summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorville <ville@localhost>2009-12-14 20:44:49 +0000
committerville <ville@localhost>2009-12-14 20:44:49 +0000
commit2b1246e03c2cb1ea40c8fa61dec784b5d973bb92 (patch)
tree5e4a3c05ee0b7cddb6cc49fabbd951fba979c37b
parent21719c14099a85161ed5488a4c7b1e3db6611073 (diff)
downloadmarkup-validator-2b1246e03c2cb1ea40c8fa61dec784b5d973bb92.zip
markup-validator-2b1246e03c2cb1ea40c8fa61dec784b5d973bb92.tar.gz
markup-validator-2b1246e03c2cb1ea40c8fa61dec784b5d973bb92.tar.bz2
Make LibXML transcoding-passing regex stricter and more readable.
-rwxr-xr-xhttpd/cgi-bin/check15
1 files changed, 10 insertions, 5 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 0cf6ca6..91adf57 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.751 2009-12-12 20:06:36 ville Exp $
+# $Id: check,v 1.752 2009-12-14 20:44:49 ville Exp $
#
# We need Perl 5.8.0+.
@@ -197,7 +197,7 @@ EOF
#
# Strings
- $VERSION = q$Revision: 1.751 $;
+ $VERSION = q$Revision: 1.752 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
# Read friendly error message file
@@ -619,9 +619,14 @@ if (&is_xml($File)) {
# the XML parser will check the value of encoding attribute in XML
# declaration so we have to amend it to reflect transcoding.
# see Bug 4867
- $xml_string =~ s/(<\?xml.*)
- (encoding[\x20|\x09|\x0D|\x0A]*=[\x20|\x09|\x0D|\x0A]*(?:"[A-Za-z][a-zA-Z0-9_-]+"|'[A-Za-z][a-zA-Z0-9_-]+'))
- (.*\?>)/$1encoding="utf-8"$3/sx;
+ $xml_string =~ s/
+ (^<\?xml\b[^>]*[\x20\x09\x0D\x0A])
+ (encoding[\x20\x09\x0D\x0A]*=[\x20\x09\x0D\x0A]*
+ (?:(["'])[A-Za-z][a-zA-Z0-9_-]+\3)
+ )
+ ([^>].*\?>)
+ /$1encoding="UTF-8"$4/sx;
+
eval { $xmlparser->parse_string($xml_string); };
$xml_string = undef;
my $xml_parse_errors_line = undef;