diff options
-rwxr-xr-x | httpd/cgi-bin/check | 44 |
1 files changed, 23 insertions, 21 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index 9818f9e..1c5f7da 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -9,7 +9,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.226 2002-08-31 04:04:02 duerst Exp $ +# $Id: check,v 1.227 2002-08-31 07:14:45 duerst Exp $ # # Disable buffering on STDOUT! @@ -95,7 +95,7 @@ BEGIN { # # Strings - $VERSION = q$Revision: 1.226 $; + $VERSION = q$Revision: 1.227 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; @@ -151,7 +151,7 @@ $File->{'Header'} = &prepSSI({ }); $File->{'Footer'} = &prepSSI({ File => $CFG->{'Footer'}, - Date => q$Date: 2002-08-31 04:04:02 $, + Date => q$Date: 2002-08-31 07:14:45 $, }); # @@ -1238,13 +1238,13 @@ sub normalize_newlines { local $_ = shift; #charset my $pattern = ''; - # patterns are not allowed to have parentheses - $pattern = '\x00\015\x00\012|\x00\015|\x00\012' if /^utf-16be$/; - $pattern = '\015\x00\012\x00|\015\x00|\012\x00' if /^utf-16le$/; - $pattern = '\x00\x00\x00\015\x00\x00\x00\012|\x00\x00\x00\015|\x00\x00\x00\012' if /^UCS-4be$/; - $pattern = '\015\x00\x00\x00\012\x00\x00\x00|\015\x00\x00\x00|\012\x00\x00\x00' if /^UCS-4le$/; + # don't use backreference parentheses! + $pattern = '\0\xD(?:\0\xA)?|\0\xA' if /^utf-16be$/; + $pattern = '\xD\0(?:\xA\0)?|\xA\0' if /^utf-16le$/; + $pattern = '\0\0\0\xD(?:\0\0\0\xA)?|\0\0\0\xA' if /^UCS-4be$/; + $pattern = '\xD\0\0\0(?:\xA\0\0\0)?|\xA\0\0\0' if /^UCS-4le$/; # insert other special cases here, such as EBCDIC - $pattern = '\015\012|\015|\012' if !$pattern; # all other cases + $pattern = '\xD(?:\xA)?|\xA' if !$pattern; # all other cases return [split /$pattern/, $file]; } @@ -2422,25 +2422,25 @@ sub find_base_encoding { local $_ = shift; # With a Byte Order Mark: - return ('ucs-4be', 4, 4, '\x00\x00\x00(.)') + return ('ucs-4be', 4, 4, '\0\0\0(.)') if /^\x00\x00\xFE\xFF/; # UCS-4, big-endian machine (1234) - return ('ucs-4le', 4, 4, '(.)\x00\x00\x00') + return ('ucs-4le', 4, 4, '(.)\0\0\0') if /^\xFF\xFE\x00\x00/; # UCS-4, little-endian machine (4321) - return ('utf-16be', 2, 2, '\x00(.)') + return ('utf-16be', 2, 2, '\0(.)') if /^\xFE\xFF/; # UTF-16, big-endian. - return ('utf-16le', 2, 2, '(.)\x00') + return ('utf-16le', 2, 2, '(.)\0') if /^\xFF\xFE/; # UTF-16, little-endian. return ('utf-8', 3, 1, '') if /^\xEF\xBB\xBF/; # UTF-8. # Without a Byte Order Mark: - return ('ucs-4be', 0, 4, '\x00\x00\x00(.)') + return ('ucs-4be', 0, 4, '\0\0\0(.)') if /^\x00\x00\x00\x3C/; # UCS-4 or 32bit; big-endian machine (1234 order). - return ('ucs-4le', 0, 4, '(.)\x00\x00\x00') + return ('ucs-4le', 0, 4, '(.)\0\0\0') if /^\x3C\x00\x00\x00/; # UCS-4 or 32bit; little-endian machine (4321 order). - return ('utf-16be', 0, 2, '\x00(.)') + return ('utf-16be', 0, 2, '\0(.)') if /^\x00\x3C\x00\x3F/; # UCS-2, UTF-16, or 16bit; big-endian. - return ('utf-16le', 0, 2, '(.)\x00') + return ('utf-16le', 0, 2, '(.)\0') if /^\x3C\x00\x3F\x00/; # UCS-2, UTF-16, or 16bit; little-endian. return ('utf-8', 0, 1, '') if /^\x3C\x3F\x78\x6D/; # UTF-8, ISO-646, ASCII, ISO-8859-*, Shift-JIS, EUC, etc. @@ -2472,14 +2472,16 @@ sub find_xml_encoding { $someText = $someBytes; # efficiency shortcut } else { # generic code for UTF-16/UCS-4 - $someBytes =~ /^(($Pattern)*)/; + $someBytes =~ /^(($Pattern)*)/s; $someText = $1; # get initial piece without chars >255 - $someText = s/$Pattern/$1/g; # select the relevant bytes + $someText = s/$Pattern/$1/sg; # select the relevant bytes } # try to find encoding pseudo-attribute - $someText =~ m("^<\?xml[ \t\n\r]+version[ \t\n\r]?=[ \t\n\r]?([\'\"])[-._:a-zA-Z0-9]+\1[ -\t\n\r]+encoding[ \t\n\r]?=[ \t\n\r]?([\'\"])([A-Za-z][-._A-Za-z0-9]*)\2); + my $s = '[\ \t\n\r]'; + $someText =~ m(^<\?xml $s+ version $s? = $s? ([\'\"]) [-._:a-zA-Z0-9]+ \1 $s+ + encoding $s? = $s? ([\'\"]) ([A-Za-z][-._A-Za-z0-9]*) \2 + )xso; $File->{Charset}->{XML} = lc $3; return $File; |