summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xhttpd/cgi-bin/check44
1 files changed, 23 insertions, 21 deletions
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 9818f9e..1c5f7da 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -9,7 +9,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.226 2002-08-31 04:04:02 duerst Exp $
+# $Id: check,v 1.227 2002-08-31 07:14:45 duerst Exp $
#
# Disable buffering on STDOUT!
@@ -95,7 +95,7 @@ BEGIN {
#
# Strings
- $VERSION = q$Revision: 1.226 $;
+ $VERSION = q$Revision: 1.227 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
@@ -151,7 +151,7 @@ $File->{'Header'} = &prepSSI({
});
$File->{'Footer'} = &prepSSI({
File => $CFG->{'Footer'},
- Date => q$Date: 2002-08-31 04:04:02 $,
+ Date => q$Date: 2002-08-31 07:14:45 $,
});
#
@@ -1238,13 +1238,13 @@ sub normalize_newlines {
local $_ = shift; #charset
my $pattern = '';
- # patterns are not allowed to have parentheses
- $pattern = '\x00\015\x00\012|\x00\015|\x00\012' if /^utf-16be$/;
- $pattern = '\015\x00\012\x00|\015\x00|\012\x00' if /^utf-16le$/;
- $pattern = '\x00\x00\x00\015\x00\x00\x00\012|\x00\x00\x00\015|\x00\x00\x00\012' if /^UCS-4be$/;
- $pattern = '\015\x00\x00\x00\012\x00\x00\x00|\015\x00\x00\x00|\012\x00\x00\x00' if /^UCS-4le$/;
+ # don't use backreference parentheses!
+ $pattern = '\0\xD(?:\0\xA)?|\0\xA' if /^utf-16be$/;
+ $pattern = '\xD\0(?:\xA\0)?|\xA\0' if /^utf-16le$/;
+ $pattern = '\0\0\0\xD(?:\0\0\0\xA)?|\0\0\0\xA' if /^UCS-4be$/;
+ $pattern = '\xD\0\0\0(?:\xA\0\0\0)?|\xA\0\0\0' if /^UCS-4le$/;
# insert other special cases here, such as EBCDIC
- $pattern = '\015\012|\015|\012' if !$pattern; # all other cases
+ $pattern = '\xD(?:\xA)?|\xA' if !$pattern; # all other cases
return [split /$pattern/, $file];
}
@@ -2422,25 +2422,25 @@ sub find_base_encoding {
local $_ = shift;
# With a Byte Order Mark:
- return ('ucs-4be', 4, 4, '\x00\x00\x00(.)')
+ return ('ucs-4be', 4, 4, '\0\0\0(.)')
if /^\x00\x00\xFE\xFF/; # UCS-4, big-endian machine (1234)
- return ('ucs-4le', 4, 4, '(.)\x00\x00\x00')
+ return ('ucs-4le', 4, 4, '(.)\0\0\0')
if /^\xFF\xFE\x00\x00/; # UCS-4, little-endian machine (4321)
- return ('utf-16be', 2, 2, '\x00(.)')
+ return ('utf-16be', 2, 2, '\0(.)')
if /^\xFE\xFF/; # UTF-16, big-endian.
- return ('utf-16le', 2, 2, '(.)\x00')
+ return ('utf-16le', 2, 2, '(.)\0')
if /^\xFF\xFE/; # UTF-16, little-endian.
return ('utf-8', 3, 1, '')
if /^\xEF\xBB\xBF/; # UTF-8.
# Without a Byte Order Mark:
- return ('ucs-4be', 0, 4, '\x00\x00\x00(.)')
+ return ('ucs-4be', 0, 4, '\0\0\0(.)')
if /^\x00\x00\x00\x3C/; # UCS-4 or 32bit; big-endian machine (1234 order).
- return ('ucs-4le', 0, 4, '(.)\x00\x00\x00')
+ return ('ucs-4le', 0, 4, '(.)\0\0\0')
if /^\x3C\x00\x00\x00/; # UCS-4 or 32bit; little-endian machine (4321 order).
- return ('utf-16be', 0, 2, '\x00(.)')
+ return ('utf-16be', 0, 2, '\0(.)')
if /^\x00\x3C\x00\x3F/; # UCS-2, UTF-16, or 16bit; big-endian.
- return ('utf-16le', 0, 2, '(.)\x00')
+ return ('utf-16le', 0, 2, '(.)\0')
if /^\x3C\x00\x3F\x00/; # UCS-2, UTF-16, or 16bit; little-endian.
return ('utf-8', 0, 1, '')
if /^\x3C\x3F\x78\x6D/; # UTF-8, ISO-646, ASCII, ISO-8859-*, Shift-JIS, EUC, etc.
@@ -2472,14 +2472,16 @@ sub find_xml_encoding {
$someText = $someBytes; # efficiency shortcut
}
else { # generic code for UTF-16/UCS-4
- $someBytes =~ /^(($Pattern)*)/;
+ $someBytes =~ /^(($Pattern)*)/s;
$someText = $1; # get initial piece without chars >255
- $someText = s/$Pattern/$1/g; # select the relevant bytes
+ $someText = s/$Pattern/$1/sg; # select the relevant bytes
}
# try to find encoding pseudo-attribute
- $someText =~ m("^<\?xml[ \t\n\r]+version[ \t\n\r]?=[ \t\n\r]?([\'\"])[-._:a-zA-Z0-9]+\1[
-\t\n\r]+encoding[ \t\n\r]?=[ \t\n\r]?([\'\"])([A-Za-z][-._A-Za-z0-9]*)\2);
+ my $s = '[\ \t\n\r]';
+ $someText =~ m(^<\?xml $s+ version $s? = $s? ([\'\"]) [-._:a-zA-Z0-9]+ \1 $s+
+ encoding $s? = $s? ([\'\"]) ([A-Za-z][-._A-Za-z0-9]*) \2
+ )xso;
$File->{Charset}->{XML} = lc $3;
return $File;