diff options
-rw-r--r-- | htdocs/config/charset.cfg | 18 | ||||
-rwxr-xr-x | httpd/cgi-bin/check | 36 |
2 files changed, 20 insertions, 34 deletions
diff --git a/htdocs/config/charset.cfg b/htdocs/config/charset.cfg index 22b9358..8e47c85 100644 --- a/htdocs/config/charset.cfg +++ b/htdocs/config/charset.cfg @@ -1,7 +1,7 @@ # # list of accepted/preferred character encodings # -# $Id: charset.cfg,v 1.16 2007-07-20 02:58:10 ot Exp $ +# $Id: charset.cfg,v 1.17 2009-06-29 19:57:51 ville Exp $ # # Syntax: # @@ -11,11 +11,13 @@ # # ? indicates the action to take: # 1: OK, character supported +# A: OK, character supported, Encode::Alias it to result # X: frequent error, e.g. starting with x-; ask user to replace with result # ERR: a charset we refuse, per some policy. Reason stated after ERR #e.g: # utf-8 = 1 +# some-alias = A perl-Encode-name # odd-alias = X good-alias # bad_charset = ERR explain reason @@ -30,11 +32,11 @@ iso-8859-4 = 1 iso-8859-5 = 1 iso-8859-6 = 1 # implicit bidi, but character encoding is the same -iso-8859-6-i = 1 +iso-8859-6-i = A iso-8859-6 iso-8859-7 = 1 iso-8859-8 = 1 # implicit bidi, but character encoding is the same -iso-8859-8-i = 1 +iso-8859-8-i = A iso-8859-8 iso-8859-9 = 1 iso-8859-10 = 1 iso-8859-11 = 1 @@ -53,7 +55,9 @@ big5-hkscs = 1 iso-2022-kr = 1 euc-kr = 1 gb18030 = 1 -tis-620 = 1 +# 0xA0 is U+00A0 in ISO-8859-11 but undefined in tis-620 +# other than that the character encodings are equivalent +tis-620 = A iso-8859-11 koi8-r = 1 koi8-u = 1 iso-ir-111 = 1 @@ -66,9 +70,11 @@ windows-1255 = 1 windows-1256 = 1 windows-1257 = 1 # windows-1258 = 1 -macintosh = 1 +# Encode::Byte does not know 'macintosh' but MacRoman +macintosh = A MacRoman ks_c_5601-1987 = 1 -ksc_5601 = 1 +# Encode only knows the long hand version of 'ksc_5601' +ksc_5601 = A KS_C_5601-1987 x-mac-roman = X macintosh x-sjis = X shift_jis diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index 8a12b4b..6dcb610 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -14,7 +14,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.664 2009-06-29 18:21:16 ville Exp $ +# $Id: check,v 1.665 2009-06-29 19:57:51 ville Exp $ # # Disable buffering on STDOUT! $| = 1; @@ -191,6 +191,12 @@ Directory not readable (permission denied): @_r } # + # Register Encode aliases. + while (my ($key, $value) = each %{$CFG->{Charsets}}) { + Encode::Alias::define_alias($key, $1) if ($value =~ /^[AX] (\S+)/); + } + + # # Set debug flag. if ($CFG->{'Allow Debug'}) { $DEBUG = TRUE if $ENV{W3C_VALIDATOR_DEBUG} || $CFG->{'Enable Debug'}; @@ -200,7 +206,7 @@ Directory not readable (permission denied): @_r # # Strings - $VERSION = q$Revision: 1.664 $; + $VERSION = q$Revision: 1.665 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; # @@ -568,31 +574,6 @@ unless ($File->{Charset}->{XML} || $File->{Charset}->{META}){ #suggest character # Abort if an error was flagged while finding the encoding. &abort_if_error_flagged($File, O_CHARSET|O_DOCTYPE); -# -# Encode alias definitions. This might not be the best -# place for them, feel free to move them elsewhere. - -# implicit bidi, but character encoding is the same -Encode::Alias::define_alias('iso-8859-6-i', 'iso-8859-6'); - -# implicit bidi, but character encoding is the same -Encode::Alias::define_alias('iso-8859-8-i', 'iso-8859-8'); - -# 0xA0 is U+00A0 in ISO-8859-11 but undefined in tis-620 -# other than that the character encodings are equivalent -Encode::Alias::define_alias('tis-620', 'iso-8859-11'); - -# Encode::Byte does not know 'macintosh' but MacRoman -Encode::Alias::define_alias('macintosh', 'MacRoman'); - -# x-mac-roman is the non-standard version of 'macintosh' -Encode::Alias::define_alias('x-mac-roman', 'MacRoman'); - -# Encode only knows the long hand version of 'ksc_5601' -Encode::Alias::define_alias('ksc_5601', 'KS_C_5601-1987'); - -# gb18030 requires Encode::HanExtra but no additional alias - $File->{Charset}->{Default} = FALSE; unless ($File->{Charset}->{Use}) { # No charset given... $File->{Charset}->{Use} = 'utf-8'; @@ -601,7 +582,6 @@ unless ($File->{Charset}->{Use}) { # No charset given... &add_warning('W04', {W04_charset => "UTF-8"}); } - # Always transcode, even if the content claims to be UTF-8 $File = transcode($File); if (($File->{ContentType} eq "text/html") and ($File->{Charset}->{Default}) and $File->{'Error Flagged'}) { |