summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorville <ville@localhost>2009-06-29 19:57:51 +0000
committerville <ville@localhost>2009-06-29 19:57:51 +0000
commit5448358f592b34ec2fa262ad5f50d1e0fa9c471f (patch)
treeaec2cb2132dddd1716d97d567d85c244f94b80a1
parent196dc838d3ea01180fb60348f8437818385e772b (diff)
downloadmarkup-validator-5448358f592b34ec2fa262ad5f50d1e0fa9c471f.zip
markup-validator-5448358f592b34ec2fa262ad5f50d1e0fa9c471f.tar.gz
markup-validator-5448358f592b34ec2fa262ad5f50d1e0fa9c471f.tar.bz2
Move Encode alias definitions to charset.cfg, register also our non-recommended but recognized aliases.
-rw-r--r--htdocs/config/charset.cfg18
-rwxr-xr-xhttpd/cgi-bin/check36
2 files changed, 20 insertions, 34 deletions
diff --git a/htdocs/config/charset.cfg b/htdocs/config/charset.cfg
index 22b9358..8e47c85 100644
--- a/htdocs/config/charset.cfg
+++ b/htdocs/config/charset.cfg
@@ -1,7 +1,7 @@
#
# list of accepted/preferred character encodings
#
-# $Id: charset.cfg,v 1.16 2007-07-20 02:58:10 ot Exp $
+# $Id: charset.cfg,v 1.17 2009-06-29 19:57:51 ville Exp $
#
# Syntax:
#
@@ -11,11 +11,13 @@
#
# ? indicates the action to take:
# 1: OK, character supported
+# A: OK, character supported, Encode::Alias it to result
# X: frequent error, e.g. starting with x-; ask user to replace with result
# ERR: a charset we refuse, per some policy. Reason stated after ERR
#e.g:
# utf-8 = 1
+# some-alias = A perl-Encode-name
# odd-alias = X good-alias
# bad_charset = ERR explain reason
@@ -30,11 +32,11 @@ iso-8859-4 = 1
iso-8859-5 = 1
iso-8859-6 = 1
# implicit bidi, but character encoding is the same
-iso-8859-6-i = 1
+iso-8859-6-i = A iso-8859-6
iso-8859-7 = 1
iso-8859-8 = 1
# implicit bidi, but character encoding is the same
-iso-8859-8-i = 1
+iso-8859-8-i = A iso-8859-8
iso-8859-9 = 1
iso-8859-10 = 1
iso-8859-11 = 1
@@ -53,7 +55,9 @@ big5-hkscs = 1
iso-2022-kr = 1
euc-kr = 1
gb18030 = 1
-tis-620 = 1
+# 0xA0 is U+00A0 in ISO-8859-11 but undefined in tis-620
+# other than that the character encodings are equivalent
+tis-620 = A iso-8859-11
koi8-r = 1
koi8-u = 1
iso-ir-111 = 1
@@ -66,9 +70,11 @@ windows-1255 = 1
windows-1256 = 1
windows-1257 = 1
# windows-1258 = 1
-macintosh = 1
+# Encode::Byte does not know 'macintosh' but MacRoman
+macintosh = A MacRoman
ks_c_5601-1987 = 1
-ksc_5601 = 1
+# Encode only knows the long hand version of 'ksc_5601'
+ksc_5601 = A KS_C_5601-1987
x-mac-roman = X macintosh
x-sjis = X shift_jis
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 8a12b4b..6dcb610 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.664 2009-06-29 18:21:16 ville Exp $
+# $Id: check,v 1.665 2009-06-29 19:57:51 ville Exp $
#
# Disable buffering on STDOUT!
$| = 1;
@@ -191,6 +191,12 @@ Directory not readable (permission denied): @_r
}
#
+ # Register Encode aliases.
+ while (my ($key, $value) = each %{$CFG->{Charsets}}) {
+ Encode::Alias::define_alias($key, $1) if ($value =~ /^[AX] (\S+)/);
+ }
+
+ #
# Set debug flag.
if ($CFG->{'Allow Debug'}) {
$DEBUG = TRUE if $ENV{W3C_VALIDATOR_DEBUG} || $CFG->{'Enable Debug'};
@@ -200,7 +206,7 @@ Directory not readable (permission denied): @_r
#
# Strings
- $VERSION = q$Revision: 1.664 $;
+ $VERSION = q$Revision: 1.665 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
#
@@ -568,31 +574,6 @@ unless ($File->{Charset}->{XML} || $File->{Charset}->{META}){ #suggest character
# Abort if an error was flagged while finding the encoding.
&abort_if_error_flagged($File, O_CHARSET|O_DOCTYPE);
-#
-# Encode alias definitions. This might not be the best
-# place for them, feel free to move them elsewhere.
-
-# implicit bidi, but character encoding is the same
-Encode::Alias::define_alias('iso-8859-6-i', 'iso-8859-6');
-
-# implicit bidi, but character encoding is the same
-Encode::Alias::define_alias('iso-8859-8-i', 'iso-8859-8');
-
-# 0xA0 is U+00A0 in ISO-8859-11 but undefined in tis-620
-# other than that the character encodings are equivalent
-Encode::Alias::define_alias('tis-620', 'iso-8859-11');
-
-# Encode::Byte does not know 'macintosh' but MacRoman
-Encode::Alias::define_alias('macintosh', 'MacRoman');
-
-# x-mac-roman is the non-standard version of 'macintosh'
-Encode::Alias::define_alias('x-mac-roman', 'MacRoman');
-
-# Encode only knows the long hand version of 'ksc_5601'
-Encode::Alias::define_alias('ksc_5601', 'KS_C_5601-1987');
-
-# gb18030 requires Encode::HanExtra but no additional alias
-
$File->{Charset}->{Default} = FALSE;
unless ($File->{Charset}->{Use}) { # No charset given...
$File->{Charset}->{Use} = 'utf-8';
@@ -601,7 +582,6 @@ unless ($File->{Charset}->{Use}) { # No charset given...
&add_warning('W04', {W04_charset => "UTF-8"});
}
-
# Always transcode, even if the content claims to be UTF-8
$File = transcode($File);
if (($File->{ContentType} eq "text/html") and ($File->{Charset}->{Default}) and $File->{'Error Flagged'}) {