diff options
author | ot <ot@localhost> | 2007-07-19 08:18:31 +0000 |
---|---|---|
committer | ot <ot@localhost> | 2007-07-19 08:18:31 +0000 |
commit | f11f7e9050d68a18a70b2035f56c82a9baf720fd (patch) | |
tree | 1e65468018e693e9c02614ffbcadae02733f2c09 | |
parent | df7f4b4de4c1770008fd15cef497d7642877d43f (diff) | |
download | markup-validator-f11f7e9050d68a18a70b2035f56c82a9baf720fd.zip markup-validator-f11f7e9050d68a18a70b2035f56c82a9baf720fd.tar.gz markup-validator-f11f7e9050d68a18a70b2035f56c82a9baf720fd.tar.bz2 |
re-populating the list of charsets (from older revisions in both branch 0.7 and 0.8)
to be used thus:
* no fatal error if the charset is supported by encode
* a warning with the suggestion for a better alias if we know one
* a warning that the encoding may be "odd" if not in the list but encode says it's OK
-rw-r--r-- | htdocs/config/charset.cfg | 89 | ||||
-rwxr-xr-x | httpd/cgi-bin/check | 20 | ||||
-rw-r--r-- | share/templates/en_US/soap_warnings.tmpl | 3 | ||||
-rw-r--r-- | share/templates/en_US/ucn_warnings.tmpl | 3 | ||||
-rw-r--r-- | share/templates/en_US/warnings.tmpl | 17 |
5 files changed, 110 insertions, 22 deletions
diff --git a/htdocs/config/charset.cfg b/htdocs/config/charset.cfg index aed81a6..f007cc4 100644 --- a/htdocs/config/charset.cfg +++ b/htdocs/config/charset.cfg @@ -1,23 +1,78 @@ # -# List of encodings aliases and forbidden encodings +# list of accepted/preferred character encodings # -# $Id: charset.cfg,v 1.14 2007-07-19 03:59:23 ot Exp $ - -# This list indicates character encoding aliases that are -# not recommended, along with a recommended equivalent, e.g: -# encoding-obscure = encoding-well-known - -# It also lists encoding names that the validator will refuse to treat: -# bogus_encoding = Encoding Forbidden (Reason why) +# $Id: charset.cfg,v 1.15 2007-07-19 08:18:30 ot Exp $ +# +# Syntax: +# +# charset/encoding = ? result +# +# Note: charsets and results are lowercase, actions are uppercase +# +# ? indicates the action to take: +# 1: OK, character supported +# X: frequent error, e.g. starting with x-; ask user to replace with result +# ERR: a charset we refuse, per some policy. Reason stated after ERR -# The list is independent of what -# is supported on a specific system but subject to the Validator -# policy for acceptable encodings. +#e.g: +# utf-8 = 1 +# odd-alias = X good-alias +# bad_charset = ERR explain reason +utf-8 = 1 +utf-16 = 1 +utf-16be = 1 +utf-16le = 1 +iso-8859-1 = 1 +iso-8859-2 = 1 +iso-8859-3 = 1 +iso-8859-4 = 1 +iso-8859-5 = 1 +iso-8859-6 = 1 +# implicit bidi, but character encoding is the same +iso-8859-6-i = 1 +iso-8859-7 = 1 +iso-8859-8 = 1 +# implicit bidi, but character encoding is the same +iso-8859-8-i = 1 +iso-8859-9 = 1 +iso-8859-10 = 1 +iso-8859-11 = 1 +# iso-8859-12 doesn't exist (yet?) +iso-8859-13 = 1 +iso-8859-14 = 1 +iso-8859-15 = 1 +iso-8859-16 = 1 +us-ascii = 1 +iso-2022-jp = 1 +shift_jis = 1 +euc-jp = 1 +gb2312 = 1 +big5 = 1 +iso-2022-kr = 1 +euc-kr = 1 +gb18030 = 1 +tis-620 = 1 +koi8-r = 1 +koi8-u = 1 +iso-ir-111 = 1 +windows-1250 = 1 +windows-1251 = 1 +windows-1252 = 1 +windows-1253 = 1 +windows-1254 = 1 +windows-1255 = 1 +windows-1256 = 1 +windows-1257 = 1 +# windows-1258 = 1 +macintosh = 1 +ks_c_5601-1987 = 1 +ksc_5601 = 1 -x-mac-roman = macintosh -x-sjis = shift_jis -iso8859-1 = iso-8859-1 -ascii = us-ascii +x-mac-roman = X macintosh +x-sjis = X shift_jis +iso8859-1 = X iso-8859-1 +ascii = X us-ascii +8859_1 = X iso-8859-1 # this one is in IANA, but better use only windows-1252 -iso-8859-1-Windows-3.1-Latin-1 = windows-1252 +iso-8859-1-Windows-3.1-Latin-1 = X windows-1252 diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index 5a6387c..ad5f1e2 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -14,7 +14,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.542 2007-07-19 06:21:03 ot Exp $ +# $Id: check,v 1.543 2007-07-19 08:18:30 ot Exp $ # # Disable buffering on STDOUT! @@ -183,7 +183,7 @@ Directory not readable (permission denied): @_r # # Strings - $VERSION = q$Revision: 1.542 $; + $VERSION = q$Revision: 1.543 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; # @@ -2090,7 +2090,7 @@ sub transcode { my $cs = $exact_charset; if ($CFG->{Charsets}->{$cs}) { - if ($CFG->{Charsets}->{$cs} =~ /Encoding Forbidden/) { + if ($CFG->{Charsets}->{$cs} =~ /ERR /) { # The encoding is not supported due to policy $File->{'Error Flagged'} = TRUE; @@ -2102,10 +2102,13 @@ sub transcode { "This encoding is not supported by the validator."); return $File; } - else { + elsif ($CFG->{Charsets}->{$cs} =~ /X /) { + # possibly problematic, we recommend another alias + my $recommended_charset = $CFG->{Charsets}->{$cs}; + $recommended_charset =~ s/X //; &add_warning('W22', { W22_declared => $cs, - W22_suggested => $CFG->{Charsets}->{$cs}, + W22_suggested => $recommended_charset, }); } } @@ -2127,6 +2130,13 @@ sub transcode { return $File; } + elsif (!$CFG->{Charsets}->{$cs}) { + # not in the list, but technically OK -> we warn + &add_warning('W23', { + W23_declared => $cs, + }); + + } my $output; my $input = $File->{Bytes}; diff --git a/share/templates/en_US/soap_warnings.tmpl b/share/templates/en_US/soap_warnings.tmpl index 463480d..e8040e0 100644 --- a/share/templates/en_US/soap_warnings.tmpl +++ b/share/templates/en_US/soap_warnings.tmpl @@ -77,6 +77,9 @@ <m:warning><m:message>Character Encoding suggestion: use <TMPL_VAR NAME="W22_suggested" ESCAPE="HTML"> instead of <TMPL_VAR NAME="W22_declared" ESCAPE="HTML"></m:warning></m:message> </TMPL_IF> +<TMPL_IF NAME="W23"> + <m:warning><m:message>Rare or unregistered Character Encoding detected</m:message></m:warning> +</TMPL_IF> <TMPL_IF NAME="W@@"> <m:warning><m:message></m:message></m:warning> diff --git a/share/templates/en_US/ucn_warnings.tmpl b/share/templates/en_US/ucn_warnings.tmpl index 5222cba..6a31f1b 100644 --- a/share/templates/en_US/ucn_warnings.tmpl +++ b/share/templates/en_US/ucn_warnings.tmpl @@ -77,6 +77,9 @@ <warning><message>Character Encoding suggestion: use <TMPL_VAR NAME="W22_suggested" ESCAPE="HTML"> instead of <TMPL_VAR NAME="W22_declared" ESCAPE="HTML"></warning></message> </TMPL_IF> +<TMPL_IF NAME="W23"> + <warning><message>Rare or unregistered Character Encoding detected</message></warning> +</TMPL_IF> <TMPL_IF NAME="W@@"> <warning><message></message></warning> diff --git a/share/templates/en_US/warnings.tmpl b/share/templates/en_US/warnings.tmpl index d99dcec..fe64041 100644 --- a/share/templates/en_US/warnings.tmpl +++ b/share/templates/en_US/warnings.tmpl @@ -443,6 +443,23 @@ </p> </li> </TMPL_IF> +<TMPL_IF NAME="W23"> + <li class="msg_warn" id="W23"><span class="err_type"><img src="images/info_icons/warning.png" alt="Warning" title="Warning" /></span> <span class="msg">Rare or unregistered character encoding detected</span> + + <p> + The character encoding declared for this document + (<code><TMPL_VAR NAME="W23_declared" ESCAPE="HTML"></code>) + is supported by the validator, but may not be widely supported across platforms. + For the sake of interoperability, it is best to use a unicode character encoding + such as <code>UTF-8</code>, or one of the + <a href="http://www.iana.org/assignments/character-sets">registered character + encodings</a>. + </p> + <p> + More information on <a href="http://www.w3.org/International/O-charset.html">declaring a character encoding on your Web server or in your document</a> can be found on the W3C Internationalization site. + </p> + </li> +</TMPL_IF> |