diff options
author | ot <ot@localhost> | 2007-07-19 03:59:24 +0000 |
---|---|---|
committer | ot <ot@localhost> | 2007-07-19 03:59:24 +0000 |
commit | 901f132d79f2beb50feff0c0aca3b2045bdb0316 (patch) | |
tree | 7f7b9345309a550435de9ab8442f49d8295d2ca3 | |
parent | 3d8de8e5e8a8ec95d120a8b054dbbd4afe30890f (diff) | |
download | markup-validator-901f132d79f2beb50feff0c0aca3b2045bdb0316.zip markup-validator-901f132d79f2beb50feff0c0aca3b2045bdb0316.tar.gz markup-validator-901f132d79f2beb50feff0c0aca3b2045bdb0316.tar.bz2 |
removing the list of supported character encodings,
as we have a better and more reliable technical way to do this.
Replacing with a mechanism to:
* suggest a better alias if a "bad" encoding alias is used
* refuse to transcode if encoding used is forbidden by policy
(I don't know any, but the mechanism is here...)
More details:
http://lists.w3.org/Archives/Public/public-qa-dev/2007Jul/0009.html
and surrounding thread.
-rw-r--r-- | htdocs/config/charset.cfg | 73 | ||||
-rwxr-xr-x | httpd/cgi-bin/check | 34 | ||||
-rw-r--r-- | share/templates/en_US/soap_warnings.tmpl | 4 | ||||
-rw-r--r-- | share/templates/en_US/ucn_warnings.tmpl | 4 | ||||
-rw-r--r-- | share/templates/en_US/warnings.tmpl | 15 |
5 files changed, 61 insertions, 69 deletions
diff --git a/htdocs/config/charset.cfg b/htdocs/config/charset.cfg index 69ef847..aed81a6 100644 --- a/htdocs/config/charset.cfg +++ b/htdocs/config/charset.cfg @@ -1,60 +1,23 @@ # -# List of accepted encodings. +# List of encodings aliases and forbidden encodings # -# $Id: charset.cfg,v 1.13 2006-10-12 01:15:50 ot Exp $ -# -# The Validator will refuse to decode documents in an encoding -# other than those listed here. The list is independent of what +# $Id: charset.cfg,v 1.14 2007-07-19 03:59:23 ot Exp $ + +# This list indicates character encoding aliases that are +# not recommended, along with a recommended equivalent, e.g: +# encoding-obscure = encoding-well-known + +# It also lists encoding names that the validator will refuse to treat: +# bogus_encoding = Encoding Forbidden (Reason why) + +# The list is independent of what # is supported on a specific system but subject to the Validator # policy for acceptable encodings. -# -utf-8 = 1 -utf-16 = 1 -utf-16be = 1 -utf-16le = 1 -iso-8859-1 = 1 -iso-8859-2 = 1 -iso-8859-3 = 1 -iso-8859-4 = 1 -iso-8859-5 = 1 -iso-8859-6 = 1 -iso-8859-6-i = 1 -iso-8859-7 = 1 -iso-8859-8 = 1 -iso-8859-8-i = 1 -iso-8859-9 = 1 -iso-8859-10 = 1 -iso-8859-11 = 1 -iso-8859-13 = 1 -iso-8859-14 = 1 -iso-8859-15 = 1 -iso-8859-16 = 1 -us-ascii = 1 -iso-2022-jp = 1 -shift_jis = 1 -euc-jp = 1 -gb2312 = 1 -big5 = 1 -iso-2022-kr = 1 -euc-kr = 1 -gb18030 = 1 -tis-620 = 1 -koi8-r = 1 -koi8-u = 1 -windows-1250 = 1 -windows-1251 = 1 -windows-1252 = 1 -windows-1253 = 1 -windows-1254 = 1 -windows-1255 = 1 -windows-1256 = 1 -windows-1257 = 1 -macintosh = 1 -x-mac-roman = 1 -x-sjis = 1 -iso8859-1 = 1 -ascii = 1 -iso-8859-1-Windows-3.1-Latin-1 = 1 -ks_c_5601-1987 = 1 -ksc_5601 = 1 + +x-mac-roman = macintosh +x-sjis = shift_jis +iso8859-1 = iso-8859-1 +ascii = us-ascii +# this one is in IANA, but better use only windows-1252 +iso-8859-1-Windows-3.1-Latin-1 = windows-1252 diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check index 97063af..2a4a79d 100755 --- a/httpd/cgi-bin/check +++ b/httpd/cgi-bin/check @@ -14,7 +14,7 @@ # This source code is available under the license at: # http://www.w3.org/Consortium/Legal/copyright-software # -# $Id: check,v 1.540 2007-07-17 02:32:36 ot Exp $ +# $Id: check,v 1.541 2007-07-19 03:59:24 ot Exp $ # # Disable buffering on STDOUT! @@ -183,7 +183,7 @@ Directory not readable (permission denied): @_r # # Strings - $VERSION = q$Revision: 1.540 $; + $VERSION = q$Revision: 1.541 $; $VERSION =~ s/Revision: ([\d\.]+) /$1/; # @@ -2083,19 +2083,25 @@ sub transcode { my $cs = $exact_charset; - if (!$CFG->{Charsets}->{$cs}) { - # The encoding is not supported due to policy - # and possibly other reasons - - $File->{'Error Flagged'} = TRUE; - $File->{Templates}->{Error}->param(fatal_transcode_error => TRUE); - $File->{Templates}->{Error}->param(fatal_transcode_charset => $cs); - - # @@FIXME might need better text - $File->{Templates}->{Error}->param(fatal_transcode_errmsg => - "Encoding not supported."); + if ($CFG->{Charsets}->{$cs}) { + if ($CFG->{Charsets}->{$cs} =~ /Encoding Forbidden/) { + # The encoding is not supported due to policy + + $File->{'Error Flagged'} = TRUE; + $File->{Templates}->{Error}->param(fatal_transcode_error => TRUE); + $File->{Templates}->{Error}->param(fatal_transcode_charset => $cs); - return $File; + # @@FIXME might need better text + $File->{Templates}->{Error}->param(fatal_transcode_errmsg => + "This encoding is not supported by the validator."); + return $File; + } + else { + &add_warning('W22', { + W22_declared => $cs, + W22_suggested => $CFG->{Charsets}->{$cs}, + }); + } } # Does the system support decoding this encoding? diff --git a/share/templates/en_US/soap_warnings.tmpl b/share/templates/en_US/soap_warnings.tmpl index 4275972..463480d 100644 --- a/share/templates/en_US/soap_warnings.tmpl +++ b/share/templates/en_US/soap_warnings.tmpl @@ -73,6 +73,10 @@ <TMPL_IF NAME="W21"> <m:warning><m:message>Byte-Order Mark found in UTF-8 File.</m:message></m:warning> </TMPL_IF> +<TMPL_IF NAME="W22"> +<m:warning><m:message>Character Encoding suggestion: use +<TMPL_VAR NAME="W22_suggested" ESCAPE="HTML"> instead of <TMPL_VAR NAME="W22_declared" ESCAPE="HTML"></m:warning></m:message> +</TMPL_IF> <TMPL_IF NAME="W@@"> <m:warning><m:message></m:message></m:warning> diff --git a/share/templates/en_US/ucn_warnings.tmpl b/share/templates/en_US/ucn_warnings.tmpl index ca33c71..5222cba 100644 --- a/share/templates/en_US/ucn_warnings.tmpl +++ b/share/templates/en_US/ucn_warnings.tmpl @@ -73,6 +73,10 @@ <TMPL_IF NAME="W21"> <warning><message>Byte-Order Mark found in UTF-8 File.</message></warning> </TMPL_IF> +<TMPL_IF NAME="W22"> +<warning><message>Character Encoding suggestion: use +<TMPL_VAR NAME="W22_suggested" ESCAPE="HTML"> instead of <TMPL_VAR NAME="W22_declared" ESCAPE="HTML"></warning></message> +</TMPL_IF> <TMPL_IF NAME="W@@"> <warning><message></message></warning> diff --git a/share/templates/en_US/warnings.tmpl b/share/templates/en_US/warnings.tmpl index 6393aca..d99dcec 100644 --- a/share/templates/en_US/warnings.tmpl +++ b/share/templates/en_US/warnings.tmpl @@ -428,6 +428,21 @@ </p> </li> </TMPL_IF> +<TMPL_IF NAME="W22"> + <li class="msg_warn" id="W22"><span class="err_type"><img src="images/info_icons/warning.png" alt="Warning" title="Warning" /></span> <span class="msg">Character Encoding suggestion: use + <code><TMPL_VAR NAME="W22_suggested" ESCAPE="HTML"></code> instead of <code><TMPL_VAR NAME="W22_declared" ESCAPE="HTML"></code></span> + + <p> + The character encoding declared for this document + (<code><TMPL_VAR NAME="W22_declared" ESCAPE="HTML"></code>) may not be widely supported. + This encoding is equivalent to <code><TMPL_VAR NAME="W22_suggested" ESCAPE="HTML"></code> + which may be better supported across platforms. + </p> + <p> + More information on <a href="http://www.w3.org/International/O-charset.html">declaring a character encoding on your Web server or in your document</a> can be found on the W3C Internationalization site. + </p> + </li> +</TMPL_IF> |