summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorot <ot@localhost>2007-07-19 03:59:24 +0000
committerot <ot@localhost>2007-07-19 03:59:24 +0000
commit901f132d79f2beb50feff0c0aca3b2045bdb0316 (patch)
tree7f7b9345309a550435de9ab8442f49d8295d2ca3
parent3d8de8e5e8a8ec95d120a8b054dbbd4afe30890f (diff)
downloadmarkup-validator-901f132d79f2beb50feff0c0aca3b2045bdb0316.zip
markup-validator-901f132d79f2beb50feff0c0aca3b2045bdb0316.tar.gz
markup-validator-901f132d79f2beb50feff0c0aca3b2045bdb0316.tar.bz2
removing the list of supported character encodings,
as we have a better and more reliable technical way to do this. Replacing with a mechanism to: * suggest a better alias if a "bad" encoding alias is used * refuse to transcode if encoding used is forbidden by policy (I don't know any, but the mechanism is here...) More details: http://lists.w3.org/Archives/Public/public-qa-dev/2007Jul/0009.html and surrounding thread.
-rw-r--r--htdocs/config/charset.cfg73
-rwxr-xr-xhttpd/cgi-bin/check34
-rw-r--r--share/templates/en_US/soap_warnings.tmpl4
-rw-r--r--share/templates/en_US/ucn_warnings.tmpl4
-rw-r--r--share/templates/en_US/warnings.tmpl15
5 files changed, 61 insertions, 69 deletions
diff --git a/htdocs/config/charset.cfg b/htdocs/config/charset.cfg
index 69ef847..aed81a6 100644
--- a/htdocs/config/charset.cfg
+++ b/htdocs/config/charset.cfg
@@ -1,60 +1,23 @@
#
-# List of accepted encodings.
+# List of encodings aliases and forbidden encodings
#
-# $Id: charset.cfg,v 1.13 2006-10-12 01:15:50 ot Exp $
-#
-# The Validator will refuse to decode documents in an encoding
-# other than those listed here. The list is independent of what
+# $Id: charset.cfg,v 1.14 2007-07-19 03:59:23 ot Exp $
+
+# This list indicates character encoding aliases that are
+# not recommended, along with a recommended equivalent, e.g:
+# encoding-obscure = encoding-well-known
+
+# It also lists encoding names that the validator will refuse to treat:
+# bogus_encoding = Encoding Forbidden (Reason why)
+
+# The list is independent of what
# is supported on a specific system but subject to the Validator
# policy for acceptable encodings.
-#
-utf-8 = 1
-utf-16 = 1
-utf-16be = 1
-utf-16le = 1
-iso-8859-1 = 1
-iso-8859-2 = 1
-iso-8859-3 = 1
-iso-8859-4 = 1
-iso-8859-5 = 1
-iso-8859-6 = 1
-iso-8859-6-i = 1
-iso-8859-7 = 1
-iso-8859-8 = 1
-iso-8859-8-i = 1
-iso-8859-9 = 1
-iso-8859-10 = 1
-iso-8859-11 = 1
-iso-8859-13 = 1
-iso-8859-14 = 1
-iso-8859-15 = 1
-iso-8859-16 = 1
-us-ascii = 1
-iso-2022-jp = 1
-shift_jis = 1
-euc-jp = 1
-gb2312 = 1
-big5 = 1
-iso-2022-kr = 1
-euc-kr = 1
-gb18030 = 1
-tis-620 = 1
-koi8-r = 1
-koi8-u = 1
-windows-1250 = 1
-windows-1251 = 1
-windows-1252 = 1
-windows-1253 = 1
-windows-1254 = 1
-windows-1255 = 1
-windows-1256 = 1
-windows-1257 = 1
-macintosh = 1
-x-mac-roman = 1
-x-sjis = 1
-iso8859-1 = 1
-ascii = 1
-iso-8859-1-Windows-3.1-Latin-1 = 1
-ks_c_5601-1987 = 1
-ksc_5601 = 1
+
+x-mac-roman = macintosh
+x-sjis = shift_jis
+iso8859-1 = iso-8859-1
+ascii = us-ascii
+# this one is in IANA, but better use only windows-1252
+iso-8859-1-Windows-3.1-Latin-1 = windows-1252
diff --git a/httpd/cgi-bin/check b/httpd/cgi-bin/check
index 97063af..2a4a79d 100755
--- a/httpd/cgi-bin/check
+++ b/httpd/cgi-bin/check
@@ -14,7 +14,7 @@
# This source code is available under the license at:
# http://www.w3.org/Consortium/Legal/copyright-software
#
-# $Id: check,v 1.540 2007-07-17 02:32:36 ot Exp $
+# $Id: check,v 1.541 2007-07-19 03:59:24 ot Exp $
#
# Disable buffering on STDOUT!
@@ -183,7 +183,7 @@ Directory not readable (permission denied): @_r
#
# Strings
- $VERSION = q$Revision: 1.540 $;
+ $VERSION = q$Revision: 1.541 $;
$VERSION =~ s/Revision: ([\d\.]+) /$1/;
#
@@ -2083,19 +2083,25 @@ sub transcode {
my $cs = $exact_charset;
- if (!$CFG->{Charsets}->{$cs}) {
- # The encoding is not supported due to policy
- # and possibly other reasons
-
- $File->{'Error Flagged'} = TRUE;
- $File->{Templates}->{Error}->param(fatal_transcode_error => TRUE);
- $File->{Templates}->{Error}->param(fatal_transcode_charset => $cs);
-
- # @@FIXME might need better text
- $File->{Templates}->{Error}->param(fatal_transcode_errmsg =>
- "Encoding not supported.");
+ if ($CFG->{Charsets}->{$cs}) {
+ if ($CFG->{Charsets}->{$cs} =~ /Encoding Forbidden/) {
+ # The encoding is not supported due to policy
+
+ $File->{'Error Flagged'} = TRUE;
+ $File->{Templates}->{Error}->param(fatal_transcode_error => TRUE);
+ $File->{Templates}->{Error}->param(fatal_transcode_charset => $cs);
- return $File;
+ # @@FIXME might need better text
+ $File->{Templates}->{Error}->param(fatal_transcode_errmsg =>
+ "This encoding is not supported by the validator.");
+ return $File;
+ }
+ else {
+ &add_warning('W22', {
+ W22_declared => $cs,
+ W22_suggested => $CFG->{Charsets}->{$cs},
+ });
+ }
}
# Does the system support decoding this encoding?
diff --git a/share/templates/en_US/soap_warnings.tmpl b/share/templates/en_US/soap_warnings.tmpl
index 4275972..463480d 100644
--- a/share/templates/en_US/soap_warnings.tmpl
+++ b/share/templates/en_US/soap_warnings.tmpl
@@ -73,6 +73,10 @@
<TMPL_IF NAME="W21">
<m:warning><m:message>Byte-Order Mark found in UTF-8 File.</m:message></m:warning>
</TMPL_IF>
+<TMPL_IF NAME="W22">
+<m:warning><m:message>Character Encoding suggestion: use
+<TMPL_VAR NAME="W22_suggested" ESCAPE="HTML"> instead of <TMPL_VAR NAME="W22_declared" ESCAPE="HTML"></m:warning></m:message>
+</TMPL_IF>
<TMPL_IF NAME="W@@">
<m:warning><m:message></m:message></m:warning>
diff --git a/share/templates/en_US/ucn_warnings.tmpl b/share/templates/en_US/ucn_warnings.tmpl
index ca33c71..5222cba 100644
--- a/share/templates/en_US/ucn_warnings.tmpl
+++ b/share/templates/en_US/ucn_warnings.tmpl
@@ -73,6 +73,10 @@
<TMPL_IF NAME="W21">
<warning><message>Byte-Order Mark found in UTF-8 File.</message></warning>
</TMPL_IF>
+<TMPL_IF NAME="W22">
+<warning><message>Character Encoding suggestion: use
+<TMPL_VAR NAME="W22_suggested" ESCAPE="HTML"> instead of <TMPL_VAR NAME="W22_declared" ESCAPE="HTML"></warning></message>
+</TMPL_IF>
<TMPL_IF NAME="W@@">
<warning><message></message></warning>
diff --git a/share/templates/en_US/warnings.tmpl b/share/templates/en_US/warnings.tmpl
index 6393aca..d99dcec 100644
--- a/share/templates/en_US/warnings.tmpl
+++ b/share/templates/en_US/warnings.tmpl
@@ -428,6 +428,21 @@
</p>
</li>
</TMPL_IF>
+<TMPL_IF NAME="W22">
+ <li class="msg_warn" id="W22"><span class="err_type"><img src="images/info_icons/warning.png" alt="Warning" title="Warning" /></span> <span class="msg">Character Encoding suggestion: use
+ <code><TMPL_VAR NAME="W22_suggested" ESCAPE="HTML"></code> instead of <code><TMPL_VAR NAME="W22_declared" ESCAPE="HTML"></code></span>
+
+ <p>
+ The character encoding declared for this document
+ (<code><TMPL_VAR NAME="W22_declared" ESCAPE="HTML"></code>) may not be widely supported.
+ This encoding is equivalent to <code><TMPL_VAR NAME="W22_suggested" ESCAPE="HTML"></code>
+ which may be better supported across platforms.
+ </p>
+ <p>
+ More information on <a href="http://www.w3.org/International/O-charset.html">declaring a character encoding on your Web server or in your document</a> can be found on the W3C Internationalization site.
+ </p>
+ </li>
+</TMPL_IF>