summaryrefslogtreecommitdiffstats
path: root/htdocs/config
diff options
context:
space:
mode:
authorot <ot@localhost>2007-07-19 08:18:31 +0000
committerot <ot@localhost>2007-07-19 08:18:31 +0000
commitf11f7e9050d68a18a70b2035f56c82a9baf720fd (patch)
tree1e65468018e693e9c02614ffbcadae02733f2c09 /htdocs/config
parentdf7f4b4de4c1770008fd15cef497d7642877d43f (diff)
downloadmarkup-validator-f11f7e9050d68a18a70b2035f56c82a9baf720fd.zip
markup-validator-f11f7e9050d68a18a70b2035f56c82a9baf720fd.tar.gz
markup-validator-f11f7e9050d68a18a70b2035f56c82a9baf720fd.tar.bz2
re-populating the list of charsets (from older revisions in both branch 0.7 and 0.8)
to be used thus: * no fatal error if the charset is supported by encode * a warning with the suggestion for a better alias if we know one * a warning that the encoding may be "odd" if not in the list but encode says it's OK
Diffstat (limited to 'htdocs/config')
-rw-r--r--htdocs/config/charset.cfg89
1 files changed, 72 insertions, 17 deletions
diff --git a/htdocs/config/charset.cfg b/htdocs/config/charset.cfg
index aed81a6..f007cc4 100644
--- a/htdocs/config/charset.cfg
+++ b/htdocs/config/charset.cfg
@@ -1,23 +1,78 @@
#
-# List of encodings aliases and forbidden encodings
+# list of accepted/preferred character encodings
#
-# $Id: charset.cfg,v 1.14 2007-07-19 03:59:23 ot Exp $
-
-# This list indicates character encoding aliases that are
-# not recommended, along with a recommended equivalent, e.g:
-# encoding-obscure = encoding-well-known
-
-# It also lists encoding names that the validator will refuse to treat:
-# bogus_encoding = Encoding Forbidden (Reason why)
+# $Id: charset.cfg,v 1.15 2007-07-19 08:18:30 ot Exp $
+#
+# Syntax:
+#
+# charset/encoding = ? result
+#
+# Note: charsets and results are lowercase, actions are uppercase
+#
+# ? indicates the action to take:
+# 1: OK, character supported
+# X: frequent error, e.g. starting with x-; ask user to replace with result
+# ERR: a charset we refuse, per some policy. Reason stated after ERR
-# The list is independent of what
-# is supported on a specific system but subject to the Validator
-# policy for acceptable encodings.
+#e.g:
+# utf-8 = 1
+# odd-alias = X good-alias
+# bad_charset = ERR explain reason
+utf-8 = 1
+utf-16 = 1
+utf-16be = 1
+utf-16le = 1
+iso-8859-1 = 1
+iso-8859-2 = 1
+iso-8859-3 = 1
+iso-8859-4 = 1
+iso-8859-5 = 1
+iso-8859-6 = 1
+# implicit bidi, but character encoding is the same
+iso-8859-6-i = 1
+iso-8859-7 = 1
+iso-8859-8 = 1
+# implicit bidi, but character encoding is the same
+iso-8859-8-i = 1
+iso-8859-9 = 1
+iso-8859-10 = 1
+iso-8859-11 = 1
+# iso-8859-12 doesn't exist (yet?)
+iso-8859-13 = 1
+iso-8859-14 = 1
+iso-8859-15 = 1
+iso-8859-16 = 1
+us-ascii = 1
+iso-2022-jp = 1
+shift_jis = 1
+euc-jp = 1
+gb2312 = 1
+big5 = 1
+iso-2022-kr = 1
+euc-kr = 1
+gb18030 = 1
+tis-620 = 1
+koi8-r = 1
+koi8-u = 1
+iso-ir-111 = 1
+windows-1250 = 1
+windows-1251 = 1
+windows-1252 = 1
+windows-1253 = 1
+windows-1254 = 1
+windows-1255 = 1
+windows-1256 = 1
+windows-1257 = 1
+# windows-1258 = 1
+macintosh = 1
+ks_c_5601-1987 = 1
+ksc_5601 = 1
-x-mac-roman = macintosh
-x-sjis = shift_jis
-iso8859-1 = iso-8859-1
-ascii = us-ascii
+x-mac-roman = X macintosh
+x-sjis = X shift_jis
+iso8859-1 = X iso-8859-1
+ascii = X us-ascii
+8859_1 = X iso-8859-1
# this one is in IANA, but better use only windows-1252
-iso-8859-1-Windows-3.1-Latin-1 = windows-1252
+iso-8859-1-Windows-3.1-Latin-1 = X windows-1252