# # list of accepted/preferred character encodings # # $Id: charset.cfg,v 1.18 2009-09-13 19:29:24 ville Exp $ # # Syntax: # # charset/encoding = ? result # # Note: charsets and results are lowercase, actions are uppercase # # ? indicates the action to take: # 1: OK, character supported # A: OK, character supported, Encode::Alias it to result # X: frequent error, e.g. starting with x-; ask user to replace with result # ERR: a charset we refuse, per some policy. Reason stated after ERR #e.g: # utf-8 = 1 # some-alias = A perl-Encode-name # odd-alias = X good-alias # bad_charset = ERR explain reason utf-8 = 1 utf-16 = 1 utf-16be = 1 utf-16le = 1 iso-8859-1 = 1 iso-8859-2 = 1 iso-8859-3 = 1 iso-8859-4 = 1 iso-8859-5 = 1 iso-8859-6 = 1 # implicit bidi, but character encoding is the same iso-8859-6-i = A iso-8859-6 iso-8859-7 = 1 iso-8859-8 = 1 # implicit bidi, but character encoding is the same iso-8859-8-i = A iso-8859-8 iso-8859-9 = 1 iso-8859-10 = 1 iso-8859-11 = 1 # iso-8859-12 doesn't exist (yet?) iso-8859-13 = 1 iso-8859-14 = 1 iso-8859-15 = 1 iso-8859-16 = 1 us-ascii = 1 iso-2022-jp = 1 shift_jis = 1 euc-jp = 1 gb2312 = 1 big5 = 1 big5-hkscs = 1 iso-2022-kr = 1 euc-kr = 1 gb18030 = 1 # 0xA0 is U+00A0 in ISO-8859-11 but undefined in tis-620 # other than that the character encodings are equivalent tis-620 = A iso-8859-11 koi8-r = 1 koi8-u = 1 iso-ir-111 = 1 windows-1250 = 1 windows-1251 = 1 windows-1252 = 1 windows-1253 = 1 windows-1254 = 1 windows-1255 = 1 windows-1256 = 1 windows-1257 = 1 # windows-1258 = 1 # Encode::Byte does not know 'macintosh' but MacRoman macintosh = A MacRoman ks_c_5601-1987 = 1 # Encode only knows the long hand version of 'ksc_5601' ksc_5601 = A KS_C_5601-1987 x-mac-roman = X macintosh x-sjis = X shift_jis iso8859-1 = X iso-8859-1 ascii = X us-ascii 8859_1 = X iso-8859-1 # this one is in IANA, but better use only windows-1252 iso-8859-1-Windows-3.1-Latin-1 = X windows-1252