blob: bbf979ff3ab44a30c75e828523ce19b1292f38f3 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
#
# list of accepted/preferred character encodings
#
# Syntax:
#
# charset/encoding = ? result
#
# Note: charsets and results are lowercase, actions are uppercase
#
# ? indicates the action to take:
# 1: OK, character supported
# A: OK, character supported, Encode::Alias it to result
# X: frequent error, e.g. starting with x-; ask user to replace with result
# ERR: a charset we refuse, per some policy. Reason stated after ERR
#e.g:
# utf-8 = 1
# some-alias = A perl-Encode-name
# odd-alias = X good-alias
# bad_charset = ERR explain reason
utf-8 = 1
utf-16 = 1
utf-16be = 1
utf-16le = 1
iso-8859-1 = 1
iso-8859-2 = 1
iso-8859-3 = 1
iso-8859-4 = 1
iso-8859-5 = 1
iso-8859-6 = 1
# implicit bidi, but character encoding is the same
iso-8859-6-i = A iso-8859-6
iso-8859-7 = 1
iso-8859-8 = 1
# implicit bidi, but character encoding is the same
iso-8859-8-i = A iso-8859-8
iso-8859-9 = 1
iso-8859-10 = 1
iso-8859-11 = 1
# iso-8859-12 doesn't exist (yet?)
iso-8859-13 = 1
iso-8859-14 = 1
iso-8859-15 = 1
iso-8859-16 = 1
us-ascii = 1
iso-2022-jp = 1
shift_jis = 1
euc-jp = 1
gb2312 = 1
big5 = 1
big5-hkscs = 1
iso-2022-kr = 1
euc-kr = 1
gb18030 = 1
# 0xA0 is U+00A0 in ISO-8859-11 but undefined in tis-620
# other than that the character encodings are equivalent
tis-620 = A iso-8859-11
koi8-r = 1
koi8-u = 1
iso-ir-111 = 1
windows-1250 = 1
windows-1251 = 1
windows-1252 = 1
windows-1253 = 1
windows-1254 = 1
windows-1255 = 1
windows-1256 = 1
windows-1257 = 1
# windows-1258 = 1
# Encode::Byte does not know 'macintosh' but MacRoman
macintosh = A MacRoman
ks_c_5601-1987 = 1
# Encode only knows the long hand version of 'ksc_5601'
ksc_5601 = A KS_C_5601-1987
x-mac-roman = X macintosh
x-sjis = X shift_jis
iso8859-1 = X iso-8859-1
ascii = X us-ascii
8859_1 = X iso-8859-1
# this one is in IANA, but better use only windows-1252
iso-8859-1-Windows-3.1-Latin-1 = X windows-1252
|