blob: 859b17480272c4bc8abad45d2b525a6bce40e9a1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
(*-*-coding: utf-8;-*-*)
#use "unicode_build.ml"
#load "unix.cma"
let _ =
Unix.system "test -f unicode_tables.cmo || ocamlc -c unicode_tables.ml";;
#load "unicode_tables.cmo"
#use "unicode.ml"
#use "reorder.ml"
let _ =
let b1 = Buffer.create 1024 in
let b2 = Buffer.create 1024 in
for i = 0 to 0x1ffff do
if i < 0xd800 || i > 0xdfff then begin
Buffer.add_string b1 (encode_utf8 i);
Buffer.add_string b2 (conv i)
end
done;
let s1 = Buffer.contents b1 in
let s2 = Buffer.contents b2 in
prerr_endline "===";
Format.printf "%d %d@." (String.length s1) (String.length s2);
Format.printf "%d %d@." (String.length (normalize s1)) (String.length s2);
assert (normalize s1 = s2);
assert (normalize s2 = s2);
assert (normalize (compose s2) = s2);
assert (from_utf_16 (to_utf_16 s1) = s1)
let _ =
let b1 = Buffer.create 1024 in
let b2 = Buffer.create 1024 in
for i = hangul_sbase -128 to hangul_sbase + hangul_scount - 1 + 128 do
Buffer.add_string b1 (encode_utf8 i);
Buffer.add_string b2 (conv i)
done;
let s1 = Buffer.contents b1 in
let s2 = Buffer.contents b2 in
assert (compose s2 = s1)
let _ =
assert (compare "abcdéfgh" "ABCDÉFGH" = 0);
assert (compare "abcdéfghi" "ABCDÉFGH" = 1);
assert (compare "abcdefghi" "ABCDeFGH" = 1);
assert (compare "abcdéfgh" "ABCDÉFGHi" = -1);
assert (compare "abcdefgh" "ABCDeFGHi" = -1);
assert (compare "abcdéfgh" "ACCDÉFGH" = -1);
assert (compare "abcdéfgh" "ABCDÉFFH" = 1)
let _ =
for i = 0 to 0xffff do
if i < 0xd800 || i > 0xdfff then begin
let s = to_utf_16 (conv i) in
(*Format.printf "%04x@." (String.length s);*)
for j = 0 to String.length s / 2 - 2 do
let c1 = get s (j * 2) + get s (j * 2 + 1) * 256 in
let c2 = get s (j * 2 + 2) + get s (j * 2 + 3) * 256 in
let v1 = combining_class c1 in
let v2 = combining_class c2 in
(*Format.printf "%04x %04x => %02x %02x@." c1 c2 v1 v2;*)
(* if v1 > 0 && v2 > 0 then Format.printf "%d %d@." v1 v2;*)
assert (v1 = 0 || v2 = 0 || v1 <= v2)
done
end
done
let _ =
let s = from_utf_16 "\x61\x00\x01\x03\x63\x00\x01\x03\x27\x03" in
order s;
assert (s = from_utf_16 "\x61\x00\x01\x03\x63\x00\x27\x03\x01\x03");
let s = from_utf_16 "\x01\x03\x27\x03" in
order s;
assert (s = from_utf_16 "\x27\x03\x01\x03")
(*
0061;LATIN SMALL LETTER A;...0;...
0063;LATIN SMALL LETTER C;...0;...
00E1;LATIN SMALL LETTER A WITH ACUTE;...0;...0061 0301;...
0107;LATIN SMALL LETTER C WITH ACUTE;...0;...0063 0301;...
0301;COMBINING ACUTE ACCENT;...230;...
0327;COMBINING CEDILLA;...202;...
*)
|