open Unicode_tables exception Invalid let fail () = raise Invalid let get s i = Char.code (String.unsafe_get s i) let set s i v = String.unsafe_set s i (Char.unsafe_chr v) (****) let hangul_sbase = 0xAC00 let hangul_lbase = 0x1100 let hangul_vbase = 0x1161 let hangul_tbase = 0x11A7 let hangul_scount = 11172 let hangul_lcount = 19 let hangul_vcount = 21 let hangul_tcount = 28 let hangul_ncount = hangul_vcount * hangul_tcount let set_char_3 s i c = set s i (c lsr 12 + 0xE0); set s (i + 1) ((c lsr 6) land 0x3f + 0x80); set s (i + 2) (c land 0x3f + 0x80) let rec norm s i l s' j = if i < l then begin let c = get s i in if c < 0x80 then begin set s' j (get ascii_lower c); norm s (i + 1) l s' (j + 1) end else if c < 0xE0 then begin (* 80 - 7FF *) if c < 0xc2 || i + 1 >= l then raise Invalid; let c1 = get s (i + 1) in if c1 land 0xc0 <> 0x80 then raise Invalid; let idx = get norm_prim (c - 0xc0) in let idx = idx lsl 6 + c1 - 0x80 in let k = get norm_second_high idx in if k = 0 then begin set s' j c; set s' (j + 1) c1; norm s (i + 2) l s' (j + 2) end else begin let k = (k - 2) lsl 8 + get norm_second_low idx in let n = get norm_repl k in String.blit norm_repl (k + 1) s' j n; norm s (i + 2) l s' (j + n) end end else if c < 0xF0 then begin (* 800 - FFFF *) if i + 2 >= l then raise Invalid; let c1 = get s (i + 1) in if c1 land 0xc0 <> 0x80 then raise Invalid; let idx = c lsl 6 + c1 - 0x3880 in if idx < 0x20 then raise Invalid; let c2 = get s (i + 2) in if c2 land 0xc0 <> 0x80 then raise Invalid; let idx = get norm_prim idx in let idx = idx lsl 6 + c2 - 0x80 in let k = get norm_second_high idx in if k = 0 then begin set s' j c; set s' (j + 1) c1; set s' (j + 2) c2; norm s (i + 3) l s' (j + 3) end else if k = 1 then begin let v = c lsl 12 + c1 lsl 6 + c2 - (0x000E2080 + hangul_sbase) in if v >= hangul_scount then begin set s' j c; set s' (j + 1) c1; set s' (j + 2) c2; norm s (i + 3) l s' (j + 3) end else begin set_char_3 s' j (v / hangul_ncount + hangul_lbase); set_char_3 s' (j + 3) ((v mod hangul_ncount) / hangul_tcount + hangul_vbase); if v mod hangul_tcount = 0 then norm s (i + 3) l s' (j + 6) else begin set_char_3 s' (j + 6) ((v mod hangul_tcount) + hangul_tbase); norm s (i + 3) l s' (j + 9) end end end else begin let k = (k - 2) lsl 8 + get norm_second_low idx in let n = get norm_repl k in String.blit norm_repl (k + 1) s' j n; norm s (i + 3) l s' (j + n) end end else begin (* 10000 - 10FFFF *) if i + 3 >= l then raise Invalid; let c1 = get s (i + 1) in let c2 = get s (i + 2) in let c3 = get s (i + 3) in if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then raise Invalid; let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in if v < 0x10000 || v > 0x10ffff then raise Invalid; set s' j c; set s' (j + 1) c1; set s' (j + 2) c2; set s' (j + 3) c3; norm s (i + 4) l s' (j + 4) end end else String.sub s' 0 j let normalize s = let l = String.length s in let s' = Bytes.create (3 * l) in try norm s 0 l s' 0 with Invalid -> s (****) let rec compare_rec s s' i l = if i = l then begin if l < String.length s then 1 else if l < String.length s' then -1 else 0 end else begin let c = get s i in let c' = get s' i in if c < 0x80 && c' < 0x80 then begin let v = compare (get ascii_lower c) (get ascii_lower c') in if v <> 0 then v else compare_rec s s' (i + 1) l end else compare (normalize s) (normalize s') end let compare s s' = compare_rec s s' 0 (min (String.length s) (String.length s')) (****) let rec decode_char s i l = if i = l then fail () else let c = get s i in if c < 0x80 then cont s (i + 1) l c else if c < 0xE0 then begin (* 80 - 7FF *) if c < 0xc2 || i + 1 >= l then fail () else let c1 = get s (i + 1) in if c1 land 0xc0 <> 0x80 then fail () else let v = c lsl 6 + c1 - 0x3080 in cont s (i + 2) l v end else if c < 0xF0 then begin (* 800 - FFFF *) if i + 2 >= l then fail () else let c1 = get s (i + 1) in let c2 = get s (i + 2) in if (c1 lor c2) land 0xc0 <> 0x80 then fail () else let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in if v < 0x800 then fail () else cont s (i + 3) l v end else begin (* 10000 - 10FFFF *) if i + 3 >= l then fail () else let c1 = get s (i + 1) in let c2 = get s (i + 2) in let c3 = get s (i + 3) in if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in if v < 0x10000 || v > 0x10ffff then fail () else cont s (i + 4) l v end and cont s i l v = (v, i) let encode_char s i l c = if c < 0x80 then begin if i >= l then fail () else begin set s i c; i + 1 end end else if c < 0x800 then begin if i + 1 >= l then fail () else begin set s i (c lsr 6 + 0xC0); set s (i + 1) (c land 0x3f + 0x80); i + 2 end end else if c < 0x10000 then begin if i + 1 >= l then fail () else begin set s i (c lsr 12 + 0xE0); set s (i + 1) ((c lsr 6) land 0x3f + 0x80); set s (i + 2) (c land 0x3f + 0x80); i + 3 end end else begin if i + 1 >= l then fail () else begin set s i (c lsr 18 + 0xF0); set s (i + 1) ((c lsr 12) land 0x3f + 0x80); set s (i + 2) ((c lsr 6) land 0x3f + 0x80); set s (i + 3) (c land 0x3f + 0x80); i + 4 end end let rec prev_char s i = let i = i - 1 in if i < 0 then fail () else if (get s i) land 0xc0 <> 0x80 then i else prev_char s i (****) let uniCharPrecompSourceTable = [| 0x00000300; 0x00540000; 0x00000301; 0x00750054; 0x00000302; 0x002000C9; 0x00000303; 0x001C00E9; 0x00000304; 0x002C0105; 0x00000306; 0x00200131; 0x00000307; 0x002E0151; 0x00000308; 0x0036017F; 0x00000309; 0x001801B5; 0x0000030A; 0x000601CD; 0x0000030B; 0x000601D3; 0x0000030C; 0x002501D9; 0x0000030F; 0x000E01FE; 0x00000311; 0x000C020C; 0x00000313; 0x000E0218; 0x00000314; 0x00100226; 0x0000031B; 0x00040236; 0x00000323; 0x002A023A; 0x00000324; 0x00020264; 0x00000325; 0x00020266; 0x00000326; 0x00040268; 0x00000327; 0x0016026C; 0x00000328; 0x000A0282; 0x0000032D; 0x000C028C; 0x0000032E; 0x00020298; 0x00000330; 0x0006029A; 0x00000331; 0x001102A0; 0x00000338; 0x002C02B1; 0x00000342; 0x001D02DD; 0x00000345; 0x003F02FA; 0x00000653; 0x00010339; 0x00000654; 0x0006033A; 0x00000655; 0x00010340; 0x0000093C; 0x00030341; 0x000009BE; 0x00010344; 0x000009D7; 0x00010345; 0x00000B3E; 0x00010346; 0x00000B56; 0x00010347; 0x00000B57; 0x00010348; 0x00000BBE; 0x00020349; 0x00000BD7; 0x0002034B; 0x00000C56; 0x0001034D; 0x00000CC2; 0x0001034E; 0x00000CD5; 0x0003034F; 0x00000CD6; 0x00010352; 0x00000D3E; 0x00020353; 0x00000D57; 0x00010355; 0x00000DCA; 0x00020356; 0x00000DCF; 0x00010358; 0x00000DDF; 0x00010359; 0x0000102E; 0x0001035A; 0x00003099; 0x0030035B; 0x0000309A; 0x000A038B |] let uniCharBMPPrecompDestinationTable = [| 0x0041; 0x00C0; 0x0045; 0x00C8; 0x0049; 0x00CC; 0x004E; 0x01F8; 0x004F; 0x00D2; 0x0055; 0x00D9; 0x0057; 0x1E80; 0x0059; 0x1EF2; 0x0061; 0x00E0; 0x0065; 0x00E8; 0x0069; 0x00EC; 0x006E; 0x01F9; 0x006F; 0x00F2; 0x0075; 0x00F9; 0x0077; 0x1E81; 0x0079; 0x1EF3; 0x00A8; 0x1FED; 0x00C2; 0x1EA6; 0x00CA; 0x1EC0; 0x00D4; 0x1ED2; 0x00DC; 0x01DB; 0x00E2; 0x1EA7; 0x00EA; 0x1EC1; 0x00F4; 0x1ED3; 0x00FC; 0x01DC; 0x0102; 0x1EB0; 0x0103; 0x1EB1; 0x0112; 0x1E14; 0x0113; 0x1E15; 0x014C; 0x1E50; 0x014D; 0x1E51; 0x01A0; 0x1EDC; 0x01A1; 0x1EDD; 0x01AF; 0x1EEA; 0x01B0; 0x1EEB; 0x0391; 0x1FBA; 0x0395; 0x1FC8; 0x0397; 0x1FCA; 0x0399; 0x1FDA; 0x039F; 0x1FF8; 0x03A5; 0x1FEA; 0x03A9; 0x1FFA; 0x03B1; 0x1F70; 0x03B5; 0x1F72; 0x03B7; 0x1F74; 0x03B9; 0x1F76; 0x03BF; 0x1F78; 0x03C5; 0x1F7A; 0x03C9; 0x1F7C; 0x03CA; 0x1FD2; 0x03CB; 0x1FE2; 0x0415; 0x0400; 0x0418; 0x040D; 0x0435; 0x0450; 0x0438; 0x045D; 0x1F00; 0x1F02; 0x1F01; 0x1F03; 0x1F08; 0x1F0A; 0x1F09; 0x1F0B; 0x1F10; 0x1F12; 0x1F11; 0x1F13; 0x1F18; 0x1F1A; 0x1F19; 0x1F1B; 0x1F20; 0x1F22; 0x1F21; 0x1F23; 0x1F28; 0x1F2A; 0x1F29; 0x1F2B; 0x1F30; 0x1F32; 0x1F31; 0x1F33; 0x1F38; 0x1F3A; 0x1F39; 0x1F3B; 0x1F40; 0x1F42; 0x1F41; 0x1F43; 0x1F48; 0x1F4A; 0x1F49; 0x1F4B; 0x1F50; 0x1F52; 0x1F51; 0x1F53; 0x1F59; 0x1F5B; 0x1F60; 0x1F62; 0x1F61; 0x1F63; 0x1F68; 0x1F6A; 0x1F69; 0x1F6B; 0x1FBF; 0x1FCD; 0x1FFE; 0x1FDD; 0x0041; 0x00C1; 0x0043; 0x0106; 0x0045; 0x00C9; 0x0047; 0x01F4; 0x0049; 0x00CD; 0x004B; 0x1E30; 0x004C; 0x0139; 0x004D; 0x1E3E; 0x004E; 0x0143; 0x004F; 0x00D3; 0x0050; 0x1E54; 0x0052; 0x0154; 0x0053; 0x015A; 0x0055; 0x00DA; 0x0057; 0x1E82; 0x0059; 0x00DD; 0x005A; 0x0179; 0x0061; 0x00E1; 0x0063; 0x0107; 0x0065; 0x00E9; 0x0067; 0x01F5; 0x0069; 0x00ED; 0x006B; 0x1E31; 0x006C; 0x013A; 0x006D; 0x1E3F; 0x006E; 0x0144; 0x006F; 0x00F3; 0x0070; 0x1E55; 0x0072; 0x0155; 0x0073; 0x015B; 0x0075; 0x00FA; 0x0077; 0x1E83; 0x0079; 0x00FD; 0x007A; 0x017A; 0x00A8; 0x0385; 0x00C2; 0x1EA4; 0x00C5; 0x01FA; 0x00C6; 0x01FC; 0x00C7; 0x1E08; 0x00CA; 0x1EBE; 0x00CF; 0x1E2E; 0x00D4; 0x1ED0; 0x00D5; 0x1E4C; 0x00D8; 0x01FE; 0x00DC; 0x01D7; 0x00E2; 0x1EA5; 0x00E5; 0x01FB; 0x00E6; 0x01FD; 0x00E7; 0x1E09; 0x00EA; 0x1EBF; 0x00EF; 0x1E2F; 0x00F4; 0x1ED1; 0x00F5; 0x1E4D; 0x00F8; 0x01FF; 0x00FC; 0x01D8; 0x0102; 0x1EAE; 0x0103; 0x1EAF; 0x0112; 0x1E16; 0x0113; 0x1E17; 0x014C; 0x1E52; 0x014D; 0x1E53; 0x0168; 0x1E78; 0x0169; 0x1E79; 0x01A0; 0x1EDA; 0x01A1; 0x1EDB; 0x01AF; 0x1EE8; 0x01B0; 0x1EE9; 0x0391; 0x0386; 0x0395; 0x0388; 0x0397; 0x0389; 0x0399; 0x038A; 0x039F; 0x038C; 0x03A5; 0x038E; 0x03A9; 0x038F; 0x03B1; 0x03AC; 0x03B5; 0x03AD; 0x03B7; 0x03AE; 0x03B9; 0x03AF; 0x03BF; 0x03CC; 0x03C5; 0x03CD; 0x03C9; 0x03CE; 0x03CA; 0x0390; 0x03CB; 0x03B0; 0x03D2; 0x03D3; 0x0413; 0x0403; 0x041A; 0x040C; 0x0433; 0x0453; 0x043A; 0x045C; 0x1F00; 0x1F04; 0x1F01; 0x1F05; 0x1F08; 0x1F0C; 0x1F09; 0x1F0D; 0x1F10; 0x1F14; 0x1F11; 0x1F15; 0x1F18; 0x1F1C; 0x1F19; 0x1F1D; 0x1F20; 0x1F24; 0x1F21; 0x1F25; 0x1F28; 0x1F2C; 0x1F29; 0x1F2D; 0x1F30; 0x1F34; 0x1F31; 0x1F35; 0x1F38; 0x1F3C; 0x1F39; 0x1F3D; 0x1F40; 0x1F44; 0x1F41; 0x1F45; 0x1F48; 0x1F4C; 0x1F49; 0x1F4D; 0x1F50; 0x1F54; 0x1F51; 0x1F55; 0x1F59; 0x1F5D; 0x1F60; 0x1F64; 0x1F61; 0x1F65; 0x1F68; 0x1F6C; 0x1F69; 0x1F6D; 0x1FBF; 0x1FCE; 0x1FFE; 0x1FDE; 0x0041; 0x00C2; 0x0043; 0x0108; 0x0045; 0x00CA; 0x0047; 0x011C; 0x0048; 0x0124; 0x0049; 0x00CE; 0x004A; 0x0134; 0x004F; 0x00D4; 0x0053; 0x015C; 0x0055; 0x00DB; 0x0057; 0x0174; 0x0059; 0x0176; 0x005A; 0x1E90; 0x0061; 0x00E2; 0x0063; 0x0109; 0x0065; 0x00EA; 0x0067; 0x011D; 0x0068; 0x0125; 0x0069; 0x00EE; 0x006A; 0x0135; 0x006F; 0x00F4; 0x0073; 0x015D; 0x0075; 0x00FB; 0x0077; 0x0175; 0x0079; 0x0177; 0x007A; 0x1E91; 0x1EA0; 0x1EAC; 0x1EA1; 0x1EAD; 0x1EB8; 0x1EC6; 0x1EB9; 0x1EC7; 0x1ECC; 0x1ED8; 0x1ECD; 0x1ED9; 0x0041; 0x00C3; 0x0045; 0x1EBC; 0x0049; 0x0128; 0x004E; 0x00D1; 0x004F; 0x00D5; 0x0055; 0x0168; 0x0056; 0x1E7C; 0x0059; 0x1EF8; 0x0061; 0x00E3; 0x0065; 0x1EBD; 0x0069; 0x0129; 0x006E; 0x00F1; 0x006F; 0x00F5; 0x0075; 0x0169; 0x0076; 0x1E7D; 0x0079; 0x1EF9; 0x00C2; 0x1EAA; 0x00CA; 0x1EC4; 0x00D4; 0x1ED6; 0x00E2; 0x1EAB; 0x00EA; 0x1EC5; 0x00F4; 0x1ED7; 0x0102; 0x1EB4; 0x0103; 0x1EB5; 0x01A0; 0x1EE0; 0x01A1; 0x1EE1; 0x01AF; 0x1EEE; 0x01B0; 0x1EEF; 0x0041; 0x0100; 0x0045; 0x0112; 0x0047; 0x1E20; 0x0049; 0x012A; 0x004F; 0x014C; 0x0055; 0x016A; 0x0059; 0x0232; 0x0061; 0x0101; 0x0065; 0x0113; 0x0067; 0x1E21; 0x0069; 0x012B; 0x006F; 0x014D; 0x0075; 0x016B; 0x0079; 0x0233; 0x00C4; 0x01DE; 0x00C6; 0x01E2; 0x00D5; 0x022C; 0x00D6; 0x022A; 0x00DC; 0x01D5; 0x00E4; 0x01DF; 0x00E6; 0x01E3; 0x00F5; 0x022D; 0x00F6; 0x022B; 0x00FC; 0x01D6; 0x01EA; 0x01EC; 0x01EB; 0x01ED; 0x0226; 0x01E0; 0x0227; 0x01E1; 0x022E; 0x0230; 0x022F; 0x0231; 0x0391; 0x1FB9; 0x0399; 0x1FD9; 0x03A5; 0x1FE9; 0x03B1; 0x1FB1; 0x03B9; 0x1FD1; 0x03C5; 0x1FE1; 0x0418; 0x04E2; 0x0423; 0x04EE; 0x0438; 0x04E3; 0x0443; 0x04EF; 0x1E36; 0x1E38; 0x1E37; 0x1E39; 0x1E5A; 0x1E5C; 0x1E5B; 0x1E5D; 0x0041; 0x0102; 0x0045; 0x0114; 0x0047; 0x011E; 0x0049; 0x012C; 0x004F; 0x014E; 0x0055; 0x016C; 0x0061; 0x0103; 0x0065; 0x0115; 0x0067; 0x011F; 0x0069; 0x012D; 0x006F; 0x014F; 0x0075; 0x016D; 0x0228; 0x1E1C; 0x0229; 0x1E1D; 0x0391; 0x1FB8; 0x0399; 0x1FD8; 0x03A5; 0x1FE8; 0x03B1; 0x1FB0; 0x03B9; 0x1FD0; 0x03C5; 0x1FE0; 0x0410; 0x04D0; 0x0415; 0x04D6; 0x0416; 0x04C1; 0x0418; 0x0419; 0x0423; 0x040E; 0x0430; 0x04D1; 0x0435; 0x04D7; 0x0436; 0x04C2; 0x0438; 0x0439; 0x0443; 0x045E; 0x1EA0; 0x1EB6; 0x1EA1; 0x1EB7; 0x0041; 0x0226; 0x0042; 0x1E02; 0x0043; 0x010A; 0x0044; 0x1E0A; 0x0045; 0x0116; 0x0046; 0x1E1E; 0x0047; 0x0120; 0x0048; 0x1E22; 0x0049; 0x0130; 0x004D; 0x1E40; 0x004E; 0x1E44; 0x004F; 0x022E; 0x0050; 0x1E56; 0x0052; 0x1E58; 0x0053; 0x1E60; 0x0054; 0x1E6A; 0x0057; 0x1E86; 0x0058; 0x1E8A; 0x0059; 0x1E8E; 0x005A; 0x017B; 0x0061; 0x0227; 0x0062; 0x1E03; 0x0063; 0x010B; 0x0064; 0x1E0B; 0x0065; 0x0117; 0x0066; 0x1E1F; 0x0067; 0x0121; 0x0068; 0x1E23; 0x006D; 0x1E41; 0x006E; 0x1E45; 0x006F; 0x022F; 0x0070; 0x1E57; 0x0072; 0x1E59; 0x0073; 0x1E61; 0x0074; 0x1E6B; 0x0077; 0x1E87; 0x0078; 0x1E8B; 0x0079; 0x1E8F; 0x007A; 0x017C; 0x015A; 0x1E64; 0x015B; 0x1E65; 0x0160; 0x1E66; 0x0161; 0x1E67; 0x017F; 0x1E9B; 0x1E62; 0x1E68; 0x1E63; 0x1E69; 0x0041; 0x00C4; 0x0045; 0x00CB; 0x0048; 0x1E26; 0x0049; 0x00CF; 0x004F; 0x00D6; 0x0055; 0x00DC; 0x0057; 0x1E84; 0x0058; 0x1E8C; 0x0059; 0x0178; 0x0061; 0x00E4; 0x0065; 0x00EB; 0x0068; 0x1E27; 0x0069; 0x00EF; 0x006F; 0x00F6; 0x0074; 0x1E97; 0x0075; 0x00FC; 0x0077; 0x1E85; 0x0078; 0x1E8D; 0x0079; 0x00FF; 0x00D5; 0x1E4E; 0x00F5; 0x1E4F; 0x016A; 0x1E7A; 0x016B; 0x1E7B; 0x0399; 0x03AA; 0x03A5; 0x03AB; 0x03B9; 0x03CA; 0x03C5; 0x03CB; 0x03D2; 0x03D4; 0x0406; 0x0407; 0x0410; 0x04D2; 0x0415; 0x0401; 0x0416; 0x04DC; 0x0417; 0x04DE; 0x0418; 0x04E4; 0x041E; 0x04E6; 0x0423; 0x04F0; 0x0427; 0x04F4; 0x042B; 0x04F8; 0x042D; 0x04EC; 0x0430; 0x04D3; 0x0435; 0x0451; 0x0436; 0x04DD; 0x0437; 0x04DF; 0x0438; 0x04E5; 0x043E; 0x04E7; 0x0443; 0x04F1; 0x0447; 0x04F5; 0x044B; 0x04F9; 0x044D; 0x04ED; 0x0456; 0x0457; 0x04D8; 0x04DA; 0x04D9; 0x04DB; 0x04E8; 0x04EA; 0x04E9; 0x04EB; 0x0041; 0x1EA2; 0x0045; 0x1EBA; 0x0049; 0x1EC8; 0x004F; 0x1ECE; 0x0055; 0x1EE6; 0x0059; 0x1EF6; 0x0061; 0x1EA3; 0x0065; 0x1EBB; 0x0069; 0x1EC9; 0x006F; 0x1ECF; 0x0075; 0x1EE7; 0x0079; 0x1EF7; 0x00C2; 0x1EA8; 0x00CA; 0x1EC2; 0x00D4; 0x1ED4; 0x00E2; 0x1EA9; 0x00EA; 0x1EC3; 0x00F4; 0x1ED5; 0x0102; 0x1EB2; 0x0103; 0x1EB3; 0x01A0; 0x1EDE; 0x01A1; 0x1EDF; 0x01AF; 0x1EEC; 0x01B0; 0x1EED; 0x0041; 0x00C5; 0x0055; 0x016E; 0x0061; 0x00E5; 0x0075; 0x016F; 0x0077; 0x1E98; 0x0079; 0x1E99; 0x004F; 0x0150; 0x0055; 0x0170; 0x006F; 0x0151; 0x0075; 0x0171; 0x0423; 0x04F2; 0x0443; 0x04F3; 0x0041; 0x01CD; 0x0043; 0x010C; 0x0044; 0x010E; 0x0045; 0x011A; 0x0047; 0x01E6; 0x0048; 0x021E; 0x0049; 0x01CF; 0x004B; 0x01E8; 0x004C; 0x013D; 0x004E; 0x0147; 0x004F; 0x01D1; 0x0052; 0x0158; 0x0053; 0x0160; 0x0054; 0x0164; 0x0055; 0x01D3; 0x005A; 0x017D; 0x0061; 0x01CE; 0x0063; 0x010D; 0x0064; 0x010F; 0x0065; 0x011B; 0x0067; 0x01E7; 0x0068; 0x021F; 0x0069; 0x01D0; 0x006A; 0x01F0; 0x006B; 0x01E9; 0x006C; 0x013E; 0x006E; 0x0148; 0x006F; 0x01D2; 0x0072; 0x0159; 0x0073; 0x0161; 0x0074; 0x0165; 0x0075; 0x01D4; 0x007A; 0x017E; 0x00DC; 0x01D9; 0x00FC; 0x01DA; 0x01B7; 0x01EE; 0x0292; 0x01EF; 0x0041; 0x0200; 0x0045; 0x0204; 0x0049; 0x0208; 0x004F; 0x020C; 0x0052; 0x0210; 0x0055; 0x0214; 0x0061; 0x0201; 0x0065; 0x0205; 0x0069; 0x0209; 0x006F; 0x020D; 0x0072; 0x0211; 0x0075; 0x0215; 0x0474; 0x0476; 0x0475; 0x0477; 0x0041; 0x0202; 0x0045; 0x0206; 0x0049; 0x020A; 0x004F; 0x020E; 0x0052; 0x0212; 0x0055; 0x0216; 0x0061; 0x0203; 0x0065; 0x0207; 0x0069; 0x020B; 0x006F; 0x020F; 0x0072; 0x0213; 0x0075; 0x0217; 0x0391; 0x1F08; 0x0395; 0x1F18; 0x0397; 0x1F28; 0x0399; 0x1F38; 0x039F; 0x1F48; 0x03A9; 0x1F68; 0x03B1; 0x1F00; 0x03B5; 0x1F10; 0x03B7; 0x1F20; 0x03B9; 0x1F30; 0x03BF; 0x1F40; 0x03C1; 0x1FE4; 0x03C5; 0x1F50; 0x03C9; 0x1F60; 0x0391; 0x1F09; 0x0395; 0x1F19; 0x0397; 0x1F29; 0x0399; 0x1F39; 0x039F; 0x1F49; 0x03A1; 0x1FEC; 0x03A5; 0x1F59; 0x03A9; 0x1F69; 0x03B1; 0x1F01; 0x03B5; 0x1F11; 0x03B7; 0x1F21; 0x03B9; 0x1F31; 0x03BF; 0x1F41; 0x03C1; 0x1FE5; 0x03C5; 0x1F51; 0x03C9; 0x1F61; 0x004F; 0x01A0; 0x0055; 0x01AF; 0x006F; 0x01A1; 0x0075; 0x01B0; 0x0041; 0x1EA0; 0x0042; 0x1E04; 0x0044; 0x1E0C; 0x0045; 0x1EB8; 0x0048; 0x1E24; 0x0049; 0x1ECA; 0x004B; 0x1E32; 0x004C; 0x1E36; 0x004D; 0x1E42; 0x004E; 0x1E46; 0x004F; 0x1ECC; 0x0052; 0x1E5A; 0x0053; 0x1E62; 0x0054; 0x1E6C; 0x0055; 0x1EE4; 0x0056; 0x1E7E; 0x0057; 0x1E88; 0x0059; 0x1EF4; 0x005A; 0x1E92; 0x0061; 0x1EA1; 0x0062; 0x1E05; 0x0064; 0x1E0D; 0x0065; 0x1EB9; 0x0068; 0x1E25; 0x0069; 0x1ECB; 0x006B; 0x1E33; 0x006C; 0x1E37; 0x006D; 0x1E43; 0x006E; 0x1E47; 0x006F; 0x1ECD; 0x0072; 0x1E5B; 0x0073; 0x1E63; 0x0074; 0x1E6D; 0x0075; 0x1EE5; 0x0076; 0x1E7F; 0x0077; 0x1E89; 0x0079; 0x1EF5; 0x007A; 0x1E93; 0x01A0; 0x1EE2; 0x01A1; 0x1EE3; 0x01AF; 0x1EF0; 0x01B0; 0x1EF1; 0x0055; 0x1E72; 0x0075; 0x1E73; 0x0041; 0x1E00; 0x0061; 0x1E01; 0x0053; 0x0218; 0x0054; 0x021A; 0x0073; 0x0219; 0x0074; 0x021B; 0x0043; 0x00C7; 0x0044; 0x1E10; 0x0045; 0x0228; 0x0047; 0x0122; 0x0048; 0x1E28; 0x004B; 0x0136; 0x004C; 0x013B; 0x004E; 0x0145; 0x0052; 0x0156; 0x0053; 0x015E; 0x0054; 0x0162; 0x0063; 0x00E7; 0x0064; 0x1E11; 0x0065; 0x0229; 0x0067; 0x0123; 0x0068; 0x1E29; 0x006B; 0x0137; 0x006C; 0x013C; 0x006E; 0x0146; 0x0072; 0x0157; 0x0073; 0x015F; 0x0074; 0x0163; 0x0041; 0x0104; 0x0045; 0x0118; 0x0049; 0x012E; 0x004F; 0x01EA; 0x0055; 0x0172; 0x0061; 0x0105; 0x0065; 0x0119; 0x0069; 0x012F; 0x006F; 0x01EB; 0x0075; 0x0173; 0x0044; 0x1E12; 0x0045; 0x1E18; 0x004C; 0x1E3C; 0x004E; 0x1E4A; 0x0054; 0x1E70; 0x0055; 0x1E76; 0x0064; 0x1E13; 0x0065; 0x1E19; 0x006C; 0x1E3D; 0x006E; 0x1E4B; 0x0074; 0x1E71; 0x0075; 0x1E77; 0x0048; 0x1E2A; 0x0068; 0x1E2B; 0x0045; 0x1E1A; 0x0049; 0x1E2C; 0x0055; 0x1E74; 0x0065; 0x1E1B; 0x0069; 0x1E2D; 0x0075; 0x1E75; 0x0042; 0x1E06; 0x0044; 0x1E0E; 0x004B; 0x1E34; 0x004C; 0x1E3A; 0x004E; 0x1E48; 0x0052; 0x1E5E; 0x0054; 0x1E6E; 0x005A; 0x1E94; 0x0062; 0x1E07; 0x0064; 0x1E0F; 0x0068; 0x1E96; 0x006B; 0x1E35; 0x006C; 0x1E3B; 0x006E; 0x1E49; 0x0072; 0x1E5F; 0x0074; 0x1E6F; 0x007A; 0x1E95; 0x003C; 0x226E; 0x003D; 0x2260; 0x003E; 0x226F; 0x2190; 0x219A; 0x2192; 0x219B; 0x2194; 0x21AE; 0x21D0; 0x21CD; 0x21D2; 0x21CF; 0x21D4; 0x21CE; 0x2203; 0x2204; 0x2208; 0x2209; 0x220B; 0x220C; 0x2223; 0x2224; 0x2225; 0x2226; 0x223C; 0x2241; 0x2243; 0x2244; 0x2245; 0x2247; 0x2248; 0x2249; 0x224D; 0x226D; 0x2261; 0x2262; 0x2264; 0x2270; 0x2265; 0x2271; 0x2272; 0x2274; 0x2273; 0x2275; 0x2276; 0x2278; 0x2277; 0x2279; 0x227A; 0x2280; 0x227B; 0x2281; 0x227C; 0x22E0; 0x227D; 0x22E1; 0x2282; 0x2284; 0x2283; 0x2285; 0x2286; 0x2288; 0x2287; 0x2289; 0x2291; 0x22E2; 0x2292; 0x22E3; 0x22A2; 0x22AC; 0x22A8; 0x22AD; 0x22A9; 0x22AE; 0x22AB; 0x22AF; 0x22B2; 0x22EA; 0x22B3; 0x22EB; 0x22B4; 0x22EC; 0x22B5; 0x22ED; 0x00A8; 0x1FC1; 0x03B1; 0x1FB6; 0x03B7; 0x1FC6; 0x03B9; 0x1FD6; 0x03C5; 0x1FE6; 0x03C9; 0x1FF6; 0x03CA; 0x1FD7; 0x03CB; 0x1FE7; 0x1F00; 0x1F06; 0x1F01; 0x1F07; 0x1F08; 0x1F0E; 0x1F09; 0x1F0F; 0x1F20; 0x1F26; 0x1F21; 0x1F27; 0x1F28; 0x1F2E; 0x1F29; 0x1F2F; 0x1F30; 0x1F36; 0x1F31; 0x1F37; 0x1F38; 0x1F3E; 0x1F39; 0x1F3F; 0x1F50; 0x1F56; 0x1F51; 0x1F57; 0x1F59; 0x1F5F; 0x1F60; 0x1F66; 0x1F61; 0x1F67; 0x1F68; 0x1F6E; 0x1F69; 0x1F6F; 0x1FBF; 0x1FCF; 0x1FFE; 0x1FDF; 0x0391; 0x1FBC; 0x0397; 0x1FCC; 0x03A9; 0x1FFC; 0x03AC; 0x1FB4; 0x03AE; 0x1FC4; 0x03B1; 0x1FB3; 0x03B7; 0x1FC3; 0x03C9; 0x1FF3; 0x03CE; 0x1FF4; 0x1F00; 0x1F80; 0x1F01; 0x1F81; 0x1F02; 0x1F82; 0x1F03; 0x1F83; 0x1F04; 0x1F84; 0x1F05; 0x1F85; 0x1F06; 0x1F86; 0x1F07; 0x1F87; 0x1F08; 0x1F88; 0x1F09; 0x1F89; 0x1F0A; 0x1F8A; 0x1F0B; 0x1F8B; 0x1F0C; 0x1F8C; 0x1F0D; 0x1F8D; 0x1F0E; 0x1F8E; 0x1F0F; 0x1F8F; 0x1F20; 0x1F90; 0x1F21; 0x1F91; 0x1F22; 0x1F92; 0x1F23; 0x1F93; 0x1F24; 0x1F94; 0x1F25; 0x1F95; 0x1F26; 0x1F96; 0x1F27; 0x1F97; 0x1F28; 0x1F98; 0x1F29; 0x1F99; 0x1F2A; 0x1F9A; 0x1F2B; 0x1F9B; 0x1F2C; 0x1F9C; 0x1F2D; 0x1F9D; 0x1F2E; 0x1F9E; 0x1F2F; 0x1F9F; 0x1F60; 0x1FA0; 0x1F61; 0x1FA1; 0x1F62; 0x1FA2; 0x1F63; 0x1FA3; 0x1F64; 0x1FA4; 0x1F65; 0x1FA5; 0x1F66; 0x1FA6; 0x1F67; 0x1FA7; 0x1F68; 0x1FA8; 0x1F69; 0x1FA9; 0x1F6A; 0x1FAA; 0x1F6B; 0x1FAB; 0x1F6C; 0x1FAC; 0x1F6D; 0x1FAD; 0x1F6E; 0x1FAE; 0x1F6F; 0x1FAF; 0x1F70; 0x1FB2; 0x1F74; 0x1FC2; 0x1F7C; 0x1FF2; 0x1FB6; 0x1FB7; 0x1FC6; 0x1FC7; 0x1FF6; 0x1FF7; 0x0627; 0x0622; 0x0627; 0x0623; 0x0648; 0x0624; 0x064A; 0x0626; 0x06C1; 0x06C2; 0x06D2; 0x06D3; 0x06D5; 0x06C0; 0x0627; 0x0625; 0x0928; 0x0929; 0x0930; 0x0931; 0x0933; 0x0934; 0x09C7; 0x09CB; 0x09C7; 0x09CC; 0x0B47; 0x0B4B; 0x0B47; 0x0B48; 0x0B47; 0x0B4C; 0x0BC6; 0x0BCA; 0x0BC7; 0x0BCB; 0x0B92; 0x0B94; 0x0BC6; 0x0BCC; 0x0C46; 0x0C48; 0x0CC6; 0x0CCA; 0x0CBF; 0x0CC0; 0x0CC6; 0x0CC7; 0x0CCA; 0x0CCB; 0x0CC6; 0x0CC8; 0x0D46; 0x0D4A; 0x0D47; 0x0D4B; 0x0D46; 0x0D4C; 0x0DD9; 0x0DDA; 0x0DDC; 0x0DDD; 0x0DD9; 0x0DDC; 0x0DD9; 0x0DDE; 0x1025; 0x1026; 0x3046; 0x3094; 0x304B; 0x304C; 0x304D; 0x304E; 0x304F; 0x3050; 0x3051; 0x3052; 0x3053; 0x3054; 0x3055; 0x3056; 0x3057; 0x3058; 0x3059; 0x305A; 0x305B; 0x305C; 0x305D; 0x305E; 0x305F; 0x3060; 0x3061; 0x3062; 0x3064; 0x3065; 0x3066; 0x3067; 0x3068; 0x3069; 0x306F; 0x3070; 0x3072; 0x3073; 0x3075; 0x3076; 0x3078; 0x3079; 0x307B; 0x307C; 0x309D; 0x309E; 0x30A6; 0x30F4; 0x30AB; 0x30AC; 0x30AD; 0x30AE; 0x30AF; 0x30B0; 0x30B1; 0x30B2; 0x30B3; 0x30B4; 0x30B5; 0x30B6; 0x30B7; 0x30B8; 0x30B9; 0x30BA; 0x30BB; 0x30BC; 0x30BD; 0x30BE; 0x30BF; 0x30C0; 0x30C1; 0x30C2; 0x30C4; 0x30C5; 0x30C6; 0x30C7; 0x30C8; 0x30C9; 0x30CF; 0x30D0; 0x30D2; 0x30D3; 0x30D5; 0x30D6; 0x30D8; 0x30D9; 0x30DB; 0x30DC; 0x30EF; 0x30F7; 0x30F0; 0x30F8; 0x30F1; 0x30F9; 0x30F2; 0x30FA; 0x30FD; 0x30FE; 0x306F; 0x3071; 0x3072; 0x3074; 0x3075; 0x3077; 0x3078; 0x307A; 0x307B; 0x307D; 0x30CF; 0x30D1; 0x30D2; 0x30D4; 0x30D5; 0x30D7; 0x30D8; 0x30DA; 0x30DB; 0x30DD |] let uniCharCombiningBitmap = "\ \x00\x00\x00\x01\x02\x03\x04\x05\ \x00\x06\x07\x08\x09\x0A\x0B\x0C\ \x0D\x14\x00\x00\x00\x00\x00\x0E\ \x0F\x00\x00\x00\x00\x00\x00\x00\ \x10\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x11\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x12\x00\x00\x13\x00\ \xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ \xFF\xFF\x00\x00\xFF\xFF\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x78\x03\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\xFE\xFF\xFB\xFF\xFF\xBB\ \x16\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\xF8\x3F\x00\x00\x00\x01\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\xC0\xFF\x9F\x3D\x00\x00\ \x00\x00\x02\x00\x00\x00\xFF\xFF\ \xFF\x07\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\xC0\xFF\x01\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x0E\x00\x00\x00\x00\x00\x00\xD0\ \xFF\x3F\x1E\x00\x0C\x00\x00\x00\ \x0E\x00\x00\x00\x00\x00\x00\xD0\ \x9F\x39\x80\x00\x0C\x00\x00\x00\ \x04\x00\x00\x00\x00\x00\x00\xD0\ \x87\x39\x00\x00\x00\x00\x03\x00\ \x0E\x00\x00\x00\x00\x00\x00\xD0\ \xBF\x3B\x00\x00\x00\x00\x00\x00\ \x0E\x00\x00\x00\x00\x00\x00\xD0\ \x8F\x39\xC0\x00\x00\x00\x00\x00\ \x04\x00\x00\x00\x00\x00\x00\xC0\ \xC7\x3D\x80\x00\x00\x00\x00\x00\ \x0E\x00\x00\x00\x00\x00\x00\xC0\ \xDF\x3D\x60\x00\x00\x00\x00\x00\ \x0C\x00\x00\x00\x00\x00\x00\xC0\ \xDF\x3D\x60\x00\x00\x00\x00\x00\ \x0C\x00\x00\x00\x00\x00\x00\xC0\ \xCF\x3D\x80\x00\x00\x00\x00\x00\ \x0C\x00\x00\x00\x00\x00\x00\x00\ \x00\x84\x5F\xFF\x00\x00\x0C\x00\ \x00\x00\x00\x00\x00\x00\xF2\x07\ \x80\x7F\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\xF2\x1B\ \x00\x3F\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x03\x00\x00\xA0\xC2\ \x00\x00\x00\x00\x00\x00\xFE\xFF\ \xDF\x00\xFF\xFE\xFF\xFF\xFF\x1F\ \x40\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\xF0\xC7\x03\ \x00\x00\xC0\x03\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x1C\x00\x00\x00\x1C\x00\ \x00\x00\x0C\x00\x00\x00\x0C\x00\ \x00\x00\x00\x00\x00\x00\xF0\xFF\ \xFF\xFF\x0F\x00\x00\x00\x00\x00\ \x00\x38\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x02\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\xFF\xFF\xFF\x07\x00\x00\ \x00\x00\x00\x00\x00\xFC\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x06\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x40\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \xFF\xFF\x00\x00\x0F\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\xFE\xFF\x3F\x00\ \x00\x00\x00\x00\x00\xFF\xFF\xFF\ \x07\x00\x00\x00\x00\x00\x00\x00" (****) let bitmap_test base bitmap character = character >= base && character < 0x10000 && (let value = get bitmap ((character lsr 8) land 0xFF) in value = 0xFF || (value <> 0 && get bitmap ((value - 1) * 32 + 256 + (character land 0xFF) / 8) land (1 lsl (character land 7)) <> 0)) let unicode_combinable character = bitmap_test 0x0300 uniCharCombiningBitmap character let rec find_rec t i j v = if i + 1 = j then begin if t.(i * 2) = v then t.(i * 2 + 1) else 0 end else begin let k = (i + j) / 2 in if v < t.(k * 2) then find_rec t i k v else find_rec t k j v end let find t i n v = let j = i + n in if v < t.(2 * i) || v > t.(2 * (j - 1)) then 0 else find_rec t i j v let uniCharPrecompSourceTableLen = Array.length uniCharPrecompSourceTable / 2 let combine v v' = if v' >= hangul_vbase && v' < hangul_tbase + hangul_tcount then begin if v' < hangul_vbase + hangul_vcount && v >= hangul_lbase && v < hangul_lbase + hangul_lcount then hangul_sbase + ((v - hangul_lbase) * (hangul_vcount * hangul_tcount)) + ((v' - hangul_vbase) * hangul_tcount) else if v' > hangul_tbase && v >= hangul_sbase && v < hangul_sbase + hangul_scount then if (v - hangul_sbase) mod hangul_tcount <> 0 then 0 else v + v' - hangul_tbase else 0 end else begin let k = find uniCharPrecompSourceTable 0 uniCharPrecompSourceTableLen v' in if k = 0 then 0 else find uniCharBMPPrecompDestinationTable (k land 0xFFFF) (k lsr 16) v end (****) let rec scan d s i l = if i < l then begin let c = get s i in if c < 0x80 then cont d s i l (i + 1) c else if c < 0xE0 then begin (* 80 - 7FF *) if c < 0xc2 || i + 1 >= l then fail () else let c1 = get s (i + 1) in if c1 land 0xc0 <> 0x80 then fail () else let v = c lsl 6 + c1 - 0x3080 in cont d s i l (i + 2) v end else if c < 0xF0 then begin (* 800 - FFFF *) if i + 2 >= l then fail () else let c1 = get s (i + 1) in let c2 = get s (i + 2) in if (c1 lor c2) land 0xc0 <> 0x80 then fail () else let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in if v < 0x800 then fail () else cont d s i l (i + 3) v end else begin (* 10000 - 10FFFF *) if i + 3 >= l then fail () else let c1 = get s (i + 1) in let c2 = get s (i + 2) in let c3 = get s (i + 3) in if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in if v < 0x10000 || v > 0x10ffff then fail () else cont d s i l (i + 4) v end end else begin let (i1, i2) = d in String.blit s i2 s i1 (l - i2); String.sub s 0 (i1 + l - i2) end and cont d s i l j v' = if unicode_combinable v' then begin let i = prev_char s i in let (v, _) = decode_char s i l in let v'' = combine v v' in if v'' = 0 then scan d s j l else begin let (i1, i2) = d in String.blit s i2 s i1 (i - i2); let i1 = i1 + i - i2 in let (v'', i) = compose_rec s j l v'' in let i1 = encode_char s i1 l v'' in scan (i1, i) s i l end end else scan d s j l and compose_rec s i l v = try let (v', j) = decode_char s i l in if unicode_combinable v' then begin let v'' = combine v v' in if v'' = 0 then (v, i) else compose_rec s j l v'' end else (v, i) with Invalid -> (v, i) let compose s = try scan (0, 0) (String.copy s) 0 (String.length s) with Invalid -> s (***) let set_2 s i v = set s i (v land 0xff); set s (i + 1) (v lsr 8) let get_2 s i = (get s (i + 1)) lsl 8 + get s i let rec scan s' j s i l = if i < l then begin let c = get s i in if c < 0x80 then cont s' j s (i + 1) l c else if c < 0xE0 then begin (* 80 - 7FF *) if c < 0xc2 || i + 1 >= l then fail () else let c1 = get s (i + 1) in if c1 land 0xc0 <> 0x80 then fail () else let v = c lsl 6 + c1 - 0x3080 in cont s' j s (i + 2) l v end else if c < 0xF0 then begin (* 800 - FFFF *) if i + 2 >= l then fail () else let c1 = get s (i + 1) in let c2 = get s (i + 2) in if (c1 lor c2) land 0xc0 <> 0x80 then fail () else let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in if v < 0x800 then fail () else cont s' j s (i + 3) l v end else begin (* 10000 - 10FFFF *) if i + 3 >= l then fail () else let c1 = get s (i + 1) in let c2 = get s (i + 2) in let c3 = get s (i + 3) in if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in if v < 0x10000 || v > 0x10ffff then fail () else cont s' j s (i + 4) l v end end else String.sub s' 0 j and cont s' j s i l v = if v < 0x10000 then begin set_2 s' j v; scan s' (j + 2) s i l end else begin let v = v - 0x10000 in set_2 s' j (v lsr 10 + 0xD800); set_2 s' (j + 2) (v land 0x3FF + 0xDC00); scan s' (j + 4) s i l end let to_utf_16 s = let l = String.length s in let s' = Bytes.create (2 * l) in scan s' 0 s 0 l (****) let rec scan s' i' l' s i l = if i + 2 <= l then begin let v = get_2 s i in if v < 0xD800 || v > 0xDFFF then let i' = encode_char s' i' l' v in scan s' i' l' s (i + 2) l else if v >= 0xdc00 || i + 4 > l then fail () else begin let v' = get_2 s (i + 2) in if v' < 0xDC00 || v' > 0XDFFF then fail () else let i' = encode_char s' i' l' ((v - 0xD800) lsl 10 + (v' - 0xDC00) + 0x10000) in scan s' i' l' s (i + 4) l end end else if i < l then fail () else String.sub s' 0 i' let from_utf_16 s = let l = String.length s in let l' = 3 * l / 2 in let s' = Bytes.create l' in scan s' 0 l' s 0 l (* from_utf16 2 bytes -> 3 bytes 4 bytes -> 4 bytes *)