diff options
author | Mathias Bynens <mathias@qiwi.be> | 2013-08-07 11:02:12 +0200 |
---|---|---|
committer | Mathias Bynens <mathias@qiwi.be> | 2013-08-07 11:48:20 +0200 |
commit | 2a82b01b18ce383e5a0c0ec9bd58da83d2e8dab1 (patch) | |
tree | 0ac1d8e5c8fb5528f53fb938195cf175c944aef3 /scripts/export-data.js | |
parent | ba9b58488a5a0c5997922124ffefb078c1c6159f (diff) | |
download | he-2a82b01b18ce383e5a0c0ec9bd58da83d2e8dab1.zip he-2a82b01b18ce383e5a0c0ec9bd58da83d2e8dab1.tar.gz he-2a82b01b18ce383e5a0c0ec9bd58da83d2e8dab1.tar.bz2 |
encode: Add `encodeEverything` option
Ref. #12.
Diffstat (limited to 'scripts/export-data.js')
-rw-r--r-- | scripts/export-data.js | 51 |
1 files changed, 44 insertions, 7 deletions
diff --git a/scripts/export-data.js b/scripts/export-data.js index 7f5f111..0d1686b 100644 --- a/scripts/export-data.js +++ b/scripts/export-data.js @@ -1,5 +1,5 @@ var fs = require('fs'); -var stringEscape = require('jsesc'); +var jsesc = require('jsesc'); var regenerate = require('regenerate'); var object = {}; @@ -14,24 +14,61 @@ var readJSON = function(fileName) { if (isArray(object)) { return object; } - return stringEscape(object, { + return jsesc(object, { 'compact': true, 'quotes': 'single' }); }; +var joinStrings = function(a, b) { + if (a && b) { + return a + '|' + b; + } + return a + b; +}; + +var loneCodePoints = readJSON('encode-lone-code-points'); +var arrayEncodeMultipleSymbols = readJSON('encode-paired-symbols'); +var arrayEncodeMultipleSymbolsASCII = arrayEncodeMultipleSymbols + .filter(function(string) { + return /^[\0-\x7F]+$/.test(string); + }); + +var encodeSingleSymbolsASCII = regenerate(loneCodePoints) + .removeRange(0x7F + 1, 0x10FFFF).toString(); +var encodeSingleSymbolsNonASCII = regenerate(loneCodePoints) + .removeRange(0x00, 0x7F).toString(); +var encodeMultipleSymbolsASCII = jsesc( + arrayEncodeMultipleSymbolsASCII.join('|') +); +var encodeMultipleSymbolsNonASCII = jsesc( + regenerate.difference( + arrayEncodeMultipleSymbols, + arrayEncodeMultipleSymbolsASCII + ).join('|') +); +var encodeASCII = joinStrings( + encodeMultipleSymbolsASCII, + encodeSingleSymbolsASCII +); +var encodeNonASCII = joinStrings( + encodeMultipleSymbolsNonASCII, + encodeSingleSymbolsNonASCII +); + module.exports = { 'encodeMap': readJSON('encode-map'), - 'encodeSingleSymbols': regenerate.fromCodePoints(readJSON('encode-lone-code-points')), - 'encodeMultipleSymbols': stringEscape(readJSON('encode-paired-symbols').join('|')), + 'encodeASCII': encodeASCII, // not used + 'encodeNonASCII': encodeNonASCII, 'decodeOverrides': readJSON('decode-map-overrides'), 'decodeMap': readJSON('decode-map'), 'decodeMapLegacy': readJSON('decode-map-legacy'), - 'astralSymbols': regenerate.fromCodePointRange(0x010000, 0x10FFFF), - 'invalidCodePoints': '[' + readJSON('invalid-code-points').join(',') + ']', + 'astralSymbol': regenerate.fromCodePointRange(0x010000, 0x10FFFF), + 'invalidCodePoints': jsesc(readJSON('invalid-code-points')), 'regexDecimalEscapeSource': '&#([0-9]+)(;?)', 'regexHexadecimalEscapeSource': '&#[xX]([a-fA-F0-9]+)(;?)', 'regexNamedReferenceSource': '&([0-9a-zA-Z]+);', - 'regexLegacyReferenceSource': '&(' + readJSON('decode-legacy-named-references').join('|') + ')([=a-zA-Z0-9])?', + 'regexLegacyReferenceSource': '&(' + + readJSON('decode-legacy-named-references').join('|') + ')([=a-zA-Z0-9])?', 'version': JSON.parse(fs.readFileSync('package.json', 'utf-8')).version }; |