summaryrefslogtreecommitdiffstats
path: root/scripts/export-data.js
diff options
context:
space:
mode:
authorMathias Bynens <mathias@qiwi.be>2013-08-07 11:02:12 +0200
committerMathias Bynens <mathias@qiwi.be>2013-08-07 11:48:20 +0200
commit2a82b01b18ce383e5a0c0ec9bd58da83d2e8dab1 (patch)
tree0ac1d8e5c8fb5528f53fb938195cf175c944aef3 /scripts/export-data.js
parentba9b58488a5a0c5997922124ffefb078c1c6159f (diff)
downloadhe-2a82b01b18ce383e5a0c0ec9bd58da83d2e8dab1.zip
he-2a82b01b18ce383e5a0c0ec9bd58da83d2e8dab1.tar.gz
he-2a82b01b18ce383e5a0c0ec9bd58da83d2e8dab1.tar.bz2
encode: Add `encodeEverything` option
Ref. #12.
Diffstat (limited to 'scripts/export-data.js')
-rw-r--r--scripts/export-data.js51
1 files changed, 44 insertions, 7 deletions
diff --git a/scripts/export-data.js b/scripts/export-data.js
index 7f5f111..0d1686b 100644
--- a/scripts/export-data.js
+++ b/scripts/export-data.js
@@ -1,5 +1,5 @@
var fs = require('fs');
-var stringEscape = require('jsesc');
+var jsesc = require('jsesc');
var regenerate = require('regenerate');
var object = {};
@@ -14,24 +14,61 @@ var readJSON = function(fileName) {
if (isArray(object)) {
return object;
}
- return stringEscape(object, {
+ return jsesc(object, {
'compact': true,
'quotes': 'single'
});
};
+var joinStrings = function(a, b) {
+ if (a && b) {
+ return a + '|' + b;
+ }
+ return a + b;
+};
+
+var loneCodePoints = readJSON('encode-lone-code-points');
+var arrayEncodeMultipleSymbols = readJSON('encode-paired-symbols');
+var arrayEncodeMultipleSymbolsASCII = arrayEncodeMultipleSymbols
+ .filter(function(string) {
+ return /^[\0-\x7F]+$/.test(string);
+ });
+
+var encodeSingleSymbolsASCII = regenerate(loneCodePoints)
+ .removeRange(0x7F + 1, 0x10FFFF).toString();
+var encodeSingleSymbolsNonASCII = regenerate(loneCodePoints)
+ .removeRange(0x00, 0x7F).toString();
+var encodeMultipleSymbolsASCII = jsesc(
+ arrayEncodeMultipleSymbolsASCII.join('|')
+);
+var encodeMultipleSymbolsNonASCII = jsesc(
+ regenerate.difference(
+ arrayEncodeMultipleSymbols,
+ arrayEncodeMultipleSymbolsASCII
+ ).join('|')
+);
+var encodeASCII = joinStrings(
+ encodeMultipleSymbolsASCII,
+ encodeSingleSymbolsASCII
+);
+var encodeNonASCII = joinStrings(
+ encodeMultipleSymbolsNonASCII,
+ encodeSingleSymbolsNonASCII
+);
+
module.exports = {
'encodeMap': readJSON('encode-map'),
- 'encodeSingleSymbols': regenerate.fromCodePoints(readJSON('encode-lone-code-points')),
- 'encodeMultipleSymbols': stringEscape(readJSON('encode-paired-symbols').join('|')),
+ 'encodeASCII': encodeASCII, // not used
+ 'encodeNonASCII': encodeNonASCII,
'decodeOverrides': readJSON('decode-map-overrides'),
'decodeMap': readJSON('decode-map'),
'decodeMapLegacy': readJSON('decode-map-legacy'),
- 'astralSymbols': regenerate.fromCodePointRange(0x010000, 0x10FFFF),
- 'invalidCodePoints': '[' + readJSON('invalid-code-points').join(',') + ']',
+ 'astralSymbol': regenerate.fromCodePointRange(0x010000, 0x10FFFF),
+ 'invalidCodePoints': jsesc(readJSON('invalid-code-points')),
'regexDecimalEscapeSource': '&#([0-9]+)(;?)',
'regexHexadecimalEscapeSource': '&#[xX]([a-fA-F0-9]+)(;?)',
'regexNamedReferenceSource': '&([0-9a-zA-Z]+);',
- 'regexLegacyReferenceSource': '&(' + readJSON('decode-legacy-named-references').join('|') + ')([=a-zA-Z0-9])?',
+ 'regexLegacyReferenceSource': '&(' +
+ readJSON('decode-legacy-named-references').join('|') + ')([=a-zA-Z0-9])?',
'version': JSON.parse(fs.readFileSync('package.json', 'utf-8')).version
};