summaryrefslogtreecommitdiffstats
path: root/scripts/export-data.js
diff options
context:
space:
mode:
authorMathias Bynens <mathias@qiwi.be>2014-05-24 16:00:43 +0200
committerMathias Bynens <mathias@qiwi.be>2014-05-24 16:00:43 +0200
commit31bc08bb9af3a0d51c5326ec5e00fa4fef875d05 (patch)
tree950bdf0891f3f8bf7bd7986b6437d4d4730ce471 /scripts/export-data.js
parentf397e8ffa59e45763dd88cab0f7468ef29ecfdcc (diff)
downloadhe-31bc08bb9af3a0d51c5326ec5e00fa4fef875d05.zip
he-31bc08bb9af3a0d51c5326ec5e00fa4fef875d05.tar.gz
he-31bc08bb9af3a0d51c5326ec5e00fa4fef875d05.tar.bz2
Clean up scripts
Diffstat (limited to 'scripts/export-data.js')
-rw-r--r--scripts/export-data.js47
1 files changed, 20 insertions, 27 deletions
diff --git a/scripts/export-data.js b/scripts/export-data.js
index d9f374a..ef1ca05 100644
--- a/scripts/export-data.js
+++ b/scripts/export-data.js
@@ -4,12 +4,8 @@ var regenerate = require('regenerate');
var difference = require('lodash.difference');
require('string.fromcodepoint');
-var readJSON = function(fileName) {
- var contents = fs.readFileSync('data/' + fileName + '.json', 'utf-8');
- var object = JSON.parse(contents);
- if (Array.isArray(object)) {
- return object;
- }
+var formatJSON = function(fileName) {
+ var object = require('../data/' + fileName + '.json');
return jsesc(object, {
'compact': true,
'quotes': 'single'
@@ -23,8 +19,8 @@ var joinStrings = function(a, b) {
return a + b;
};
-var loneCodePoints = readJSON('encode-lone-code-points');
-var arrayEncodeMultipleSymbols = readJSON('encode-paired-symbols');
+var loneCodePoints = require('../data/encode-lone-code-points.json');
+var arrayEncodeMultipleSymbols = require('../data/encode-paired-symbols.json');
var arrayEncodeMultipleSymbolsASCII = arrayEncodeMultipleSymbols
.filter(function(string) {
return /^[\0-\x7F]+$/.test(string);
@@ -43,48 +39,45 @@ var encodeMultipleSymbolsNonASCII = jsesc(
arrayEncodeMultipleSymbolsASCII
).join('|')
);
-var encodeASCII = joinStrings(
+var regexEncodeASCII = joinStrings(
encodeMultipleSymbolsASCII,
encodeSingleSymbolsASCII
);
-var encodeNonASCII = joinStrings(
+var regexEncodeNonASCII = joinStrings(
encodeMultipleSymbolsNonASCII,
encodeSingleSymbolsNonASCII
);
-var invalidRawCodePoints = readJSON('invalid-raw-code-points');
+var invalidRawCodePoints = require('../data/invalid-raw-code-points.json');
// U+0000 is a parse error in the Data state (which is the state where `he`’s
// input and output is supposed to end up in), so add it to the set of invalid
// raw code points. http://whatwg.org/html/tokenization.html#data-state
invalidRawCodePoints.unshift(0x0000);
var overrides = Object.keys(
- JSON.parse(fs.readFileSync('data/decode-map-overrides.json', 'utf-8'))
-).map(function(codePoint) {
- return Number(codePoint);
-});
+ require('../data/decode-map-overrides.json')
+).map(Number);
module.exports = {
- 'encodeMap': readJSON('encode-map'),
- 'encodeASCII': encodeASCII, // not used
- 'regexEncodeNonASCII': encodeNonASCII,
- 'decodeOverrides': readJSON('decode-map-overrides'),
- 'decodeMap': readJSON('decode-map'),
- 'decodeMapLegacy': readJSON('decode-map-legacy'),
- 'invalidReferenceCodePoints': (function() {
- return jsesc(readJSON('invalid-character-reference-code-points'));
- }()),
+ 'encodeMap': formatJSON('encode-map'),
+ 'decodeMapOverrides': formatJSON('decode-map-overrides'),
+ 'decodeMap': formatJSON('decode-map'),
+ 'decodeMapLegacy': formatJSON('decode-map-legacy'),
+ 'invalidReferenceCodePoints': formatJSON('invalid-character-reference-code-points'),
'invalidCodePointsString': (function() {
var string = String.fromCodePoint.apply(0, invalidRawCodePoints);
return jsesc(string, { 'wrap': true });
}()),
+ 'regexEncodeASCII': regexEncodeASCII, // not used
+ 'regexEncodeNonASCII': regexEncodeNonASCII,
'regexInvalidRawCodePoints': regenerate(invalidRawCodePoints).toString(),
'regexAstralSymbol': regenerate().addRange(0x010000, 0x10FFFF).toString(),
'regexDecimalEscapeSource': '&#([0-9]+)(;?)',
'regexHexadecimalEscapeSource': '&#[xX]([a-fA-F0-9]+)(;?)',
'regexNamedReferenceSource': '&([0-9a-zA-Z]+);',
'regexLegacyReferenceSource': (function() {
- return '&(' + readJSON('decode-legacy-named-references').join('|') +
+ return '&(' +
+ require('../data/decode-legacy-named-references.json').join('|') +
')([=a-zA-Z0-9])?';
}()),
'regexLoneSurrogate': '[\\uD800-\\uDBFF](?:[^\\uDC00-\\uDFFF]|$)|(?:[^\\uD800-\uDBFF]|^)[\\uDC00-\\uDFFF]',
@@ -110,6 +103,6 @@ module.exports = {
.remove(overrides)
.toString();
}()),
- 'testData': fs.readFileSync('data/entities.json', 'utf-8').trim(),
- 'version': JSON.parse(fs.readFileSync('package.json', 'utf-8')).version
+ 'testData': formatJSON('entities'),
+ 'version': require('../package.json').version
};