summaryrefslogtreecommitdiffstats
path: root/scripts/encode-non-ascii-regex.js
diff options
context:
space:
mode:
authorMathias Bynens <mathias@qiwi.be>2014-05-24 17:35:01 +0200
committerMathias Bynens <mathias@qiwi.be>2014-05-24 20:29:40 +0200
commit65f5b4904fb0c04818c1ed86735999f7dbf07b2b (patch)
treeacf9f51f8dfe6d1ea7f50ac32966b4f43183463b /scripts/encode-non-ascii-regex.js
parent31bc08bb9af3a0d51c5326ec5e00fa4fef875d05 (diff)
downloadhe-65f5b4904fb0c04818c1ed86735999f7dbf07b2b.zip
he-65f5b4904fb0c04818c1ed86735999f7dbf07b2b.tar.gz
he-65f5b4904fb0c04818c1ed86735999f7dbf07b2b.tar.bz2
Split scripts into separate files per exported value
Diffstat (limited to 'scripts/encode-non-ascii-regex.js')
-rw-r--r--scripts/encode-non-ascii-regex.js44
1 files changed, 44 insertions, 0 deletions
diff --git a/scripts/encode-non-ascii-regex.js b/scripts/encode-non-ascii-regex.js
new file mode 100644
index 0000000..374aa1d
--- /dev/null
+++ b/scripts/encode-non-ascii-regex.js
@@ -0,0 +1,44 @@
+var jsesc = require('jsesc');
+var regenerate = require('regenerate');
+var difference = require('lodash.difference');
+
+var joinStrings = function(a, b) {
+ if (a && b) {
+ return a + '|' + b;
+ }
+ return a + b;
+};
+
+var loneCodePoints = require('../data/encode-lone-code-points.json');
+var arrayEncodeMultipleSymbols = require('../data/encode-paired-symbols.json');
+var arrayEncodeMultipleSymbolsAscii = arrayEncodeMultipleSymbols
+ .filter(function(string) {
+ return /^[\0-\x7F]+$/.test(string);
+ });
+
+var encodeSingleSymbolsAscii = regenerate(loneCodePoints)
+ .removeRange(0x7F + 1, 0x10FFFF).toString();
+var encodeSingleSymbolsNonAscii = regenerate(loneCodePoints)
+ .removeRange(0x00, 0x7F).toString();
+var encodeMultipleSymbolsAscii = jsesc(
+ arrayEncodeMultipleSymbolsAscii.join('|')
+);
+var encodeMultipleSymbolsNonAscii = jsesc(
+ difference(
+ arrayEncodeMultipleSymbols,
+ arrayEncodeMultipleSymbolsAscii
+ ).join('|')
+);
+
+var regexEncodeAscii = joinStrings(
+ encodeMultipleSymbolsAscii,
+ encodeSingleSymbolsAscii
+);
+
+var regexEncodeNonAscii = joinStrings(
+ encodeMultipleSymbolsNonAscii,
+ encodeSingleSymbolsNonAscii
+);
+
+// Note: `regexEncodeAscii` is not used.
+module.exports = regexEncodeNonAscii;