summaryrefslogtreecommitdiffstats
path: root/scripts/encode-non-ascii-regex.js
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/encode-non-ascii-regex.js')
-rw-r--r--scripts/encode-non-ascii-regex.js44
1 files changed, 44 insertions, 0 deletions
diff --git a/scripts/encode-non-ascii-regex.js b/scripts/encode-non-ascii-regex.js
new file mode 100644
index 0000000..374aa1d
--- /dev/null
+++ b/scripts/encode-non-ascii-regex.js
@@ -0,0 +1,44 @@
+var jsesc = require('jsesc');
+var regenerate = require('regenerate');
+var difference = require('lodash.difference');
+
+var joinStrings = function(a, b) {
+ if (a && b) {
+ return a + '|' + b;
+ }
+ return a + b;
+};
+
+var loneCodePoints = require('../data/encode-lone-code-points.json');
+var arrayEncodeMultipleSymbols = require('../data/encode-paired-symbols.json');
+var arrayEncodeMultipleSymbolsAscii = arrayEncodeMultipleSymbols
+ .filter(function(string) {
+ return /^[\0-\x7F]+$/.test(string);
+ });
+
+var encodeSingleSymbolsAscii = regenerate(loneCodePoints)
+ .removeRange(0x7F + 1, 0x10FFFF).toString();
+var encodeSingleSymbolsNonAscii = regenerate(loneCodePoints)
+ .removeRange(0x00, 0x7F).toString();
+var encodeMultipleSymbolsAscii = jsesc(
+ arrayEncodeMultipleSymbolsAscii.join('|')
+);
+var encodeMultipleSymbolsNonAscii = jsesc(
+ difference(
+ arrayEncodeMultipleSymbols,
+ arrayEncodeMultipleSymbolsAscii
+ ).join('|')
+);
+
+var regexEncodeAscii = joinStrings(
+ encodeMultipleSymbolsAscii,
+ encodeSingleSymbolsAscii
+);
+
+var regexEncodeNonAscii = joinStrings(
+ encodeMultipleSymbolsNonAscii,
+ encodeSingleSymbolsNonAscii
+);
+
+// Note: `regexEncodeAscii` is not used.
+module.exports = regexEncodeNonAscii;