summaryrefslogtreecommitdiffstats
path: root/scripts/encode-non-ascii-regex.js
blob: 0c7894d06cb7293c6ad91e8151e463fa18098de5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
'use strict';

const jsesc = require('jsesc');
const regenerate = require('regenerate');
const difference = require('lodash').difference;

const joinStrings = function(a, b) {
	if (a && b) {
		return a + '|' + b;
	}
	return a + b;
};

const loneCodePoints = require('../data/encode-lone-code-points.json');
const arrayEncodeMultipleSymbols = require('../data/encode-paired-symbols.json');
const arrayEncodeMultipleSymbolsAscii = arrayEncodeMultipleSymbols
	.filter(function(string) {
		return /^[\0-\x7F]+$/.test(string);
	});

const encodeSingleSymbolsAscii = regenerate(loneCodePoints)
	.removeRange(0x7F + 1, 0x10FFFF).toString();
const encodeSingleSymbolsNonAscii = regenerate(loneCodePoints)
	.removeRange(0x00, 0x7F).toString();
const encodeMultipleSymbolsAscii = jsesc(
	arrayEncodeMultipleSymbolsAscii.join('|')
);
const encodeMultipleSymbolsNonAscii = jsesc(
	difference(
		arrayEncodeMultipleSymbols,
		arrayEncodeMultipleSymbolsAscii
	).join('|')
);

const regexEncodeAscii = joinStrings(
	encodeMultipleSymbolsAscii,
	encodeSingleSymbolsAscii
);

const regexEncodeNonAscii = joinStrings(
	encodeMultipleSymbolsNonAscii,
	encodeSingleSymbolsNonAscii
);

// Note: `regexEncodeAscii` is not used.
module.exports = regexEncodeNonAscii;