Skip to content

Commit 78238de

Browse files
committed
Perf: speed up build
1 parent 30cab8f commit 78238de

20 files changed

+155
-136
lines changed

Build/build-anti-bogus-domain.js

+10-3
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,35 @@ const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('.
66
const { processLine } = require('./lib/process-line');
77
const { task } = require('./lib/trace-runner');
88

9-
const buildAntiBogusDomain = task(__filename, async () => {
9+
const getBogusNxDomainIPs = async () => {
1010
/** @type {string[]} */
1111
const res = [];
1212
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) {
1313
if (line.startsWith('bogus-nxdomain=')) {
1414
res.push(line.replace('bogus-nxdomain=', ''));
1515
}
1616
}
17+
return res;
18+
};
1719

20+
const buildAntiBogusDomain = task(__filename, async () => {
1821
const filePath = path.resolve(__dirname, '../Source/ip/reject.conf');
1922

23+
const bogusIpPromise = getBogusNxDomainIPs();
24+
2025
/** @type {string[]} */
2126
const result = [];
2227
for await (const line of readFileByLine(filePath)) {
2328
if (line === '# --- [Anti Bogus Domain Replace Me] ---') {
24-
res.forEach(ip => {
29+
(await bogusIpPromise).forEach(ip => {
2530
if (isIPv4(ip)) {
2631
result.push(`IP-CIDR,${ip}/32,no-resolve`);
2732
} else if (isIPv6(ip)) {
2833
result.push(`IP-CIDR6,${ip}/128,no-resolve`);
2934
}
3035
});
36+
37+
continue;
3138
} else {
3239
const l = processLine(line);
3340
if (l) {
@@ -47,7 +54,7 @@ const buildAntiBogusDomain = task(__filename, async () => {
4754
' - https://github.com/felixonmars/dnsmasq-china-list'
4855
];
4956

50-
await Promise.all(createRuleset(
57+
return Promise.all(createRuleset(
5158
'Sukka\'s Ruleset - Anti Bogus Domain',
5259
description,
5360
new Date(),

Build/build-apple-cdn.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ const buildAppleCdn = task(__filename, async () => {
2020
const ruleset = res.map(domain => `DOMAIN-SUFFIX,${domain}`);
2121
const domainset = res.map(i => `.${i}`);
2222

23-
await Promise.all([
23+
return Promise.all([
2424
...createRuleset(
2525
'Sukka\'s Ruleset - Apple CDN',
2626
description,

Build/build-cdn-conf.js

+10-4
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ const { processLine } = require('./lib/process-line');
99

1010
const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt');
1111

12-
const buildCdnConf = task(__filename, async () => {
12+
const getS3OSSDomains = async () => {
1313
const trie = new Trie();
1414

1515
if (fs.existsSync(publicSuffixPath)) {
@@ -46,13 +46,19 @@ const buildCdnConf = task(__filename, async () => {
4646
}
4747
});
4848

49+
return S3OSSDomains;
50+
};
51+
52+
const buildCdnConf = task(__filename, async () => {
4953
/** @type {string[]} */
5054
const cdnDomainsList = [];
55+
56+
const getS3OSSDomainsPromise = getS3OSSDomains();
57+
5158
for await (const l of readFileByLine(path.resolve(__dirname, '../Source/non_ip/cdn.conf'))) {
5259
if (l === '# --- [AWS S3 Replace Me] ---') {
53-
S3OSSDomains.forEach(domain => {
54-
cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`);
55-
});
60+
(await getS3OSSDomainsPromise).forEach(domain => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); });
61+
continue;
5662
}
5763
const line = processLine(l);
5864
if (line) {

Build/build-chn-cidr.js

+6-5
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,21 @@ const EXCLUDE_CIDRS = [
1313
];
1414

1515
const buildChnCidr = task(__filename, async () => {
16-
const { exclude: excludeCidrs } = await import('cidr-tools-wasm');
16+
const [{ exclude: excludeCidrs }, rl] = await Promise.all([
17+
import('cidr-tools-wasm'),
18+
fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')
19+
]);
1720

1821
/** @type {string[]} */
1922
const cidr = [];
20-
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) {
23+
for await (const line of rl) {
2124
const l = processLine(line);
2225
if (l) {
2326
cidr.push(l);
2427
}
2528
}
2629

27-
console.log('Before Merge:', cidr.length);
2830
const filteredCidr = excludeCidrs(cidr, EXCLUDE_CIDRS, true);
29-
console.log('After Merge:', filteredCidr.length);
3031

3132
const description = [
3233
'License: CC BY-SA 2.0',
@@ -36,7 +37,7 @@ const buildChnCidr = task(__filename, async () => {
3637
'Data from https://misaka.io (misakaio @ GitHub)'
3738
];
3839

39-
await Promise.all([
40+
return Promise.all([
4041
compareAndWriteFile(
4142
withBannerArray(
4243
'Sukka\'s Ruleset - Mainland China IPv4 CIDR',

Build/build-common.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ const outputSurgeDir = path.resolve(__dirname, '../List');
1717
const outputClashDir = path.resolve(__dirname, '../Clash');
1818

1919
const buildCommon = task(__filename, async () => {
20-
/** @type {Promise<void>[]} */
20+
/** @type {Promise<unknown>[]} */
2121
const promises = [];
2222

2323
const pw = new PathScurry(sourceDir);
@@ -107,7 +107,7 @@ async function transformDomainset(sourcePath, relativePath) {
107107
)
108108
];
109109

110-
await Promise.all(createRuleset(
110+
return Promise.all(createRuleset(
111111
title,
112112
description,
113113
new Date(),
@@ -140,7 +140,7 @@ async function transformRuleset(sourcePath, relativePath) {
140140
)
141141
];
142142

143-
await Promise.all(createRuleset(
143+
return Promise.all(createRuleset(
144144
title,
145145
description,
146146
new Date(),

Build/build-domestic-ruleset.js

+3-6
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@ const domainSorter = require('./lib/stable-sort-domain');
88
const { task } = require('./lib/trace-runner');
99

1010
const buildDomesticRuleset = task(__filename, async () => {
11-
const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf'));
1211
const results = [];
13-
for await (const l of rl) {
12+
for await (const l of readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf'))) {
1413
const line = processLine(l);
1514
if (line) {
1615
results.push(line);
@@ -21,9 +20,7 @@ const buildDomesticRuleset = task(__filename, async () => {
2120
...Object.entries(DOMESTICS)
2221
.reduce(
2322
(acc, [key, { domains }]) => {
24-
if (key === 'SYSTEM') {
25-
return acc;
26-
}
23+
if (key === 'SYSTEM') return acc;
2724
return [...acc, ...domains];
2825
},
2926
/** @type {string[]} */([])
@@ -40,7 +37,7 @@ const buildDomesticRuleset = task(__filename, async () => {
4037
'This file contains known addresses that are avaliable in the Mainland China.'
4138
];
4239

43-
await Promise.all([
40+
return Promise.all([
4441
...createRuleset(
4542
'Sukka\'s Ruleset - Domestic Domains',
4643
rulesetDescription,

Build/build-internal-cdn-rules.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ const buildInternalCDNDomains = task(__filename, async () => {
8080
fse.ensureDir(path.resolve(__dirname, '../List/internal'))
8181
]);
8282

83-
await compareAndWriteFile(
83+
return compareAndWriteFile(
8484
[
8585
...Array.from(set).sort(domainSorter).map(i => `SUFFIX,${i}`),
8686
...Array.from(keywords).sort().map(i => `REGEX,${i}`)

Build/build-internal-chn-domains.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const buildInternalChnDomains = task(__filename, async () => {
1111
fse.ensureDir(path.resolve(__dirname, '../List/internal'))
1212
]);
1313

14-
await compareAndWriteFile(
14+
return compareAndWriteFile(
1515
result.map(line => `SUFFIX,${line}`),
1616
path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt')
1717
);

Build/build-internal-reverse-chn-cidr.js

+7-4
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,15 @@ const RESERVED_IPV4_CIDR = [
2525
];
2626

2727
const buildInternalReverseChnCIDR = task(__filename, async () => {
28-
const { exclude } = await import('cidr-tools-wasm');
28+
const [{ exclude }, rl] = await Promise.all([
29+
import('cidr-tools-wasm'),
30+
fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt'),
31+
fse.ensureDir(path.resolve(__dirname, '../List/internal'))
32+
]);
2933

3034
/** @type {string[]} */
3135
const cidr = [];
32-
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) {
36+
for await (const line of rl) {
3337
const l = processLine(line);
3438
if (l) {
3539
cidr.push(l);
@@ -42,8 +46,7 @@ const buildInternalReverseChnCIDR = task(__filename, async () => {
4246
true
4347
);
4448

45-
await fse.ensureDir(path.resolve(__dirname, '../List/internal'));
46-
await fs.promises.writeFile(
49+
return fs.promises.writeFile(
4750
path.resolve(__dirname, '../List/internal/reversed-chn-cidr.txt'),
4851
`${reversedCidr.join('\n')}\n`
4952
);

Build/build-phishing-domainset.js

+74-53
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1-
const tldts = require('tldts');
1+
// @ts-check
22
const { processFilterRules } = require('./lib/parse-filter.js');
33
const path = require('path');
44
const { createRuleset } = require('./lib/create-file');
55
const { processLine } = require('./lib/process-line.js');
6-
const domainSorter = require('./lib/stable-sort-domain');
6+
const { createDomainSorter } = require('./lib/stable-sort-domain');
77
const { traceSync, task } = require('./lib/trace-runner.js');
8+
const Trie = require('./lib/trie.js');
9+
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix.js');
10+
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse.js');
11+
const tldts = require('tldts');
812

913
const WHITELIST_DOMAIN = new Set([
1014
'w3s.link',
@@ -61,77 +65,94 @@ const BLACK_TLD = new Set([
6165
]);
6266

6367
const buildPhishingDomainSet = task(__filename, async () => {
64-
const domainSet = Array.from((await processFilterRules(
65-
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
66-
// [
67-
// 'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt',
68-
// 'https://malware-filter.pages.dev/phishing-filter-agh.txt',
69-
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
70-
// ]
71-
)).black);
68+
const [{ black: domainSet }, gorhill] = await Promise.all([
69+
processFilterRules(
70+
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
71+
// [
72+
// 'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt',
73+
// 'https://malware-filter.pages.dev/phishing-filter-agh.txt',
74+
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
75+
// ]
76+
),
77+
getGorhillPublicSuffixPromise()
78+
]);
79+
80+
traceSync('* whitelist', () => {
81+
const trieForRemovingWhiteListed = Trie.from(domainSet);
82+
WHITELIST_DOMAIN.forEach(white => {
83+
trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f));
84+
if (trieForRemovingWhiteListed.has(white)) {
85+
domainSet.delete(white);
86+
}
87+
});
88+
});
89+
7290
const domainCountMap = {};
91+
const getDomain = createCachedGorhillGetDomain(gorhill);
7392

7493
traceSync('* process domain set', () => {
75-
for (let i = 0, len = domainSet.length; i < len; i++) {
76-
const line = processLine(domainSet[i]);
77-
if (!line) continue;
94+
const domainArr = Array.from(domainSet);
7895

79-
const parsed = tldts.parse(line, { allowPrivateDomains: true });
80-
const apexDomain = parsed.domain;
96+
for (let i = 0, len = domainArr.length; i < len; i++) {
97+
const line = processLine(domainArr[i]);
98+
if (!line) continue;
8199

82-
if (apexDomain) {
83-
if (WHITELIST_DOMAIN.has(apexDomain)) {
84-
continue;
85-
}
100+
const apexDomain = getDomain(line);
101+
if (!apexDomain) continue;
86102

87-
domainCountMap[apexDomain] ||= 0;
103+
domainCountMap[apexDomain] ||= 0;
88104

89-
let isPhishingDomainMockingAmazon = false;
90-
if (line.startsWith('.amaz')) {
91-
domainCountMap[apexDomain] += 0.5;
105+
const isPhishingDomainMockingCoJp = line.includes('-co-jp');
106+
if (isPhishingDomainMockingCoJp) {
107+
domainCountMap[apexDomain] += 0.5;
108+
}
92109

93-
isPhishingDomainMockingAmazon = true;
110+
if (line.startsWith('.amaz')) {
111+
domainCountMap[apexDomain] += 0.5;
94112

95-
if (line.startsWith('.amazon-')) {
96-
domainCountMap[apexDomain] += 4.5;
97-
}
98-
} else if (line.startsWith('.customer')) {
99-
domainCountMap[apexDomain] += 0.25;
113+
if (line.startsWith('.amazon-')) {
114+
domainCountMap[apexDomain] += 4.5;
100115
}
101-
if (line.includes('-co-jp')) {
102-
domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
116+
if (isPhishingDomainMockingCoJp) {
117+
domainCountMap[apexDomain] += 4;
103118
}
119+
} else if (line.startsWith('.customer')) {
120+
domainCountMap[apexDomain] += 0.25;
121+
}
104122

105-
const tld = parsed.publicSuffix;
106-
if (!tld || !BLACK_TLD.has(tld)) continue;
123+
const tld = gorhill.getPublicSuffix(line[0] === '.' ? line.slice(1) : line);
124+
if (!tld || !BLACK_TLD.has(tld)) continue;
107125

108-
domainCountMap[apexDomain] += 1;
126+
domainCountMap[apexDomain] += 1;
109127

110-
if (line.length > 19) {
111-
// Add more weight if the domain is long enough
112-
if (line.length > 44) {
113-
domainCountMap[apexDomain] += 3.5;
114-
} else if (line.length > 34) {
115-
domainCountMap[apexDomain] += 2.5;
116-
} else if (line.length > 29) {
117-
domainCountMap[apexDomain] += 1.5;
118-
} else if (line.length > 24) {
119-
domainCountMap[apexDomain] += 0.75;
120-
} else if (line.length > 19) {
121-
domainCountMap[apexDomain] += 0.25;
122-
}
128+
const lineLen = line.length;
123129

124-
if (domainCountMap[apexDomain] < 5) {
125-
const subdomain = parsed.subdomain;
126-
if (subdomain?.includes('.')) {
127-
domainCountMap[apexDomain] += 1.5;
128-
}
130+
if (lineLen > 19) {
131+
// Add more weight if the domain is long enough
132+
if (lineLen > 44) {
133+
domainCountMap[apexDomain] += 3.5;
134+
} else if (lineLen > 34) {
135+
domainCountMap[apexDomain] += 2.5;
136+
} else if (lineLen > 29) {
137+
domainCountMap[apexDomain] += 1.5;
138+
} else if (lineLen > 24) {
139+
domainCountMap[apexDomain] += 0.75;
140+
} else {
141+
domainCountMap[apexDomain] += 0.25;
142+
}
143+
144+
if (domainCountMap[apexDomain] < 5) {
145+
const subdomain = tldts.getSubdomain(line);
146+
if (subdomain?.includes('.')) {
147+
domainCountMap[apexDomain] += 1.5;
129148
}
130149
}
131150
}
132151
}
133152
});
134153

154+
const domainSorter = createDomainSorter(gorhill);
155+
135156
const results = traceSync('* get final results', () => Object.entries(domainCountMap)
136157
.reduce((acc, [apexDomain, count]) => {
137158
if (count >= 5) {
@@ -151,7 +172,7 @@ const buildPhishingDomainSet = task(__filename, async () => {
151172
' - https://gitlab.com/malware-filter/phishing-filter'
152173
];
153174

154-
await Promise.all(createRuleset(
175+
return Promise.all(createRuleset(
155176
'Sukka\'s Ruleset - Reject Phishing',
156177
description,
157178
new Date(),

0 commit comments

Comments
 (0)