Skip to content

Commit 78afa59

Browse files
committed
Perf: further speed up infra
1 parent adb8b43 commit 78afa59

25 files changed

+429
-171
lines changed

.eslintrc.json

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
11
{
22
"root": true,
33
"extends": ["sukka/node"],
4-
"rules": {
5-
"no-console": "off"
6-
},
7-
"parserOptions": {
8-
"ecmaVersion": "latest",
9-
"sourceType": "module"
10-
}
4+
"ignorePatterns": [
5+
"node_modules/",
6+
// disable for now
7+
"**/*.d.ts"
8+
],
9+
"overrides": [
10+
{
11+
"files": ["**/*.js"],
12+
"rules": {
13+
"no-console": "off"
14+
},
15+
"parserOptions": {
16+
"ecmaVersion": "latest",
17+
"sourceType": "module"
18+
}
19+
}
20+
]
1121
}

Build/build-cdn-conf.js

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,22 @@ const { minifyRules } = require('./lib/minify-rules');
55
const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line');
66
const Trie = require('./lib/trie');
77
const { runner } = require('./lib/trace-runner');
8+
const fs = require('fs');
9+
10+
const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix-list_dat.txt');
811

912
runner(__filename, async () => {
1013
const trie = new Trie();
11-
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
12-
trie.add(line);
14+
15+
if (fs.existsSync(publicSuffixPath)) {
16+
for await (const line of readFileByLine(publicSuffixPath)) {
17+
trie.add(line);
18+
}
19+
} else {
20+
console.log('public_suffix_list.dat not found, fetch directly from remote.');
21+
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
22+
trie.add(line);
23+
}
1324
}
1425

1526
/**
@@ -18,13 +29,16 @@ runner(__filename, async () => {
1829
*/
1930
const S3OSSDomains = new Set();
2031

21-
trie.find('.amazonaws.com')
22-
.filter(line => (line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-'))
23-
.forEach(line => S3OSSDomains.add(line));
24-
25-
trie.find('.scw.cloud')
26-
.filter(line => (line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-'))
27-
.forEach(line => S3OSSDomains.add(line));
32+
trie.find('.amazonaws.com').forEach(line => {
33+
if ((line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) {
34+
S3OSSDomains.add(line);
35+
}
36+
});
37+
trie.find('.scw.cloud').forEach(line => {
38+
if ((line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) {
39+
S3OSSDomains.add(line);
40+
}
41+
});
2842

2943
/** @type {string[]} */
3044
const cdnDomainsList = [];
@@ -45,7 +59,7 @@ runner(__filename, async () => {
4559
];
4660
const ruleset = minifyRules(cdnDomainsList);
4761

48-
await Promise.all(createRuleset(
62+
return Promise.all(createRuleset(
4963
'Sukka\'s Ruleset - CDN Domains',
5064
description,
5165
new Date(),

Build/build-domestic-ruleset.js

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,15 @@ runner(__filename, async () => {
1919

2020
results.push(
2121
...Object.entries(DOMESTICS)
22-
.filter(([key]) => key !== 'SYSTEM')
23-
.flatMap(([, { domains }]) => domains)
22+
.reduce(
23+
(acc, [key, { domains }]) => {
24+
if (key === 'SYSTEM') {
25+
return acc;
26+
}
27+
return [...acc, ...domains];
28+
},
29+
/** @type {string[]} */([])
30+
)
2431
.sort(domainSorter)
2532
.map((domain) => `DOMAIN-SUFFIX,${domain}`)
2633
);

Build/build-internal-cdn-rules.js

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// @ts-check
22
const fse = require('fs-extra');
33
const path = require('path');
4-
const { isDomainLoose } = require('./lib/is-domain-loose');
54
const tldts = require('tldts');
65
const { processLine } = require('./lib/process-line');
76
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
@@ -35,11 +34,15 @@ runner(__filename, async () => {
3534
*/
3635
const processLocalDomainSet = async (domainSetPath) => {
3736
for await (const line of readFileByLine(domainSetPath)) {
38-
if (line[0] === '.') {
39-
addApexDomain(line.slice(1));
40-
} else if (isDomainLoose(line)) {
41-
addApexDomain(line);
42-
} else if (processLine(line)) {
37+
const parsed = tldts.parse(line, { allowPrivateDomains: true });
38+
if (!parsed.isIp && (parsed.isIcann || parsed.isPrivate)) {
39+
if (parsed.domain) {
40+
set.add(parsed.domain);
41+
}
42+
continue;
43+
}
44+
45+
if (processLine(line)) {
4346
console.warn('[drop line from domainset]', line);
4447
}
4548
}
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
// @ts-check
22
const path = require('path');
33
const fse = require('fs-extra');
4-
const fs = require('fs');
54
const { parseFelixDnsmasq } = require('./lib/parse-dnsmasq');
65
const { runner } = require('./lib/trace-runner');
6+
const { compareAndWriteFile } = require('./lib/create-file');
77

88
runner(__filename, async () => {
99
const [result] = await Promise.all([
1010
parseFelixDnsmasq('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf'),
1111
fse.ensureDir(path.resolve(__dirname, '../List/internal'))
1212
]);
1313

14-
await fs.promises.writeFile(
15-
path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt'),
16-
`${result.map(line => `SUFFIX,${line}`).join('\n')}\n`
14+
await compareAndWriteFile(
15+
result.map(line => `SUFFIX,${line}`),
16+
path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt')
1717
);
1818
});

Build/build-phishing-domainset.js

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
const { parse } = require('tldts');
1+
const tldts = require('tldts');
22
const { processFilterRules } = require('./lib/parse-filter.js');
33
const path = require('path');
44
const { createRuleset } = require('./lib/create-file');
55
const { processLine } = require('./lib/process-line.js');
66
const domainSorter = require('./lib/stable-sort-domain');
7-
const { runner } = require('./lib/trace-runner.js');
7+
const { runner, traceSync } = require('./lib/trace-runner.js');
88

99
const WHITELIST_DOMAIN = new Set([
1010
'w3s.link',
@@ -61,19 +61,14 @@ const BLACK_TLD = new Set([
6161
]);
6262

6363
runner(__filename, async () => {
64-
const domainSet = Array.from(
65-
(await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black
66-
);
64+
const domainSet = Array.from((await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black);
6765
const domainCountMap = {};
6866

6967
for (let i = 0, len = domainSet.length; i < len; i++) {
7068
const line = processLine(domainSet[i]);
7169
if (!line) continue;
7270

73-
const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line;
74-
75-
const parsed = parse(domain, { allowPrivateDomains: true });
76-
71+
const parsed = tldts.parse(line, { allowPrivateDomains: true });
7772
const apexDomain = parsed.domain;
7873

7974
if (apexDomain) {
@@ -84,19 +79,18 @@ runner(__filename, async () => {
8479
domainCountMap[apexDomain] ||= 0;
8580

8681
let isPhishingDomainMockingAmazon = false;
87-
88-
if (domain.startsWith('amaz')) {
82+
if (line.startsWith('.amaz')) {
8983
domainCountMap[apexDomain] += 0.5;
9084

9185
isPhishingDomainMockingAmazon = true;
9286

93-
if (domain.startsWith('amazon-')) {
87+
if (line.startsWith('.amazon-')) {
9488
domainCountMap[apexDomain] += 4.5;
9589
}
96-
} else if (domain.startsWith('customer')) {
90+
} else if (line.startsWith('.customer')) {
9791
domainCountMap[apexDomain] += 0.25;
9892
}
99-
if (domain.includes('-co-jp')) {
93+
if (line.includes('-co-jp')) {
10094
domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
10195
}
10296

@@ -105,17 +99,17 @@ runner(__filename, async () => {
10599

106100
domainCountMap[apexDomain] += 1;
107101

108-
if (domain.length > 19) {
102+
if (line.length > 19) {
109103
// Add more weight if the domain is long enough
110-
if (domain.length > 44) {
104+
if (line.length > 44) {
111105
domainCountMap[apexDomain] += 3.5;
112-
} else if (domain.length > 34) {
106+
} else if (line.length > 34) {
113107
domainCountMap[apexDomain] += 2.5;
114-
} else if (domain.length > 29) {
108+
} else if (line.length > 29) {
115109
domainCountMap[apexDomain] += 1.5;
116-
} else if (domain.length > 24) {
110+
} else if (line.length > 24) {
117111
domainCountMap[apexDomain] += 0.75;
118-
} else if (domain.length > 19) {
112+
} else if (line.length > 19) {
119113
domainCountMap[apexDomain] += 0.25;
120114
}
121115

@@ -129,15 +123,14 @@ runner(__filename, async () => {
129123
}
130124
}
131125

132-
const results = [];
133-
134-
Object.entries(domainCountMap).forEach(([domain, count]) => {
135-
if (count >= 5) {
136-
results.push(`.${domain}`);
137-
}
138-
});
139-
140-
results.sort(domainSorter);
126+
const results = traceSync('* get final results', () => Object.entries(domainCountMap)
127+
.reduce((acc, [apexDomain, count]) => {
128+
if (count >= 5) {
129+
acc.push(`.${apexDomain}`);
130+
}
131+
return acc;
132+
}, /** @type {string[]} */([]))
133+
.sort(domainSorter));
141134

142135
const description = [
143136
'License: AGPL 3.0',

Build/build-reject-domainset.js

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,29 @@
11
// @ts-check
2-
const fs = require('fs');
32
const fse = require('fs-extra');
43
const { resolve: pathResolve } = require('path');
54

6-
const tldts = require('tldts');
7-
85
const { processHosts, processFilterRules } = require('./lib/parse-filter');
96
const Trie = require('./lib/trie');
107

118
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
12-
const { createRuleset } = require('./lib/create-file');
9+
const { createRuleset, compareAndWriteFile } = require('./lib/create-file');
1310
const { processLine } = require('./lib/process-line');
1411
const { domainDeduper } = require('./lib/domain-deduper');
1512
const createKeywordFilter = require('./lib/aho-corasick');
1613
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
17-
const domainSorter = require('./lib/stable-sort-domain');
14+
const { createDomainSorter } = require('./lib/stable-sort-domain');
15+
const { traceSync, runner } = require('./lib/trace-runner');
16+
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix');
17+
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
1818

1919
/** Whitelists */
2020
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
2121
/** @type {Set<string>} Dedupe domains inclued by DOMAIN-KEYWORD */
2222
const domainKeywordsSet = new Set();
2323
/** @type {Set<string>} Dedupe domains included by DOMAIN-SUFFIX */
2424
const domainSuffixSet = new Set();
25-
(async () => {
25+
26+
runner(__filename, async () => {
2627
/** @type Set<string> */
2728
const domainSets = new Set();
2829

@@ -31,7 +32,8 @@ const domainSuffixSet = new Set();
3132

3233
let shouldStop = false;
3334

34-
await Promise.all([
35+
const [gorhill] = await Promise.all([
36+
getGorhillPublicSuffixPromise,
3537
// Parse from remote hosts & domain lists
3638
...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => {
3739
hosts.forEach(host => {
@@ -129,7 +131,7 @@ const domainSuffixSet = new Set();
129131
console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
130132
console.time('* Dedupe from black keywords/suffixes');
131133

132-
const kwfilter = createKeywordFilter(Array.from(domainKeywordsSet));
134+
const kwfilter = createKeywordFilter(domainKeywordsSet);
133135

134136
const trie1 = Trie.from(domainSets);
135137
domainSuffixSet.forEach(suffix => {
@@ -167,19 +169,35 @@ const domainSuffixSet = new Set();
167169

168170
const START_TIME = Date.now();
169171

170-
const dudupedDominArray = domainDeduper(Array.from(domainSets));
172+
const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
171173

172174
console.log(`* Dedupe from covered subdomain - ${(Date.now() - START_TIME) / 1000}s`);
173175
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`);
174176

175-
/** @type {Record<string, number>} */
176-
const rejectDomainsStats = dudupedDominArray.reduce((acc, cur) => {
177-
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false });
178-
if (suffix) {
179-
acc[suffix] = (acc[suffix] ?? 0) + 1;
180-
}
181-
return acc;
182-
}, {});
177+
// Create reject stats
178+
const getDomain = createCachedGorhillGetDomain(gorhill);
179+
/** @type {[string, number][]} */
180+
const rejectDomainsStats = traceSync(
181+
'* Collect reject domain stats',
182+
() => Object.entries(
183+
dudupedDominArray.reduce((acc, cur) => {
184+
const suffix = getDomain(cur);
185+
if (suffix) {
186+
acc[suffix] = (acc[suffix] ?? 0) + 1;
187+
}
188+
return acc;
189+
}, {})
190+
).filter(a => a[1] > 2).sort((a, b) => {
191+
const t = b[1] - a[1];
192+
if (t === 0) {
193+
return a[0].localeCompare(b[0]);
194+
}
195+
return t;
196+
})
197+
);
198+
199+
const domainSorter = createDomainSorter(gorhill);
200+
const domainset = traceSync('* Sort reject domainset', () => dudupedDominArray.sort(domainSorter));
183201

184202
const description = [
185203
'License: AGPL 3.0',
@@ -192,7 +210,6 @@ const domainSuffixSet = new Set();
192210
...HOSTS.map(host => ` - ${host[0]}`),
193211
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
194212
];
195-
const domainset = dudupedDominArray.sort(domainSorter);
196213

197214
await Promise.all([
198215
...createRuleset(
@@ -204,21 +221,11 @@ const domainSuffixSet = new Set();
204221
pathResolve(__dirname, '../List/domainset/reject.conf'),
205222
pathResolve(__dirname, '../Clash/domainset/reject.txt')
206223
),
207-
fs.promises.writeFile(
208-
pathResolve(__dirname, '../List/internal/reject-stats.txt'),
209-
Object.entries(rejectDomainsStats)
210-
.filter(a => a[1] > 1)
211-
.sort((a, b) => {
212-
const t = b[1] - a[1];
213-
if (t === 0) {
214-
return a[0].localeCompare(b[0]);
215-
}
216-
return t;
217-
})
218-
.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`)
219-
.join('\n')
224+
compareAndWriteFile(
225+
rejectDomainsStats.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`),
226+
pathResolve(__dirname, '../List/internal/reject-stats.txt')
220227
),
221228
// Copy reject_sukka.conf for backward compatibility
222229
fse.copy(pathResolve(__dirname, '../Source/domainset/reject_sukka.conf'), pathResolve(__dirname, '../List/domainset/reject_sukka.conf'))
223230
]);
224-
})();
231+
});

0 commit comments

Comments
 (0)