11// @ts -check
2- const fs = require ( 'fs' ) ;
32const fse = require ( 'fs-extra' ) ;
43const { resolve : pathResolve } = require ( 'path' ) ;
54
6- const tldts = require ( 'tldts' ) ;
7-
85const { processHosts, processFilterRules } = require ( './lib/parse-filter' ) ;
96const Trie = require ( './lib/trie' ) ;
107
118const { HOSTS , ADGUARD_FILTERS , PREDEFINED_WHITELIST , PREDEFINED_ENFORCED_BACKLIST } = require ( './lib/reject-data-source' ) ;
12- const { createRuleset } = require ( './lib/create-file' ) ;
9+ const { createRuleset, compareAndWriteFile } = require ( './lib/create-file' ) ;
1310const { processLine } = require ( './lib/process-line' ) ;
1411const { domainDeduper } = require ( './lib/domain-deduper' ) ;
1512const createKeywordFilter = require ( './lib/aho-corasick' ) ;
1613const { readFileByLine } = require ( './lib/fetch-remote-text-by-line' ) ;
17- const domainSorter = require ( './lib/stable-sort-domain' ) ;
14+ const { createDomainSorter } = require ( './lib/stable-sort-domain' ) ;
15+ const { traceSync, runner } = require ( './lib/trace-runner' ) ;
16+ const { getGorhillPublicSuffixPromise } = require ( './lib/get-gorhill-publicsuffix' ) ;
17+ const { createCachedGorhillGetDomain } = require ( './lib/cached-tld-parse' ) ;
1818
1919/** Whitelists */
2020const filterRuleWhitelistDomainSets = new Set ( PREDEFINED_WHITELIST ) ;
2121/** @type {Set<string> } Dedupe domains inclued by DOMAIN-KEYWORD */
2222const domainKeywordsSet = new Set ( ) ;
2323/** @type {Set<string> } Dedupe domains included by DOMAIN-SUFFIX */
2424const domainSuffixSet = new Set ( ) ;
25- ( async ( ) => {
25+
26+ runner ( __filename , async ( ) => {
2627 /** @type Set<string> */
2728 const domainSets = new Set ( ) ;
2829
@@ -31,7 +32,8 @@ const domainSuffixSet = new Set();
3132
3233 let shouldStop = false ;
3334
34- await Promise . all ( [
35+ const [ gorhill ] = await Promise . all ( [
36+ getGorhillPublicSuffixPromise ,
3537 // Parse from remote hosts & domain lists
3638 ...HOSTS . map ( entry => processHosts ( entry [ 0 ] , entry [ 1 ] ) . then ( hosts => {
3739 hosts . forEach ( host => {
@@ -129,7 +131,7 @@ const domainSuffixSet = new Set();
129131 console . log ( `Start deduping from black keywords/suffixes! (${ previousSize } )` ) ;
130132 console . time ( '* Dedupe from black keywords/suffixes' ) ;
131133
132- const kwfilter = createKeywordFilter ( Array . from ( domainKeywordsSet ) ) ;
134+ const kwfilter = createKeywordFilter ( domainKeywordsSet ) ;
133135
134136 const trie1 = Trie . from ( domainSets ) ;
135137 domainSuffixSet . forEach ( suffix => {
@@ -167,19 +169,35 @@ const domainSuffixSet = new Set();
167169
168170 const START_TIME = Date . now ( ) ;
169171
170- const dudupedDominArray = domainDeduper ( Array . from ( domainSets ) ) ;
172+ const dudupedDominArray = traceSync ( '* Dedupe from covered subdomain' , ( ) => domainDeduper ( Array . from ( domainSets ) ) ) ;
171173
172174 console . log ( `* Dedupe from covered subdomain - ${ ( Date . now ( ) - START_TIME ) / 1000 } s` ) ;
173175 console . log ( `Deduped ${ previousSize - dudupedDominArray . length } rules!` ) ;
174176
175- /** @type {Record<string, number> } */
176- const rejectDomainsStats = dudupedDominArray . reduce ( ( acc , cur ) => {
177- const suffix = tldts . getDomain ( cur , { allowPrivateDomains : false } ) ;
178- if ( suffix ) {
179- acc [ suffix ] = ( acc [ suffix ] ?? 0 ) + 1 ;
180- }
181- return acc ;
182- } , { } ) ;
177+ // Create reject stats
178+ const getDomain = createCachedGorhillGetDomain ( gorhill ) ;
179+ /** @type {[string, number][] } */
180+ const rejectDomainsStats = traceSync (
181+ '* Collect reject domain stats' ,
182+ ( ) => Object . entries (
183+ dudupedDominArray . reduce ( ( acc , cur ) => {
184+ const suffix = getDomain ( cur ) ;
185+ if ( suffix ) {
186+ acc [ suffix ] = ( acc [ suffix ] ?? 0 ) + 1 ;
187+ }
188+ return acc ;
189+ } , { } )
190+ ) . filter ( a => a [ 1 ] > 2 ) . sort ( ( a , b ) => {
191+ const t = b [ 1 ] - a [ 1 ] ;
192+ if ( t === 0 ) {
193+ return a [ 0 ] . localeCompare ( b [ 0 ] ) ;
194+ }
195+ return t ;
196+ } )
197+ ) ;
198+
199+ const domainSorter = createDomainSorter ( gorhill ) ;
200+ const domainset = traceSync ( '* Sort reject domainset' , ( ) => dudupedDominArray . sort ( domainSorter ) ) ;
183201
184202 const description = [
185203 'License: AGPL 3.0' ,
@@ -192,7 +210,6 @@ const domainSuffixSet = new Set();
192210 ...HOSTS . map ( host => ` - ${ host [ 0 ] } ` ) ,
193211 ...ADGUARD_FILTERS . map ( filter => ` - ${ Array . isArray ( filter ) ? filter [ 0 ] : filter } ` )
194212 ] ;
195- const domainset = dudupedDominArray . sort ( domainSorter ) ;
196213
197214 await Promise . all ( [
198215 ...createRuleset (
@@ -204,21 +221,11 @@ const domainSuffixSet = new Set();
204221 pathResolve ( __dirname , '../List/domainset/reject.conf' ) ,
205222 pathResolve ( __dirname , '../Clash/domainset/reject.txt' )
206223 ) ,
207- fs . promises . writeFile (
208- pathResolve ( __dirname , '../List/internal/reject-stats.txt' ) ,
209- Object . entries ( rejectDomainsStats )
210- . filter ( a => a [ 1 ] > 1 )
211- . sort ( ( a , b ) => {
212- const t = b [ 1 ] - a [ 1 ] ;
213- if ( t === 0 ) {
214- return a [ 0 ] . localeCompare ( b [ 0 ] ) ;
215- }
216- return t ;
217- } )
218- . map ( ( [ domain , count ] ) => `${ domain } ${ ' ' . repeat ( 100 - domain . length ) } ${ count } ` )
219- . join ( '\n' )
224+ compareAndWriteFile (
225+ rejectDomainsStats . map ( ( [ domain , count ] ) => `${ domain } ${ ' ' . repeat ( 100 - domain . length ) } ${ count } ` ) ,
226+ pathResolve ( __dirname , '../List/internal/reject-stats.txt' )
220227 ) ,
221228 // Copy reject_sukka.conf for backward compatibility
222229 fse . copy ( pathResolve ( __dirname , '../Source/domainset/reject_sukka.conf' ) , pathResolve ( __dirname , '../List/domainset/reject_sukka.conf' ) )
223230 ] ) ;
224- } ) ( ) ;
231+ } ) ;
0 commit comments