1
- const tldts = require ( 'tldts' ) ;
1
+ // @ts -check
2
2
const { processFilterRules } = require ( './lib/parse-filter.js' ) ;
3
3
const path = require ( 'path' ) ;
4
4
const { createRuleset } = require ( './lib/create-file' ) ;
5
5
const { processLine } = require ( './lib/process-line.js' ) ;
6
- const domainSorter = require ( './lib/stable-sort-domain' ) ;
6
+ const { createDomainSorter } = require ( './lib/stable-sort-domain' ) ;
7
7
const { traceSync, task } = require ( './lib/trace-runner.js' ) ;
8
+ const Trie = require ( './lib/trie.js' ) ;
9
+ const { getGorhillPublicSuffixPromise } = require ( './lib/get-gorhill-publicsuffix.js' ) ;
10
+ const { createCachedGorhillGetDomain } = require ( './lib/cached-tld-parse.js' ) ;
11
+ const tldts = require ( 'tldts' ) ;
8
12
9
13
const WHITELIST_DOMAIN = new Set ( [
10
14
'w3s.link' ,
@@ -61,77 +65,94 @@ const BLACK_TLD = new Set([
61
65
] ) ;
62
66
63
67
const buildPhishingDomainSet = task ( __filename , async ( ) => {
64
- const domainSet = Array . from ( ( await processFilterRules (
65
- 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
66
- // [
67
- // 'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt',
68
- // 'https://malware-filter.pages.dev/phishing-filter-agh.txt',
69
- // 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
70
- // ]
71
- ) ) . black ) ;
68
+ const [ { black : domainSet } , gorhill ] = await Promise . all ( [
69
+ processFilterRules (
70
+ 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
71
+ // [
72
+ // 'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt',
73
+ // 'https://malware-filter.pages.dev/phishing-filter-agh.txt',
74
+ // 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
75
+ // ]
76
+ ) ,
77
+ getGorhillPublicSuffixPromise ( )
78
+ ] ) ;
79
+
80
+ traceSync ( '* whitelist' , ( ) => {
81
+ const trieForRemovingWhiteListed = Trie . from ( domainSet ) ;
82
+ WHITELIST_DOMAIN . forEach ( white => {
83
+ trieForRemovingWhiteListed . find ( `.${ white } ` , false ) . forEach ( f => domainSet . delete ( f ) ) ;
84
+ if ( trieForRemovingWhiteListed . has ( white ) ) {
85
+ domainSet . delete ( white ) ;
86
+ }
87
+ } ) ;
88
+ } ) ;
89
+
72
90
const domainCountMap = { } ;
91
+ const getDomain = createCachedGorhillGetDomain ( gorhill ) ;
73
92
74
93
traceSync ( '* process domain set' , ( ) => {
75
- for ( let i = 0 , len = domainSet . length ; i < len ; i ++ ) {
76
- const line = processLine ( domainSet [ i ] ) ;
77
- if ( ! line ) continue ;
94
+ const domainArr = Array . from ( domainSet ) ;
78
95
79
- const parsed = tldts . parse ( line , { allowPrivateDomains : true } ) ;
80
- const apexDomain = parsed . domain ;
96
+ for ( let i = 0 , len = domainArr . length ; i < len ; i ++ ) {
97
+ const line = processLine ( domainArr [ i ] ) ;
98
+ if ( ! line ) continue ;
81
99
82
- if ( apexDomain ) {
83
- if ( WHITELIST_DOMAIN . has ( apexDomain ) ) {
84
- continue ;
85
- }
100
+ const apexDomain = getDomain ( line ) ;
101
+ if ( ! apexDomain ) continue ;
86
102
87
- domainCountMap [ apexDomain ] ||= 0 ;
103
+ domainCountMap [ apexDomain ] ||= 0 ;
88
104
89
- let isPhishingDomainMockingAmazon = false ;
90
- if ( line . startsWith ( '.amaz' ) ) {
91
- domainCountMap [ apexDomain ] += 0.5 ;
105
+ const isPhishingDomainMockingCoJp = line . includes ( '-co-jp' ) ;
106
+ if ( isPhishingDomainMockingCoJp ) {
107
+ domainCountMap [ apexDomain ] += 0.5 ;
108
+ }
92
109
93
- isPhishingDomainMockingAmazon = true ;
110
+ if ( line . startsWith ( '.amaz' ) ) {
111
+ domainCountMap [ apexDomain ] += 0.5 ;
94
112
95
- if ( line . startsWith ( '.amazon-' ) ) {
96
- domainCountMap [ apexDomain ] += 4.5 ;
97
- }
98
- } else if ( line . startsWith ( '.customer' ) ) {
99
- domainCountMap [ apexDomain ] += 0.25 ;
113
+ if ( line . startsWith ( '.amazon-' ) ) {
114
+ domainCountMap [ apexDomain ] += 4.5 ;
100
115
}
101
- if ( line . includes ( '-co-jp' ) ) {
102
- domainCountMap [ apexDomain ] += ( isPhishingDomainMockingAmazon ? 4.5 : 0.5 ) ;
116
+ if ( isPhishingDomainMockingCoJp ) {
117
+ domainCountMap [ apexDomain ] += 4 ;
103
118
}
119
+ } else if ( line . startsWith ( '.customer' ) ) {
120
+ domainCountMap [ apexDomain ] += 0.25 ;
121
+ }
104
122
105
- const tld = parsed . publicSuffix ;
106
- if ( ! tld || ! BLACK_TLD . has ( tld ) ) continue ;
123
+ const tld = gorhill . getPublicSuffix ( line [ 0 ] === '.' ? line . slice ( 1 ) : line ) ;
124
+ if ( ! tld || ! BLACK_TLD . has ( tld ) ) continue ;
107
125
108
- domainCountMap [ apexDomain ] += 1 ;
126
+ domainCountMap [ apexDomain ] += 1 ;
109
127
110
- if ( line . length > 19 ) {
111
- // Add more weight if the domain is long enough
112
- if ( line . length > 44 ) {
113
- domainCountMap [ apexDomain ] += 3.5 ;
114
- } else if ( line . length > 34 ) {
115
- domainCountMap [ apexDomain ] += 2.5 ;
116
- } else if ( line . length > 29 ) {
117
- domainCountMap [ apexDomain ] += 1.5 ;
118
- } else if ( line . length > 24 ) {
119
- domainCountMap [ apexDomain ] += 0.75 ;
120
- } else if ( line . length > 19 ) {
121
- domainCountMap [ apexDomain ] += 0.25 ;
122
- }
128
+ const lineLen = line . length ;
123
129
124
- if ( domainCountMap [ apexDomain ] < 5 ) {
125
- const subdomain = parsed . subdomain ;
126
- if ( subdomain ?. includes ( '.' ) ) {
127
- domainCountMap [ apexDomain ] += 1.5 ;
128
- }
130
+ if ( lineLen > 19 ) {
131
+ // Add more weight if the domain is long enough
132
+ if ( lineLen > 44 ) {
133
+ domainCountMap [ apexDomain ] += 3.5 ;
134
+ } else if ( lineLen > 34 ) {
135
+ domainCountMap [ apexDomain ] += 2.5 ;
136
+ } else if ( lineLen > 29 ) {
137
+ domainCountMap [ apexDomain ] += 1.5 ;
138
+ } else if ( lineLen > 24 ) {
139
+ domainCountMap [ apexDomain ] += 0.75 ;
140
+ } else {
141
+ domainCountMap [ apexDomain ] += 0.25 ;
142
+ }
143
+
144
+ if ( domainCountMap [ apexDomain ] < 5 ) {
145
+ const subdomain = tldts . getSubdomain ( line ) ;
146
+ if ( subdomain ?. includes ( '.' ) ) {
147
+ domainCountMap [ apexDomain ] += 1.5 ;
129
148
}
130
149
}
131
150
}
132
151
}
133
152
} ) ;
134
153
154
+ const domainSorter = createDomainSorter ( gorhill ) ;
155
+
135
156
const results = traceSync ( '* get final results' , ( ) => Object . entries ( domainCountMap )
136
157
. reduce ( ( acc , [ apexDomain , count ] ) => {
137
158
if ( count >= 5 ) {
@@ -151,7 +172,7 @@ const buildPhishingDomainSet = task(__filename, async () => {
151
172
' - https://gitlab.com/malware-filter/phishing-filter'
152
173
] ;
153
174
154
- await Promise . all ( createRuleset (
175
+ return Promise . all ( createRuleset (
155
176
'Sukka\'s Ruleset - Reject Phishing' ,
156
177
description ,
157
178
new Date ( ) ,
0 commit comments