15
15
import geoip2 .database
16
16
import urllib .request
17
17
import whois
18
+ import check
18
19
19
20
from xml .etree import ElementTree
20
21
@@ -121,17 +122,24 @@ def sites(limit=None) -> [[str, str, str]]:
121
122
def build_isps_data (limit = None ):
122
123
isps = collections .defaultdict (lambda : [])
123
124
124
- for site , classification , _ , _ in sites (limit = limit ):
125
+ for site , classification , _ , page_string in sites (limit = limit ):
125
126
isp = site_isp (site )
126
127
if isp is None :
127
128
continue
128
129
129
130
rank = site_rank (site )
130
131
132
+ hate_site_response = HateSiteLoader (domain = site ).load ()
133
+ is_site_up = isinstance (
134
+ HateSiteResponseAnalyzer (response = hate_site_response , page_string = page_string ).analyze (),
135
+ HateSiteResponseSiteUp
136
+ )
137
+ print (f"site up: { is_site_up } " )
138
+
131
139
if classification != 'splc' :
132
140
classification = None
133
141
134
- isps [isp ].append ([mask_site (site ), rank_to_color ( rank ) , classification ])
142
+ isps [isp ].append ([mask_site (site ), is_site_up , classification ])
135
143
136
144
return sorted (isps .items (), key = lambda x : len (x [1 ]), reverse = True )
137
145
@@ -143,23 +151,68 @@ def mask_site(site: str) -> str:
143
151
return f"{ site [0 ]} { asterisks } .{ domain } "
144
152
145
153
146
- def rank_to_color (rank : typing .Optional [int ]) -> str :
147
- if rank and rank < 10_000 :
148
- return '#fff600'
149
- elif rank and rank < 100_000 :
150
- return '#d7d45d'
151
- elif rank and rank < 1_000_000 :
152
- return '#c9c77f'
153
- elif rank and rank < 10_000_000 :
154
- return '#bcbc9d'
155
- else :
156
- return '#b3b3b3'
157
-
158
-
159
154
def todays_date () -> str :
160
155
return datetime .datetime .now ().strftime ("%B %d, %Y" )
161
156
162
157
158
+ CHROME_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'
159
+ REQUEST_HEADERS = {'User-Agent' : CHROME_USER_AGENT }
160
+
161
+
162
+ class HateSiteErrorResponse (typing .NamedTuple ):
163
+ reason : str
164
+ status_code : typing .Optional [int ]
165
+
166
+
167
+ class HateSiteResponse (typing .NamedTuple ):
168
+ body : bytes
169
+ status_code : int
170
+
171
+
172
+ class HateSiteLoader (typing .NamedTuple ):
173
+ domain : str
174
+
175
+ def load (self ) -> typing .Union [HateSiteResponse , HateSiteErrorResponse ]:
176
+ url = 'http://' + self .domain
177
+ request = urllib .request .Request (url , headers = REQUEST_HEADERS )
178
+ try :
179
+ response = urllib .request .urlopen (request )
180
+ except urllib .error .HTTPError as error :
181
+ return HateSiteErrorResponse (reason = str (error .reason ), status_code = error .code )
182
+ except urllib .error .URLError as error :
183
+ return HateSiteErrorResponse (reason = str (error .reason ), status_code = None )
184
+ return HateSiteResponse (body = response .read (), status_code = response .status )
185
+
186
+
187
+ class HateSiteResponseSiteUp :
188
+ pass
189
+
190
+
191
+ class HateSiteResponsePageStringNotFound :
192
+ pass
193
+
194
+
195
+ class HateSiteReponseSiteDown (typing .NamedTuple ):
196
+ status_code : typing .Optional [int ]
197
+ reason : str
198
+
199
+
200
+ class HateSiteResponseAnalyzer (typing .NamedTuple ):
201
+ response : typing .Union [HateSiteResponse , HateSiteErrorResponse ]
202
+ page_string : str
203
+
204
+ def analyze (self ) -> typing .Union [HateSiteResponseSiteUp , HateSiteResponsePageStringNotFound , HateSiteReponseSiteDown ]:
205
+ if isinstance (self .response , HateSiteResponse ):
206
+ if self .page_string .encode () in self .response .body :
207
+ return HateSiteResponseSiteUp ()
208
+ else :
209
+ return HateSiteResponsePageStringNotFound ()
210
+ elif self .response .status_code :
211
+ return HateSiteResponseSiteDown (status_code = self .response .status_code , reason = self .response .reason )
212
+ else :
213
+ return HateSiteResponseSiteDown (status_code = None , reason = self .response .reason )
214
+
215
+
163
216
def render (limit = None ):
164
217
env = jinja2 .Environment (
165
218
loader = jinja2 .FileSystemLoader ('templates' ),
0 commit comments