15
15
import geoip2 .database
16
16
import urllib .request
17
17
import whois
18
- import check
19
-
20
- from xml .etree import ElementTree
21
18
22
19
import jinja2
23
- import myawis
24
20
25
21
26
22
HATE_SITES_CSV_DEFAULT_PATH = 'hate-sites.csv'
36
32
}
37
33
38
34
39
- def site_rank (site : str ) -> typing .Optional [int ]:
40
- while True :
41
- obj = myawis .CallAwis (args .aws_access_key_id , args .aws_secret_access_key )
42
- try :
43
- urlinfo = obj .urlinfo (site )
44
- break
45
- except requests .exceptions .ConnectionError :
46
- log_error (site , "AWIS connection error, trying again" )
47
- try :
48
- tree = ElementTree .fromstring (str (urlinfo ))
49
- except ElementTree .ParseError :
50
- log_error (site , "Could not retrieve rank" )
51
- return None
52
- results = tree .findall (
53
- './/aws:TrafficData/aws:Rank' ,
54
- {'aws' : "http://awis.amazonaws.com/doc/2005-07-11" }
55
- )
56
- if not results :
57
- log_error (site , 'Could not find rank' )
58
- return None
59
- rank = tree .findall (
60
- './/aws:TrafficData/aws:Rank' ,
61
- {'aws' : "http://awis.amazonaws.com/doc/2005-07-11" }
62
- )[0 ].text
63
- log_info (site , f"Found site rank: { rank } " )
64
- # TODO fetch `aws:ContributingSubdomain`
65
- return int (rank ) if rank else None
66
-
67
-
68
35
def log_info (site : str , s : str ):
69
36
logging .info (f"{ site } - { s } " )
70
37
@@ -127,14 +94,11 @@ def build_isps_data(limit=None):
127
94
if isp is None :
128
95
continue
129
96
130
- rank = site_rank (site )
131
-
132
97
hate_site_response = HateSiteLoader (domain = site ).load ()
133
98
is_site_up = isinstance (
134
99
HateSiteResponseAnalyzer (response = hate_site_response , page_string = page_string ).analyze (),
135
100
HateSiteResponseSiteUp
136
101
)
137
- print (f"site up: { is_site_up } " )
138
102
139
103
if classification != 'splc' :
140
104
classification = None
@@ -192,7 +156,7 @@ class HateSiteResponsePageStringNotFound:
192
156
pass
193
157
194
158
195
- class HateSiteReponseSiteDown (typing .NamedTuple ):
159
+ class HateSiteResponseSiteDown (typing .NamedTuple ):
196
160
status_code : typing .Optional [int ]
197
161
reason : str
198
162
@@ -201,7 +165,7 @@ class HateSiteResponseAnalyzer(typing.NamedTuple):
201
165
response : typing .Union [HateSiteResponse , HateSiteErrorResponse ]
202
166
page_string : str
203
167
204
- def analyze (self ) -> typing .Union [HateSiteResponseSiteUp , HateSiteResponsePageStringNotFound , HateSiteReponseSiteDown ]:
168
+ def analyze (self ) -> typing .Union [HateSiteResponseSiteUp , HateSiteResponsePageStringNotFound , HateSiteResponseSiteDown ]:
205
169
if isinstance (self .response , HateSiteResponse ):
206
170
if self .page_string .encode () in self .response .body :
207
171
return HateSiteResponseSiteUp ()
@@ -237,8 +201,6 @@ def render(limit=None):
237
201
238
202
if __name__ == "__main__" :
239
203
parser = argparse .ArgumentParser ()
240
- parser .add_argument ('aws_access_key_id' )
241
- parser .add_argument ('aws_secret_access_key' )
242
204
parser .add_argument ('--hate-sites-csv-path' , default = HATE_SITES_CSV_DEFAULT_PATH )
243
205
parser .add_argument ('--log' , action = 'store_true' )
244
206
parser .add_argument ('--limit' , type = int , help = 'Limit the number of sites to process' )
0 commit comments