forked from iusondemand/open-notice-consent-tracker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ghost_scrape.py
522 lines (521 loc) · 22.9 KB
/
ghost_scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
import urllib
import re
import scraperwiki
import lxml.html
import sys
def getDetails():
li = ['http://www.ghostery.com/apps/yahoo_analytics',
'http://www.ghostery.com/apps/%5Bx+1%5D',
'http://www.ghostery.com/apps/24%2F7_media_ad_network',
'http://www.ghostery.com/apps/2leep',
'http://www.ghostery.com/apps/33across',
'http://www.ghostery.com/apps/4w_marketplace',
'http://www.ghostery.com/apps/5min_media',
'http://www.ghostery.com/apps/accelerator_media',
'http://www.ghostery.com/apps/accesstrade',
'http://www.ghostery.com/apps/accuen_media',
'http://www.ghostery.com/apps/act-on_beacon',
'http://www.ghostery.com/apps/acuity_ads',
'http://www.ghostery.com/apps/acxiom',
'http://www.ghostery.com/apps/ad_decisive',
'http://www.ghostery.com/apps/ad_spirit',
'http://www.ghostery.com/apps/ad360',
'http://www.ghostery.com/apps/ad4game',
'http://www.ghostery.com/apps/adaction',
'http://www.ghostery.com/apps/adaptiveblue_smartlinks',
'http://www.ghostery.com/apps/adara_media',
'http://www.ghostery.com/apps/adblade',
'http://www.ghostery.com/apps/adbrite_notice',
'http://www.ghostery.com/apps/adbuyer.com',
'http://www.ghostery.com/apps/adcash',
'http://www.ghostery.com/apps/adclickmedia',
'http://www.ghostery.com/apps/adconion',
'http://www.ghostery.com/apps/addthis',
'http://www.ghostery.com/apps/adecn',
'http://www.ghostery.com/apps/adengage',
'http://www.ghostery.com/apps/adf.ly',
'http://www.ghostery.com/apps/adform',
'http://www.ghostery.com/apps/adfox',
'http://www.ghostery.com/apps/adfunky',
'http://www.ghostery.com/apps/adfusion',
'http://www.ghostery.com/apps/adgear',
'http://www.ghostery.com/apps/adgoal',
'http://www.ghostery.com/apps/adhitz',
'http://www.ghostery.com/apps/adhood',
'http://www.ghostery.com/apps/adify',
'http://www.ghostery.com/apps/adinterax',
'http://www.ghostery.com/apps/adition',
'http://www.ghostery.com/apps/adjug',
'http://www.ghostery.com/apps/adjuggler',
'http://www.ghostery.com/apps/adk2',
'http://www.ghostery.com/apps/adknowledge',
'http://www.ghostery.com/apps/adloox',
'http://www.ghostery.com/apps/admarvel',
'http://www.ghostery.com/apps/admaster',
'http://www.ghostery.com/apps/admeld',
'http://www.ghostery.com/apps/admob',
'http://www.ghostery.com/apps/adnet',
'http://www.ghostery.com/apps/adnet_media',
'http://www.ghostery.com/apps/adnologies',
'http://www.ghostery.com/apps/adobe_tag_container',
'http://www.ghostery.com/apps/adobe_test_%26_target',
'http://www.ghostery.com/apps/adometry',
'http://www.ghostery.com/apps/adon_network',
'http://www.ghostery.com/apps/adoperator',
'http://www.ghostery.com/apps/adorika',
'http://www.ghostery.com/apps/adotube',
'http://www.ghostery.com/apps/adperium',
'http://www.ghostery.com/apps/adpredictive',
'http://www.ghostery.com/apps/adreactor',
'http://www.ghostery.com/apps/adrecord',
'http://www.ghostery.com/apps/adriver',
'http://www.ghostery.com/apps/adroit_digital_solutions',
'http://www.ghostery.com/apps/adrolays',
'http://www.ghostery.com/apps/adroll',
'http://www.ghostery.com/apps/adsensecamp',
'http://www.ghostery.com/apps/adserverpub',
'http://www.ghostery.com/apps/adshuffle',
'http://www.ghostery.com/apps/adside',
'http://www.ghostery.com/apps/adsupply',
'http://www.ghostery.com/apps/adswizz',
'http://www.ghostery.com/apps/adtech',
'http://www.ghostery.com/apps/adult_webmaster_empire',
'http://www.ghostery.com/apps/adultadworld',
'http://www.ghostery.com/apps/advanse',
'http://www.ghostery.com/apps/advert_stream',
'http://www.ghostery.com/apps/advertpro',
'http://www.ghostery.com/apps/adxpansion',
'http://www.ghostery.com/apps/adzerk',
'http://www.ghostery.com/apps/affectv',
'http://www.ghostery.com/apps/affiliator',
'http://www.ghostery.com/apps/afterdownload',
'http://www.ghostery.com/apps/aggregate_knowledge',
'http://www.ghostery.com/apps/akavita',
'http://www.ghostery.com/apps/al_bawaba_advertising',
'http://www.ghostery.com/apps/alexa_metrics',
'http://www.ghostery.com/apps/alexa_traffic_rank',
'http://www.ghostery.com/apps/alexa_widget',
'http://www.ghostery.com/apps/allure_media',
'http://www.ghostery.com/apps/allyes',
'http://www.ghostery.com/apps/almondnet',
'http://www.ghostery.com/apps/ambient_digital',
'http://www.ghostery.com/apps/amp_platform',
'http://www.ghostery.com/apps/aol_oba_notice',
'http://www.ghostery.com/apps/appnexus',
'http://www.ghostery.com/apps/at_internet',
'http://www.ghostery.com/apps/atg_optimization',
'http://www.ghostery.com/apps/atg_recommendations',
'http://www.ghostery.com/apps/atlas',
'http://www.ghostery.com/apps/atlas_profitbuilder',
'http://www.ghostery.com/apps/audience_science',
'http://www.ghostery.com/apps/auditude',
'http://www.ghostery.com/apps/avalanchers',
'http://www.ghostery.com/apps/avazu_network',
'http://www.ghostery.com/apps/awstats',
'http://www.ghostery.com/apps/banner_connect',
'http://www.ghostery.com/apps/barilliance',
'http://www.ghostery.com/apps/baynote_observer',
'http://www.ghostery.com/apps/beencounter',
'http://www.ghostery.com/apps/begun',
'http://www.ghostery.com/apps/bigmir',
'http://www.ghostery.com/apps/binlayer',
'http://www.ghostery.com/apps/bitcoin_miner',
'http://www.ghostery.com/apps/bizo',
'http://www.ghostery.com/apps/blink_new_media',
'http://www.ghostery.com/apps/blog_rating_tracker',
'http://www.ghostery.com/apps/blogbang',
'http://www.ghostery.com/apps/blogcounter',
'http://www.ghostery.com/apps/bloomreach',
'http://www.ghostery.com/apps/bluecava',
'http://www.ghostery.com/apps/bluekai',
'http://www.ghostery.com/apps/bluelithium',
'http://www.ghostery.com/apps/bounce_exchange',
'http://www.ghostery.com/apps/brainient',
'http://www.ghostery.com/apps/brand_affinity',
'http://www.ghostery.com/apps/brandreach',
'http://www.ghostery.com/apps/brandscreen',
'http://www.ghostery.com/apps/bridgetrack',
'http://www.ghostery.com/apps/brightcove',
'http://www.ghostery.com/apps/brighttag',
'http://www.ghostery.com/apps/brilig',
'http://www.ghostery.com/apps/browser_update',
'http://www.ghostery.com/apps/buddy_media',
'http://www.ghostery.com/apps/buysellads',
'http://www.ghostery.com/apps/buzzfeed',
'http://www.ghostery.com/apps/buzzparadise',
'http://www.ghostery.com/apps/c3_metrics',
'http://www.ghostery.com/apps/cadreon',
'http://www.ghostery.com/apps/casale_media',
'http://www.ghostery.com/apps/cbs_interactive',
'http://www.ghostery.com/apps/cedexis_radar',
'http://www.ghostery.com/apps/centro',
'http://www.ghostery.com/apps/cerberus_speed-trap',
'http://www.ghostery.com/apps/certifica_metric',
'http://www.ghostery.com/apps/certona',
'http://www.ghostery.com/apps/chango',
'http://www.ghostery.com/apps/channelfinder',
'http://www.ghostery.com/apps/chartbeat',
'http://www.ghostery.com/apps/chitika_notice',
'http://www.ghostery.com/apps/choicestream',
'http://www.ghostery.com/apps/clarityray',
'http://www.ghostery.com/apps/clearsaleing',
'http://www.ghostery.com/apps/clickability_beacon',
'http://www.ghostery.com/apps/clickequations',
'http://www.ghostery.com/apps/clicktale',
'http://www.ghostery.com/apps/clicky',
'http://www.ghostery.com/apps/clicmanager',
'http://www.ghostery.com/apps/clixmetrix',
'http://www.ghostery.com/apps/clixpy',
'http://www.ghostery.com/apps/clove_network',
'http://www.ghostery.com/apps/cnzz',
'http://www.ghostery.com/apps/cobalt_group',
'http://www.ghostery.com/apps/comm100',
'http://www.ghostery.com/apps/commission_junction',
'http://www.ghostery.com/apps/competexl',
'http://www.ghostery.com/apps/conduit',
'http://www.ghostery.com/apps/connexity',
'http://www.ghostery.com/apps/content.ad',
'http://www.ghostery.com/apps/convert_platform',
'http://www.ghostery.com/apps/convertro',
'http://www.ghostery.com/apps/conviva',
'http://www.ghostery.com/apps/cookieq',
'http://www.ghostery.com/apps/coremetrics',
'http://www.ghostery.com/apps/cpa_detective',
'http://www.ghostery.com/apps/cpmprofit',
'http://www.ghostery.com/apps/cpx_interactive',
'http://www.ghostery.com/apps/cq_counter',
'http://www.ghostery.com/apps/crakmedia_network',
'http://www.ghostery.com/apps/crazy_egg',
'http://www.ghostery.com/apps/crimtan',
'http://www.ghostery.com/apps/criteo',
'http://www.ghostery.com/apps/crmmetrix',
'http://www.ghostery.com/apps/cross_pixel_media',
'http://www.ghostery.com/apps/crowd_ignite',
'http://www.ghostery.com/apps/cxense',
'http://www.ghostery.com/apps/daisycon',
'http://www.ghostery.com/apps/dataium',
'http://www.ghostery.com/apps/datapoint_media',
'http://www.ghostery.com/apps/dataxu',
'http://www.ghostery.com/apps/dc_stormiq',
'http://www.ghostery.com/apps/degaa_analytics',
'http://www.ghostery.com/apps/demandbase',
'http://www.ghostery.com/apps/demandware_analytics',
'http://www.ghostery.com/apps/didit_blizzard',
'http://www.ghostery.com/apps/didit_maestro',
'http://www.ghostery.com/apps/diggthis',
'http://www.ghostery.com/apps/direct/advert',
'http://www.ghostery.com/apps/direct_response_group',
'http://www.ghostery.com/apps/disqus',
'http://www.ghostery.com/apps/diva',
'http://www.ghostery.com/apps/domodomain',
'http://www.ghostery.com/apps/dotomi',
'http://www.ghostery.com/apps/dotomi_notice',
'http://www.ghostery.com/apps/doubleclick',
'http://www.ghostery.com/apps/doubleclick_bid_manager',
'http://www.ghostery.com/apps/doubleclick_dart',
'http://www.ghostery.com/apps/doubleclick_floodlight',
'http://www.ghostery.com/apps/doubleclick_spotlight',
'http://www.ghostery.com/apps/doublepimp',
'http://www.ghostery.com/apps/doubleverify',
'http://www.ghostery.com/apps/doubleverify_notice',
'http://www.ghostery.com/apps/dsmm_advantage',
'http://www.ghostery.com/apps/duckduckgo',
'http://www.ghostery.com/apps/dynamic_yieldhttp://www.ghostery.com/apps/facebook_connect',
'http://www.ghostery.com/apps/facebook_conversion_tracking',
'http://www.ghostery.com/apps/facebook_exchange_(fbx)',
'http://www.ghostery.com/apps/facebook_social_plugins',
'http://www.ghostery.com/apps/feedbackify',
'http://www.ghostery.com/apps/feedjit',
'http://www.ghostery.com/apps/fetchback_notice',
'http://www.ghostery.com/apps/financial_content',
'http://www.ghostery.com/apps/flag_counter',
'http://www.ghostery.com/apps/flashtalking',
'http://www.ghostery.com/apps/flux',
'http://www.ghostery.com/apps/foodie_blogroll',
'http://www.ghostery.com/apps/foresee',
'http://www.ghostery.com/apps/foursquare_widget',
'http://www.ghostery.com/apps/free_online_users',
'http://www.ghostery.com/apps/freewheel',
'http://www.ghostery.com/apps/freexmedia',
'http://www.ghostery.com/apps/friendfeed',
'http://www.ghostery.com/apps/fruitflan',
'http://www.ghostery.com/apps/functionaltrends',
'http://www.ghostery.com/apps/gdn_notice',
'http://www.ghostery.com/apps/gemius',
'http://www.ghostery.com/apps/generic_social_sharing_widgets',
'http://www.ghostery.com/apps/geoads',
'http://www.ghostery.com/apps/geovisite',
'http://www.ghostery.com/apps/getsatisfaction',
'http://www.ghostery.com/apps/gigya_beacon',
'http://www.ghostery.com/apps/gigya_social_analytics',
'http://www.ghostery.com/apps/gigya_socialize',
'http://www.ghostery.com/apps/github_ribbon',
'http://www.ghostery.com/apps/gittip',
'http://www.ghostery.com/apps/glam_media',
'http://www.ghostery.com/apps/godaddy_site_analytics',
'http://www.ghostery.com/apps/gomez',
'http://www.ghostery.com/apps/goodadvert',
'http://www.ghostery.com/apps/google_%2B1',
'http://www.ghostery.com/apps/google_adsense',
'http://www.ghostery.com/apps/google_adwords_conversion',
'http://www.ghostery.com/apps/google_affiliate_network',
'http://www.ghostery.com/apps/google_ajax_search_api',
'http://www.ghostery.com/apps/google_analytics',
'http://www.ghostery.com/apps/google_custom_search_engine',
'http://www.ghostery.com/apps/google_friendconnect',
'http://www.ghostery.com/apps/google_jsapi_stats_collection',
'http://www.ghostery.com/apps/google_tag_manager',
'http://www.ghostery.com/apps/google_widgets',
'http://www.ghostery.com/apps/goooal',
'http://www.ghostery.com/apps/gosquared',
'http://www.ghostery.com/apps/gravatar',
'http://www.ghostery.com/apps/gravity_insights',
'http://www.ghostery.com/apps/groupm_server',
'http://www.ghostery.com/apps/gtop',
'http://www.ghostery.com/apps/hiconversion',
'http://www.ghostery.com/apps/hitslink',
'http://www.ghostery.com/apps/hitsniffer',
'http://www.ghostery.com/apps/hooklogic',
'http://www.ghostery.com/apps/hotlog',
'http://www.ghostery.com/apps/hottraffic',
'http://www.ghostery.com/apps/hubspot_websitegrader',
'http://www.ghostery.com/apps/hurra_tracker',
'http://www.ghostery.com/apps/i.ua',
'http://www.ghostery.com/apps/iadvize',
'http://www.ghostery.com/apps/i-behavior',
'http://www.ghostery.com/apps/icerocket_tracker',
'http://www.ghostery.com/apps/ientry',
'http://www.ghostery.com/apps/image_space_media',
'http://www.ghostery.com/apps/impact',
'http://www.ghostery.com/apps/impact_radius',
'http://www.ghostery.com/apps/impulse',
'http://www.ghostery.com/apps/infolinks',
'http://www.ghostery.com/apps/infonline',
'http://www.ghostery.com/apps/innity',
'http://www.ghostery.com/apps/innometrics',
'http://www.ghostery.com/apps/inpref',
'http://www.ghostery.com/apps/insightexpress',
'http://www.ghostery.com/apps/inspectlet',
'http://www.ghostery.com/apps/intense_debate',
'http://www.ghostery.com/apps/intent_media',
'http://www.ghostery.com/apps/intercom',
'http://www.ghostery.com/apps/iperceptions',
'http://www.ghostery.com/apps/iwiw_widgets',
'http://www.ghostery.com/apps/jetpack_digital',
'http://www.ghostery.com/apps/juicyads',
'http://www.ghostery.com/apps/just_relevant',
'http://www.ghostery.com/apps/kenshoo',
'http://www.ghostery.com/apps/kissmetrics',
'http://www.ghostery.com/apps/kitara_media',
'http://www.ghostery.com/apps/klikki',
'http://www.ghostery.com/apps/knowledge_networks',
'http://www.ghostery.com/apps/komli_atom',
'http://www.ghostery.com/apps/kontera_contentlink',
'http://www.ghostery.com/apps/krux_digital',
'http://www.ghostery.com/apps/leadforensics',
'http://www.ghostery.com/apps/leadformix',
'http://www.ghostery.com/apps/leadlander',
'http://www.ghostery.com/apps/legolas_media',
'http://www.ghostery.com/apps/lifestreet_media',
'http://www.ghostery.com/apps/lijit',
'http://www.ghostery.com/apps/linkedin_widgets',
'http://www.ghostery.com/apps/linksalpha',
'http://www.ghostery.com/apps/linksmart',
'http://www.ghostery.com/apps/linkz',
'http://www.ghostery.com/apps/liveball',
'http://www.ghostery.com/apps/livefyre',
'http://www.ghostery.com/apps/liveinternet',
'http://www.ghostery.com/apps/liveperson',
'http://www.ghostery.com/apps/liverail',
'http://www.ghostery.com/apps/lockerz_share',
'http://www.ghostery.com/apps/longtail_video_analytics',
'http://www.ghostery.com/apps/lotame',
'http://www.ghostery.com/apps/lucky_orange',
'http://www.ghostery.com/apps/lxr100',
'http://www.ghostery.com/apps/magnetic',
'http://www.ghostery.com/apps/magnetise_group',
'http://www.ghostery.com/apps/magnify_stats',
'http://www.ghostery.com/apps/mail.ru_group',
'http://www.ghostery.com/apps/mailchimp_tracking',
'http://www.ghostery.com/apps/marin_search_marketer',
'http://www.ghostery.com/apps/marketgid',
'http://www.ghostery.com/apps/marketo',
'http://www.ghostery.com/apps/matchbin',
'http://www.ghostery.com/apps/matomy_market',
'http://www.ghostery.com/apps/maxpoint_interactive',
'http://www.ghostery.com/apps/media_innovation_group',
'http://www.ghostery.com/apps/media_optimizer_(adobe)',
'http://www.ghostery.com/apps/media6degrees',
'http://www.ghostery.com/apps/mediaforge',
'http://www.ghostery.com/apps/mediagra',
'http://www.ghostery.com/apps/mediahub',
'http://www.ghostery.com/apps/mediamath',
'http://www.ghostery.com/apps/mediamind',
'http://www.ghostery.com/apps/median',
'http://www.ghostery.com/apps/meebo_bar',
'http://www.ghostery.com/apps/meetrics',
'http://www.ghostery.com/apps/megaindex',
'http://www.ghostery.com/apps/mercent',
'http://www.ghostery.com/apps/merchenta',
'http://www.ghostery.com/apps/metrigo',
'http://www.ghostery.com/apps/microsoft_analytics',
'http://www.ghostery.com/apps/microsoft_notice',
'http://www.ghostery.com/apps/mint',
'http://www.ghostery.com/apps/mirando',
'http://www.ghostery.com/apps/mixpanel',
'http://www.ghostery.com/apps/mln_advertising',
'http://www.ghostery.com/apps/moat',
'http://www.ghostery.com/apps/mobile_theory',
'http://www.ghostery.com/apps/modernus',
'http://www.ghostery.com/apps/mokono_analytics',
'http://www.ghostery.com/apps/monetate',
'http://www.ghostery.com/apps/moonray_autopilot',
'http://www.ghostery.com/apps/motigo_webstats',
'http://www.ghostery.com/apps/mouseflow',
'http://www.ghostery.com/apps/mybloglog',
'http://www.ghostery.com/apps/mybuys',
'http://www.ghostery.com/apps/mypagerank',
'http://www.ghostery.com/apps/mytop_counter',
'http://www.ghostery.com/apps/nanigans',
'http://www.ghostery.com/apps/navegg',
'http://www.ghostery.com/apps/ndn_analytics',
'http://www.ghostery.com/apps/nebuad',
'http://www.ghostery.com/apps/netbina',
'http://www.ghostery.com/apps/netmining',
'http://www.ghostery.com/apps/netmonitor',
'http://www.ghostery.com/apps/netratings_sitecensus',
'http://www.ghostery.com/apps/netupdater',
'http://www.ghostery.com/apps/new_relic',
'http://www.ghostery.com/apps/nexage',
'http://www.ghostery.com/apps/ninja_access_analysis',
'http://www.ghostery.com/apps/nooked',
'http://www.ghostery.com/apps/nugg.ad',
'http://www.ghostery.com/apps/observer',
'http://www.ghostery.com/apps/olark',
'http://www.ghostery.com/apps/omniture_(adobe_analytics)',
'http://www.ghostery.com/apps/on_%7C_ad',
'http://www.ghostery.com/apps/onetruefan',
'http://www.ghostery.com/apps/onlinewebstat',
'http://www.ghostery.com/apps/openx',
'http://www.ghostery.com/apps/operative_media',
'http://www.ghostery.com/apps/optify',
'http://www.ghostery.com/apps/optim.al',
'http://www.ghostery.com/apps/optimax_media_delivery',
'http://www.ghostery.com/apps/optimizely',
'http://www.ghostery.com/apps/outbrain',
'http://www.ghostery.com/apps/owa',
'http://www.ghostery.com/apps/oxamedia',
'http://www.ghostery.com/apps/payclick',
'http://www.ghostery.com/apps/peerset',
'http://www.ghostery.com/apps/percentmobile',
'http://www.ghostery.com/apps/performable',
'http://www.ghostery.com/apps/persianstat',
'http://www.ghostery.com/apps/pheedo',
'http://www.ghostery.com/apps/phonalytics',
'http://www.ghostery.com/apps/pinterest',
'http://www.ghostery.com/apps/pirchio',
'http://www.ghostery.com/apps/piwik_analytics',
'http://www.ghostery.com/apps/pixfuture',
'http://www.ghostery.com/apps/platform161',
'http://www.ghostery.com/apps/plista',
'http://www.ghostery.com/apps/plugrush',
'http://www.ghostery.com/apps/pointroll',
'http://www.ghostery.com/apps/polldaddy',
'http://www.ghostery.com/apps/press+',
'http://www.ghostery.com/apps/propeller_ads',
'http://www.ghostery.com/apps/publicidad.net',
'http://www.ghostery.com/apps/pulse360_notice',
'http://www.ghostery.com/apps/punchtab',
'http://www.ghostery.com/apps/qcri_analytics',
'http://www.ghostery.com/apps/quantcast',
'http://www.ghostery.com/apps/quantcast_notice',
'http://www.ghostery.com/apps/qubit_opentag',
'http://www.ghostery.com/apps/quisma',
'http://www.ghostery.com/apps/radarstats',
'http://www.ghostery.com/apps/radarurl',
'http://www.ghostery.com/apps/radiumone',
'http://www.ghostery.com/apps/[email protected]',
'http://www.ghostery.com/apps/reddit',
'http://www.ghostery.com/apps/redux_media',
'http://www.ghostery.com/apps/redvertisment',
'http://www.ghostery.com/apps/referlocal',
'http://www.ghostery.com/apps/reklam_store',
'http://www.ghostery.com/apps/reklamz',
'http://www.ghostery.com/apps/repost.us',
'http://www.ghostery.com/apps/reputationmanager',
'http://www.ghostery.com/apps/resonate_networks',
'http://www.ghostery.com/apps/retargeter_beacon',
'http://www.ghostery.com/apps/revenuehits',
'http://www.ghostery.com/apps/revolver_maps',
'http://www.ghostery.com/apps/rich',
'http://www.ghostery.com/apps/rich_media_banner_network',
'http://www.ghostery.com/apps/richrelevance',
'http://www.ghostery.com/apps/right_media',
'http://www.ghostery.com/apps/rocket_fuel',
'http://www.ghostery.com/apps/rubicon',
'http://www.ghostery.com/apps/salesforce_live_agent',
'http://www.ghostery.com/apps/salesfusion',
'http://www.ghostery.com/apps/say_media',
'http://www.ghostery.com/apps/scorecard_research_beacon',
'http://www.ghostery.com/apps/scribol',
'http://www.ghostery.com/apps/securedvisit',
'http://www.ghostery.com/apps/segment.io',
'http://www.ghostery.com/apps/sekindo',
'http://www.ghostery.com/apps/semasio',
'http://www.ghostery.com/apps/sexadnetwork',
'http://www.ghostery.com/apps/sextracker',
'http://www.ghostery.com/apps/shareaholic',
'http://www.ghostery.com/apps/sharethis',
'http://www.ghostery.com/apps/silverpop',
'http://www.ghostery.com/apps/simplereach',
'http://www.ghostery.com/apps/simpli.fi',
'http://www.ghostery.com/apps/singlefeed',
'http://www.ghostery.com/apps/sitemeter',
'http://www.ghostery.com/apps/sitescout',
'http://www.ghostery.com/apps/skylines',
'http://www.ghostery.com/apps/slice_factory',
'http://www.ghostery.com/apps/smart_adserver',
'http://www.ghostery.com/apps/smartcontext',
'http://www.ghostery.com/apps/sociomantic',
'http://www.ghostery.com/apps/soundcloud',
'http://www.ghostery.com/apps/specificclick',
'http://www.ghostery.com/apps/speedyads',
'http://www.ghostery.com/apps/spot200',
'http://www.ghostery.com/apps/spotify_embed',
'http://www.ghostery.com/apps/stat24',
'http://www.ghostery.com/apps/stat4u',
'http://www.ghostery.com/apps/statcounter',
'http://www.ghostery.com/apps/stathat',
'http://www.ghostery.com/apps/steel_house_media',
'http://www.ghostery.com/apps/strands_recommender',
'http://www.ghostery.com/apps/struq',
'http://www.ghostery.com/apps/stumbleupon_widgets',
'http://www.ghostery.com/apps/supercountershttp://www.ghostery.com/apps/turnto',
'http://www.ghostery.com/apps/twitter_badge',
'http://www.ghostery.com/apps/twitter_buttonhttp://www.ghostery.com/apps/webtracker',
'http://www.ghostery.com/apps/webtrekk',
'http://www.ghostery.com/apps/webtrends',
'http://www.ghostery.com/apps/webvisor',
'http://www.ghostery.com/apps/where',
'http://www.ghostery.com/apps/wibiya_toolbar',
'http://www.ghostery.com/apps/wordpress_stats',
'http://www.ghostery.com/apps/wordstream',
'http://www.ghostery.com/apps/xplosion',
'http://www.ghostery.com/apps/yabuka',
'http://www.ghostery.com/apps/yahoo!_overture',
'http://www.ghostery.com/apps/yahoo_search_marketing_analytics',
'http://www.ghostery.com/apps/yandex.metrics',
'http://www.ghostery.com/apps/ybrant_media',
'http://www.ghostery.com/apps/yieldbot',
'http://www.ghostery.com/apps/yieldbuild',]
for url in li:
html = scraperwiki.scrape(url)
root = lxml.html.fromstring(html)
privacyPolicy = root.cssselect("div#privacyinfo a")[0]
email = root.cssselect("div#privacycontact a")[0]
emailAddress = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z] {2,4}",email.text)
data = "%s,%s,%s" % (url, privacyPolicy.text, email.text)
with open("directory.txt","a") as directory:
directory.write(data)
print "saved entry %s" % (url)
getDetails()