diff --git a/README.rst b/README.rst index 7c8c7ef..69fdfc0 100644 --- a/README.rst +++ b/README.rst @@ -23,7 +23,7 @@ It might be usefull for SEO and research tasks. Extract these result types -------------------------- -* ads_main - advertisments within regular search results +* ads_main - advertisements within regular search results * image - result from image search * news - news teaser within regular search results * results - standard search result diff --git a/docs/conf.py b/docs/conf.py index c172316..28cc738 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,7 +60,7 @@ # The short X.Y version. version = '0.9' # The full version, including alpha/beta/rc tags. -release = '0.9.1' +release = '0.9.2' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index 14616cc..90d86d6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,7 +26,7 @@ It might be usefull for SEO and research tasks. Extract these result types -------------------------- -* ads_main - advertisments within regular search results +* ads_main - advertisements within regular search results * image - result from image search * news - news teaser within regular search results * results - standard search result diff --git a/examples/example_related.py b/examples/example_related.py index 6a73150..1b971e2 100644 --- a/examples/example_related.py +++ b/examples/example_related.py @@ -7,7 +7,7 @@ def scrape_to_csv(config, keywords): scrap = serpscrap.SerpScrap() scrap.init(config=config.get(), keywords=keywords) - return scrap.as_csv('/tmp/planet-earth') + return scrap.as_csv('/tmp/cryptocurrency') def get_related(config, keywords, related): @@ -25,7 +25,7 @@ def get_related(config, keywords, related): config.set('scrape_urls', False) config.set('num_workers', 1) -keywords = ['planet earth'] +keywords = ['cryptocurrency'] related = keywords related = get_related(config, keywords, related) diff --git a/requirements.txt b/requirements.txt index b60d9a3..f0ce883 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ lxml chardet==3.0.4 beautifulsoup4==4.6.0 -html2text==2016.9.19 +html2text==2017.10.4 PySocks==1.6.7 -sqlalchemy==1.1.13 -selenium==3.5.0 +sqlalchemy==1.1.15 +selenium==3.8.0 cssselect==1.0.1 \ No newline at end of file diff --git a/scrapcore/database.py b/scrapcore/database.py index 9bb4dee..177c6da 100644 --- a/scrapcore/database.py +++ b/scrapcore/database.py @@ -111,6 +111,12 @@ def set_values_from_parser(self, parser): for link in value: parsed = urlparse(link['link']) + if link['snippet'] is not None: + # try to remove inline css, which is in some results since 12/2017 + tmp_snipped = link['snippet'].split('}') + if len(tmp_snipped) > 1: + link['snippet'] = tmp_snipped[len(tmp_snipped)-1] + # fill with nones to prevent key errors [link.update({key: None}) for key in ( 'snippet', diff --git a/scrapcore/parser/google_parser.py b/scrapcore/parser/google_parser.py index 63cab48..f048dd0 100644 --- a/scrapcore/parser/google_parser.py +++ b/scrapcore/parser/google_parser.py @@ -104,7 +104,7 @@ class GoogleParser(Parser): 'container': '#center_col', 'result_container': '.ads-ad', 'link': 'h3 > a:nth-child(2)::attr(href)', - 'snippet': '.ads-creative::text', + 'snippet': 'div.ads-creative::text', 'title': 'h3 > a:nth-child(2)::text', 'visible_link': '.ads-visurl cite::text', 'rating': 'div._Ond _Bu span::text', diff --git a/scrapcore/user_agent.py b/scrapcore/user_agent.py index 3debc5f..09739de 100644 --- a/scrapcore/user_agent.py +++ b/scrapcore/user_agent.py @@ -2,69 +2,69 @@ import random user_agents_mobile = [ - 'Mozilla/5.0 (iPhone, CPU iPhone OS 10_2_1 like Mac OS X) AppleWebKit/602.4.6 (KHTML, like Gecko) Version/10.0 Mobile/14D27 Safari/602.1', - 'Mozilla/5.0 (iPhone, CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', - 'Mozilla/5.0 (iPhone, CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1', - 'Mozilla/5.0 (Linux, Android 7.0, SAMSUNG SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.0 Chrome/51.0.2704.106 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0.1, SM-G920F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 7.0, SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 7.0, SAMSUNG SM-G935F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.0 Chrome/51.0.2704.106 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G900F Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36', - 'Mozilla/5.0 (iPhone, CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0 Mobile/14B100 Safari/602.1', - 'Mozilla/5.0 (iPhone, CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E277 Safari/602.1', - 'Mozilla/5.0 (Linux, Android 7.0, SM-G935F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36', - 'Mozilla/5.0 (iPhone, CPU iPhone OS 10_2_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) GSA/24.1.151204851 Mobile/14D27 Safari/602.1', - 'Mozilla/5.0 (Linux, Android 6.0.1, SM-G900F Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G920F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.0 Chrome/51.0.2704.106 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0, ALE-L21 Build/HuaweiALE-L21) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G920F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0.1, SM-G925F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36', - 'Mozilla/5.0 (iPhone, CPU iPhone OS 10_0_2 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 Mobile/14A456 Safari/602.1', - 'Mozilla/5.0 (iPhone, CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) GSA/24.1.151204851 Mobile/14E304 Safari/602.1', - 'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-A510F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0.1, SM-G920F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Crosswalk/20.50.533.12 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 7.0, SM-G935F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Crosswalk/20.50.533.12 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0.1, SM-A510F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G800F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36', - 'Mozilla/5.0 (iPhone, CPU iPhone OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13G36 Safari/601.1', - 'Mozilla/5.0 (Linux, Android 7.0, SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Crosswalk/20.50.533.12 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G925F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.0 Chrome/51.0.2704.106 Mobile Safari/537.36', - 'Mozilla/5.0 (Linux, Android 6.0, HUAWEI VNS-L31 Build/HUAWEIVNS-L31) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36', - 'Mozilla/5.0 (Android 6.0.1, Mobile, rv:52.0) Gecko/52.0 Firefox/52.0', - 'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G903F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_2 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B202 Safari/604.1', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_3 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A432 Safari/604.1', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B150 Safari/604.1', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.0 Mobile/14G60 Safari/602.1', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B93 Safari/604.1', + 'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 7.0; SM-G935F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 7.0; SM-G950F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 7.0; SM-G920F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G935F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.0 Mobile/14F89 Safari/602.1', + 'Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G950F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_2_1 like Mac OS X) AppleWebKit/602.4.6 (KHTML, like Gecko) Version/10.0 Mobile/14D27 Safari/602.1', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_2 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A421 Safari/604.1', + 'Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G920F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1', + 'Mozilla/5.0 (Linux; Android 7.0; SM-G925F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 6.0; ALE-L21 Build/HuaweiALE-L21) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 6.0.1; SM-G900F Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G925F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-G900F Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1', + 'Mozilla/5.0 (Linux; Android 7.0; HUAWEI VNS-L31 Build/HUAWEIVNS-L31) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_2 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Version/11.0 Mobile/15C114 Safari/604.1', + 'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.98 Mobile Safari/537.36', + 'Mozilla/5.0 (Android 7.0; Mobile; rv:57.0) Gecko/57.0 Firefox/57.0', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', + 'Mozilla/5.0 (Linux; Android 7.0; SM-A510F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', ] user_agents_computer = [ - 'Mozilla/5.0 (Windows NT 6.1, WOW64, rv:52.0) Gecko/20100101 Firefox/52.0', - 'Mozilla/5.0 (Windows NT 10.0, WOW64, rv:52.0) Gecko/20100101 Firefox/52.0', - 'Mozilla/5.0 (Windows NT 6.1, WOW64, Trident/7.0, rv:11.0) like Gecko', - 'Mozilla/5.0 (Windows NT 10.0, Win64, x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.1, WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0, WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0, Win64, x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393', - 'Mozilla/5.0 (Windows NT 6.1, Win64, x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', - 'Mozilla/5.0 (X11, Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.101 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.1, Trident/7.0, rv:11.0) like Gecko', - 'Mozilla/5.0 (Windows NT 6.3, WOW64, rv:52.0) Gecko/20100101 Firefox/52.0', - 'Mozilla/5.0 (Macintosh, Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30', - 'Mozilla/5.0 (Windows NT 6.1, rv:52.0) Gecko/20100101 Firefox/52.0', - 'Mozilla/5.0 (Windows NT 6.1, WOW64, rv:45.0) Gecko/20100101 Firefox/45.0', - # 'Mozilla/5.0 (Windows NT 10.0, WOW64, Trident/7.0, rv:11.0) like Gecko', - 'Mozilla/5.0 (Windows NT 6.3, WOW64, Trident/7.0, rv:11.0) like Gecko', - 'Mozilla/5.0 (Macintosh, Intel Mac OS X 10_12_3) AppleWebKit/602.4.8 (KHTML, like Gecko) Version/10.0.3 Safari/602.4.8', - 'Mozilla/5.0 (Windows NT 10.0, Win64, x64, rv:52.0) Gecko/20100101 Firefox/52.0', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.3, WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0, rv:52.0) Gecko/20100101 Firefox/52.0', - 'Mozilla/5.0 (Windows NT 6.1, Win64, x64, rv:52.0) Gecko/20100101 Firefox/52.0', - 'Mozilla/5.0 (Macintosh, Intel Mac OS X 10_11_6) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30', - 'Mozilla/5.0 (Windows NT 6.3, Win64, x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.1, WOW64, rv:51.0) Gecko/20100101 Firefox/51.0', - 'Mozilla/5.0 (Windows NT 6.1, WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0, WOW64, rv:51.0) Gecko/20100101 Firefox/51.0', - 'Mozilla/5.0 (Windows NT 6.0, rv:52.0) Gecko/20100101 Firefox/52.0', - 'Mozilla/5.0 (Windows NT 6.1, Win64, x64, Trident/7.0, rv:11.0) like Gecko', - 'Mozilla/5.0 (Windows NT 5.1, rv:52.0) Gecko/20100101 Firefox/52.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0', + 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5', + 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko', + 'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0', + 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko', + 'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0', + 'Mozilla/5.0 (Windows NT 6.1; rv:57.0) Gecko/20100101 Firefox/57.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0', + 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36', + 'Mozilla/5.0 (Windows NT 6.1; rv:56.0) Gecko/20100101 Firefox/56.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5', + 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0', + 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0', ] diff --git a/setup.py b/setup.py index 935a60f..257c6b1 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- from setuptools import setup, find_packages -version = '0.9.1' +version = '0.9.2' setup( @@ -25,10 +25,10 @@ 'PySocks==1.6.7', 'chardet==3.0.4', 'beautifulsoup4==4.6.0', - 'html2text==2016.9.19', + 'html2text==2017.10.4', 'lxml', - 'sqlalchemy==1.1.13', - 'selenium==3.5.0', + 'sqlalchemy==1.1.15', + 'selenium==3.8.0', 'cssselect==1.0.1', ], classifiers=[ @@ -40,5 +40,5 @@ 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', ], - keywords='serp-scraper url-scraper ad-detection', + keywords='seo scraper ad-detection scraping keywords', )