@@ -15,23 +15,25 @@ Ensure the executing user has read/write permissions for this folder.
15
15
Default configuration
16
16
---------------------
17
17
18
- * cachedir: '/tmp/.serpscrap/' - path cachefiles
19
- * clean_cache_after: 24 - clean cached files older then x hours
20
- * database_name: '/tmp/serpscrap' - path and name sqlite db (stores scrape results)
21
- * do_caching: True - enable / disable caching
22
- * headers: - dict to customize request header, see below
23
- * num_pages_for_keyword: 2 - number of result pages to scrape
24
- * num_results_per_page: 10 - number results per searchengine page
25
- * proxy_file: '' - path to proxy file, see below
26
- * scrape_urls: False - scrape urls of search results
27
- * search_engines: ['google'] - search engines (google)
28
- * url_threads: 3 - number of threads if scrape_urls is true
29
- * use_own_ip: True - if using proxies set to False
30
- * sleeping_min: 5 - min seconds to sleep between scrapes
31
- * sleeping_max: 15 - max seconds to sleep between scrapes
32
- * screenshot: True - enable screenshots for each query
33
- * dir_screenshot: '/tmp/screenshots' - basedir for saved screenshots
34
- * chrome_headless: True - run chrome in headless mode, default is True
18
+ * cachedir: '/tmp/.serpscrap/' - path cachefiles
19
+ * chrome_headless: True - run chrome in headless mode, default is True
20
+ * clean_cache_after: 24 - clean cached files older then x hours
21
+ * database_name: '/tmp/serpscrap' - path and name sqlite db (stores scrape results)
22
+ * dir_screenshot: '/tmp/screenshots' - basedir for saved screenshots
23
+ * do_caching: True - enable / disable caching
24
+ * executable_path: '/usr/local/bin/chromedriver' - path to chromedriver
25
+ * google_search_url: 'https://www.google.com/search?' - base search url, modify for other countries
26
+ * headers: - dict to customize request header, see below
27
+ * num_pages_for_keyword: 2 - number of result pages to scrape
28
+ * num_results_per_page: 10 - number results per searchengine page
29
+ * proxy_file: '' - path to proxy file, see below
30
+ * scrape_urls: False - scrape urls of search results
31
+ * screenshot: True - enable screenshots for each query
32
+ * search_engines: ['google'] - search engines (google)
33
+ * sleeping_max: 15 - max seconds to sleep between scrapes
34
+ * sleeping_min: 5 - min seconds to sleep between scrapes
35
+ * url_threads: 3 - number of threads if scrape_urls is true
36
+ * use_own_ip: True - if using proxies set to False
35
37
36
38
Custom configuration
37
39
--------------------
@@ -48,7 +50,9 @@ Change some config params.
48
50
scrap = serpscrap.SerpScrap()
49
51
scrap.init(config = config.get(), keywords = keywords)
50
52
51
- Using your own configuration
53
+ You can apply your own config dictionary. It is not required to provide any possible
54
+ config key. by applying the default config values will be overwritten by the new values.
55
+ for not provided config keys the deault values still exists.
52
56
53
57
.. code-block :: python
54
58
@@ -61,10 +65,10 @@ Using your own configuration
61
65
' database_name' : ' /tmp/serpscrap' ,
62
66
' do_caching' : True ,
63
67
' num_pages_for_keyword' : 2 ,
64
- ' proxy_file' : ' ' ,
65
68
' scrape_urls' : True ,
66
69
' search_engines' : [' google' ],
67
- ' url_threads' : 3 ,
70
+ ' google_search_url' : ' https://www.google.com/search?' ,
71
+ ' executable_path' , ' /usr/local/bin/chromedriver' ,
68
72
}
69
73
70
74
config.apply(config_new)
0 commit comments