fixes for #23 #31

Ronald Schmidt · Ronald Schmidt · commit 7339ba6cdc04 · 2018-05-08T12:46:25.000+02:00
diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -15,23 +15,25 @@ Ensure the executing user has read/write permissions for this folder.
 Default configuration
 ---------------------
 
-* cachedir: '/tmp/.serpscrap/'        - path cachefiles
-* clean_cache_after: 24               - clean cached files older then x hours
-* database_name: '/tmp/serpscrap'     - path and name sqlite db (stores scrape results)
-* do_caching: True                    - enable / disable caching
-* headers:                            - dict to customize request header, see below
-* num_pages_for_keyword: 2            - number of result pages to scrape
-* num_results_per_page: 10            - number results per searchengine page
-* proxy_file: ''                      - path to proxy file, see below
-* scrape_urls: False                  - scrape urls of search results
-* search_engines: ['google']          - search engines (google)
-* url_threads: 3                      - number of threads if scrape_urls is true
-* use_own_ip: True                    - if using proxies set to False
-* sleeping_min: 5                     - min seconds to sleep between scrapes
-* sleeping_max: 15                    - max seconds to sleep between scrapes
-* screenshot: True                    - enable screenshots for each query
-* dir_screenshot: '/tmp/screenshots'  - basedir for saved screenshots
-* chrome_headless: True               - run chrome in headless mode, default is True
+* cachedir: '/tmp/.serpscrap/'                        - path cachefiles
+* chrome_headless: True                               - run chrome in headless mode, default is True
+* clean_cache_after: 24                               - clean cached files older then x hours
+* database_name: '/tmp/serpscrap'                     - path and name sqlite db (stores scrape results)
+* dir_screenshot: '/tmp/screenshots'                  - basedir for saved screenshots
+* do_caching: True                                    - enable / disable caching
+* executable_path: '/usr/local/bin/chromedriver'      - path to chromedriver
+* google_search_url: 'https://www.google.com/search?' - base search url, modify for other countries
+* headers:                                            - dict to customize request header, see below
+* num_pages_for_keyword: 2                            - number of result pages to scrape
+* num_results_per_page: 10                            - number results per searchengine page
+* proxy_file: ''                                      - path to proxy file, see below
+* scrape_urls: False                                  - scrape urls of search results
+* screenshot: True                                    - enable screenshots for each query
+* search_engines: ['google']                          - search engines (google)
+* sleeping_max: 15                                    - max seconds to sleep between scrapes
+* sleeping_min: 5                                     - min seconds to sleep between scrapes
+* url_threads: 3                                      - number of threads if scrape_urls is true
+* use_own_ip: True                                    - if using proxies set to False
 
 Custom configuration
 --------------------
@@ -48,7 +50,9 @@ Change some config params.
    scrap = serpscrap.SerpScrap()
    scrap.init(config=config.get(), keywords=keywords)
 
-Using your own configuration
+You can apply your own config dictionary. It is not required to provide any possible
+config key. by applying the default config values will be overwritten by the new values.
+for not provided config keys the deault values still exists.
 
 .. code-block:: python
 
@@ -61,10 +65,10 @@ Using your own configuration
       'database_name': '/tmp/serpscrap',
       'do_caching': True,
       'num_pages_for_keyword': 2,
-      'proxy_file': '',
       'scrape_urls': True,
       'search_engines': ['google'],
-      'url_threads': 3,
+      'google_search_url': 'https://www.google.com/search?',
+      'executable_path', '/usr/local/bin/chromedriver',
    }
    
    config.apply(config_new)
diff --git a/docs/results.rst b/docs/results.rst
@@ -10,7 +10,7 @@ If you prefer to save the results use the as_csv() method.
 
    {
     'query': 'example',
-    'query_num_results total': 'Ungefähr 1.740.000.000 Ergebnisse (0,50 '
+    'query_num_results_total': 'Ungefähr 1.740.000.000 Ergebnisse (0,50 '
                                'Sekunden)\xa0',
     'query_num_results_page': 10,
     'query_page_number': 1,
diff --git a/serpscrap/config.py b/serpscrap/config.py
@@ -88,10 +88,10 @@ def set(self, key, value):
         self.config.__setitem__(key, value)
 
     def apply(self, config):
-        """apply an individual conig
+        """apply an individual config, replace default config
+        by values of new config
 
         Args:
             config (dict): new configuration
         """
-
-        self.config = config
+        self.config = {**self.config, **config}

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ If you prefer to save the results use the as_csv() method.`
`10`	`10`
`11`	`11`	`{`
`12`	`12`	`'query': 'example',`
`13`		`- 'query_num_results total': 'Ungefähr 1.740.000.000 Ergebnisse (0,50 '`
	`13`	`+ 'query_num_results_total': 'Ungefähr 1.740.000.000 Ergebnisse (0,50 '`
`14`	`14`	`'Sekunden)\xa0',`
`15`	`15`	`'query_num_results_page': 10,`
`16`	`16`	`'query_page_number': 1,`