|
13 | 13 | __author__ = 'JHao' |
14 | 14 |
|
15 | 15 | from apscheduler.schedulers.blocking import BlockingScheduler |
| 16 | +from apscheduler.executors.pool import ProcessPoolExecutor |
16 | 17 |
|
17 | 18 | from util.six import Queue |
18 | | -from helper.fetch import Fetcher |
19 | | -from helper.check import Checker |
| 19 | +from helper.fetch import runFetcher |
| 20 | +from helper.check import runChecker |
20 | 21 | from helper.proxy import Proxy |
21 | 22 | from handler.logHandler import LogHandler |
22 | 23 | from handler.proxyHandler import ProxyHandler |
23 | 24 |
|
24 | 25 |
|
25 | | -def doProxyFetch(): |
| 26 | +def runProxyFetch(): |
26 | 27 | proxy_queue = Queue() |
27 | 28 |
|
28 | | - fetcher = Fetcher() |
29 | | - for proxy in fetcher.fetch(): |
| 29 | + for proxy in runFetcher(): |
30 | 30 | proxy_queue.put(Proxy(proxy).to_json) |
31 | 31 |
|
32 | | - thread_list = list() |
33 | | - for index in range(20): |
34 | | - thread_list.append(Checker("raw", proxy_queue, "thread_%s" % str(index).zfill(2))) |
| 32 | + runChecker("raw", proxy_queue) |
35 | 33 |
|
36 | | - for thread in thread_list: |
37 | | - thread.start() |
38 | 34 |
|
39 | | - for thread in thread_list: |
40 | | - thread.join() |
41 | | - |
42 | | - |
43 | | -def doProxyCheck(): |
| 35 | +def runProxyCheck(): |
44 | 36 | proxy_queue = Queue() |
45 | 37 |
|
46 | | - proxy_handler = ProxyHandler() |
47 | | - for proxy in proxy_handler.getAll(): |
| 38 | + for proxy in ProxyHandler().getAll(): |
48 | 39 | proxy_queue.put(proxy.to_json) |
49 | 40 |
|
50 | | - |
51 | | -# class DoFetchProxy(ProxyManager): |
52 | | -# """ fetch proxy""" |
53 | | -# |
54 | | -# def __init__(self): |
55 | | -# ProxyManager.__init__(self) |
56 | | -# self.log = LogHandler('fetch_proxy') |
57 | | -# |
58 | | -# def main(self): |
59 | | -# self.log.info("start fetch proxy") |
60 | | -# self.fetch() |
61 | | -# self.log.info("finish fetch proxy") |
62 | | -# |
63 | | -# |
64 | | -# def rawProxyScheduler(): |
65 | | -# DoFetchProxy().main() |
66 | | -# doRawProxyCheck() |
67 | | -# |
68 | | -# |
69 | | -# def usefulProxyScheduler(): |
70 | | -# doUsefulProxyCheck() |
| 41 | + runChecker("use", proxy_queue) |
71 | 42 |
|
72 | 43 |
|
73 | 44 | def runScheduler(): |
74 | | - doProxyFetch() |
| 45 | + runProxyFetch() |
75 | 46 |
|
76 | 47 | scheduler_log = LogHandler("scheduler") |
77 | 48 | scheduler = BlockingScheduler(logger=scheduler_log) |
78 | 49 |
|
79 | | - scheduler.add_job(doProxyFetch, 'interval', minutes=5, id="proxy_fetch", name="proxy采集") |
80 | | - # scheduler.add_job(usefulProxyScheduler, 'interval', minutes=1, id="useful_proxy_check", name="useful_proxy定时检查") |
| 50 | + scheduler.add_job(runProxyFetch, 'interval', minutes=4, id="proxy_fetch", name="proxy采集") |
| 51 | + scheduler.add_job(runProxyCheck, 'interval', minutes=2, id="proxy_check", name="proxy检查") |
| 52 | + |
| 53 | + executors = { |
| 54 | + 'default': {'type': 'threadpool', 'max_workers': 20}, |
| 55 | + 'processpool': ProcessPoolExecutor(max_workers=5) |
| 56 | + } |
| 57 | + job_defaults = { |
| 58 | + 'coalesce': False, |
| 59 | + 'max_instances': 10 |
| 60 | + } |
| 61 | + |
| 62 | + scheduler.configure(executors=executors, job_defaults=job_defaults) |
81 | 63 |
|
82 | 64 | scheduler.start() |
83 | 65 |
|
|
0 commit comments