Skip to content

Commit eef8b9b

Browse files
committed
[update] helper
1 parent 2038600 commit eef8b9b

File tree

4 files changed

+51
-43
lines changed

4 files changed

+51
-43
lines changed

helper/check.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ def __proxyCheck(proxy):
5252

5353

5454
class Checker(Thread):
55+
"""
56+
多线程检测代理是否可用
57+
"""
5558

5659
def __init__(self, check_type, queue, thread_name):
5760
Thread.__init__(self, name=thread_name)
@@ -66,7 +69,7 @@ def run(self):
6669
try:
6770
proxy_json = self.queue.get(block=False)
6871
except Empty:
69-
self.log.info("ProxyCheck - {} : exit".format(self.name))
72+
self.log.info("ProxyCheck - {} : complete".format(self.name))
7073
break
7174

7275
proxy = Proxy.createFromJson(proxy_json)
@@ -83,3 +86,21 @@ def run(self):
8386
else:
8487
pass
8588
self.queue.task_done()
89+
90+
91+
def runChecker(tp, queue):
92+
"""
93+
run Checker
94+
:param tp: raw/use
95+
:param queue: Proxy Queue
96+
:return:
97+
"""
98+
thread_list = list()
99+
for index in range(20):
100+
thread_list.append(Checker(tp, queue, "thread_%s" % str(index).zfill(2)))
101+
102+
for thread in thread_list:
103+
thread.start()
104+
105+
for thread in thread_list:
106+
thread.join()

helper/fetch.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,9 @@ def fetch(self):
5555
except Exception as e:
5656
self.log.error("ProxyFetch - {func}: error".format(func=fetch_name))
5757
self.log.error(str(e))
58+
self.log.info("ProxyFetch - all complete!")
5859
return proxy_set
60+
61+
62+
def runFetcher():
63+
return Fetcher().fetch()

helper/scheduler.py

Lines changed: 23 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,71 +13,53 @@
1313
__author__ = 'JHao'
1414

1515
from apscheduler.schedulers.blocking import BlockingScheduler
16+
from apscheduler.executors.pool import ProcessPoolExecutor
1617

1718
from util.six import Queue
18-
from helper.fetch import Fetcher
19-
from helper.check import Checker
19+
from helper.fetch import runFetcher
20+
from helper.check import runChecker
2021
from helper.proxy import Proxy
2122
from handler.logHandler import LogHandler
2223
from handler.proxyHandler import ProxyHandler
2324

2425

25-
def doProxyFetch():
26+
def runProxyFetch():
2627
proxy_queue = Queue()
2728

28-
fetcher = Fetcher()
29-
for proxy in fetcher.fetch():
29+
for proxy in runFetcher():
3030
proxy_queue.put(Proxy(proxy).to_json)
3131

32-
thread_list = list()
33-
for index in range(20):
34-
thread_list.append(Checker("raw", proxy_queue, "thread_%s" % str(index).zfill(2)))
32+
runChecker("raw", proxy_queue)
3533

36-
for thread in thread_list:
37-
thread.start()
3834

39-
for thread in thread_list:
40-
thread.join()
41-
42-
43-
def doProxyCheck():
35+
def runProxyCheck():
4436
proxy_queue = Queue()
4537

46-
proxy_handler = ProxyHandler()
47-
for proxy in proxy_handler.getAll():
38+
for proxy in ProxyHandler().getAll():
4839
proxy_queue.put(proxy.to_json)
4940

50-
51-
# class DoFetchProxy(ProxyManager):
52-
# """ fetch proxy"""
53-
#
54-
# def __init__(self):
55-
# ProxyManager.__init__(self)
56-
# self.log = LogHandler('fetch_proxy')
57-
#
58-
# def main(self):
59-
# self.log.info("start fetch proxy")
60-
# self.fetch()
61-
# self.log.info("finish fetch proxy")
62-
#
63-
#
64-
# def rawProxyScheduler():
65-
# DoFetchProxy().main()
66-
# doRawProxyCheck()
67-
#
68-
#
69-
# def usefulProxyScheduler():
70-
# doUsefulProxyCheck()
41+
runChecker("use", proxy_queue)
7142

7243

7344
def runScheduler():
74-
doProxyFetch()
45+
runProxyFetch()
7546

7647
scheduler_log = LogHandler("scheduler")
7748
scheduler = BlockingScheduler(logger=scheduler_log)
7849

79-
scheduler.add_job(doProxyFetch, 'interval', minutes=5, id="proxy_fetch", name="proxy采集")
80-
# scheduler.add_job(usefulProxyScheduler, 'interval', minutes=1, id="useful_proxy_check", name="useful_proxy定时检查")
50+
scheduler.add_job(runProxyFetch, 'interval', minutes=4, id="proxy_fetch", name="proxy采集")
51+
scheduler.add_job(runProxyCheck, 'interval', minutes=2, id="proxy_check", name="proxy检查")
52+
53+
executors = {
54+
'default': {'type': 'threadpool', 'max_workers': 20},
55+
'processpool': ProcessPoolExecutor(max_workers=5)
56+
}
57+
job_defaults = {
58+
'coalesce': False,
59+
'max_instances': 10
60+
}
61+
62+
scheduler.configure(executors=executors, job_defaults=job_defaults)
8163

8264
scheduler.start()
8365

proxyPool.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from config.setting import BANNER
1818

19-
from helper.proxyScheduler import runScheduler
19+
from helper.scheduler import runScheduler
2020
from api.proxyApi import runFlask
2121

2222
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])

0 commit comments

Comments
 (0)