Skip to content

Commit 0be2abc

Browse files
committed
Add Sogou Search Engine
Add Sogou Search Engine
1 parent e212c38 commit 0be2abc

File tree

2 files changed

+75
-1
lines changed

2 files changed

+75
-1
lines changed

searchengine/search_sogou.py

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# !/usr/bin/env python
2+
# -*- coding:utf-8 -*-
3+
__author__ = 'bit4'
4+
__github__ = 'https://github.com/bit4woo'
5+
6+
from lib import myparser
7+
from lib.log import logger
8+
import time
9+
from lib import myrequests
10+
req = myrequests
11+
12+
class search_sogou:
13+
14+
def __init__(self, word, limit, proxy=None):
15+
self.engine_name ="SoGou"
16+
self.word = word
17+
self.limit = int(limit)
18+
self.results = ""
19+
self.totalresults = ""
20+
self.proxies = proxy
21+
self.server = "www.sogou.com"
22+
self.counter = 0 #
23+
self.print_banner()
24+
return
25+
26+
def print_banner(self):
27+
logger.info("Searching now in {0}..".format(self.engine_name))
28+
return
29+
30+
def do_search(self):
31+
try:
32+
#http://www.sogou.com/web?query=xxxx&page=2&ie=utf8
33+
url = "http://{0}/web?query={1}&page={2}".format(self.server,self.word,self.counter)# 这里的pn参数是条目数
34+
r = req.get(url, proxies = self.proxies)
35+
self.results = r.content
36+
self.totalresults += self.results
37+
return True
38+
except Exception, e:
39+
logger.error("Error in {0}: {1}".format(__file__.split('/')[-1],e))
40+
return False
41+
42+
def process(self):
43+
while self.counter <= self.limit and self.counter <= 1000:
44+
if self.do_search():
45+
time.sleep(1)
46+
#print "\tSearching " + str(self.counter) + " results..."
47+
self.counter += 10
48+
continue
49+
else:
50+
break
51+
52+
def get_emails(self):
53+
rawres = myparser.parser(self.totalresults, self.word)
54+
#print "%s email(s) found in Baidu" %len(rawres.emails())
55+
return rawres.emails()
56+
57+
def get_hostnames(self):
58+
rawres = myparser.parser(self.totalresults, self.word)
59+
#print "%s domain(s) found in Baidu" %len(rawres.hostnames())
60+
return rawres.hostnames()
61+
def run(self): # define this function,use for threading, define here or define in child-class both should be OK
62+
self.process()
63+
self.d = self.get_hostnames()
64+
self.e = self.get_emails()
65+
logger.info("{0} found {1} domain(s) and {2} email(s)".format(self.engine_name,len(self.d),len(self.e)))
66+
return self.d, self.e
67+
68+
69+
if __name__ == "__main__":
70+
useragent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
71+
proxy = {"http":"http://127.0.0.1:8080"}
72+
search = search_sogou("meizu.com", '100')
73+
print search.run()

teemo.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def install_package():
7373
from searchengine.search_so import search_so
7474
from searchengine.search_yahoo import search_yahoo
7575
from searchengine.search_yandex import search_yandex
76+
from searchengine.search_sogou import search_sogou
7677

7778
reload(sys)
7879
sys.setdefaultencoding('utf-8')
@@ -198,7 +199,7 @@ def main():
198199
Threadlist.append(t)
199200

200201
for engine in [search_ask,search_baidu,search_bing,search_bing_api,search_dogpile,search_duckduckgo,search_exalead,search_fofa,search_google,search_google_cse,
201-
search_shodan,search_so,search_yahoo,search_yandex]:
202+
search_shodan,search_so,search_sogou,search_yahoo,search_yandex]:
202203
if proxy_switch == 1 and engine.__name__ in proxy_default_enabled:
203204
proxy = args.proxy
204205
else:

0 commit comments

Comments
 (0)