Skip to content

Commit 1d57270

Browse files
committedMay 18, 2021
update get_history_url flag
1 parent bd0907c commit 1d57270

File tree

3 files changed

+26
-14
lines changed

3 files changed

+26
-14
lines changed
 

‎setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
setuptools.setup(
88
name="wechatarticles",
9-
version="0.6.4",
9+
version="0.6.5",
1010
author="wnma3mz",
1111
author_email="wnma3mz@gmail.com",
1212
description="wechat articles scrapy",

‎test/test_GetUrls.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import pandas as pd
99
from wechatarticles import ArticlesInfo
10-
from wechatarticles.utils import get_history_urls
10+
from wechatarticles.utils import get_history_urls, verify_url
1111

1212
# 快速获取大量文章urls(利用历史文章获取链接)
1313

@@ -68,7 +68,7 @@ def demo(lst):
6868
key = ""
6969

7070
lst = get_history_urls(
71-
biz, uin, key, lst=[], start_timestamp=0, count=0, endcount=10
71+
biz, uin, key, lst=[], start_timestamp=0, start_count=0, end_count=10
7272
)
7373
print("抓取到的文章链接")
7474
print(lst)
@@ -78,9 +78,9 @@ def demo(lst):
7878
# 个人微信号登陆后获取的cookie
7979
cookie = ""
8080
# 获取点赞数、阅读数、评论信息
81-
ai = PublicAccountsWeb(appmsg_token, cookie)
81+
ai = ArticlesInfo(appmsg_token, cookie)
8282

83-
# url:微信文章链接
83+
# url:微信文章链接. lst[0]["app_msg_ext_info"]["content_url"]
8484
read_num, like_num, old_like_num = ai.read_like_nums(url)
8585
item = ai.comments(url)
8686
print("阅读:{}; 在看: {}; 点赞: {}".format(read_num, like_num, old_like_num))

‎wechatarticles/utils.py

+21-9
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,14 @@ def read_nickname(fname):
121121

122122

123123
def get_history_urls(
124-
biz, uin, key, lst=[], start_timestamp=0, count=10, endcount=99999
124+
biz,
125+
uin,
126+
key,
127+
lst=[],
128+
start_timestamp=0,
129+
start_count=10,
130+
end_count=99999,
131+
return_flag=False,
125132
):
126133
"""
127134
获取历史文章链接
@@ -138,37 +145,42 @@ def get_history_urls(
138145
已有的数据列表
139146
start_timestampe: int
140147
截至时间戳
141-
count: int
148+
start_count: int
142149
开始的条数
143-
endcount: int
150+
end_count: int
144151
截至条数
152+
return_flag: bool
153+
是否返回状态信息
145154
146155
Returns
147156
-------
148157
lst:
149158
获取到的历史文章数据
150159
"""
151160
t = PC(biz=biz, uin=uin, cookie="")
161+
flag = True
152162
try:
153163
while True:
154-
res = t.get_urls(key, offset=count)
164+
res = t.get_urls(key, offset=start_count)
155165
if res == []:
156166
break
157-
count += 10
167+
start_count += 10
158168
lst.append(res)
159169
dt = res[-1]["comm_msg_info"]["datetime"]
160-
print(count, timestamp2date(dt))
161-
if dt <= start_timestamp or count >= endcount:
170+
print(start_count, timestamp2date(dt))
171+
if dt <= start_timestamp or start_count >= end_count:
162172
break
163173
time.sleep(5)
164174
except KeyboardInterrupt as e:
175+
flag = False
165176
print("程序手动中断")
166-
return lst
167177
except Exception as e:
168178
print(e)
179+
flag = False
169180
print("获取文章链接失败。。。退出程序")
170-
assert 1 == 2
171181
finally:
182+
if return_flag:
183+
return flag, lst
172184
return lst
173185

174186

0 commit comments

Comments
 (0)