update get_history_url flag

wnma3mz · wnma3mz · commit 1d57270fdb04 · 2021-05-18T12:31:08.000+08:00
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setuptools.setup(
     name="wechatarticles",
-    version="0.6.4",
+    version="0.6.5",
     author="wnma3mz",
     author_email="wnma3mz@gmail.com",
     description="wechat articles scrapy",
diff --git a/test/test_GetUrls.py b/test/test_GetUrls.py
@@ -7,7 +7,7 @@
 
 import pandas as pd
 from wechatarticles import ArticlesInfo
-from wechatarticles.utils import get_history_urls
+from wechatarticles.utils import get_history_urls, verify_url
 
 # 快速获取大量文章urls（利用历史文章获取链接）
 
@@ -68,7 +68,7 @@ def demo(lst):
     key = ""
 
     lst = get_history_urls(
-        biz, uin, key, lst=[], start_timestamp=0, count=0, endcount=10
+        biz, uin, key, lst=[], start_timestamp=0, start_count=0, end_count=10
     )
     print("抓取到的文章链接")
     print(lst)
@@ -78,9 +78,9 @@ def demo(lst):
     # 个人微信号登陆后获取的cookie
     cookie = ""
     # 获取点赞数、阅读数、评论信息
-    ai = PublicAccountsWeb(appmsg_token, cookie)
+    ai = ArticlesInfo(appmsg_token, cookie)
 
-    # url：微信文章链接
+    # url：微信文章链接. lst[0]["app_msg_ext_info"]["content_url"]
     read_num, like_num, old_like_num = ai.read_like_nums(url)
     item = ai.comments(url)
     print("阅读：{}; 在看: {}; 点赞: {}".format(read_num, like_num, old_like_num))
diff --git a/wechatarticles/utils.py b/wechatarticles/utils.py
@@ -121,7 +121,14 @@ def read_nickname(fname):
 
 
 def get_history_urls(
-    biz, uin, key, lst=[], start_timestamp=0, count=10, endcount=99999
+    biz,
+    uin,
+    key,
+    lst=[],
+    start_timestamp=0,
+    start_count=10,
+    end_count=99999,
+    return_flag=False,
 ):
     """
     获取历史文章链接
@@ -138,37 +145,42 @@ def get_history_urls(
         已有的数据列表
     start_timestampe: int
         截至时间戳
-    count: int
+    start_count: int
         开始的条数
-    endcount: int
+    end_count: int
         截至条数
+    return_flag: bool
+        是否返回状态信息
 
     Returns
     -------
     lst:
         获取到的历史文章数据
     """
     t = PC(biz=biz, uin=uin, cookie="")
+    flag = True
     try:
         while True:
-            res = t.get_urls(key, offset=count)
+            res = t.get_urls(key, offset=start_count)
             if res == []:
                 break
-            count += 10
+            start_count += 10
             lst.append(res)
             dt = res[-1]["comm_msg_info"]["datetime"]
-            print(count, timestamp2date(dt))
-            if dt <= start_timestamp or count >= endcount:
+            print(start_count, timestamp2date(dt))
+            if dt <= start_timestamp or start_count >= end_count:
                 break
             time.sleep(5)
     except KeyboardInterrupt as e:
+        flag = False
         print("程序手动中断")
-        return lst
     except Exception as e:
         print(e)
+        flag = False
         print("获取文章链接失败。。。退出程序")
-        assert 1 == 2
     finally:
+        if return_flag:
+            return flag, lst
         return lst