From 183596569db96d5760d79c3489d99a36ef2c6091 Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Tue, 26 May 2020 13:06:31 -0400 Subject: [PATCH 01/11] Refactored report related functionalities Refactored report related functionalities to reduce redundancy in code. Also implemented: issue #3712 and issue #2795 . --- chatcommands.py | 336 ++++++++++++++++++++---------------------------- 1 file changed, 140 insertions(+), 196 deletions(-) diff --git a/chatcommands.py b/chatcommands.py index 339b90f487..8382d6fd13 100644 --- a/chatcommands.py +++ b/chatcommands.py @@ -1601,12 +1601,9 @@ def invite(msg, room_id, roles): @command(str, whole_msg=True, privileged=False, give_name=True, aliases=["scan", "scan-force", "report-force", "report-direct"]) def report(msg, args, alias_used="report"): - """ - Report a post (or posts) - :param msg: - :return: A string (or None) - """ - if not is_privileged(msg.owner, msg.room) and alias_used != "scan": + """ Scan or report posts (wrapper) """ + if alias_used != "scan" and not is_privileged(msg.owner, msg.room): + # Only "scan" is allowed for non-privileged users raise CmdException(GlobalVars.not_privileged_warning) crn, wait = can_report_now(msg.owner.id, msg._client.host) @@ -1617,8 +1614,6 @@ def report(msg, args, alias_used="report"): "wait 30 seconds after you've reported multiple posts in " "one go.".format(alias_used, wait)) - alias_used = alias_used or "report" - argsraw = args.split(' "', 1) urls = argsraw[0].split(' ') @@ -1640,25 +1635,43 @@ def report(msg, args, alias_used="report"): "SmokeDetector's chat messages getting rate-limited too much, " "which would slow down reports.".format(alias_used)) - # report_posts(urls, reported_by_owner, reported_in, blacklist_by, operation="report", custom_reason=None): - output = report_posts(urls, msg.owner, msg.room.name, message_url, alias_used, custom_reason) + output = [] + shortlinks = [] + for url in urls: + shortlink = url_to_shortlink(url) + # Validate shortlink + if not shortlink: + output.append("[Post]({}): Invalid url.".format(url)) + elif shortlink in shortlinks: + output.append("[Post]({}): Duplicate url.".format(url)) + else: + shortlinks.append(shortlink) + # Fetch post data by API + post_data = api_get_post(rebuild_str(shortlink)) + if post_data is None: + return "[Post]({}): Invalid url.".format(url) + if post_data is False: + return "[Post]({}): No data fetched from API. It may have been deleted.".format(url) + # Report that post + current_output = report_post(post_data, msg.owner, msg.room.name, message_url, alias_used, custom_reason) + if current_output: + output.append(current_output) - if output: + if len(output): if 1 < len(urls) > output.count("\n") + 1: add_or_update_multiple_reporter(msg.owner.id, msg._client.host, time.time()) - return output - + return "\n".join(output) -# noinspection PyIncorrectDocstring,PyUnusedLocal -@command(str, whole_msg=True, privileged=True, aliases=['reportuser']) -def allspam(msg, url): - """ - Reports all of a user's posts as spam - :param msg: - :param url: A user profile URL - :return: - """ +@command(str, whole_msg=True, privileged=True, give_name=True, + aliases=["reportuser", "reportuser-force", "allspam-force"]) +def allspam(msg, url, alias_used="allspam"): + """ Report all posts of a user """ + if alias_used in {"reportuser", "allspam"}: + operation = "report" + else: + operation = "report-force" + custom_reason = None # TODO: allow custom reasons api_key = 'IAkbitmze4B8KpacUfLqkw((' crn, wait = can_report_now(msg.owner.id, msg._client.host) if not crn: @@ -1669,7 +1682,7 @@ def allspam(msg, url): "one go.".format(wait)) user = get_user_from_url(url) if user is None: - raise CmdException("That doesn't look like a valid user URL.") + raise CmdException("[User]({}): Invalid url.".format(url)) user_sites = [] user_posts = [] # Detect whether link is to network profile or site profile @@ -1692,6 +1705,7 @@ def allspam(msg, url): if 'items' not in res or len(res['items']) == 0: raise CmdException("The specified user does not appear to exist.") if res['has_more']: + # Too many posts raise CmdException("The specified user has an abnormally high number of accounts. Please consider flagging " "for moderator attention, otherwise use !!/report on the user's posts individually.") # Add accounts with posts @@ -1700,6 +1714,7 @@ def allspam(msg, url): user_sites.append((site['user_id'], get_api_sitename_from_url(site['site_url']))) else: user_sites.append((user[0], get_api_sitename_from_url(user[1]))) + total_posts = 0 # Tracking total amount of posts by that user # Fetch posts for u_id, u_site in user_sites: # Respect backoffs etc @@ -1719,16 +1734,23 @@ def allspam(msg, url): GlobalVars.api_backoff_time = time.time() + res["backoff"] GlobalVars.api_request_lock.release() if 'items' not in res or len(res['items']) == 0: - raise CmdException("The specified user has no posts on this site.") + # The user has no post on this site. + # Move on to the next site. + continue posts = res['items'] if posts[0]['owner']['reputation'] > 100: + # High reputation on one site indicates that the user + # is unlikely a spammer on other sites. + # Hence raise an exception and quit the process raise CmdException("The specified user's reputation is abnormally high. Please consider flagging for " "moderator attention, otherwise use !!/report on the posts individually.") # Add blacklisted user - use most downvoted post as post URL message_url = "https://chat.{}/transcript/{}?m={}".format(msg._client.host, msg.room.id, msg.id) add_blacklisted_user(user, message_url, sorted(posts, key=lambda x: x['score'])[0]['owner']['link']) + output = [] # TODO: Postdata refactor, figure out a better way to use apigetpost for post in posts: + # Manually construct post_data (need to be changed; see TODO above) post_data = PostData() post_data.post_id = post['post_id'] post_data.post_url = url_to_shortlink(post['link']) @@ -1742,196 +1764,118 @@ def allspam(msg, url): post_data.score = post['score'] post_data.up_vote_count = post['up_vote_count'] post_data.down_vote_count = post['down_vote_count'] - if post_data.post_type == "answer": - # Annoyingly we have to make another request to get the question ID, since it is only returned by the - # /answers route - # Respect backoffs etc - GlobalVars.api_request_lock.acquire() - if GlobalVars.api_backoff_time > time.time(): - time.sleep(GlobalVars.api_backoff_time - time.time() + 2) - # Fetch posts - req_url = "https://api.stackexchange.com/2.2/answers/{}".format(post['post_id']) - params = { - 'filter': '!*Jxb9s5EOrE51WK*', - 'key': api_key, - 'site': u_site - } - answer_res = requests.get(req_url, params=params).json() - if "backoff" in answer_res: - if GlobalVars.api_backoff_time < time.time() + answer_res["backoff"]: - GlobalVars.api_backoff_time = time.time() + answer_res["backoff"] - GlobalVars.api_request_lock.release() - # Finally, set the attribute - post_data.question_id = answer_res['items'][0]['question_id'] - post_data.is_answer = True - user_posts.append(post_data) - if len(user_posts) == 0: - raise CmdException("The specified user hasn't posted anything.") - if len(user_posts) > 15: - raise CmdException("The specified user has an abnormally high number of spam posts. Please consider flagging " - "for moderator attention, otherwise use !!/report on the posts individually.") - why_info = u"User manually reported by *{}* in room *{}*.\n".format(msg.owner.name, msg.room.name) - # Handle all posts - for index, post in enumerate(user_posts, start=1): - batch = "" - if len(user_posts) > 1: - batch = " (batch report: post {} out of {})".format(index, len(user_posts)) - handle_spam(post=Post(api_response=post.as_dict), - reasons=["Manually reported " + post.post_type + batch], - why=why_info) - time.sleep(2) # Should this be implemented differently? - if len(user_posts) > 2: + # Report that post + current_output = report_post(post_data, msg.owner, msg.room.name, message_url, operation, custom_reason) + if current_output: + output.append(current_output) + total_posts += 1 + if total_posts == 0: + raise CmdException("The user has no post yet.") + if total_posts > 2: add_or_update_multiple_reporter(msg.owner.id, msg._client.host, time.time()) + if len(output): + return "\n".join(output) -def report_posts(urls, reported_by_owner, reported_in=None, blacklist_by=None, operation="report", custom_reason=None): - """ - Reports a list of URLs - :param urls: A list of URLs - :param reported_by_owner: The chatexchange User record for the user that reported the URLs. - :param reported_in: The name of the room in which the URLs were reported, or True if reported by the MS API. - :param blacklist_by: String of the URL for the transcript of the chat message causing the report. - :param operation: String of which operation is being performed (e.g. report, scan, report-force, scan-force) - :param custom_reason: String of the custom reason why the URLs are being reported. - :return: String: the in-chat repsponse - """ - reported_by_name = reported_by_owner.name +def report_post(post_data, reported_by, reported_in=None, blacklist_by=None, operation="report", custom_reason=None): + """ Scan or report a single post """ + reporter_name = reported_by.name operation = operation or "report" is_forced = operation in {"scan-force", "report-force", "report-direct"} - if operation == "scan-force": - operation = "scan" - action_done = "scanned" if operation == "scan" else "reported" + + # Format report_info + if operation in {"scan", "scan-force"}: + action_str = "scanned" + else: + action_str = "reported" + if reported_in is None: - reported_from = " by *{}*".format(reported_by_name) + from_str = " by *{}*".format(reporter_name) elif reported_in is True: - reported_from = " by *{}* from the metasmoke API".format(reported_by_name) + from_str = " by *{}* from the metasmoke API".format(reporter_name) else: - reported_from = " by user *{}* in room *{}*".format(reported_by_name, reported_in) + from_str = " by user *{}* in room *{}*".format(reporter_name, reported_in) if custom_reason: - with_reason = " with reason: *{}*".format(custom_reason) + reason_str = " with reason: *{}*".format(custom_reason) else: - with_reason = "" - - report_info = "Post manually {}{}{}.\n\n".format(action_done, reported_from, with_reason) - - normalized_urls = [] - for url in urls: - t = url_to_shortlink(url) - if not t: - normalized_urls.append("That does not look like a valid post URL.") - elif t not in normalized_urls: - normalized_urls.append(t) - else: - normalized_urls.append("A duplicate URL was provided.") - urls = normalized_urls - - users_to_blacklist = [] - output = [] - - for index, url in enumerate(urls, start=1): - if not url.startswith("http://") and not url.startswith("https://"): - # Return the bad URL directly. - output.append("Post {}: {}".format(index, url)) - continue - - post_data = api_get_post(rebuild_str(url)) - - if post_data is None: - output.append("Post {}: That does not look like a valid post URL.".format(index)) - continue - - if post_data is False: - output.append("Post {}: Could not find data for this post in the API. " - "It may already have been deleted.".format(index)) - continue - - if has_already_been_posted(post_data.site, post_data.post_id, post_data.title) and not is_false_positive( - (post_data.post_id, post_data.site)) and not is_forced: - # Don't re-report if the post wasn't marked as a false positive. If it was marked as a false positive, - # this re-report might be attempting to correct that/fix a mistake/etc. - - if GlobalVars.metasmoke_key is not None: - se_link = to_protocol_relative(post_data.post_url) - ms_link = resolve_ms_link(se_link) or to_metasmoke_link(se_link) - output.append("Post {}: Already recently reported [ [MS]({}) ]".format(index, ms_link)) - continue - else: - output.append("Post {}: Already recently reported".format(index)) - continue - - url = to_protocol_relative(post_data.post_url) - post = Post(api_response=post_data.as_dict) - user = get_user_from_url(post_data.owner_url) - - if fetch_post_id_and_site_from_url(url)[2] == "answer": - parent_data = api_get_post("https://{}/q/{}".format(post.post_site, post_data.question_id)) - post._is_answer = True - post._parent = Post(api_response=parent_data.as_dict) - - # if operation == "report-direct": - # scan_spam, scan_reasons, scan_why = False, [], "" - # else: - if True: - scan_spam, scan_reasons, scan_why = check_if_spam(post) # Scan it first - - if operation in {"report", "report-force"}: # Force blacklist user even if !!/report falls back to scan - if user is not None: - users_to_blacklist.append((user, blacklist_by, post_data.post_url)) - - # scan_spam == False indicates that the post is not spam, but it is also set to False - # when the post is spam, but has been previously reported. In that case, the scan_reasons - # is a tuple with what would be the list of reasons as the first entry and what would - # be the why as the second. This converts that output back into what they would be - # if the post wasn't previously reported for the cases where we want to process it - # as such. - # Expand real scan results from dirty returm value when not "!!/scan" - # Presence of "scan_why" indicates the post IS spam but ignored - if (operation != "scan" or is_forced) and (not scan_spam) and scan_why: - scan_spam = True - scan_reasons, scan_why = scan_reasons - - # Always handle if reported - if scan_spam and operation != "report-direct": - comment = report_info + scan_why.lstrip() - handle_spam(post=post, reasons=scan_reasons, why=comment) - if custom_reason: - Tasks.later(Metasmoke.post_auto_comment, custom_reason, reported_by_owner, url=url, after=15) - continue + reason_str = "" - # scan_spam == False - if operation in {"report", "report-force", "report-direct"}: - batch = "" - if len(urls) > 1: - batch = " (batch report: post {} out of {})".format(index, len(urls)) + report_info = "Post manually {}{}{}.\n\n".format(action_str, from_str, reason_str) - if scan_spam: - why_append = "This post would have also been caught for: " + ", ".join(scan_reasons).capitalize() + \ - '\n' + scan_why - else: - why_append = "This post would not have been caught otherwise." + abs_url = post_data.post_url + url = to_protocol_relative(abs_url) - comment = report_info + why_append - handle_spam(post=post, - reasons=["Manually reported " + post_data.post_type + batch], - why=comment) + if have_already_been_posted(post_data.site, post_data.post_id, post_data.title) and not is_forced: + # Recently reported and is not forced + if GlobalVars.metasmoke_key is not None: + ms_link = resolve_ms_link(url) or to_metasmoke_link(url) + # Post custom_reason as MS comment if exists if custom_reason: - Tasks.later(Metasmoke.post_auto_comment, custom_reason, reported_by_owner, url=url, after=15) - continue - - # scan_spam == False and "scan" + Tasks.later(Metasmoke.post_auto_comment, custom_reason, reported_by, url=url, after=15) + return "[Post]({}): Already recently reported [ [MS]({}) ]".format(abs_url, ms_link) else: - if scan_why: - output.append("Post {}: Looks like spam but not reported: {}".format(index, scan_why.capitalize())) - else: - output.append("Post {}: This does not look like spam".format(index)) + return "[Post]({}): Already recently reported".format(abs_url) + + post = Post(api_response=post_data.as_dict) + user = get_user_from_url(post_data.owner_url) + + if fetch_post_id_and_site_from_url(url)[2] == "answer": + # If the post is an answer, another API call need to be made + # to fetch its parent post + parent_data = api_get_post("https://{}/q/{}".format(post.post_site, post_data.question_id)) + post._is_answer = True + post._parent = Post(api_response=parent_data.as_dict) + + scan_spam, scan_reasons, scan_why = check_if_spam(post) # Scan it first + if operation in {"report", "report-force"}: + # if operation is "report" or "report-force", blacklist the user + if user is not None: + add_blacklisted_user(user, blacklist_by, post_data.post_url) + + if (operation != "scan") and not scan_spam and scan_why: + # The post is spam, but is manually ignored. + # More doc available in check_if_spam() + # In such cases, if operation is not just scan, we will treat the post as spam. + scan_spam = True + scan_reasons, scan_why = scan_reasons + + if scan_spam and operation != "report-direct": + # The post is spam + processed_why = report_info + scan_why.lstrip() # Add additional info + handle_spam(post=post, reasons=scan_reasons, why=processed_why) + if custom_reason: + Tasks.later(Metasmoke.post_auto_comment, custom_reason, reported_by, url=url, after=15) + return None - for item in users_to_blacklist: - add_blacklisted_user(*item) + # From this point on we are sure that either the operation is "report-direct" + # Or the post is not treated as spam by the system (scan_spam is False) + # The latter case may be that the operation is simply scan and the post is spam but ignored + if operation in {"report", "report-force", "report-direct"}: + if scan_spam: + # The operation is "report-direct" + processed_why = report_info + "This post would have also been caught for: " + \ + ", ".join(scan_reasons).capitalize() + '\n' + scan_why + else: + processed_why = report_info + "This post would not have been caught otherwise." + handle_spam(post=post, # Reported posts are always treated as spam. + reasons=["Manually reported" + post_data.post_type], + why=processed_why) + if custom_reason: + Tasks.later(Metasmoke.post_auto_comment, custom_reason, reported_by, url=url, after=15) + return None - if len(output): - return "\n".join(output) - return None + # At this point, either the post is not spam + # Or it is manually ignored + # Also the operation must be either "scan" or "scan-force" + if scan_why: + # Post is ignored + output.append("[Post]({}): Looks like spam but is ignored.".format(abs_url)) + return None + else: + # Is not spam + output.append("[Post]({}): Does not look like spam.".format(abs_url)) + return None @command(str, str, privileged=True, whole_msg=True) From d37ce1448a83155d6becea20cc2df85e675addef Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Tue, 26 May 2020 15:17:16 -0400 Subject: [PATCH 02/11] Minor fixes --- chatcommands.py | 55 ++++++++++++++++++++++++++++----------- test/test_chatcommands.py | 30 ++++++++++++++------- 2 files changed, 60 insertions(+), 25 deletions(-) diff --git a/chatcommands.py b/chatcommands.py index 8382d6fd13..a958faac18 100644 --- a/chatcommands.py +++ b/chatcommands.py @@ -1705,7 +1705,7 @@ def allspam(msg, url, alias_used="allspam"): if 'items' not in res or len(res['items']) == 0: raise CmdException("The specified user does not appear to exist.") if res['has_more']: - # Too many posts + # Too many accounts raise CmdException("The specified user has an abnormally high number of accounts. Please consider flagging " "for moderator attention, otherwise use !!/report on the user's posts individually.") # Add accounts with posts @@ -1714,7 +1714,6 @@ def allspam(msg, url, alias_used="allspam"): user_sites.append((site['user_id'], get_api_sitename_from_url(site['site_url']))) else: user_sites.append((user[0], get_api_sitename_from_url(user[1]))) - total_posts = 0 # Tracking total amount of posts by that user # Fetch posts for u_id, u_site in user_sites: # Respect backoffs etc @@ -1764,14 +1763,42 @@ def allspam(msg, url, alias_used="allspam"): post_data.score = post['score'] post_data.up_vote_count = post['up_vote_count'] post_data.down_vote_count = post['down_vote_count'] - # Report that post - current_output = report_post(post_data, msg.owner, msg.room.name, message_url, operation, custom_reason) - if current_output: - output.append(current_output) - total_posts += 1 - if total_posts == 0: + if post_data.post_type == "answer": + # Annoyingly we have to make another request to get the question ID, since it is only returned by the + # /answers route + # Respect backoffs etc + GlobalVars.api_request_lock.acquire() + if GlobalVars.api_backoff_time > time.time(): + time.sleep(GlobalVars.api_backoff_time - time.time() + 2) + # Fetch posts + req_url = "https://api.stackexchange.com/2.2/answers/{}".format(post['post_id']) + params = { + 'filter': '!*Jxb9s5EOrE51WK*', + 'key': api_key, + 'site': u_site + } + answer_res = requests.get(req_url, params=params).json() + if "backoff" in answer_res: + if GlobalVars.api_backoff_time < time.time() + answer_res["backoff"]: + GlobalVars.api_backoff_time = time.time() + answer_res["backoff"] + GlobalVars.api_request_lock.release() + # Finally, set the attribute + post_data.question_id = answer_res['items'][0]['question_id'] + post_data.is_answer = True + # Add the post to report queue + user_posts.append(post_data) + if len(user_posts) == 0: raise CmdException("The user has no post yet.") - if total_posts > 2: + if len(user_posts) > 15: + raise CmdException("The specified user has an abnormally high number of spam posts. Please consider flagging " + "for moderator attention, otherwise use !!/report on the posts individually.") + # Report posts + for current_post_data in user_posts: + # Report that post + current_output = report_post(current_post_data, msg.owner, msg.room.name, message_url, operation, custom_reason) + if current_output: + output.append(current_output) + if len(user_posts) > 2: add_or_update_multiple_reporter(msg.owner.id, msg._client.host, time.time()) if len(output): return "\n".join(output) @@ -1806,7 +1833,7 @@ def report_post(post_data, reported_by, reported_in=None, blacklist_by=None, ope abs_url = post_data.post_url url = to_protocol_relative(abs_url) - if have_already_been_posted(post_data.site, post_data.post_id, post_data.title) and not is_forced: + if has_already_been_posted(post_data.site, post_data.post_id, post_data.title) and not is_forced: # Recently reported and is not forced if GlobalVars.metasmoke_key is not None: ms_link = resolve_ms_link(url) or to_metasmoke_link(url) @@ -1859,7 +1886,7 @@ def report_post(post_data, reported_by, reported_in=None, blacklist_by=None, ope else: processed_why = report_info + "This post would not have been caught otherwise." handle_spam(post=post, # Reported posts are always treated as spam. - reasons=["Manually reported" + post_data.post_type], + reasons=["Manually reported " + post_data.post_type], why=processed_why) if custom_reason: Tasks.later(Metasmoke.post_auto_comment, custom_reason, reported_by, url=url, after=15) @@ -1870,12 +1897,10 @@ def report_post(post_data, reported_by, reported_in=None, blacklist_by=None, ope # Also the operation must be either "scan" or "scan-force" if scan_why: # Post is ignored - output.append("[Post]({}): Looks like spam but is ignored.".format(abs_url)) - return None + return "[Post]({}): Looks like spam but is ignored.".format(abs_url) else: # Is not spam - output.append("[Post]({}): Does not look like spam.".format(abs_url)) - return None + return "[Post]({}): Does not look like spam.".format(abs_url) @command(str, str, privileged=True, whole_msg=True) diff --git a/test/test_chatcommands.py b/test/test_chatcommands.py index 2f51ded910..fca9f6fe3e 100644 --- a/test/test_chatcommands.py +++ b/test/test_chatcommands.py @@ -289,7 +289,7 @@ def test_report(handle_spam): "id": 1337 }) - assert chatcommands.report("test", original_msg=msg, alias_used="report") == "Post 1: That does not look like a valid post URL." + assert chatcommands.report("test", original_msg=msg, alias_used="report") == "[Post](test): Invalid url." assert chatcommands.report("one two three four five plus-an-extra", original_msg=msg, alias_used="report") == ( "To avoid SmokeDetector reporting posts too slowly, you can report at most 5 posts at a time. This is to avoid " @@ -300,11 +300,11 @@ def test_report(handle_spam): # .startswith("You cannot provide multiple custom report reasons.") assert chatcommands.report('https://stackoverflow.com/q/1', original_msg=msg) == \ - "Post 1: Could not find data for this post in the API. It may already have been deleted." + "[Post](https://stackoverflow.com/q/1): No data fetched from API. It may have been deleted." # Valid post assert chatcommands.report('https://stackoverflow.com/a/1732454', original_msg=msg, alias_used="scan") == \ - "Post 1: This does not look like spam" + "[Post](https://stackoverflow.com/a/1732454): Does not look like spam." assert chatcommands.report('https://stackoverflow.com/a/1732454 "~o.O~"', original_msg=msg, alias_used="report") is None _, call = handle_spam.call_args_list[-1] @@ -338,7 +338,7 @@ def test_report(handle_spam): # Don't re-report GlobalVars.latest_questions = [('stackoverflow.com', '1732454', 'RegEx match open tags except XHTML self-contained tags')] - assert chatcommands.report('https://stackoverflow.com/a/1732454', original_msg=msg).startswith("Post 1: Already recently reported") + assert chatcommands.report('https://stackoverflow.com/a/1732454', original_msg=msg).startswith("[Post](https://stackoverflow.com/a/1732454): Already recently reported") # Can use report command multiple times in 30s if only one URL was used assert chatcommands.report('https://stackoverflow.com/q/1732348', original_msg=msg, alias_used="report") is None @@ -369,7 +369,7 @@ def test_allspam(handle_spam): "id": 1337 }) - assert chatcommands.allspam("test", original_msg=msg) == "That doesn't look like a valid user URL." + assert chatcommands.allspam("test", original_msg=msg) == "[User](test): Invalid url." # If this code lasts long enough to fail, I'll be happy assert chatcommands.allspam("https://stackexchange.com/users/10000000000", original_msg=msg) == \ @@ -391,10 +391,10 @@ def test_allspam(handle_spam): ) assert chatcommands.allspam("https://stackexchange.com/users/12108751", original_msg=msg) == \ - "The specified user hasn't posted anything." + "The user has no post yet." assert chatcommands.allspam("https://stackoverflow.com/users/8846458", original_msg=msg) == \ - "The specified user has no posts on this site." + "The user has no post yet." # This test is for users with <100rep but >15 posts # If this breaks in the future because the below user eventually gets 100 rep (highly unlikely), use the following @@ -412,16 +412,26 @@ def test_allspam(handle_spam): _, call = handle_spam.call_args_list[0] assert isinstance(call["post"], Post) assert call["reasons"] == ["Manually reported answer"] - assert call["why"] == "User manually reported by *ArtOfCode* in room *Charcoal HQ*.\n" + assert call["why"].startswith( + "Post manually reported by user *ArtOfCode* in room *Charcoal HQ*." + "\n\nThis post would not have been caught otherwise." + ) handle_spam.reset_mock() + assert chatcommands.allspam("https://meta.stackexchange.com/users/373807", original_msg=msg) is None assert handle_spam.call_count == 1 _, call = handle_spam.call_args_list[0] assert isinstance(call["post"], Post) - assert call["reasons"] == ["Manually reported answer"] - assert call["why"] == "User manually reported by *ArtOfCode* in room *Charcoal HQ*.\n" + # We expect "blacklisted user" here, as the blacklist is dumped to a pickle + # Hence when the pickle is loaded again, the user is blacklisted again + assert call["reasons"] == ["blacklisted user"] + # There is no "This post would have also been caught for:" as the reported post + # is not manually ignored or marked as fp + assert call["why"].startswith( + "Post manually reported by user *ArtOfCode* in room *Charcoal HQ*." + ) finally: GlobalVars.blacklisted_users.clear() From 8bdde2eb537c36188024a5117d90d012e558a632 Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Tue, 26 May 2020 16:07:26 -0400 Subject: [PATCH 03/11] Removed redundant code (code vs API request trade off) --- chatcommands.py | 97 ++++++++++++++----------------------------------- 1 file changed, 27 insertions(+), 70 deletions(-) diff --git a/chatcommands.py b/chatcommands.py index a958faac18..cfe29146d5 100644 --- a/chatcommands.py +++ b/chatcommands.py @@ -1636,26 +1636,10 @@ def report(msg, args, alias_used="report"): "which would slow down reports.".format(alias_used)) output = [] - shortlinks = [] for url in urls: - shortlink = url_to_shortlink(url) - # Validate shortlink - if not shortlink: - output.append("[Post]({}): Invalid url.".format(url)) - elif shortlink in shortlinks: - output.append("[Post]({}): Duplicate url.".format(url)) - else: - shortlinks.append(shortlink) - # Fetch post data by API - post_data = api_get_post(rebuild_str(shortlink)) - if post_data is None: - return "[Post]({}): Invalid url.".format(url) - if post_data is False: - return "[Post]({}): No data fetched from API. It may have been deleted.".format(url) - # Report that post - current_output = report_post(post_data, msg.owner, msg.room.name, message_url, alias_used, custom_reason) - if current_output: - output.append(current_output) + current_output = report_post(url, msg.owner, msg.room.name, message_url, alias_used, custom_reason) + if current_output: + output.append(current_output) if len(output): if 1 < len(urls) > output.count("\n") + 1: @@ -1684,7 +1668,7 @@ def allspam(msg, url, alias_used="allspam"): if user is None: raise CmdException("[User]({}): Invalid url.".format(url)) user_sites = [] - user_posts = [] + user_post_urls = [] # Detect whether link is to network profile or site profile if user[1] == 'stackexchange.com': # Respect backoffs etc @@ -1714,7 +1698,7 @@ def allspam(msg, url, alias_used="allspam"): user_sites.append((site['user_id'], get_api_sitename_from_url(site['site_url']))) else: user_sites.append((user[0], get_api_sitename_from_url(user[1]))) - # Fetch posts + # Fetch post urls for u_id, u_site in user_sites: # Respect backoffs etc GlobalVars.api_request_lock.acquire() @@ -1743,68 +1727,29 @@ def allspam(msg, url, alias_used="allspam"): # Hence raise an exception and quit the process raise CmdException("The specified user's reputation is abnormally high. Please consider flagging for " "moderator attention, otherwise use !!/report on the posts individually.") - # Add blacklisted user - use most downvoted post as post URL message_url = "https://chat.{}/transcript/{}?m={}".format(msg._client.host, msg.room.id, msg.id) - add_blacklisted_user(user, message_url, sorted(posts, key=lambda x: x['score'])[0]['owner']['link']) - output = [] - # TODO: Postdata refactor, figure out a better way to use apigetpost + # We would ignore other data given by the API response + # The reduction in duplicated code worth a few more API requests for post in posts: - # Manually construct post_data (need to be changed; see TODO above) - post_data = PostData() - post_data.post_id = post['post_id'] - post_data.post_url = url_to_shortlink(post['link']) - *discard, post_data.site, post_data.post_type = fetch_post_id_and_site_from_url( - url_to_shortlink(post['link'])) - post_data.title = unescape(post['title']) - post_data.owner_name = unescape(post['owner']['display_name']) - post_data.owner_url = post['owner']['link'] - post_data.owner_rep = post['owner']['reputation'] - post_data.body = post['body'] - post_data.score = post['score'] - post_data.up_vote_count = post['up_vote_count'] - post_data.down_vote_count = post['down_vote_count'] - if post_data.post_type == "answer": - # Annoyingly we have to make another request to get the question ID, since it is only returned by the - # /answers route - # Respect backoffs etc - GlobalVars.api_request_lock.acquire() - if GlobalVars.api_backoff_time > time.time(): - time.sleep(GlobalVars.api_backoff_time - time.time() + 2) - # Fetch posts - req_url = "https://api.stackexchange.com/2.2/answers/{}".format(post['post_id']) - params = { - 'filter': '!*Jxb9s5EOrE51WK*', - 'key': api_key, - 'site': u_site - } - answer_res = requests.get(req_url, params=params).json() - if "backoff" in answer_res: - if GlobalVars.api_backoff_time < time.time() + answer_res["backoff"]: - GlobalVars.api_backoff_time = time.time() + answer_res["backoff"] - GlobalVars.api_request_lock.release() - # Finally, set the attribute - post_data.question_id = answer_res['items'][0]['question_id'] - post_data.is_answer = True - # Add the post to report queue - user_posts.append(post_data) - if len(user_posts) == 0: + user_post_urls.append(post['link']) + if len(user_post_urls) == 0: raise CmdException("The user has no post yet.") - if len(user_posts) > 15: + if len(user_post_urls) > 15: raise CmdException("The specified user has an abnormally high number of spam posts. Please consider flagging " "for moderator attention, otherwise use !!/report on the posts individually.") - # Report posts - for current_post_data in user_posts: + + for current_url in user_post_urls: # Report that post - current_output = report_post(current_post_data, msg.owner, msg.room.name, message_url, operation, custom_reason) + current_output = report_post(current_url, msg.owner, msg.room.name, message_url, operation, custom_reason) if current_output: output.append(current_output) - if len(user_posts) > 2: + if len(user_post_urls) > 2: add_or_update_multiple_reporter(msg.owner.id, msg._client.host, time.time()) if len(output): return "\n".join(output) -def report_post(post_data, reported_by, reported_in=None, blacklist_by=None, operation="report", custom_reason=None): +def report_post(url, reported_by, reported_in=None, blacklist_by=None, operation="report", custom_reason=None): """ Scan or report a single post """ reporter_name = reported_by.name operation = operation or "report" @@ -1830,6 +1775,18 @@ def report_post(post_data, reported_by, reported_in=None, blacklist_by=None, ope report_info = "Post manually {}{}{}.\n\n".format(action_str, from_str, reason_str) + # Generate shortlink from url + shortlink = url_to_shortlink(url) + if not shortlink: + return "[Post]({}): Invalid url.".format(url) + + # Fetch post data by API + post_data = api_get_post(rebuild_str(shortlink)) + if post_data is None: + return "[Post]({}): Invalid url.".format(url) + if post_data is False: + return "[Post]({}): No data fetched from API. It may have been deleted.".format(url) + abs_url = post_data.post_url url = to_protocol_relative(abs_url) From 9f380646e2fd1e38796a0d08276fc660c8ff47b4 Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Tue, 26 May 2020 16:11:45 -0400 Subject: [PATCH 04/11] Minor fixes --- chatcommands.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chatcommands.py b/chatcommands.py index cfe29146d5..28fc4906ba 100644 --- a/chatcommands.py +++ b/chatcommands.py @@ -1738,6 +1738,7 @@ def allspam(msg, url, alias_used="allspam"): raise CmdException("The specified user has an abnormally high number of spam posts. Please consider flagging " "for moderator attention, otherwise use !!/report on the posts individually.") + output = [] for current_url in user_post_urls: # Report that post current_output = report_post(current_url, msg.owner, msg.room.name, message_url, operation, custom_reason) From dc315ea2d30481e351ca8a15d348158afe57fcfe Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Wed, 10 Jun 2020 09:55:15 -0400 Subject: [PATCH 05/11] Minor changes in chatcommands.py: Improved comments --- chatcommands.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/chatcommands.py b/chatcommands.py index 28fc4906ba..e09af8003b 100644 --- a/chatcommands.py +++ b/chatcommands.py @@ -1752,6 +1752,18 @@ def allspam(msg, url, alias_used="allspam"): def report_post(url, reported_by, reported_in=None, blacklist_by=None, operation="report", custom_reason=None): """ Scan or report a single post """ + # Arguments: + # - url: url to the post being reported (string) + # - reported_by: user reporting the post (ChatExchange user object) + # - reported_in: room in which the post is reported (integer for chatroom, + # True for MS API, or None) + # - blacklist_by: the chat transcript url of the report chat msg (string) + # - operation: Operation to perform (string, shall be in + # {"scan", "scan-force", "report", "report-force" or "report-direct"}) + # - custom_reason: a custom reason why to report the post (string or None) + # Returns: + # - None or a string containing the message to reply to the reporter + reporter_name = reported_by.name operation = operation or "report" is_forced = operation in {"scan-force", "report-force", "report-direct"} From cec7cf404cf7a4227bb93514599ce85d40739246 Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Wed, 1 Jul 2020 22:31:20 -0400 Subject: [PATCH 06/11] chatcommands.py: Removed confusing links Instead of the non-indicative "[Post](): Error message" reply, use the explicit "Post : Error message". --- chatcommands.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/chatcommands.py b/chatcommands.py index e09af8003b..0850e5e72a 100644 --- a/chatcommands.py +++ b/chatcommands.py @@ -1666,7 +1666,7 @@ def allspam(msg, url, alias_used="allspam"): "one go.".format(wait)) user = get_user_from_url(url) if user is None: - raise CmdException("[User]({}): Invalid url.".format(url)) + raise CmdException("User {}: Invalid url.".format(url)) user_sites = [] user_post_urls = [] # Detect whether link is to network profile or site profile @@ -1791,14 +1791,14 @@ def report_post(url, reported_by, reported_in=None, blacklist_by=None, operation # Generate shortlink from url shortlink = url_to_shortlink(url) if not shortlink: - return "[Post]({}): Invalid url.".format(url) + return "Post {}: Invalid url.".format(url) # Fetch post data by API post_data = api_get_post(rebuild_str(shortlink)) if post_data is None: - return "[Post]({}): Invalid url.".format(url) + return "Post {}: Invalid url.".format(url) if post_data is False: - return "[Post]({}): No data fetched from API. It may have been deleted.".format(url) + return "Post {}: No data fetched from API. It may have been deleted.".format(url) abs_url = post_data.post_url url = to_protocol_relative(abs_url) @@ -1810,9 +1810,9 @@ def report_post(url, reported_by, reported_in=None, blacklist_by=None, operation # Post custom_reason as MS comment if exists if custom_reason: Tasks.later(Metasmoke.post_auto_comment, custom_reason, reported_by, url=url, after=15) - return "[Post]({}): Already recently reported [ [MS]({}) ]".format(abs_url, ms_link) + return "Post {}: Already recently reported [ [MS]({}) ]".format(abs_url, ms_link) else: - return "[Post]({}): Already recently reported".format(abs_url) + return "Post {}: Already recently reported".format(abs_url) post = Post(api_response=post_data.as_dict) user = get_user_from_url(post_data.owner_url) @@ -1867,10 +1867,10 @@ def report_post(url, reported_by, reported_in=None, blacklist_by=None, operation # Also the operation must be either "scan" or "scan-force" if scan_why: # Post is ignored - return "[Post]({}): Looks like spam but is ignored.".format(abs_url) + return "Post {}: Looks like spam but is ignored.".format(abs_url) else: # Is not spam - return "[Post]({}): Does not look like spam.".format(abs_url) + return "Post {}: Does not look like spam.".format(abs_url) @command(str, str, privileged=True, whole_msg=True) From fbc7d8f380557473fd162579f345b5e56e32b384 Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Wed, 1 Jul 2020 22:41:18 -0400 Subject: [PATCH 07/11] chatcommands.py: Do not blacklist the user until the last post in allspam() --- chatcommands.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/chatcommands.py b/chatcommands.py index 0850e5e72a..6e698bbbe4 100644 --- a/chatcommands.py +++ b/chatcommands.py @@ -1637,7 +1637,9 @@ def report(msg, args, alias_used="report"): output = [] for url in urls: - current_output = report_post(url, msg.owner, msg.room.name, message_url, alias_used, custom_reason) + current_output = report_post(url, msg.owner, msg.room.name, + message_url, False, # Blacklist user + alias_used, custom_reason) if current_output: output.append(current_output) @@ -1739,18 +1741,30 @@ def allspam(msg, url, alias_used="allspam"): "for moderator attention, otherwise use !!/report on the posts individually.") output = [] - for current_url in user_post_urls: + for current_url in user_post_urls[:-1]: # Report that post - current_output = report_post(current_url, msg.owner, msg.room.name, message_url, operation, custom_reason) + current_output = report_post(current_url, msg.owner, msg.room.name, + message_url, True, # No blacklist user + operation, custom_reason) if current_output: output.append(current_output) + + # Blacklist the user at the last reported post + current_output = report_post(user_post_urls[-1], msg.owner, msg.room.name, + message_url, False, # Blacklist user + operation, custom_reason) + if current_output: + output.append(current_output) + if len(user_post_urls) > 2: add_or_update_multiple_reporter(msg.owner.id, msg._client.host, time.time()) if len(output): return "\n".join(output) -def report_post(url, reported_by, reported_in=None, blacklist_by=None, operation="report", custom_reason=None): +def report_post(url, reported_by, reported_in=None, + blacklist_by=None, no_blacklist_user=False, + operation="report", custom_reason=None): """ Scan or report a single post """ # Arguments: # - url: url to the post being reported (string) @@ -1827,7 +1841,7 @@ def report_post(url, reported_by, reported_in=None, blacklist_by=None, operation scan_spam, scan_reasons, scan_why = check_if_spam(post) # Scan it first if operation in {"report", "report-force"}: # if operation is "report" or "report-force", blacklist the user - if user is not None: + if user is not None and not no_blacklist_user: add_blacklisted_user(user, blacklist_by, post_data.post_url) if (operation != "scan") and not scan_spam and scan_why: From 2385cebb2e2e19c250b728f56c485f70ebae05b8 Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Wed, 1 Jul 2020 22:44:38 -0400 Subject: [PATCH 08/11] test_chatcommands.py: Adjust test cases accordingly --- test/test_chatcommands.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_chatcommands.py b/test/test_chatcommands.py index fca9f6fe3e..f7a1f041f2 100644 --- a/test/test_chatcommands.py +++ b/test/test_chatcommands.py @@ -289,7 +289,7 @@ def test_report(handle_spam): "id": 1337 }) - assert chatcommands.report("test", original_msg=msg, alias_used="report") == "[Post](test): Invalid url." + assert chatcommands.report("test", original_msg=msg, alias_used="report") == "Post test: Invalid url." assert chatcommands.report("one two three four five plus-an-extra", original_msg=msg, alias_used="report") == ( "To avoid SmokeDetector reporting posts too slowly, you can report at most 5 posts at a time. This is to avoid " @@ -300,11 +300,11 @@ def test_report(handle_spam): # .startswith("You cannot provide multiple custom report reasons.") assert chatcommands.report('https://stackoverflow.com/q/1', original_msg=msg) == \ - "[Post](https://stackoverflow.com/q/1): No data fetched from API. It may have been deleted." + "Post https://stackoverflow.com/q/1: No data fetched from API. It may have been deleted." # Valid post assert chatcommands.report('https://stackoverflow.com/a/1732454', original_msg=msg, alias_used="scan") == \ - "[Post](https://stackoverflow.com/a/1732454): Does not look like spam." + "Post https://stackoverflow.com/a/1732454: Does not look like spam." assert chatcommands.report('https://stackoverflow.com/a/1732454 "~o.O~"', original_msg=msg, alias_used="report") is None _, call = handle_spam.call_args_list[-1] @@ -338,7 +338,7 @@ def test_report(handle_spam): # Don't re-report GlobalVars.latest_questions = [('stackoverflow.com', '1732454', 'RegEx match open tags except XHTML self-contained tags')] - assert chatcommands.report('https://stackoverflow.com/a/1732454', original_msg=msg).startswith("[Post](https://stackoverflow.com/a/1732454): Already recently reported") + assert chatcommands.report('https://stackoverflow.com/a/1732454', original_msg=msg).startswith("Post https://stackoverflow.com/a/1732454: Already recently reported") # Can use report command multiple times in 30s if only one URL was used assert chatcommands.report('https://stackoverflow.com/q/1732348', original_msg=msg, alias_used="report") is None @@ -369,7 +369,7 @@ def test_allspam(handle_spam): "id": 1337 }) - assert chatcommands.allspam("test", original_msg=msg) == "[User](test): Invalid url." + assert chatcommands.allspam("test", original_msg=msg) == "User test: Invalid url." # If this code lasts long enough to fail, I'll be happy assert chatcommands.allspam("https://stackexchange.com/users/10000000000", original_msg=msg) == \ From 23989d1f651d1dd6f1cabe9f177bbbd9b9c7e032 Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Thu, 2 Jul 2020 09:19:09 -0400 Subject: [PATCH 09/11] chatcommands.py: Add batch report prefix --- chatcommands.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/chatcommands.py b/chatcommands.py index 6e698bbbe4..7bc0582056 100644 --- a/chatcommands.py +++ b/chatcommands.py @@ -1637,7 +1637,7 @@ def report(msg, args, alias_used="report"): output = [] for url in urls: - current_output = report_post(url, msg.owner, msg.room.name, + current_output = report_post(url, msg.owner, msg.room.name, "", message_url, False, # Blacklist user alias_used, custom_reason) if current_output: @@ -1741,16 +1741,17 @@ def allspam(msg, url, alias_used="allspam"): "for moderator attention, otherwise use !!/report on the posts individually.") output = [] + allspam_prefix = "(Batch report by {})".format(msg.owner) for current_url in user_post_urls[:-1]: # Report that post - current_output = report_post(current_url, msg.owner, msg.room.name, + current_output = report_post(current_url, msg.owner, msg.room.name, allspam_prefix, message_url, True, # No blacklist user operation, custom_reason) if current_output: output.append(current_output) # Blacklist the user at the last reported post - current_output = report_post(user_post_urls[-1], msg.owner, msg.room.name, + current_output = report_post(user_post_urls[-1], msg.owner, msg.room.name, allspam_prefix, message_url, False, # Blacklist user operation, custom_reason) if current_output: @@ -1762,7 +1763,7 @@ def allspam(msg, url, alias_used="allspam"): return "\n".join(output) -def report_post(url, reported_by, reported_in=None, +def report_post(url, reported_by, reported_in=None, prefix="", blacklist_by=None, no_blacklist_user=False, operation="report", custom_reason=None): """ Scan or report a single post """ @@ -1870,7 +1871,7 @@ def report_post(url, reported_by, reported_in=None, else: processed_why = report_info + "This post would not have been caught otherwise." handle_spam(post=post, # Reported posts are always treated as spam. - reasons=["Manually reported " + post_data.post_type], + reasons=["Manually reported " + post_data.post_type + prefix], why=processed_why) if custom_reason: Tasks.later(Metasmoke.post_auto_comment, custom_reason, reported_by, url=url, after=15) From 6fd56f8e6e552996be70471f13896bd538786afd Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Thu, 2 Jul 2020 11:07:03 -0400 Subject: [PATCH 10/11] Use .owner.name instead of .owner --- chatcommands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chatcommands.py b/chatcommands.py index 7bc0582056..dcf8552c6a 100644 --- a/chatcommands.py +++ b/chatcommands.py @@ -1741,7 +1741,7 @@ def allspam(msg, url, alias_used="allspam"): "for moderator attention, otherwise use !!/report on the posts individually.") output = [] - allspam_prefix = "(Batch report by {})".format(msg.owner) + allspam_prefix = " (Batch report by {})".format(msg.owner.name) for current_url in user_post_urls[:-1]: # Report that post current_output = report_post(current_url, msg.owner, msg.room.name, allspam_prefix, From d02c9ecc0f9a65ed1c6bc0a957878c1a186a2150 Mon Sep 17 00:00:00 2001 From: user12986714 <65436504+user12986714@users.noreply.github.com> Date: Thu, 2 Jul 2020 11:09:22 -0400 Subject: [PATCH 11/11] Adjust test cases accordingly --- test/test_chatcommands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_chatcommands.py b/test/test_chatcommands.py index f7a1f041f2..d64cdcdb28 100644 --- a/test/test_chatcommands.py +++ b/test/test_chatcommands.py @@ -411,7 +411,7 @@ def test_allspam(handle_spam): assert handle_spam.call_count == 1 _, call = handle_spam.call_args_list[0] assert isinstance(call["post"], Post) - assert call["reasons"] == ["Manually reported answer"] + assert call["reasons"] == ["Manually reported answer (Batch report by ArtOfCode)"] assert call["why"].startswith( "Post manually reported by user *ArtOfCode* in room *Charcoal HQ*." "\n\nThis post would not have been caught otherwise."