From 9addeb7e3d14999ecf6b29ae63533f30ecbd6d9a Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 14 Jan 2025 23:57:07 +0100 Subject: [PATCH] Add Redistribution tag to Teamskeet and MYLF scenes The vast majority of 'Network X Creator' scenes are redistributions, the benefits outweigh the false positives in my opinion --- scrapers/Teamskeet/TeamskeetAPI.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index 086b9dd4e..13859c22c 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -6,6 +6,7 @@ from datetime import datetime import py_common.log as log +import cloudscraper ### SET MEMBER ACCESS TOKEN HERE ### CAN BE access_token OR refresh_token @@ -13,22 +14,11 @@ MYLF_ACCESS_TOKEN = "" #### +scraper = cloudscraper.create_scraper() -try: - import cloudscraper -except ModuleNotFoundError: - print("You need to install the cloudscraper module. (https://pypi.org/project/cloudscraper/)", file=sys.stderr) - print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install cloudscraper", file=sys.stderr) - sys.exit() -try: - import requests -except ModuleNotFoundError: - print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr) - print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr) - sys.exit() def try_url(url): - return requests.head(url).status_code == 200 + return scraper.head(url).status_code == 200 def try_img_replacement(imgurl): # members/full - 1600x900 @@ -196,18 +186,18 @@ def save_json(api_json, url): if IS_MEMBER: headers.update({"Cookie": f"access_token={MEMBER_ACCESS_TOKEN}"}) log.debug(f"Asking the API... {api_url}") - scraper = cloudscraper.create_scraper() + # Send to the API r = "" try: r = scraper.get(api_url, headers=headers, timeout=(3, 5)) - except: + except Exception as e: log.error("An error has occurred with the page request") - log.error(f"Request status: `{r.status_code}`") + log.error(e) log.error("Check your TeamskeetAPI.log for more details") with open("TeamskeetAPI.log", 'w', encoding='utf-8') as f: f.write(f"Scene ID: {scene_id}\n") - f.write(f"Request:\n{r.text}") + f.write(f"Request:\n{e}") sys.exit(1) try: scene_api_json_check = r.json().get('found') @@ -220,7 +210,7 @@ def save_json(api_json, url): log.error('Scene not found (Wrong ID?)') sys.exit(1) - except: + except Exception: log.debug(r.status_code) if (r.status_code == 401 and IS_MEMBER): log.error("It's likely that your member access token needs to be replaced") @@ -258,6 +248,8 @@ def save_json(api_json, url): studioApiName = scene_api_json['site'].get('siteName') if IS_MEMBER else scene_api_json['site'].get('name') log.debug("Studio API name is '" + studioApiName + "'") scrape['studio']['name'] = studioMap[studioApiName] if studioApiName in studioMap else studioApiName +if " x " in scrape['studio']['name'].lower(): + tags.append("Redistribution") scrape['tags'] = [{"name": x} for x in tags] scrape['code'] = scene_id if IS_MEMBER else scene_api_json.get('cId', '').split('/')[-1] for tag in studioDefaultTags.get(studioApiName, []):