Skip to content

Commit 5296d53

Browse files
committed
Fix Redgif finding identifier in URLs and filename
1 parent 551203b commit 5296d53

File tree

1 file changed

+40
-24
lines changed

1 file changed

+40
-24
lines changed

scrapers/Redgifs/Redgifs.py

+40-24
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def get_token():
2828
session.headers.update({"Authorization": "Bearer " + get_token()})
2929

3030

31-
def scrape_id(gif_id: str):
31+
def scene_by_id(gif_id: str) -> ScrapedScene | None:
3232
api_url = f"https://api.redgifs.com/v2/gifs/{gif_id}?users=yes"
3333

3434
req = session.get(api_url)
@@ -37,16 +37,19 @@ def scrape_id(gif_id: str):
3737
return
3838

3939
data = req.json()
40+
log.debug(f"Data: {json.dumps(data, indent=2)}")
4041
gif = data["gif"]
4142
user = data["user"]
4243

43-
scene = {
44-
"title": gif.get("description"),
44+
scene: ScrapedScene = {
4545
"tags": [{"name": t} for t in gif.get("tags")],
46-
"date": datetime.fromtimestamp(gif["createDate"]).date().strftime("%Y-%m-%d"),
46+
"date": datetime.fromtimestamp(gif["createDate"]).date().isoformat(),
47+
"url": f"https://www.redgifs.com/watch/{gif_id}",
4748
"performers": [],
4849
}
4950

51+
if title := gif.get("title"):
52+
scene["title"] = title
5053
# We cannot return the image URL because you need the token to access it
5154
# and Stash does not have our token: base64 encoding the image instead
5255
if img := dig(gif, "urls", ("poster", "hd", "sd")):
@@ -56,7 +59,10 @@ def scrape_id(gif_id: str):
5659

5760
if name := user.get("name"):
5861
scene["studio"] = {"name": name, "url": user["url"]}
59-
scene["performers"] = [{"name": name}]
62+
urls = [
63+
url for url in [dig(user, f"socialUrl{i}") for i in range(1, 16)] if url
64+
]
65+
scene["performers"] = [{"name": name, "urls": urls}]
6066

6167
if (username := user.get("username")) and username != name:
6268
scene["performers"].append({"name": username})
@@ -65,40 +71,50 @@ def scrape_id(gif_id: str):
6571

6672

6773
def extract_id(string: str):
68-
# Redgifs URLs are in the format https://www.redgifs.com/watch/unique-name
69-
if "redgifs.com/watch" in string:
70-
return string.split("/")[-1].split("#")[0].split("?")[0]
74+
# Redgifs URLs are in the format https://www.redgifs.com/watch/identifier
75+
if match := re.search(r"redgifs.com/watch/(\w+)", string):
76+
return match.group(1)
7177

7278
# Filenames are either 'Redgifs_identifier' or 'Title of Clip [identifier]'
7379
filename = Path(string).stem
74-
if filename.startswith("Redgifs_"):
75-
return filename.split("_")[-1]
76-
elif match := re.match(r"\[(\w+)\]", filename):
80+
if match := re.search(r"Redgifs_(\w+)", filename):
81+
return match.group(1)
82+
elif match := re.search(r"\[(\w+)\]", filename):
7783
return match.group(1)
7884

7985
return None
8086

8187

88+
def scene_by_url(url: str) -> ScrapedScene | None:
89+
if identifier := extract_id(url):
90+
return scene_by_id(identifier)
91+
92+
log.error(f"Could not extract ID from URL: {url}")
93+
94+
95+
def scene_by_fragment(fragment: dict) -> ScrapedScene | None:
96+
if (url := dig(fragment, "url")) and (identifier := extract_id(url)):
97+
return scene_by_id(identifier)
98+
elif (filename := dig(fragment, "files", 0, "path")) and (
99+
identifier := extract_id(filename)
100+
):
101+
return scene_by_id(identifier)
102+
log.error("Could not extract ID from fragment")
103+
log.error("Filename must match 'Redgifs_identifier' or 'whatever [identifier]'")
104+
105+
82106
if __name__ == "__main__":
83107
op, args = scraper_args()
84108
result = None
85109
match op, args:
86-
case "scene-by-url" | "scene-by-query-fragment", {"url": identifier}:
87-
gif_id = extract_id(identifier)
110+
case "scene-by-url" | "scene-by-query-fragment", {"url": url}:
111+
result = scene_by_url(url)
88112
case "scene-by-name", {"name": identifier}:
89-
gif_id = extract_id(identifier)
90-
case "scene-by-fragment", {"title": title, "url": url}:
91-
identifier = title or url
92-
gif_id = extract_id(identifier)
113+
result = [s for s in [scene_by_id(identifier.strip())] if s]
114+
case "scene-by-fragment", fragment:
115+
result = scene_by_fragment(fragment)
93116
case _:
94117
log.error(f"Operation: {op}, arguments: {json.dumps(args)}")
95118
sys.exit(1)
96119

97-
if gif_id:
98-
log.debug(f"Fetching scene with ID '{gif_id}'")
99-
result = scrape_id(gif_id)
100-
else:
101-
log.error(f"Unable to find valid GIF identifier in '{identifier}'")
102-
result = None
103-
104120
print(json.dumps(result))

0 commit comments

Comments
 (0)