Skip to content

Commit

Permalink
Scrape partial dates from scene titles on IAFD
Browse files Browse the repository at this point in the history
This means an entry for Ass Blasters (1977) will return an approximate date of 1977-01-01
if IAFD does not have a more specific release date for it
  • Loading branch information
Maista6969 committed Jan 2, 2025
1 parent 536c4c8 commit 6d92627
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions scrapers/IAFD/IAFD.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,14 +277,24 @@ def scene_details(tree):


def scene_date(tree):
# If there's no release date we will use the year from the title for an approximate date
title_pattern = re.compile(r".*\(([0-9]{4})\).*")
return maybe(
tree.xpath(SHARED_SELECTORS["date"]),
clean_date,
) or maybe(
tree.xpath(SHARED_SELECTORS["title"]),
lambda t: re.sub(title_pattern, r"\1-01-01", t).strip()
if re.match(title_pattern, t)
else None,
)


def scene_title(tree):
return maybe(tree.xpath(SHARED_SELECTORS["title"]), lambda t: re.sub(r"\s*\(\d{4}\)$", "", t.strip()))
return maybe(
tree.xpath(SHARED_SELECTORS["title"]),
lambda t: re.sub(r"\s*\(\d{4}\)$", "", t.strip()),
)


def movie_studio(tree):
Expand All @@ -302,7 +312,9 @@ def movie_date(tree):
lambda d: clean_date(d.strip()),
) or maybe(
tree.xpath(SHARED_SELECTORS["title"]),
lambda t: re.sub(title_pattern, r"\1-01-01", t).strip() if re.match(title_pattern, t) else None,
lambda t: re.sub(title_pattern, r"\1-01-01", t).strip()
if re.match(title_pattern, t)
else None,
)


Expand All @@ -328,7 +340,10 @@ def movie_director(tree):


def movie_title(tree):
return maybe(tree.xpath(SHARED_SELECTORS["title"]), lambda t: re.sub(r"\s*\(\d+\)$", "", t.strip()))
return maybe(
tree.xpath(SHARED_SELECTORS["title"]),
lambda t: re.sub(r"\s*\(\d+\)$", "", t.strip()),
)


def video_url(tree):
Expand Down

0 comments on commit 6d92627

Please sign in to comment.