Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 59 additions & 7 deletions scrapers/Arx/Arx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,37 @@ sceneByURL:
- transmidnight.com
- transroommates.com
scraper: htmlScraper
galleryByURL:
- action: scrapeXPath
url: *urls
scraper: htmlScraper
performerByURL:
- action: scrapeXPath
url: *urls
scraper: htmlScraper
xPathScrapers:
htmlScraper:
scene:
Title: //meta[@property="og:title"]/@content
Date:
Title: &titleSel //meta[@property="og:title"]/@content
Date: &date
selector: //h1/following-sibling::div[contains(@class, "items-center")]//span[@class="block"]/text()
postProcess:
- parseDate: Jan 2, 2006
Details: //span[text()="Description:"]/following-sibling::span
Details: &detailsSel //span[text()="Description:"]/following-sibling::span
Performers:
Name: //span[text()="Models:"]/following-sibling::div//a//span/text()
Name: &performersSel //span[text()="Models:"]/following-sibling::div//a//span/text()
Tags:
Name: &tagsSel //a[contains(@href, "/categories/")]//text()
URL: &urlSel //meta[@property="og:url"]/@content
Image: &imageSel //meta[@property="og:image"]/@content
Code:
Code: &code
selector: *urlSel
postProcess:
- replace:
- regex: &domainAndCodeRegex 'https?://([^/]+)/scenes/(\d+)/.*'
with: $2
Studio:
Name:
Name: &studioName
selector: *imageSel
postProcess:
- replace:
Expand All @@ -51,7 +55,10 @@ xPathScrapers:
- map:
hhdp: Anal Vault
chi: Cuck Hunter
hhsf: Honey Trans
hhsm: Honey Trans
hhsss: Honey Trans
jlo: Japan Lust
jly: Japan Lust
joi: JOI Babes
hhles: Les Worship
Expand All @@ -66,6 +73,51 @@ xPathScrapers:
- replace:
- regex: *domainAndCodeRegex
with: https://www.$1
gallery:
Title:
selector: *titleSel
postProcess:
- replace:
- regex: "Photos for Scene \\| "
with: ""
Code: *code
URL: *urlSel
Date:
selector: *urlSel
postProcess:
- replace: &removePhotos
- regex: /photos$
with: ""
- subScraper: *date
Studio:
Name:
selector: *urlSel
postProcess:
- replace: *removePhotos
- subScraper: *studioName
Performers:
Name:
selector: *urlSel
postProcess:
- replace: *removePhotos
- subScraper:
selector: *performersSel
concat: ","
split: ","
Tags:
Name:
selector: *urlSel
postProcess:
- replace: *removePhotos
- subScraper:
selector: *tagsSel
concat: ","
split: ","
Details:
selector: *urlSel
postProcess:
- replace: *removePhotos
- subScraper: *detailsSel
performer:
Name: //meta[@property="og:image:alt"]/@content
Image:
Expand All @@ -76,4 +128,4 @@ xPathScrapers:
with: "${1}0x0${2}"
Tags:
Name: *tagsSel
# Last Updated July 28, 2025
# Last Updated November 7, 2025