Skip to content

Commit 04c1841

Browse files
authored
Merge pull request #2561 from nrg101/add-gallery-by-url-to-arx
add galleryByURL to Arx
2 parents f117ad3 + 55e8dab commit 04c1841

File tree

1 file changed

+59
-7
lines changed

1 file changed

+59
-7
lines changed

scrapers/Arx/Arx.yml

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,33 +16,37 @@ sceneByURL:
1616
- transmidnight.com
1717
- transroommates.com
1818
scraper: htmlScraper
19+
galleryByURL:
20+
- action: scrapeXPath
21+
url: *urls
22+
scraper: htmlScraper
1923
performerByURL:
2024
- action: scrapeXPath
2125
url: *urls
2226
scraper: htmlScraper
2327
xPathScrapers:
2428
htmlScraper:
2529
scene:
26-
Title: //meta[@property="og:title"]/@content
27-
Date:
30+
Title: &titleSel //meta[@property="og:title"]/@content
31+
Date: &date
2832
selector: //h1/following-sibling::div[contains(@class, "items-center")]//span[@class="block"]/text()
2933
postProcess:
3034
- parseDate: Jan 2, 2006
31-
Details: //span[text()="Description:"]/following-sibling::span
35+
Details: &detailsSel //span[text()="Description:"]/following-sibling::span
3236
Performers:
33-
Name: //span[text()="Models:"]/following-sibling::div//a//span/text()
37+
Name: &performersSel //span[text()="Models:"]/following-sibling::div//a//span/text()
3438
Tags:
3539
Name: &tagsSel //a[contains(@href, "/categories/")]//text()
3640
URL: &urlSel //meta[@property="og:url"]/@content
3741
Image: &imageSel //meta[@property="og:image"]/@content
38-
Code:
42+
Code: &code
3943
selector: *urlSel
4044
postProcess:
4145
- replace:
4246
- regex: &domainAndCodeRegex 'https?://([^/]+)/scenes/(\d+)/.*'
4347
with: $2
4448
Studio:
45-
Name:
49+
Name: &studioName
4650
selector: *imageSel
4751
postProcess:
4852
- replace:
@@ -51,7 +55,10 @@ xPathScrapers:
5155
- map:
5256
hhdp: Anal Vault
5357
chi: Cuck Hunter
58+
hhsf: Honey Trans
5459
hhsm: Honey Trans
60+
hhsss: Honey Trans
61+
jlo: Japan Lust
5562
jly: Japan Lust
5663
joi: JOI Babes
5764
hhles: Les Worship
@@ -66,6 +73,51 @@ xPathScrapers:
6673
- replace:
6774
- regex: *domainAndCodeRegex
6875
with: https://www.$1
76+
gallery:
77+
Title:
78+
selector: *titleSel
79+
postProcess:
80+
- replace:
81+
- regex: "Photos for Scene \\| "
82+
with: ""
83+
Code: *code
84+
URL: *urlSel
85+
Date:
86+
selector: *urlSel
87+
postProcess:
88+
- replace: &removePhotos
89+
- regex: /photos$
90+
with: ""
91+
- subScraper: *date
92+
Studio:
93+
Name:
94+
selector: *urlSel
95+
postProcess:
96+
- replace: *removePhotos
97+
- subScraper: *studioName
98+
Performers:
99+
Name:
100+
selector: *urlSel
101+
postProcess:
102+
- replace: *removePhotos
103+
- subScraper:
104+
selector: *performersSel
105+
concat: ","
106+
split: ","
107+
Tags:
108+
Name:
109+
selector: *urlSel
110+
postProcess:
111+
- replace: *removePhotos
112+
- subScraper:
113+
selector: *tagsSel
114+
concat: ","
115+
split: ","
116+
Details:
117+
selector: *urlSel
118+
postProcess:
119+
- replace: *removePhotos
120+
- subScraper: *detailsSel
69121
performer:
70122
Name: //meta[@property="og:image:alt"]/@content
71123
Image:
@@ -76,4 +128,4 @@ xPathScrapers:
76128
with: "${1}0x0${2}"
77129
Tags:
78130
Name: *tagsSel
79-
# Last Updated July 28, 2025
131+
# Last Updated November 7, 2025

0 commit comments

Comments
 (0)