Skip to content

Commit 512530a

Browse files
committed
ReflectiveDesire: fix scene scraper, add gallery scraper
Signed-off-by: Roman Ondráček <[email protected]>
1 parent d114ed4 commit 512530a

File tree

1 file changed

+52
-12
lines changed

1 file changed

+52
-12
lines changed

scrapers/ReflectiveDesire.yml

+52-12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
name: ReflectiveDesire
2+
galleryByURL:
3+
- action: scrapeXPath
4+
url:
5+
- reflectivedesire.com/photos/
6+
scraper: galleryScraper
27
sceneByURL:
38
- action: scrapeXPath
49
url:
@@ -10,6 +15,40 @@ performerByURL:
1015
- reflectivedesire.com/performers/
1116
scraper: performerScraper
1217
xPathScrapers:
18+
galleryScraper:
19+
common:
20+
$album: //section[@class="album"]
21+
$performers: //section[@class="album"]/div[@class="item-text"]/p[@class="text-attributes"]/span/a[contains(@href, "/performers")]
22+
gallery:
23+
Title: $album/div[contains(@class, "title-container")]/h1/text()
24+
Code:
25+
selector: //link[@rel="canonical"]/@href
26+
postProcess:
27+
- replace:
28+
- regex: ^https://reflectivedesire.com/photos/([^/]*)/$
29+
with: $1
30+
Details: //meta[@name='description']/@content
31+
Date:
32+
selector: //meta[@name='description']/@content
33+
postProcess:
34+
- replace:
35+
- regex: ^.*Posted (.* \d{4})\.$
36+
with: $1
37+
- parseDate: "January 2006"
38+
Performers:
39+
Name: $performers
40+
URL:
41+
selector: $performers/@href
42+
postProcess:
43+
- replace:
44+
- regex: ^
45+
with: https://reflectivedesire.com
46+
URL: //link[@rel="canonical"]/@href
47+
Studio:
48+
Name:
49+
fixed: Reflective Desire
50+
Tags:
51+
Name: $album/div[@class="item-text"]/p[@class="text-attributes"]/span/a[contains(@href, "/categories")]
1352
performerScraper:
1453
common:
1554
$socialLinks: //div[@class="title-container"]/div/span[@class="tag-links"]
@@ -20,16 +59,18 @@ xPathScrapers:
2059
Instagram: $socialLinks/a[contains(@href,"https://instagram.com/")]/@href
2160
URL: //link[@rel="canonical"]/@href
2261
sceneScraper:
62+
common:
63+
$performers: //div[@class="item-text"]/p[@class="text-attributes"]/span/a[contains(@href, "/performers")]
64+
$tags: //div[@class="item-text"]/p[@class="text-attributes"]/span/a[contains(@href, "/categories")]
2365
scene:
2466
Title: //div[contains(@class, "title-container")]/h1
2567
Code: //section[@class="single-video"]/article[contains(@class, "video")]/@data-video-id
26-
Details:
27-
selector: //meta[@name='description']/@content
68+
Details: //meta[@name='description']/@content
2869
Date:
29-
selector: //p[contains(@class, "video-text-length")]
70+
selector: //p[contains(@class, "text-length")]
3071
postProcess:
3172
- replace:
32-
- regex: ^.* Published (.* \d{4})\.$
73+
- regex: ^.* Published (.* \d{4})\.?$
3374
with: $1
3475
- parseDate: "January 2006"
3576
Studio:
@@ -38,21 +79,20 @@ xPathScrapers:
3879
Image: //link[@rel="image_src"]/@href
3980
URL: //link[@rel="canonical"]/@href
4081
Performers:
41-
Name:
42-
selector: //span[contains(text(),'Performers')]/a/text()
43-
concat: ","
82+
Name: $performers
83+
URL:
84+
selector: $performers/@href
4485
postProcess:
4586
- replace:
46-
- regex: ',\s+'
47-
with: ","
48-
split: ","
87+
- regex: ^
88+
with: https://reflectivedesire.com
4989
Tags:
5090
Name:
51-
selector: //span[contains(text(),'Categories')]/a/text() | //span[@class='hidden-attributes']/text()
91+
selector: $tags/text() | $tags/parent::span/following-sibling::span[@class="hidden-attributes-toggle"]/following-sibling::span[@class="hidden-attributes"]/text()
5292
concat: ","
5393
postProcess:
5494
- replace:
5595
- regex: ',\s+'
5696
with: ","
5797
split: ","
58-
# Last Updated April 14, 2024
98+
# Last Updated January 11, 2024

0 commit comments

Comments
 (0)