1
1
name : ReflectiveDesire
2
+ galleryByURL :
3
+ - action : scrapeXPath
4
+ url :
5
+ - reflectivedesire.com/photos/
6
+ scraper : galleryScraper
2
7
sceneByURL :
3
8
- action : scrapeXPath
4
9
url :
@@ -10,6 +15,40 @@ performerByURL:
10
15
- reflectivedesire.com/performers/
11
16
scraper : performerScraper
12
17
xPathScrapers :
18
+ galleryScraper :
19
+ common :
20
+ $album : //section[@class="album"]
21
+ $performers : //section[@class="album"]/div[@class="item-text"]/p[@class="text-attributes"]/span/a[contains(@href, "/performers")]
22
+ gallery :
23
+ Title : $album/div[contains(@class, "title-container")]/h1/text()
24
+ Code :
25
+ selector : //link[@rel="canonical"]/@href
26
+ postProcess :
27
+ - replace :
28
+ - regex : ^https://reflectivedesire.com/photos/([^/]*)/$
29
+ with : $1
30
+ Details : //meta[@name='description']/@content
31
+ Date :
32
+ selector : //meta[@name='description']/@content
33
+ postProcess :
34
+ - replace :
35
+ - regex : ^.*Posted (.* \d{4})\.$
36
+ with : $1
37
+ - parseDate : " January 2006"
38
+ Performers :
39
+ Name : $performers
40
+ URL :
41
+ selector : $performers/@href
42
+ postProcess :
43
+ - replace :
44
+ - regex : ^
45
+ with : https://reflectivedesire.com
46
+ URL : //link[@rel="canonical"]/@href
47
+ Studio :
48
+ Name :
49
+ fixed : Reflective Desire
50
+ Tags :
51
+ Name : $album/div[@class="item-text"]/p[@class="text-attributes"]/span/a[contains(@href, "/categories")]
13
52
performerScraper :
14
53
common :
15
54
$socialLinks : //div[@class="title-container"]/div/span[@class="tag-links"]
@@ -20,16 +59,18 @@ xPathScrapers:
20
59
Instagram : $socialLinks/a[contains(@href,"https://instagram.com/")]/@href
21
60
URL : //link[@rel="canonical"]/@href
22
61
sceneScraper :
62
+ common :
63
+ $performers : //div[@class="item-text"]/p[@class="text-attributes"]/span/a[contains(@href, "/performers")]
64
+ $tags : //div[@class="item-text"]/p[@class="text-attributes"]/span/a[contains(@href, "/categories")]
23
65
scene :
24
66
Title : //div[contains(@class, "title-container")]/h1
25
67
Code : //section[@class="single-video"]/article[contains(@class, "video")]/@data-video-id
26
- Details :
27
- selector : //meta[@name='description']/@content
68
+ Details : //meta[@name='description']/@content
28
69
Date :
29
- selector : //p[contains(@class, "video- text-length")]
70
+ selector : //p[contains(@class, "text-length")]
30
71
postProcess :
31
72
- replace :
32
- - regex : ^.* Published (.* \d{4})\.$
73
+ - regex : ^.* Published (.* \d{4})\.? $
33
74
with : $1
34
75
- parseDate : " January 2006"
35
76
Studio :
@@ -38,21 +79,20 @@ xPathScrapers:
38
79
Image : //link[@rel="image_src"]/@href
39
80
URL : //link[@rel="canonical"]/@href
40
81
Performers :
41
- Name :
42
- selector : //span[contains(text(),'Performers')]/a/text()
43
- concat : " , "
82
+ Name : $performers
83
+ URL :
84
+ selector : $performers/@href
44
85
postProcess :
45
86
- replace :
46
- - regex : ' ,\s+'
47
- with : " ,"
48
- split : " ,"
87
+ - regex : ^
88
+ with : https://reflectivedesire.com
49
89
Tags :
50
90
Name :
51
- selector : //span[contains( text(),'Categories')]/a/text() | // span[@class=' hidden-attributes' ]/text()
91
+ selector : $tags/ text() | $tags/parent::span/following-sibling:: span[@class=" hidden-attributes-toggle"]/following-sibling::span[@class="hidden-attributes" ]/text()
52
92
concat : " ,"
53
93
postProcess :
54
94
- replace :
55
95
- regex : ' ,\s+'
56
96
with : " ,"
57
97
split : " ,"
58
- # Last Updated April 14 , 2024
98
+ # Last Updated January 11 , 2024
0 commit comments