1
+ # yaml-language-server: $schema=../validator/scraper.schema.json
1
2
name : RealJamVR
2
3
sceneByURL : &byURL
3
4
- action : scrapeXPath
@@ -8,6 +9,13 @@ sceneByURL: &byURL
8
9
9
10
galleryByURL : *byURL
10
11
12
+ performerByURL :
13
+ - action : scrapeXPath
14
+ url :
15
+ - porncornvr.com/actor/
16
+ - realjamvr.com/actor/
17
+ scraper : performerScraper
18
+
11
19
xPathScrapers :
12
20
sceneScraper :
13
21
scene :
@@ -20,16 +28,21 @@ xPathScrapers:
20
28
Date : &date
21
29
selector : //div[@class="specs-icon"]/following-sibling::strong
22
30
postProcess :
23
- - replace :
24
- - regex : ^([a-zA-Z]{3})\D*(\d{1,2},\s*\d+)$
25
- with : $1. $2
31
+ # both date formats are used interchangeably
26
32
- parseDate : Jan. 2, 2006
33
+ - parseDate : January 2, 2006
27
34
Performers : &performers
28
35
Name : //div[contains(@class,"scene-view")]/a[contains(@href,"/actor/")]
29
36
Tags : &tags
30
- Name : //a[starts-with(@href, "/scenes") and @class="tag"]/text() | //div[not(@class)]/div[@class="specs-icon" and not(i)]
37
+ Name :
38
+ selector : //a[starts-with(@href, "/scenes") and @class="tag"]/text() | //div[not(@class)]/div[@class="specs-icon"]
39
+ postProcess :
40
+ - replace :
41
+ # use the duration "specs-icon" as a fixed value replacement "hack"
42
+ - regex : \d+:\d+:\d+
43
+ with : Virtual Reality
31
44
Details : &details
32
- selector : //div[@class="opacity-75 my-2" ]
45
+ selector : //div[contains( @class, "collapse-content-wrapper")]/div[contains(@class, "collapse-content") ]
33
46
Image :
34
47
selector : //*[@id="video-player"]//@poster
35
48
Studio : &studio
@@ -39,12 +52,54 @@ xPathScrapers:
39
52
- replace :
40
53
- regex : ' (.*)\| ([^\|]+VR)$'
41
54
with : $2
55
+ Code :
56
+ selector : //dl8-video/source[1]/@src
57
+ postProcess :
58
+ - replace :
59
+ - regex : .*/scenes/(\d+)/.*
60
+ with : $1
61
+ - regex : .*/videos_app/\w+/(\d+)_.*
62
+ with : $1
42
63
gallery :
43
64
Title : *title
44
65
Date : *date
45
66
Performers : *performers
46
67
Tags : *tags
47
68
Details : *details
48
69
Studio : *studio
49
-
50
- # Last Updated October 22, 2023
70
+ performerScraper :
71
+ performer :
72
+ Name : //h1
73
+ Gender : //div[span[text()="Gender:"]]/text()
74
+ Country :
75
+ selector : //div[span[text()="Birth Place:"]]/text()
76
+ postProcess :
77
+ - replace :
78
+ - regex : .*,
79
+ with : " "
80
+ Birthdate :
81
+ selector : //div[span[text()="Date of Birth:"]]/text()
82
+ postProcess :
83
+ # both date formats are used interchangeably
84
+ - parseDate : Jan. 2, 2006
85
+ - parseDate : January 2, 2006
86
+ Height :
87
+ selector : //div[span[text()="Height:"]]/text()
88
+ postProcess :
89
+ - replace :
90
+ - regex : .*\ (\d+)\ cm.*
91
+ with : $1
92
+ Weight :
93
+ selector : //div[span[text()="Weight:"]]/text()
94
+ postProcess :
95
+ - replace :
96
+ - regex : .*\ (\d+)\ kg.*
97
+ with : $1
98
+ HairColor : //div[span[text()="Hair color:"]]/text()
99
+ EyeColor : //div[span[text()="Eyes color:"]]/text()
100
+ Tags :
101
+ Name : //div[span[text()="Tags:"]]/a/text()
102
+ Image : //div[contains(@class, "actor-view")]//img/@src
103
+ Piercings : //div[span[text()="Piercing:"]]/text()
104
+ Tattoos : //div[span[text()="Tattoo:"]]/text()
105
+ # Last Updated January 8, 2025
0 commit comments