Skip to content

Commit a2d243e

Browse files
committed
Fix RealJamVR (stashapp#2158)
* fix and improve scene scraper * add performerByURL * clarify duplicate parseDate * make Code regex work for old video URLs; simplify date parsing in scene scraper
1 parent de03d6c commit a2d243e

File tree

1 file changed

+62
-7
lines changed

1 file changed

+62
-7
lines changed

scrapers/RealJamVR.yml

+62-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# yaml-language-server: $schema=../validator/scraper.schema.json
12
name: RealJamVR
23
sceneByURL: &byURL
34
- action: scrapeXPath
@@ -8,6 +9,13 @@ sceneByURL: &byURL
89

910
galleryByURL: *byURL
1011

12+
performerByURL:
13+
- action: scrapeXPath
14+
url:
15+
- porncornvr.com/actor/
16+
- realjamvr.com/actor/
17+
scraper: performerScraper
18+
1119
xPathScrapers:
1220
sceneScraper:
1321
scene:
@@ -20,16 +28,21 @@ xPathScrapers:
2028
Date: &date
2129
selector: //div[@class="specs-icon"]/following-sibling::strong
2230
postProcess:
23-
- replace:
24-
- regex: ^([a-zA-Z]{3})\D*(\d{1,2},\s*\d+)$
25-
with: $1. $2
31+
# both date formats are used interchangeably
2632
- parseDate: Jan. 2, 2006
33+
- parseDate: January 2, 2006
2734
Performers: &performers
2835
Name: //div[contains(@class,"scene-view")]/a[contains(@href,"/actor/")]
2936
Tags: &tags
30-
Name: //a[starts-with(@href, "/scenes") and @class="tag"]/text() | //div[not(@class)]/div[@class="specs-icon" and not(i)]
37+
Name:
38+
selector: //a[starts-with(@href, "/scenes") and @class="tag"]/text() | //div[not(@class)]/div[@class="specs-icon"]
39+
postProcess:
40+
- replace:
41+
# use the duration "specs-icon" as a fixed value replacement "hack"
42+
- regex: \d+:\d+:\d+
43+
with: Virtual Reality
3144
Details: &details
32-
selector: //div[@class="opacity-75 my-2"]
45+
selector: //div[contains(@class, "collapse-content-wrapper")]/div[contains(@class, "collapse-content")]
3346
Image:
3447
selector: //*[@id="video-player"]//@poster
3548
Studio: &studio
@@ -39,12 +52,54 @@ xPathScrapers:
3952
- replace:
4053
- regex: '(.*)\| ([^\|]+VR)$'
4154
with: $2
55+
Code:
56+
selector: //dl8-video/source[1]/@src
57+
postProcess:
58+
- replace:
59+
- regex: .*/scenes/(\d+)/.*
60+
with: $1
61+
- regex: .*/videos_app/\w+/(\d+)_.*
62+
with: $1
4263
gallery:
4364
Title: *title
4465
Date: *date
4566
Performers: *performers
4667
Tags: *tags
4768
Details: *details
4869
Studio: *studio
49-
50-
# Last Updated October 22, 2023
70+
performerScraper:
71+
performer:
72+
Name: //h1
73+
Gender: //div[span[text()="Gender:"]]/text()
74+
Country:
75+
selector: //div[span[text()="Birth Place:"]]/text()
76+
postProcess:
77+
- replace:
78+
- regex: .*,
79+
with: ""
80+
Birthdate:
81+
selector: //div[span[text()="Date of Birth:"]]/text()
82+
postProcess:
83+
# both date formats are used interchangeably
84+
- parseDate: Jan. 2, 2006
85+
- parseDate: January 2, 2006
86+
Height:
87+
selector: //div[span[text()="Height:"]]/text()
88+
postProcess:
89+
- replace:
90+
- regex: .*\ (\d+)\ cm.*
91+
with: $1
92+
Weight:
93+
selector: //div[span[text()="Weight:"]]/text()
94+
postProcess:
95+
- replace:
96+
- regex: .*\ (\d+)\ kg.*
97+
with: $1
98+
HairColor: //div[span[text()="Hair color:"]]/text()
99+
EyeColor: //div[span[text()="Eyes color:"]]/text()
100+
Tags:
101+
Name: //div[span[text()="Tags:"]]/a/text()
102+
Image: //div[contains(@class, "actor-view")]//img/@src
103+
Piercings: //div[span[text()="Piercing:"]]/text()
104+
Tattoos: //div[span[text()="Tattoo:"]]/text()
105+
# Last Updated January 8, 2025

0 commit comments

Comments
 (0)