Skip to content

Commit

Permalink
Fix RealJamVR (#2158)
Browse files Browse the repository at this point in the history
* fix and improve scene scraper

* add performerByURL

* clarify duplicate parseDate

* make Code regex work for old video URLs; simplify date parsing in scene scraper
  • Loading branch information
nrg101 authored Jan 10, 2025
1 parent 68033e8 commit ce8fc0c
Showing 1 changed file with 62 additions and 7 deletions.
69 changes: 62 additions & 7 deletions scrapers/RealJamVR.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# yaml-language-server: $schema=../validator/scraper.schema.json
name: RealJamVR
sceneByURL: &byURL
- action: scrapeXPath
Expand All @@ -8,6 +9,13 @@ sceneByURL: &byURL

galleryByURL: *byURL

performerByURL:
- action: scrapeXPath
url:
- porncornvr.com/actor/
- realjamvr.com/actor/
scraper: performerScraper

xPathScrapers:
sceneScraper:
scene:
Expand All @@ -20,16 +28,21 @@ xPathScrapers:
Date: &date
selector: //div[@class="specs-icon"]/following-sibling::strong
postProcess:
- replace:
- regex: ^([a-zA-Z]{3})\D*(\d{1,2},\s*\d+)$
with: $1. $2
# both date formats are used interchangeably
- parseDate: Jan. 2, 2006
- parseDate: January 2, 2006
Performers: &performers
Name: //div[contains(@class,"scene-view")]/a[contains(@href,"/actor/")]
Tags: &tags
Name: //a[starts-with(@href, "/scenes") and @class="tag"]/text() | //div[not(@class)]/div[@class="specs-icon" and not(i)]
Name:
selector: //a[starts-with(@href, "/scenes") and @class="tag"]/text() | //div[not(@class)]/div[@class="specs-icon"]
postProcess:
- replace:
# use the duration "specs-icon" as a fixed value replacement "hack"
- regex: \d+:\d+:\d+
with: Virtual Reality
Details: &details
selector: //div[@class="opacity-75 my-2"]
selector: //div[contains(@class, "collapse-content-wrapper")]/div[contains(@class, "collapse-content")]
Image:
selector: //*[@id="video-player"]//@poster
Studio: &studio
Expand All @@ -39,12 +52,54 @@ xPathScrapers:
- replace:
- regex: '(.*)\| ([^\|]+VR)$'
with: $2
Code:
selector: //dl8-video/source[1]/@src
postProcess:
- replace:
- regex: .*/scenes/(\d+)/.*
with: $1
- regex: .*/videos_app/\w+/(\d+)_.*
with: $1
gallery:
Title: *title
Date: *date
Performers: *performers
Tags: *tags
Details: *details
Studio: *studio

# Last Updated October 22, 2023
performerScraper:
performer:
Name: //h1
Gender: //div[span[text()="Gender:"]]/text()
Country:
selector: //div[span[text()="Birth Place:"]]/text()
postProcess:
- replace:
- regex: .*,
with: ""
Birthdate:
selector: //div[span[text()="Date of Birth:"]]/text()
postProcess:
# both date formats are used interchangeably
- parseDate: Jan. 2, 2006
- parseDate: January 2, 2006
Height:
selector: //div[span[text()="Height:"]]/text()
postProcess:
- replace:
- regex: .*\ (\d+)\ cm.*
with: $1
Weight:
selector: //div[span[text()="Weight:"]]/text()
postProcess:
- replace:
- regex: .*\ (\d+)\ kg.*
with: $1
HairColor: //div[span[text()="Hair color:"]]/text()
EyeColor: //div[span[text()="Eyes color:"]]/text()
Tags:
Name: //div[span[text()="Tags:"]]/a/text()
Image: //div[contains(@class, "actor-view")]//img/@src
Piercings: //div[span[text()="Piercing:"]]/text()
Tattoos: //div[span[text()="Tattoo:"]]/text()
# Last Updated January 8, 2025

0 comments on commit ce8fc0c

Please sign in to comment.