Skip to content

Commit 70556f5

Browse files
authored
Merge pull request #2302 from MortonBridges/Add-Hamezo-to-Baberotica
Add Hamezo to Baberotica.yml
2 parents 8157c78 + 12cf4a0 commit 70556f5

File tree

1 file changed

+40
-10
lines changed

1 file changed

+40
-10
lines changed

scrapers/Baberotica.yml

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,18 @@ sceneByURL:
1010
- teenthais.com
1111
- vrpornpass.com
1212
scraper: sceneScraper
13+
- action: scrapeXPath
14+
url:
15+
- hamezo.com
16+
scraper: altSceneScraper
1317
performerByURL:
1418
- action: scrapeXPath
1519
url: *urls
1620
scraper: performerScraper
21+
- action: scrapeXPath
22+
url:
23+
- hamezo.com
24+
scraper: performerScraper
1725
xPathScrapers:
1826
sceneScraper:
1927
scene:
@@ -22,7 +30,7 @@ xPathScrapers:
2230
Performers:
2331
Name: //p/span[@itemprop="actor"]/a/span | //div[@class="cat"][1]/a/text()
2432
URL: //div[@class="cat"][1]/a/@href | //span[@itemprop="actor"]/a/@href
25-
Date:
33+
Date: &Date
2634
# vrpornpass.com does not currently provide the scene published date on the non-paid site
2735
selector: //div[contains(@class, "pure-u-1")]//meta[@itemprop="datePublished"]/@content | //div[@class="video-date"]
2836
postProcess:
@@ -38,36 +46,52 @@ xPathScrapers:
3846
- parseDate: 2006-01-02T15:04:05-07:00
3947
Tags:
4048
Name: //a[@itemprop="genre"] | //div[@class="cat"][2]/a
41-
Details:
49+
Details: &Details
4250
selector: //div[@itemprop="description"] | //p[span[@class="readmore"]]
4351
postProcess:
4452
- replace:
4553
- regex: ... Read More
4654
with:
47-
Image:
55+
Image: &Image
4856
selector: //div[contains(@class,"pure-u-1")]/meta[@itemprop="thumbnailUrl"]/@content | //div[@id="videohtml5tour"]//img[@class="pure-img"]/@src | //div[@id="videohtml5tour"]/video/@poster | //div[@class="rel"]//img/@src
4957
postProcess:
5058
- replace:
5159
- regex: ^//
5260
with: https://
53-
URL:
61+
URL: &URL
5462
selector: //link[@rel="canonical"][1]/@href | //link[@rel="alternate"][1]/@href | //div[@class="title"]/h5/a/@href
5563
postProcess:
5664
- replace:
5765
- regex: \s*(.*)\s*feed/$
5866
with: $1
59-
Studio:
67+
Studio: &Studio
6068
Name:
6169
selector: //meta[@itemprop="url"]/@content
6270
postProcess:
6371
- map:
6472
https://avidolz.com/: AvIdolz
6573
https://baberotica.com/: Baberotica
6674
https://baberoticavr.com/: BaberoticaVR
75+
https://hamezo.com/: Hamezo
6776
https://nucosplay.com/: Nu Cosplay
6877
https://suckmevr.com/: SuckMeVR
6978
https://teenthais.com/: TeenThais
7079
https://vrpornpass.com/: VR PornPass
80+
altSceneScraper:
81+
scene:
82+
Title:
83+
selector: //h1
84+
Performers:
85+
Name: //div[@class="model-thumb"]//h5/a/text()
86+
URL: //div[@class="model-thumb"]//h5/a/@href
87+
Tags:
88+
Name: //div[@class="cat"]/a
89+
Studio:
90+
Name: //symbol[@id="logosvg"]/title
91+
Details: *Details
92+
Date: *Date
93+
Image: *Image
94+
URL: *URL
7195
performerScraper:
7296
common:
7397
$profileBE: //div[@class="model-profile"]
@@ -83,11 +107,11 @@ xPathScrapers:
83107
- regex: None
84108
with:
85109
Aliases:
86-
selector: $profileBE[contains(strong, "Alias name:")]//text()
110+
selector: $profileBE[contains(strong, "Alias name:")]//text() | $profileBE[contains(strong, "Japanese name:")]//text()
87111
postProcess:
88112
- replace:
89-
- regex: .*Alias name:\s*(.*)\s*$
90-
with: $1
113+
- regex: .*(Alias|Japanese) name:\s*(.*)\s*$
114+
with: $2
91115
- regex: None
92116
with:
93117
Gender:
@@ -181,6 +205,12 @@ xPathScrapers:
181205
with:
182206
Instagram:
183207
selector: $profileBE[contains(strong, "Instagram:")]/a/@href
208+
Measurements:
209+
selector: $profileBE[contains(strong, "Body:")]//text()
210+
postProcess:
211+
- replace:
212+
- regex: .*Body:\s*(.*)\s*$
213+
with: $1
184214
Twitter:
185215
selector: $profileBE[contains(strong, "Twitter:")]/a/@href
186216
Details:
@@ -196,7 +226,7 @@ xPathScrapers:
196226
- regex: \s*(.*)\s*feed/$
197227
with: $1
198228
Image:
199-
selector: //div[@class="model-photo"]/img[@class="rounded"]/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1-2"]//img/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1"]//img/@src
229+
selector: //div[@class="model-photo"]/img[@class="rounded"]/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1-2"]//img/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1"]//img/@src | //img[@class="smallroundedthumbs"]/@src
200230
postProcess:
201231
- replace:
202232
- regex: ^//
@@ -205,4 +235,4 @@ xPathScrapers:
205235
with: 690x960
206236
- regex: 270x480
207237
with: 405x720
208-
# Last Updated October 10, 2023
238+
# Last Updated April 27, 2025

0 commit comments

Comments
 (0)