@@ -10,10 +10,18 @@ sceneByURL:
1010 - teenthais.com
1111 - vrpornpass.com
1212 scraper : sceneScraper
13+ - action : scrapeXPath
14+ url :
15+ - hamezo.com
16+ scraper : altSceneScraper
1317performerByURL :
1418 - action : scrapeXPath
1519 url : *urls
1620 scraper : performerScraper
21+ - action : scrapeXPath
22+ url :
23+ - hamezo.com
24+ scraper : performerScraper
1725xPathScrapers :
1826 sceneScraper :
1927 scene :
@@ -22,7 +30,7 @@ xPathScrapers:
2230 Performers :
2331 Name : //p/span[@itemprop="actor"]/a/span | //div[@class="cat"][1]/a/text()
2432 URL : //div[@class="cat"][1]/a/@href | //span[@itemprop="actor"]/a/@href
25- Date :
33+ Date : &Date
2634 # vrpornpass.com does not currently provide the scene published date on the non-paid site
2735 selector : //div[contains(@class, "pure-u-1")]//meta[@itemprop="datePublished"]/@content | //div[@class="video-date"]
2836 postProcess :
@@ -38,36 +46,52 @@ xPathScrapers:
3846 - parseDate : 2006-01-02T15:04:05-07:00
3947 Tags :
4048 Name : //a[@itemprop="genre"] | //div[@class="cat"][2]/a
41- Details :
49+ Details : &Details
4250 selector : //div[@itemprop="description"] | //p[span[@class="readmore"]]
4351 postProcess :
4452 - replace :
4553 - regex : ... Read More
4654 with :
47- Image :
55+ Image : &Image
4856 selector : //div[contains(@class,"pure-u-1")]/meta[@itemprop="thumbnailUrl"]/@content | //div[@id="videohtml5tour"]//img[@class="pure-img"]/@src | //div[@id="videohtml5tour"]/video/@poster | //div[@class="rel"]//img/@src
4957 postProcess :
5058 - replace :
5159 - regex : ^//
5260 with : https://
53- URL :
61+ URL : &URL
5462 selector : //link[@rel="canonical"][1]/@href | //link[@rel="alternate"][1]/@href | //div[@class="title"]/h5/a/@href
5563 postProcess :
5664 - replace :
5765 - regex : \s*(.*)\s*feed/$
5866 with : $1
59- Studio :
67+ Studio : &Studio
6068 Name :
6169 selector : //meta[@itemprop="url"]/@content
6270 postProcess :
6371 - map :
6472 https://avidolz.com/ : AvIdolz
6573 https://baberotica.com/ : Baberotica
6674 https://baberoticavr.com/ : BaberoticaVR
75+ https://hamezo.com/ : Hamezo
6776 https://nucosplay.com/ : Nu Cosplay
6877 https://suckmevr.com/ : SuckMeVR
6978 https://teenthais.com/ : TeenThais
7079 https://vrpornpass.com/ : VR PornPass
80+ altSceneScraper :
81+ scene :
82+ Title :
83+ selector : //h1
84+ Performers :
85+ Name : //div[@class="model-thumb"]//h5/a/text()
86+ URL : //div[@class="model-thumb"]//h5/a/@href
87+ Tags :
88+ Name : //div[@class="cat"]/a
89+ Studio :
90+ Name : //symbol[@id="logosvg"]/title
91+ Details : *Details
92+ Date : *Date
93+ Image : *Image
94+ URL : *URL
7195 performerScraper :
7296 common :
7397 $profileBE : //div[@class="model-profile"]
@@ -83,11 +107,11 @@ xPathScrapers:
83107 - regex : None
84108 with :
85109 Aliases :
86- selector : $profileBE[contains(strong, "Alias name:")]//text()
110+ selector : $profileBE[contains(strong, "Alias name:")]//text() | $profileBE[contains(strong, "Japanese name:")]//text()
87111 postProcess :
88112 - replace :
89- - regex : .*Alias name:\s*(.*)\s*$
90- with : $1
113+ - regex : .*( Alias|Japanese) name:\s*(.*)\s*$
114+ with : $2
91115 - regex : None
92116 with :
93117 Gender :
@@ -181,6 +205,12 @@ xPathScrapers:
181205 with :
182206 Instagram :
183207 selector : $profileBE[contains(strong, "Instagram:")]/a/@href
208+ Measurements :
209+ selector : $profileBE[contains(strong, "Body:")]//text()
210+ postProcess :
211+ - replace :
212+ - regex : .*Body:\s*(.*)\s*$
213+ with : $1
184214 Twitter :
185215 selector : $profileBE[contains(strong, "Twitter:")]/a/@href
186216 Details :
@@ -196,7 +226,7 @@ xPathScrapers:
196226 - regex : \s*(.*)\s*feed/$
197227 with : $1
198228 Image :
199- selector : //div[@class="model-photo"]/img[@class="rounded"]/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1-2"]//img/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1"]//img/@src
229+ selector : //div[@class="model-photo"]/img[@class="rounded"]/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1-2"]//img/@src | //div[@class="pure-u-1-4 pure-u-sm-1-3 pure-u-xs-1"]//img/@src | //img[@class="smallroundedthumbs"]/@src
200230 postProcess :
201231 - replace :
202232 - regex : ^//
@@ -205,4 +235,4 @@ xPathScrapers:
205235 with : 690x960
206236 - regex : 270x480
207237 with : 405x720
208- # Last Updated October 10, 2023
238+ # Last Updated April 27, 2025
0 commit comments