@@ -7,73 +7,67 @@ sceneByURL:
77 - sexlikereal.com
88 scraper : sceneScraper
99
10- sceneByFragment :
11- action : scrapeXPath
12- # url format: https://www.sexlikereal.com/scenes/{title}-{code}
13- # However, the url:
14- # https://www.sexlikereal.com/{code}
15- # will redirect to the full url so that is what we will use for scrapping
16- queryURL : https://www.sexlikereal.com/{filename}
17- queryURLReplace :
18- # filename format:
19- # SLR_{stufio:[^_]+}_{title:[^_]+}_{res:\d+p}_{code:\d+}_{vrtype}.{ext}
20- # vrtype: stuff we do not care about but could contain '_'
21- filename :
22- - regex : (?i)^SLR_.+(?:_\d+p)?_(\d+)_.*$
23- with : $1
24- - regex : .*\.[^\.]+$ # if no id is found in the filename
25- with : # clear the filename so that it doesn't leak
26- scraper : sceneScraper
10+ # as of 2025-10-24, code link does not work, either at /scenes or /
11+
12+ # sceneByFragment:
13+ # action: scrapeXPath
14+ # # url format: https://www.sexlikereal.com/scenes/{title}-{code}
15+ # # However, the url:
16+ # # https://www.sexlikereal.com/{code}
17+ # # will redirect to the full url so that is what we will use for scrapping
18+ # queryURL: https://www.sexlikereal.com/{filename}
19+ # queryURLReplace:
20+ # # filename format:
21+ # # SLR_{stufio:[^_]+}_{title:[^_]+}_{res:\d+p}_{code:\d+}_{vrtype}.{ext}
22+ # # vrtype: stuff we do not care about but could contain '_'
23+ # filename:
24+ # - regex: (?i)^SLR_.+(?:_\d+p)?_(\d+)_.*$
25+ # with: $1
26+ # - regex: .*\.[^\.]+$ # if no id is found in the filename
27+ # with: # clear the filename so that it doesn't leak
28+ # scraper: sceneScraper
2729
2830xPathScrapers :
2931 sceneScraper :
32+ common :
33+ $ldjson : //script[@type="application/ld+json"][contains(text(), '"@type":"VideoObject"')]/text()
3034 scene :
3135 Title :
32- selector : //script[@type="text/javascript"][contains(.,"videoData:")]/text()
36+ selector : //h1
37+ Date :
38+ selector : //p/time[@datetime][not(@data-hk)]/@datetime
3339 postProcess :
3440 - replace :
35- - regex : ' .+videoData:\s{[^{]+title":"([^"]+)",.+'
36- with : $1
37- - regex : ' \\u2019'
38- with : " ’"
39- - regex : ' \\u2013'
40- with : " –"
41- Date : //time/@datetime
41+ - regex : " T.+"
42+ with : " "
43+ - parseDate : 2006-01-02
4244 Details :
43- selector : //div[@data-qa="scene-about-tab-text"]/text()
45+ selector : $ldjson
4446 postProcess :
4547 - replace :
46- - regex : ' ^\.\s*'
47- with :
48- concat : " \n "
48+ - regex : .*"description":\s?"([^"]+).+
49+ with : $1
50+ - regex : " ^'|'\\ r\\ n"
51+ with : " "
52+ - regex : ' \\r\\n\\r\\n'
53+ with : " \n\n "
54+ # fragile but it works
4955 Tags :
50- Name : >-
51- //meta[@property="video:tag"]/@content
52- |
53- //a[@data-qa="scene-tags-list-item-link"]/text()
56+ Name : //div/ul/li/a/span
5457 Performers :
55- Name : //a[contains(@data-qa, "scene-model-list-item-name ")]/text()
58+ Name : //a[starts-with(@href,"/pornstars/ ")]/text()
5659 Studio :
5760 Name :
58- selector : //a[contains (@href,"/studios/")]/div[last( )]/text()
61+ selector : //h3/a[starts-with (@href,"/studios/")]/text()
5962 postProcess :
6063 - map :
6164 DDFNetworkVR : " DDF Network VR"
6265 KinkyGirlsBerlin : " Kinky Girls Berlin"
6366 LethalHardcoreVR : " Lethal Hardcore VR"
6467 LittleCapriceVR : " Little Caprice Dreams Virtual Reality"
65- LustReality : " LustReality"
6668 POVcentralVR : " POV Central"
67- RealHotVR : " RealHotVR"
6869 SinsVR : " XSinsVR"
6970 VirtualXPorn : " Virtual X Porn"
7071 WankitnowVR : " Wank It Now VR"
71- Image : //div[@id="webvr"]//img/@src
72- URL : &sceneUrl //link[@rel="canonical"]/@href
73- Code :
74- selector : *sceneUrl
75- postProcess :
76- - replace :
77- - regex : ' ^(.+)-(\d+)/?$'
78- with : $2
79- # Last Updated May 22, 2025
72+ Image : /html/head/meta[@property="og:image"]/@content
73+ # Last Updated October 24, 2025
0 commit comments