Skip to content

Commit cc05b01

Browse files
authored
Merge branch 'stashapp:master' into wallhaven-imageByFragtment
2 parents 92fe765 + e30f8d0 commit cc05b01

File tree

14 files changed

+193
-110
lines changed

14 files changed

+193
-110
lines changed

scrapers/MadBrosX.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/stashapp/CommunityScrapers/refs/heads/master/validator/scraper.schema.json
2+
name: "MadBrosX"
3+
# scrapes: MadBros
4+
5+
sceneByURL:
6+
- action: scrapeXPath
7+
url:
8+
- madbrosx.com
9+
scraper: madbrosx
10+
11+
xPathScrapers:
12+
madbrosx:
13+
scene:
14+
Title: //h3[@class = "page-heading"]/span/@title
15+
Date:
16+
selector: //div[@class = "video_page-sub-head__DqmKK"]//span[@aria-label = "calendar"]/../text()
17+
postProcess:
18+
- parseDate: January 2, 2006
19+
Details:
20+
selector: //div[@id = "rc-tabs-1-panel-description"]/p/text()
21+
postProcess:
22+
- replace:
23+
- regex: "^No description$"
24+
with: ""
25+
Performers:
26+
Name: //h4[@class = "ant-list-item-meta-title"]//a[contains(@href, "model")]/text()
27+
Studio:
28+
Name:
29+
fixed: Madbros
30+
Image: //meta[@property = "og:image"]/@content
31+
Tags:
32+
Name: //span[@class = "ant-tag"]/a/text()

scrapers/MilkyPeru.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/stashapp/CommunityScrapers/refs/heads/master/validator/scraper.schema.json
2+
name: "Milky Peru"
3+
sceneByURL:
4+
- action: scrapeXPath
5+
url:
6+
- milkyperu.com
7+
scraper: milkyperu
8+
9+
xPathScrapers:
10+
milkyperu:
11+
scene:
12+
Title:
13+
selector: //meta[@property = "og:title"]/@content
14+
postProcess:
15+
- replace:
16+
- regex: "[. -]+$"
17+
with: ""
18+
URLs: //meta[@property = "og:url"]/@content
19+
Date:
20+
selector: //meta[@property = "article:published_time"]/@content
21+
postProcess:
22+
- parseDate: 2006-01-02T15:04:05-07:00
23+
Details: //div[@data-elementor-type="wp-post"]//div[@class="elementor-widget-container"]/h2[contains(@class, "elementor-heading-title")]//text()
24+
Studio:
25+
Name: //meta[@property = "og:site_name"]/@content
26+
Image: //meta[@property = "og:image"]/@content

scrapers/MissAV_en.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ name: MissAV (en)
22
sceneByURL:
33
- action: scrapeXPath
44
url:
5-
- missav.com
5+
- missav.ai
6+
- missav.ws
7+
- missav.live
68
scraper: sceneScraper
79
sceneByName:
810
action: scrapeXPath

scrapers/MissAV_jp.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ name: MissAV (jp)
22
sceneByURL:
33
- action: scrapeXPath
44
url:
5-
- missav.com
5+
- missav.ai
6+
- missav.ws
7+
- missav.live
68
scraper: sceneScraper
79
sceneByName:
810
action: scrapeXPath

scrapers/Motherless.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ xPathScrapers:
3636
sceneScraper:
3737
common:
3838
$meta: //div[@class='media-meta']
39+
$error: //div[contains(@class,'error-page view-page')]
3940
scene:
40-
Title: $meta//h1/text()
41+
Title: $error/h1 | $meta//h1/text()
4142
Date:
4243
selector: $meta//div[@class='media-meta-stats']/span[not(contains(.,'Views')) and not(contains(.,'Favorites'))]/text()
4344
postProcess:
@@ -66,4 +67,4 @@ xPathScrapers:
6667
- replace:
6768
- regex: \#
6869
with:
69-
# Last Updated April 01, 2025
70+
# Last Updated October 17, 2025

scrapers/OopsFamily/OopsFamily.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ xPathScrapers:
1212
Name:
1313
fixed: "Oops Family"
1414
Title:
15-
selector: //h1[@class = 'video-detail__title']//text()
15+
selector: //div[@class="video-detail__title"]/text()
1616
Details:
1717
selector: //div[@data-id="description" and @class="hidden"]//text()
1818
Tags:
@@ -32,4 +32,4 @@ xPathScrapers:
3232
- regex: (.*) • (.*)
3333
with: $2
3434
- parseDate: "2 January, 2006"
35-
# Last Updated December 29, 2023
35+
# Last Updated October 17, 2025

scrapers/Perverformer.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ sceneByURL:
77
xPathScrapers:
88
sceneScraper:
99
common:
10-
$tags: //main/div[@class="columns"]/div[@class="secondary"]/div[@class="area"]/p/span[@class="nowrap"]
10+
$tags: //main/div[@class="columns"]/div[@class="secondary"]/div[@class="area"]/div/div[@class="tag-cloud"]/div
1111
scene:
1212
Title:
1313
selector: //main/div[@class="heading"]/h1
@@ -32,4 +32,4 @@ xPathScrapers:
3232
Studio:
3333
Name:
3434
fixed: Perverformer
35-
# Last Updated September 21, 2025
35+
# Last Updated Octover 04, 2025

scrapers/ScrapeWithURL/ScrapeWithURL.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,20 @@ def scrape_scene(url):
7373

7474

7575
FRAGMENT = json.loads(sys.stdin.read())
76-
url = FRAGMENT.get("url")
76+
urls = FRAGMENT.get("urls")
7777

78-
if url:
79-
result = scrape_scene(url)
80-
result = filter_nones(result)
81-
log.debug(f"result {result}")
82-
print(json.dumps(result))
78+
for url in urls:
79+
if not url.startswith('http'):
80+
continue # skip urls that don't start with http
81+
else:
82+
try:
83+
result = scrape_scene(url)
84+
result = filter_nones(result)
85+
log.debug(f"result {result}")
86+
print(json.dumps(result))
87+
if result:
88+
break
89+
except Exception:
90+
continue
8391
else:
8492
print("null")

scrapers/ScrapeWithURL/ScrapeWithURL.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ sceneByFragment:
66
script:
77
- python
88
- ScrapeWithURL.py
9-
# Last Updated December 20, 2024
9+
# Last Updated October 19, 2025

scrapers/SexLikeReal.yml

Lines changed: 40 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,73 +7,67 @@ sceneByURL:
77
- sexlikereal.com
88
scraper: sceneScraper
99

10-
sceneByFragment:
11-
action: scrapeXPath
12-
# url format: https://www.sexlikereal.com/scenes/{title}-{code}
13-
# However, the url:
14-
# https://www.sexlikereal.com/{code}
15-
# will redirect to the full url so that is what we will use for scrapping
16-
queryURL: https://www.sexlikereal.com/{filename}
17-
queryURLReplace:
18-
# filename format:
19-
# SLR_{stufio:[^_]+}_{title:[^_]+}_{res:\d+p}_{code:\d+}_{vrtype}.{ext}
20-
# vrtype: stuff we do not care about but could contain '_'
21-
filename:
22-
- regex: (?i)^SLR_.+(?:_\d+p)?_(\d+)_.*$
23-
with: $1
24-
- regex: .*\.[^\.]+$ # if no id is found in the filename
25-
with: # clear the filename so that it doesn't leak
26-
scraper: sceneScraper
10+
# as of 2025-10-24, code link does not work, either at /scenes or /
11+
12+
# sceneByFragment:
13+
# action: scrapeXPath
14+
# # url format: https://www.sexlikereal.com/scenes/{title}-{code}
15+
# # However, the url:
16+
# # https://www.sexlikereal.com/{code}
17+
# # will redirect to the full url so that is what we will use for scrapping
18+
# queryURL: https://www.sexlikereal.com/{filename}
19+
# queryURLReplace:
20+
# # filename format:
21+
# # SLR_{stufio:[^_]+}_{title:[^_]+}_{res:\d+p}_{code:\d+}_{vrtype}.{ext}
22+
# # vrtype: stuff we do not care about but could contain '_'
23+
# filename:
24+
# - regex: (?i)^SLR_.+(?:_\d+p)?_(\d+)_.*$
25+
# with: $1
26+
# - regex: .*\.[^\.]+$ # if no id is found in the filename
27+
# with: # clear the filename so that it doesn't leak
28+
# scraper: sceneScraper
2729

2830
xPathScrapers:
2931
sceneScraper:
32+
common:
33+
$ldjson: //script[@type="application/ld+json"][contains(text(), '"@type":"VideoObject"')]/text()
3034
scene:
3135
Title:
32-
selector: //script[@type="text/javascript"][contains(.,"videoData:")]/text()
36+
selector: //h1
37+
Date:
38+
selector: //p/time[@datetime][not(@data-hk)]/@datetime
3339
postProcess:
3440
- replace:
35-
- regex: '.+videoData:\s{[^{]+title":"([^"]+)",.+'
36-
with: $1
37-
- regex: '\\u2019'
38-
with: ""
39-
- regex: '\\u2013'
40-
with: ""
41-
Date: //time/@datetime
41+
- regex: "T.+"
42+
with: ""
43+
- parseDate: 2006-01-02
4244
Details:
43-
selector: //div[@data-qa="scene-about-tab-text"]/text()
45+
selector: $ldjson
4446
postProcess:
4547
- replace:
46-
- regex: '^\.\s*'
47-
with:
48-
concat: "\n"
48+
- regex: .*"description":\s?"([^"]+).+
49+
with: $1
50+
- regex: "^'|'\\r\\n"
51+
with: ""
52+
- regex: '\\r\\n\\r\\n'
53+
with: "\n\n"
54+
# fragile but it works
4955
Tags:
50-
Name: >-
51-
//meta[@property="video:tag"]/@content
52-
|
53-
//a[@data-qa="scene-tags-list-item-link"]/text()
56+
Name: //div/ul/li/a/span
5457
Performers:
55-
Name: //a[contains(@data-qa, "scene-model-list-item-name")]/text()
58+
Name: //a[starts-with(@href,"/pornstars/")]/text()
5659
Studio:
5760
Name:
58-
selector: //a[contains(@href,"/studios/")]/div[last()]/text()
61+
selector: //h3/a[starts-with(@href,"/studios/")]/text()
5962
postProcess:
6063
- map:
6164
DDFNetworkVR: "DDF Network VR"
6265
KinkyGirlsBerlin: "Kinky Girls Berlin"
6366
LethalHardcoreVR: "Lethal Hardcore VR"
6467
LittleCapriceVR: "Little Caprice Dreams Virtual Reality"
65-
LustReality: "LustReality"
6668
POVcentralVR: "POV Central"
67-
RealHotVR: "RealHotVR"
6869
SinsVR: "XSinsVR"
6970
VirtualXPorn: "Virtual X Porn"
7071
WankitnowVR: "Wank It Now VR"
71-
Image: //div[@id="webvr"]//img/@src
72-
URL: &sceneUrl //link[@rel="canonical"]/@href
73-
Code:
74-
selector: *sceneUrl
75-
postProcess:
76-
- replace:
77-
- regex: '^(.+)-(\d+)/?$'
78-
with: $2
79-
# Last Updated May 22, 2025
72+
Image: /html/head/meta[@property="og:image"]/@content
73+
# Last Updated October 24, 2025

0 commit comments

Comments
 (0)