1
1
import json
2
2
import sys
3
3
from typing import Never
4
- import requests
5
4
import re
6
5
7
6
import py_common .log as log
7
+ from py_common .util import dig
8
+ from py_common .deps import ensure_requirements
9
+
10
+ ensure_requirements ("requests" )
11
+ import requests # noqa: E402
8
12
9
13
10
14
def fail (message : str ) -> Never :
@@ -51,60 +55,59 @@ def getIMG(video):
51
55
return item
52
56
return ""
53
57
54
- def getMusic (video ):
55
- if len (video ["music" ]) > 0 :
56
- return "Music:\n " + "\n " .join (video ["music" ])
57
- return ""
58
58
59
59
def getVideoById (sceneId ):
60
60
data = getData (sceneId )
61
61
62
- if "video" not in data or len (data [ "video" ]) < 1 :
62
+ if not ( video := dig (data , "video" , 0 )) :
63
63
fail (f"Video data not found in API response: { data } " )
64
64
65
- video = data ["video" ][0 ]
66
- tags = video ["tags" ] + video ["categories" ]
67
65
urlTitle = video ["title" ].replace (" " , "-" )
68
66
69
- details = ""
70
- if video ["description" ] != None :
71
- details += video ["description" ]
72
- music = getMusic (video )
73
- if music :
74
- if len (details ) > 0 :
75
- details += "\n "
76
- details += music
77
-
78
- return {
67
+ scraped = {
79
68
"title" : video ["title" ],
80
69
"url" : f"https://pmvhaven.com/video/{ urlTitle } _{ video ['_id' ]} " ,
81
70
"image" : getIMG (video ),
82
71
"date" : video ["isoDate" ].split ("T" )[0 ],
83
- "details" : details ,
84
- "studio" : {"Name" : video ["creator" ]},
85
- "tags" : [{"name" : x .strip ()} for x in tags ],
86
- "performers" : [{"name" : x .strip ()} for x in video ["stars" ]],
72
+ "performers" : [{"name" : x .strip ()} for x in dig (video , "stars" , default = [])],
87
73
}
88
74
75
+ if description := dig (video , "description" ):
76
+ scraped ["description" ] = description
77
+
78
+ if songs := dig (video , "music" ):
79
+ music = "Music:\n " + "\n " .join (songs )
80
+ if "description" in scraped :
81
+ scraped ["description" ] += "\n " + music
82
+ else :
83
+ scraped ["description" ] = music
84
+
85
+ if creator := dig (video , "creator" ):
86
+ scraped ["studio" ] = {"name" : creator }
87
+
88
+ tags = dig (video , "tags" , default = []) + dig (video , "categories" , default = [])
89
+ # remove duplicates and sort
90
+ scraped ["tags" ] = sorted (
91
+ {tag .strip ().lower (): tag .strip () for tag in tags }.values ()
92
+ )
93
+
94
+ return scraped
95
+
89
96
90
- """
97
+ def sceneByFragment (params ):
98
+ """
91
99
Assumes the video ID or the download hash is in the title of the Stash scene.
92
100
The default file name when downloading from PMVHaven includes the download hash,
93
101
so this will first assume the parameter is the download hash. If no results are
94
102
returned then it will assume the parameter is the video ID and attempt data fetch.
95
- """
96
-
97
-
98
- def sceneByFragment (params ):
99
- if not params ["title" ]:
103
+ """
104
+ if not (title := dig (params , "title" )):
100
105
fail ("JSON blob did not contain title property" )
101
106
102
- regex = re .search (r"([a-z0-9]{24})" , params ["title" ])
107
+ if not (match := re .search (r"([a-z0-9]{24})" , title )):
108
+ fail (f"Did not find ID from video title '{ title } '" )
103
109
104
- if not regex :
105
- fail (f"Did not find ID from video title { params ['title' ]} " )
106
-
107
- inputParam = regex .group (1 )
110
+ inputParam = match .group (1 )
108
111
videoId = getVideoIdFromDownloadHash (inputParam )
109
112
110
113
if videoId is None :
@@ -113,24 +116,23 @@ def sceneByFragment(params):
113
116
return getVideoById (videoId )
114
117
115
118
116
- """
119
+ def sceneByURL (params ):
120
+ """
117
121
This assumes a URL of https://pmvhaven.com/video/{title}_{alphanumericVideoId}
118
122
As of 2024-01-01, this is the only valid video URL format. If this changes in
119
123
the future (i.e. more than one valid URL type, or ID not present in URL) and
120
- requires falling back to the old cloudscraper method, an xpath of
121
- //meta[@property="video-id"]/@content
124
+ requires falling back to the old cloudscraper method, an xpath of
125
+ //meta[@property="video-id"]/@content
122
126
can be used to pass into the PMVHaven API
123
- """
127
+ """
124
128
125
-
126
- def sceneByURL (params ):
127
- if not params ["url" ]:
129
+ if not (url := dig (params , "url" )):
128
130
fail ("No URL entered" )
129
131
130
- sceneId = params [ " url" ] .split ("_" )[- 1 ]
132
+ sceneId = url .split ("_" )[- 1 ]
131
133
132
- if not sceneId or not sceneId .isalnum ():
133
- fail (f"Did not find scene ID from PMVStash video URL { params [ ' url' ] } " )
134
+ if not ( sceneId and sceneId .isalnum () ):
135
+ fail (f"Did not find scene ID from PMVStash video URL { url } " )
134
136
135
137
data = getVideoById (sceneId )
136
138
return data
0 commit comments