Skip to content

Commit eacfe06

Browse files
mhdzumairdreulavelle
authored andcommitted
Improve MediaFusion scraping configs
1 parent 42829a2 commit eacfe06

File tree

1 file changed

+53
-37
lines changed

1 file changed

+53
-37
lines changed

src/program/services/scrapers/mediafusion.py

+53-37
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
""" Mediafusion scraper module """
2-
import json
3-
import re
2+
43
from typing import Dict
54

65
from loguru import logger
@@ -35,7 +34,11 @@ def __init__(self):
3534
self.timeout = self.settings.timeout
3635
self.encrypted_string = None
3736
# https://github.com/elfhosted/infra/blob/ci/mediafusion/middleware-ratelimit-stream.yaml
38-
rate_limit_params = get_rate_limit_params(max_calls=1, period=10) if self.settings.ratelimit else None
37+
rate_limit_params = (
38+
get_rate_limit_params(max_calls=1, period=10)
39+
if self.settings.ratelimit
40+
else None
41+
)
3942
session = create_service_session(rate_limit_params=rate_limit_params)
4043
self.request_handler = ScraperRequestHandler(session)
4144
self.initialized = self.validate()
@@ -57,39 +60,36 @@ def validate(self) -> bool:
5760
logger.error("Mediafusion ratelimit must be a valid boolean.")
5861
return False
5962

60-
if self.app_settings.downloaders.real_debrid.enabled:
61-
self.api_key = self.app_settings.downloaders.real_debrid.api_key
62-
self.downloader = "realdebrid"
63-
elif self.app_settings.downloaders.torbox.enabled:
64-
self.api_key = self.app_settings.downloaders.torbox.api_key
65-
self.downloader = "torbox"
66-
elif self.app_settings.downloaders.all_debrid.enabled:
67-
self.api_key = self.app_settings.downloaders.all_debrid.api_key
68-
self.downloader = "alldebrid"
69-
else:
70-
logger.error("No downloader enabled, please enable at least one.")
71-
return False
72-
7363
payload = {
74-
"sp": {
75-
"sv": self.downloader,
76-
"tk": self.api_key,
77-
"ewc": False
78-
},
79-
"sr": ["4k", "2160p", "1440p", "1080p", "720p", "480p", None],
80-
"ec": False,
81-
"eim": False,
82-
"sftn": True,
83-
"tsp": ["cached"], # sort order, but this doesnt matter as we sort later
84-
"nf": ["Disable"], # nudity filter
85-
"cf": ["Disable"] # certification filter
64+
"selected_resolutions": [
65+
"4k",
66+
"2160p",
67+
"1440p",
68+
"1080p",
69+
"720p",
70+
"480p",
71+
None,
72+
],
73+
"max_streams_per_resolution": 100,
74+
"live_search_streams": True,
75+
"show_full_torrent_name": True,
76+
"torrent_sorting_priority": [], # disable sort order, but this doesnt matter as we sort later
77+
"language_sorting": [],
78+
"nudity_filter": ["Disable"],
79+
"certification_filter": ["Disable"],
8680
}
8781

8882
url = f"{self.settings.url}/encrypt-user-data"
8983
headers = {"Content-Type": "application/json"}
9084

9185
try:
92-
response = self.request_handler.execute(HttpMethod.POST, url, overriden_response_type=ResponseType.DICT, json=payload, headers=headers)
86+
response = self.request_handler.execute(
87+
HttpMethod.POST,
88+
url,
89+
overriden_response_type=ResponseType.DICT,
90+
json=payload,
91+
headers=headers,
92+
)
9393
if not response.data or response.data["status"] != "success":
9494
logger.error(f"Failed to encrypt user data: {response.data['message']}")
9595
return False
@@ -100,7 +100,9 @@ def validate(self) -> bool:
100100

101101
try:
102102
url = f"{self.settings.url}/manifest.json"
103-
response = self.request_handler.execute(HttpMethod.GET, url, timeout=self.timeout)
103+
response = self.request_handler.execute(
104+
HttpMethod.GET, url, timeout=self.timeout
105+
)
104106
return response.is_ok
105107
except Exception as e:
106108
logger.error(f"Mediafusion failed to initialize: {e}")
@@ -117,7 +119,9 @@ def run(self, item: MediaItem) -> Dict[str, str]:
117119
except RateLimitExceeded:
118120
logger.debug(f"Mediafusion ratelimit exceeded for item: {item.log_string}")
119121
except ConnectTimeout:
120-
logger.warning(f"Mediafusion connection timeout for item: {item.log_string}")
122+
logger.warning(
123+
f"Mediafusion connection timeout for item: {item.log_string}"
124+
)
121125
except ReadTimeout:
122126
logger.warning(f"Mediafusion read timeout for item: {item.log_string}")
123127
except RequestException as e:
@@ -134,25 +138,37 @@ def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]:
134138
if identifier:
135139
url += identifier
136140

137-
response = self.request_handler.execute(HttpMethod.GET, f"{url}.json", timeout=self.timeout)
141+
response = self.request_handler.execute(
142+
HttpMethod.GET, f"{url}.json", timeout=self.timeout
143+
)
138144
if not response.is_ok or len(response.data.streams) <= 0:
139145
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
140146
return {}
141147

142148
torrents: Dict[str, str] = {}
143149

144150
for stream in response.data.streams:
145-
if not hasattr(stream, "description") and hasattr(stream, "title") and "rate-limit exceeded" in stream.title:
146-
raise RateLimitExceeded(f"Mediafusion rate-limit exceeded for item: {item.log_string}")
151+
if (
152+
not hasattr(stream, "description")
153+
and hasattr(stream, "title")
154+
and "rate-limit exceeded" in stream.title
155+
):
156+
raise RateLimitExceeded(
157+
f"Mediafusion rate-limit exceeded for item: {item.log_string}"
158+
)
147159
description_split = stream.description.replace("📂 ", "")
148160
raw_title = description_split.split("\n")[0]
149-
info_hash = re.search(r"info_hash=([A-Za-z0-9]+)", stream.url).group(1)
161+
if scrape_type == "series":
162+
raw_title = raw_title.split("/")[0]
163+
info_hash = stream.infoHash
150164
if info_hash and info_hash not in torrents:
151165
torrents[info_hash] = raw_title
152166

153167
if torrents:
154-
logger.log("SCRAPER", f"Found {len(torrents)} streams for {item.log_string}")
168+
logger.log(
169+
"SCRAPER", f"Found {len(torrents)} streams for {item.log_string}"
170+
)
155171
else:
156172
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
157173

158-
return torrents
174+
return torrents

0 commit comments

Comments
 (0)