-
-
Notifications
You must be signed in to change notification settings - Fork 21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New Extractor! #33
base: main
Are you sure you want to change the base?
New Extractor! #33
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import re | ||
from typing import Dict, Any | ||
from bs4 import BeautifulSoup, SoupStrainer | ||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError | ||
import json | ||
from urllib.parse import urlparse, parse_qs | ||
|
||
|
||
class VixCloudExtractor(BaseExtractor): | ||
"""VixCloud URL extractor.""" | ||
|
||
async def version(self, domain: str) -> str: | ||
"""Get version of VixCloud Parent Site.""" | ||
DOMAIN = domain | ||
base_url = f"https://streamingcommunity.{DOMAIN}/richiedi-un-titolo" | ||
response = await self._make_request( | ||
base_url, | ||
headers={ | ||
"Referer": f"https://streamingcommunity.{DOMAIN}/", | ||
"Origin": f"https://streamingcommunity.{DOMAIN}", | ||
}, | ||
) | ||
if response.status_code != 200: | ||
raise ExtractorError("Outdated Domain") | ||
# Soup the response | ||
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"})) | ||
if soup: | ||
# Extract version | ||
version = json.loads(soup.find("div", {"id": "app"}).get("data-page"))["version"] | ||
return version | ||
|
||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]: | ||
"""Extract Vixcloud URL.""" | ||
domain = url.split("://")[1].split("/")[0].split(".")[1] | ||
version = await self.version(domain) | ||
response = await self._make_request(url, headers={"x-inertia": "true", "x-inertia-version": version}) | ||
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("iframe")) | ||
iframe = soup.find("iframe").get("src") | ||
parsed_url = urlparse(iframe) | ||
query_params = parse_qs(parsed_url.query) | ||
response = await self._make_request(iframe, headers={"x-inertia": "true", "x-inertia-version": version}) | ||
|
||
if response.status_code != 200: | ||
raise ExtractorError("Failed to extract URL components, Invalid Request") | ||
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("body")) | ||
if soup: | ||
script = soup.find("body").find("script").text | ||
token = re.search(r"'token':\s*'(\w+)'", script).group(1) | ||
expires = re.search(r"'expires':\s*'(\d+)'", script).group(1) | ||
quality = re.search(r'"quality":(\d+)', script).group(1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Remove unused variables 'quality', 'canPlayFHD', and 'b'. - quality = re.search(r'"quality":(\d+)', script).group(1)
...
- canPlayFHD = "h=1"
...
- b = "b=1" Also applies to: 55-55, 58-58 🧰 Tools🪛 Ruff (0.8.2)50-50: Local variable Remove assignment to unused variable (F841)
Comment on lines
+46
to
+50
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Codebase verification Based on the code review, I can now generate the final response: Add error handling for regex matches to prevent NoneType errors
token_match = re.search(r"'token':\s*'(\w+)'", script)
if not token_match:
raise ExtractorError("Failed to extract token from script")
token = token_match.group(1) 🔗 Analysis chainConfirm presence of 'token', 'expires', and 'quality' in the script. Let me verify the presence of error handling for these regex matches by searching through the codebase. 🏁 Scripts executedThe following scripts were executed for the analysis: Error: No shell scripts found Script: #!/bin/bash
# Search for the file content to understand the context and error handling
cat mediaflow_proxy/extractors/vixcloud.py
Length of output: 3011 🧰 Tools🪛 Ruff (0.8.2)50-50: Local variable Remove assignment to unused variable (F841) |
||
vixid = iframe.split("/embed/")[1].split("?")[0] | ||
base_url = iframe.split("://")[1].split("/")[0] | ||
final_url = f"https://{base_url}/playlist/{vixid}.m3u8?token={token}&expires={expires}" | ||
if "canPlayFHD" in query_params: | ||
canPlayFHD = "h=1" | ||
final_url += "&h=1" | ||
if "b" in query_params: | ||
b = "b=1" | ||
final_url += "&b=1" | ||
self.base_headers["referer"] = url | ||
return { | ||
"destination_url": final_url, | ||
"request_headers": self.base_headers, | ||
"mediaflow_endpoint": self.mediaflow_endpoint, | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion
Improve error handling in 'version' method.
Currently, if the JSON structure or the "version" key is missing, the code will raise an unhandled exception. Consider adding a try-except block around the JSON parsing and key access. This ensures the method fails gracefully if the response format changes.
📝 Committable suggestion