From 90360ccfa6a547b43b69d3f83e579c49d87643c5 Mon Sep 17 00:00:00 2001 From: kqlio67 <166700875+kqlio67@users.noreply.github.com> Date: Mon, 30 Dec 2024 02:04:06 +0000 Subject: [PATCH] fix: Update deprecated DuckDuckGo search backend from 'api' to 'auto' (#2516) * fix: Update deprecated DuckDuckGo search backend from 'api' to 'auto' Fixes UserWarning: 'api' backend is deprecated, using backend='auto' - Updated default backend parameter from 'api' to 'auto' in search function - Aligns with latest duckduckgo-search library recommendations * Update g4f/Provider/Blackbox.py * Fix response_format=b64_json (g4f/client/__init__.py) * Update g4f/Provider/Blackbox2.py g4f/Provider/BlackboxCreateAgent.py --------- Co-authored-by: kqlio67 <> --- g4f/Provider/Blackbox.py | 26 ++-- g4f/Provider/Blackbox2.py | 197 ---------------------------- g4f/Provider/BlackboxCreateAgent.py | 2 +- g4f/client/__init__.py | 21 +-- g4f/web_search.py | 6 +- 5 files changed, 35 insertions(+), 217 deletions(-) delete mode 100644 g4f/Provider/Blackbox2.py diff --git a/g4f/Provider/Blackbox.py b/g4f/Provider/Blackbox.py index e4d4cb96d49..c4b14e20f60 100644 --- a/g4f/Provider/Blackbox.py +++ b/g4f/Provider/Blackbox.py @@ -8,6 +8,7 @@ import aiohttp import asyncio from pathlib import Path +import concurrent.futures from ..typing import AsyncResult, Messages, ImagesType from .base_provider import AsyncGeneratorProvider, ProviderModelMixin @@ -220,15 +221,24 @@ async def create_async_generator( use_internal_search = web_search and model in cls.web_search_models if web_search and not use_internal_search: - - def run_search(): - return get_search_message(messages[-1]["content"]) + try: + # Create a timeout for web search + async def run_search(): + with concurrent.futures.ThreadPoolExecutor() as executor: + return await asyncio.get_event_loop().run_in_executor( + executor, + lambda: get_search_message(messages[-1]["content"]) + ) + + # Set a timeout of 10 seconds for web search + search_result = await asyncio.wait_for(run_search(), timeout=10.0) + messages[-1]["content"] = search_result - import concurrent.futures - with concurrent.futures.ThreadPoolExecutor() as executor: - messages[-1]["content"] = await asyncio.get_event_loop().run_in_executor( - executor, run_search - ) + except asyncio.TimeoutError: + debug.log("Web search timed out, proceeding with original message") + except Exception as e: + debug.log(f"Web search failed: {str(e)}, proceeding with original message") + web_search = False async def process_request(): diff --git a/g4f/Provider/Blackbox2.py b/g4f/Provider/Blackbox2.py deleted file mode 100644 index 374d5c04746..00000000000 --- a/g4f/Provider/Blackbox2.py +++ /dev/null @@ -1,197 +0,0 @@ -from __future__ import annotations - -import random -import asyncio -import re -import json -from pathlib import Path -from aiohttp import ClientSession -from typing import AsyncIterator - -from ..typing import AsyncResult, Messages -from ..image import ImageResponse -from .base_provider import AsyncGeneratorProvider, ProviderModelMixin -from ..cookies import get_cookies_dir - -from .. import debug - -class Blackbox2(AsyncGeneratorProvider, ProviderModelMixin): - url = "https://www.blackbox.ai" - api_endpoints = { - "llama-3.1-70b": "https://www.blackbox.ai/api/improve-prompt", - "flux": "https://www.blackbox.ai/api/image-generator" - } - - working = True - supports_system_message = True - supports_message_history = True - supports_stream = False - - default_model = 'llama-3.1-70b' - chat_models = ['llama-3.1-70b'] - image_models = ['flux'] - models = [*chat_models, *image_models] - - @classmethod - def _get_cache_file(cls) -> Path: - """Returns the path to the cache file.""" - dir = Path(get_cookies_dir()) - dir.mkdir(exist_ok=True) - return dir / 'blackbox2.json' - - @classmethod - def _load_cached_license(cls) -> str | None: - """Loads the license key from the cache.""" - cache_file = cls._get_cache_file() - if cache_file.exists(): - try: - with open(cache_file, 'r') as f: - data = json.load(f) - return data.get('license_key') - except Exception as e: - debug.log(f"Error reading cache file: {e}") - return None - - @classmethod - def _save_cached_license(cls, license_key: str): - """Saves the license key to the cache.""" - cache_file = cls._get_cache_file() - try: - with open(cache_file, 'w') as f: - json.dump({'license_key': license_key}, f) - except Exception as e: - debug.log(f"Error writing to cache file: {e}") - - @classmethod - async def _get_license_key(cls, session: ClientSession) -> str: - cached_license = cls._load_cached_license() - if cached_license: - return cached_license - - try: - async with session.get(cls.url) as response: - html = await response.text() - js_files = re.findall(r'static/chunks/\d{4}-[a-fA-F0-9]+\.js', html) - - license_format = r'["\'](\d{6}-\d{6}-\d{6}-\d{6}-\d{6})["\']' - - def is_valid_context(text_around): - return any(char + '=' in text_around for char in 'abcdefghijklmnopqrstuvwxyz') - - for js_file in js_files: - js_url = f"{cls.url}/_next/{js_file}" - async with session.get(js_url) as js_response: - js_content = await js_response.text() - for match in re.finditer(license_format, js_content): - start = max(0, match.start() - 10) - end = min(len(js_content), match.end() + 10) - context = js_content[start:end] - - if is_valid_context(context): - license_key = match.group(1) - cls._save_cached_license(license_key) - return license_key - - raise ValueError("License key not found") - except Exception as e: - debug.log(f"Error getting license key: {str(e)}") - raise - - @classmethod - async def create_async_generator( - cls, - model: str, - messages: Messages, - prompt: str = None, - proxy: str = None, - max_retries: int = 3, - delay: int = 1, - max_tokens: int = None, - **kwargs - ) -> AsyncResult: - if not model: - model = cls.default_model - - if model in cls.chat_models: - async for result in cls._generate_text(model, messages, proxy, max_retries, delay, max_tokens): - yield result - elif model in cls.image_models: - prompt = messages[-1]["content"] - async for result in cls._generate_image(model, prompt, proxy): - yield result - else: - raise ValueError(f"Unsupported model: {model}") - - @classmethod - async def _generate_text( - cls, - model: str, - messages: Messages, - proxy: str = None, - max_retries: int = 3, - delay: int = 1, - max_tokens: int = None, - ) -> AsyncIterator[str]: - headers = cls._get_headers() - - async with ClientSession(headers=headers) as session: - license_key = await cls._get_license_key(session) - api_endpoint = cls.api_endpoints[model] - - data = { - "messages": messages, - "max_tokens": max_tokens, - "validated": license_key - } - - for attempt in range(max_retries): - try: - async with session.post(api_endpoint, json=data, proxy=proxy) as response: - response.raise_for_status() - response_data = await response.json() - if 'prompt' in response_data: - yield response_data['prompt'] - return - else: - raise KeyError("'prompt' key not found in the response") - except Exception as e: - if attempt == max_retries - 1: - raise RuntimeError(f"Error after {max_retries} attempts: {str(e)}") - else: - wait_time = delay * (2 ** attempt) + random.uniform(0, 1) - debug.log(f"Attempt {attempt + 1} failed. Retrying in {wait_time:.2f} seconds...") - await asyncio.sleep(wait_time) - - @classmethod - async def _generate_image( - cls, - model: str, - prompt: str, - proxy: str = None - ) -> AsyncIterator[ImageResponse]: - headers = cls._get_headers() - api_endpoint = cls.api_endpoints[model] - - async with ClientSession(headers=headers) as session: - data = { - "query": prompt - } - - async with session.post(api_endpoint, headers=headers, json=data, proxy=proxy) as response: - response.raise_for_status() - response_data = await response.json() - - if 'markdown' in response_data: - image_url = response_data['markdown'].split('(')[1].split(')')[0] - yield ImageResponse(images=image_url, alt=prompt) - - @staticmethod - def _get_headers() -> dict: - return { - 'accept': '*/*', - 'accept-language': 'en-US,en;q=0.9', - 'content-type': 'text/plain;charset=UTF-8', - 'origin': 'https://www.blackbox.ai', - 'referer': 'https://www.blackbox.ai', - 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' - } diff --git a/g4f/Provider/BlackboxCreateAgent.py b/g4f/Provider/BlackboxCreateAgent.py index d329ea0e22f..89bdc43c123 100644 --- a/g4f/Provider/BlackboxCreateAgent.py +++ b/g4f/Provider/BlackboxCreateAgent.py @@ -37,7 +37,7 @@ def _get_cache_file(cls) -> Path: """Returns the path to the cache file.""" dir = Path(get_cookies_dir()) dir.mkdir(exist_ok=True) - return dir / 'blackbox2.json' + return dir / 'blackbox_create_agent.json' @classmethod def _load_cached_value(cls) -> str | None: diff --git a/g4f/client/__init__.py b/g4f/client/__init__.py index 502535ed8a6..19e27619cf8 100644 --- a/g4f/client/__init__.py +++ b/g4f/client/__init__.py @@ -5,6 +5,7 @@ import random import string import asyncio +import aiohttp import base64 import json from typing import Union, AsyncIterator, Iterator, Coroutine, Optional @@ -486,17 +487,21 @@ async def _process_image_response( if response_format == "url": # Return original URLs without saving locally images = [Image.model_construct(url=image, revised_prompt=response.alt) for image in response.get_list()] + elif response_format == "b64_json": + # Convert URLs directly to base64 without saving + async def get_b64_from_url(url: str) -> Image: + async with aiohttp.ClientSession() as session: + async with session.get(url, proxy=proxy) as resp: + if resp.status == 200: + image_data = await resp.read() + b64_data = base64.b64encode(image_data).decode() + return Image.model_construct(b64_json=b64_data, revised_prompt=response.alt) + images = await asyncio.gather(*[get_b64_from_url(image) for image in response.get_list()]) else: # Save locally for None (default) case images = await copy_images(response.get_list(), response.get("cookies"), proxy) - if response_format == "b64_json": - async def process_image_item(image_file: str) -> Image: - with open(os.path.join(images_dir, os.path.basename(image_file)), "rb") as file: - image_data = base64.b64encode(file.read()).decode() - return Image.model_construct(b64_json=image_data, revised_prompt=response.alt) - images = await asyncio.gather(*[process_image_item(image) for image in images]) - else: - images = [Image.model_construct(url=f"/images/{os.path.basename(image)}", revised_prompt=response.alt) for image in images] + images = [Image.model_construct(url=f"/images/{os.path.basename(image)}", revised_prompt=response.alt) for image in images] + return ImagesResponse.model_construct( created=int(time.time()), data=images, diff --git a/g4f/web_search.py b/g4f/web_search.py index 652555b639e..5d3c5659b5f 100644 --- a/g4f/web_search.py +++ b/g4f/web_search.py @@ -102,7 +102,7 @@ async def fetch_and_scrape(session: ClientSession, url: str, max_words: int = No except ClientError: return -async def search(query: str, max_results: int = 5, max_words: int = 2500, backend: str = "api", add_text: bool = True, timeout: int = 5, region: str = "wt-wt") -> SearchResults: +async def search(query: str, max_results: int = 5, max_words: int = 2500, backend: str = "auto", add_text: bool = True, timeout: int = 5, region: str = "wt-wt") -> SearchResults: if not has_requirements: raise MissingRequirementsError('Install "duckduckgo-search" and "beautifulsoup4" package | pip install -U g4f[search]') with DDGS() as ddgs: @@ -113,7 +113,7 @@ async def search(query: str, max_results: int = 5, max_words: int = 2500, backen safesearch="moderate", timelimit="y", max_results=max_results, - backend=backend, + backend=backend, # Changed from 'api' to 'auto' ): results.append(SearchResultEntry( result["title"], @@ -169,4 +169,4 @@ def get_search_message(prompt: str, raise_search_exceptions=False, **kwargs) -> if raise_search_exceptions: raise e debug.log(f"Couldn't do web search: {e.__class__.__name__}: {e}") - return prompt \ No newline at end of file + return prompt