unclecode · stevenh · Apr 15, 2025
diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py
@@ -1656,44 +1656,22 @@ def perform_completion_with_backoff(
 
     for attempt in range(max_attempts):
         try:
-            response = completion(
+            return completion(
                 model=provider,
                 messages=[{"role": "user", "content": prompt_with_variables}],
                 **extra_args,
             )
-            return response  # Return the successful response
         except RateLimitError as e:
-            print("Rate limit error:", str(e))
+            if attempt == max_attempts - 1:
+                # Last attempt failed, raise the error.
+                raise
 
-            # Check if we have exhausted our max attempts
-            if attempt < max_attempts - 1:
-                # Calculate the delay and wait
-                delay = base_delay * (2**attempt)  # Exponential backoff formula
-                print(f"Waiting for {delay} seconds before retrying...")
-                time.sleep(delay)
-            else:
-                # Return an error response after exhausting all retries
-                return [
-                    {
-                        "index": 0,
-                        "tags": ["error"],
-                        "content": ["Rate limit error. Please try again later."],
-                    }
-                ]
-        except Exception as e:
-            raise e  # Raise any other exceptions immediately
-            # print("Error during completion request:", str(e))
-            # error_message = e.message
-            # return [
-            #     {
-            #         "index": 0,
-            #         "tags": ["error"],
-            #         "content": [
-            #             f"Error during LLM completion request. {error_message}"
-            #         ],
-            #     }
-            # ]
+            print("Rate limit error:", str(e))
 
+            # Otherwise, calculate delay and wait before the next attempt.
+            delay = base_delay * (2**attempt)  # Exponential backoff formula
+            print(f"Waiting for {delay} seconds before retrying...")
+            time.sleep(delay)
 
 def extract_blocks(url, html, provider=DEFAULT_PROVIDER, api_token=None, base_url=None):
     """

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -0,0 +1,20 @@
+import pytest
+from litellm.exceptions import RateLimitError
+
+from crawl4ai.utils import perform_completion_with_backoff
+
+
+def test_perform_completion_with_backoff_rate_limit():
+    with pytest.raises(RateLimitError):
+        perform_completion_with_backoff(
+            provider="openai/gpt-4o",
+            prompt_with_variables="Test prompt",
+            api_token="test_token",
+            extra_args={  # Force the rate limit error.
+                "mock_response": RateLimitError(
+                    message="Rate limit exceeded",
+                    llm_provider="openai",
+                    model="gpt-4o",
+                ),
+            },
+        )