Skip to content

Commit 27af4cc

Browse files
committed
Fix "raw://" URL parsing logic
Closes unclecode#686
1 parent dde14eb commit 27af4cc

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

crawl4ai/async_crawler_strategy.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1231,9 +1231,9 @@ async def crawl(
12311231
get_delayed_content=None,
12321232
)
12331233

1234-
elif url.startswith("raw:") or url.startswith("raw://"):
1234+
elif url.startswith("raw:"):
12351235
# Process raw HTML content
1236-
raw_html = url[4:] if url[:4] == "raw:" else url[7:]
1236+
raw_html = url[6:] if url.startswith("raw://") else url[4:]
12371237
html = raw_html
12381238
if config.screenshot:
12391239
screenshot_data = await self._generate_screenshot_from_html(html)

tests/20241401/test_async_crawler_strategy.py

+25
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,24 @@
1515
if not CRAWL4AI_HOME_DIR.joinpath("profiles", "test_profile").exists():
1616
CRAWL4AI_HOME_DIR.joinpath("profiles", "test_profile").mkdir(parents=True)
1717

18+
@pytest.fixture
19+
def basic_html():
20+
return """
21+
<html lang="en">
22+
<head>
23+
<title>Basic HTML</title>
24+
</head>
25+
<body>
26+
<h1>Main Heading</h1>
27+
<main>
28+
<div class="container">
29+
<p>Basic HTML document for testing purposes.</p>
30+
</div>
31+
</main>
32+
</body>
33+
</html>
34+
"""
35+
1836
# Test Config Files
1937
@pytest.fixture
2038
def basic_browser_config():
@@ -325,6 +343,13 @@ async def test_stealth_mode(crawler_strategy):
325343
)
326344
assert response.status_code == 200
327345

346+
@pytest.mark.asyncio
347+
@pytest.mark.parametrize("prefix", ("raw:", "raw://"))
348+
async def test_raw_urls(crawler_strategy, basic_html, prefix):
349+
url = f"{prefix}{basic_html}"
350+
response = await crawler_strategy.crawl(url, CrawlerRunConfig())
351+
assert response.html == basic_html
352+
328353
# Error Handling Tests
329354
@pytest.mark.asyncio
330355
async def test_invalid_url():

0 commit comments

Comments
 (0)