|
1 | 1 | # __init__.py
|
2 | 2 | import warnings
|
| 3 | +from logging import Logger |
3 | 4 |
|
4 |
| -from .async_webcrawler import AsyncWebCrawler, CacheMode |
5 | 5 | from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig
|
6 |
| - |
7 |
| -from .content_scraping_strategy import ( |
8 |
| - ContentScrapingStrategy, |
9 |
| - WebScrapingStrategy, |
10 |
| - LXMLWebScrapingStrategy, |
| 6 | +from .async_dispatcher import ( |
| 7 | + BaseDispatcher, |
| 8 | + CrawlerMonitor, |
| 9 | + DisplayMode, |
| 10 | + MemoryAdaptiveDispatcher, |
| 11 | + RateLimiter, |
| 12 | + SemaphoreDispatcher, |
11 | 13 | )
|
12 | 14 | from .async_logger import (
|
13 |
| - AsyncLoggerBase, |
14 | 15 | AsyncLogger,
|
| 16 | + AsyncLoggerBase, |
15 | 17 | )
|
16 |
| -from .proxy_strategy import ( |
17 |
| - ProxyRotationStrategy, |
18 |
| - RoundRobinProxyStrategy, |
19 |
| -) |
20 |
| -from .extraction_strategy import ( |
21 |
| - ExtractionStrategy, |
22 |
| - LLMExtractionStrategy, |
23 |
| - CosineStrategy, |
24 |
| - JsonCssExtractionStrategy, |
25 |
| - JsonXPathExtractionStrategy, |
26 |
| -) |
| 18 | +from .async_webcrawler import AsyncWebCrawler, CacheMode |
| 19 | +from .browser_profiler import BrowserProfiler |
27 | 20 | from .chunking_strategy import ChunkingStrategy, RegexChunking
|
28 |
| -from .markdown_generation_strategy import DefaultMarkdownGenerator |
29 | 21 | from .content_filter_strategy import (
|
30 |
| - PruningContentFilter, |
31 | 22 | BM25ContentFilter,
|
32 | 23 | LLMContentFilter,
|
| 24 | + PruningContentFilter, |
33 | 25 | RelevantContentFilter,
|
34 | 26 | )
|
35 |
| -from .models import CrawlResult, MarkdownGenerationResult |
36 |
| -from .async_dispatcher import ( |
37 |
| - MemoryAdaptiveDispatcher, |
38 |
| - SemaphoreDispatcher, |
39 |
| - RateLimiter, |
40 |
| - CrawlerMonitor, |
41 |
| - DisplayMode, |
42 |
| - BaseDispatcher, |
| 27 | +from .content_scraping_strategy import ( |
| 28 | + ContentScrapingStrategy, |
| 29 | + LXMLWebScrapingStrategy, |
| 30 | + WebScrapingStrategy, |
43 | 31 | )
|
44 |
| -from .docker_client import Crawl4aiDockerClient |
45 |
| -from .hub import CrawlerHub |
46 |
| -from .browser_profiler import BrowserProfiler |
47 | 32 | from .deep_crawling import (
|
48 |
| - DeepCrawlStrategy, |
| 33 | + BestFirstCrawlingStrategy, |
49 | 34 | BFSDeepCrawlStrategy,
|
50 |
| - FilterChain, |
51 |
| - URLPatternFilter, |
52 |
| - DomainFilter, |
53 |
| - ContentTypeFilter, |
54 |
| - URLFilter, |
55 |
| - FilterStats, |
56 |
| - SEOFilter, |
57 |
| - KeywordRelevanceScorer, |
58 |
| - URLScorer, |
59 | 35 | CompositeScorer,
|
| 36 | + ContentTypeFilter, |
| 37 | + DeepCrawlDecorator, |
| 38 | + DeepCrawlStrategy, |
| 39 | + DFSDeepCrawlStrategy, |
60 | 40 | DomainAuthorityScorer,
|
| 41 | + DomainFilter, |
| 42 | + FilterChain, |
| 43 | + FilterStats, |
61 | 44 | FreshnessScorer,
|
| 45 | + KeywordRelevanceScorer, |
62 | 46 | PathDepthScorer,
|
63 |
| - BestFirstCrawlingStrategy, |
64 |
| - DFSDeepCrawlStrategy, |
65 |
| - DeepCrawlDecorator, |
| 47 | + SEOFilter, |
| 48 | + URLFilter, |
| 49 | + URLPatternFilter, |
| 50 | + URLScorer, |
| 51 | +) |
| 52 | +from .deep_crawling.scorers import ( |
| 53 | + ScoringStats, |
| 54 | +) |
| 55 | +from .docker_client import Crawl4aiDockerClient |
| 56 | +from .extraction_strategy import ( |
| 57 | + CosineStrategy, |
| 58 | + ExtractionStrategy, |
| 59 | + JsonCssExtractionStrategy, |
| 60 | + JsonXPathExtractionStrategy, |
| 61 | + LLMExtractionStrategy, |
| 62 | +) |
| 63 | +from .hub import CrawlerHub |
| 64 | +from .markdown_generation_strategy import DefaultMarkdownGenerator |
| 65 | +from .models import CrawlResult, MarkdownGenerationResult |
| 66 | +from .proxy_strategy import ( |
| 67 | + ProxyRotationStrategy, |
| 68 | + RoundRobinProxyStrategy, |
66 | 69 | )
|
67 | 70 |
|
68 | 71 | __all__ = [
|
|
120 | 123 | "Crawl4aiDockerClient",
|
121 | 124 | "ProxyRotationStrategy",
|
122 | 125 | "RoundRobinProxyStrategy",
|
| 126 | + "ScoringStats", |
| 127 | + "Logger", |
123 | 128 | ]
|
124 | 129 |
|
125 | 130 |
|
|
0 commit comments