Skip to content

Commit 32c38fa

Browse files
committed
refactor: exports
1 parent 4385d9e commit 32c38fa

File tree

4 files changed

+60
-20
lines changed

4 files changed

+60
-20
lines changed

README.md

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,52 @@ bun add recipe-scrapers-js
3737
import { getScraper } from 'recipe-scrapers-js'
3838

3939
const html = `<html>The html to scrape...</html>`
40+
const url = 'https://allrecipes.com/recipe/example'
4041

4142
// Get a scraper for a specific URL
4243
// This function will throw if a scraper does not exist.
43-
const scraper = getScraper(html, 'https://allrecipes.com/recipe/example')
44-
44+
const MyScraper = getScraper(url)
45+
const scraper = new MyScraper(html, url, /* { ...options } */)
4546
const recipe = await scraper.toObject()
46-
47+
4748
console.log(recipe)
4849
```
4950

51+
### Options
52+
53+
```typescript
54+
interface ScraperOptions {
55+
/**
56+
* Additional extractors to be used by the scraper.
57+
* These extractors will be added to the default set of extractors.
58+
* Extractors are applied according to their priority.
59+
* Higher priority extractors will run first.
60+
* @default []
61+
*/
62+
extraExtractors?: ExtractorPlugin[]
63+
/**
64+
* Additional post-processors to be used by the scraper.
65+
* These post-processors will be added to the default set of post-processors.
66+
* Post-processors are applied after all extractors have run.
67+
* Post-processors are also applied according to their priority.
68+
* Higher priority post-processors will run first.
69+
* @default []
70+
*/
71+
extraPostProcessors?: PostProcessorPlugin[]
72+
/**
73+
* Whether link scraping is enabled.
74+
* @default false
75+
*/
76+
linksEnabled?: boolean
77+
/**
78+
* Logging level for the scraper.
79+
* This controls the verbosity of logs produced by the scraper.
80+
* @default LogLevel.Warn
81+
*/
82+
logLevel?: LogLevel
83+
}
84+
```
85+
5086
## Supported Sites
5187

5288
This library supports recipe extraction from various popular cooking websites. The scraper automatically detects the appropriate parser based on the URL.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "recipe-scrapers-js",
3-
"version": "0.1.0-alpha.2",
3+
"version": "0.1.0-alpha.3",
44
"license": "MIT",
55
"description": "A recipe scrapers library",
66
"author": {

src/index.ts

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,21 @@
11
import { scrapers } from './scrapers/_index'
2+
import { getHostName } from './utils'
23

34
export * from '@/types/recipe.interface'
45
export * from '@/types/scraper.interface'
6+
export * from './abstract-extractor-plugin'
7+
export * from './abstract-postprocessor-plugin'
8+
export * from './logger'
9+
export { scrapers }
510

611
/**
7-
* Extracts the host name from a URL string.
12+
* Returns a scraper class for the given URL, if implemented.
813
*/
9-
function getHostName(urlString: string) {
10-
try {
11-
const url = new URL(urlString)
12-
return url.host
13-
} catch {
14-
throw new Error(`Invalid URL: ${urlString}`)
15-
}
16-
}
17-
18-
/**
19-
* Returns a scraper instance for the given URL, if implemented.
20-
*/
21-
export function getScraper(html: string, url: string) {
14+
export function getScraper(url: string) {
2215
const hostName = getHostName(url)
2316

2417
if (scrapers[hostName]) {
25-
const ScraperClass = scrapers[hostName]
26-
return new ScraperClass(html, url)
18+
return scrapers[hostName]
2719
}
2820

2921
throw new Error(

src/utils/index.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,15 @@ export function isPlainObject(
2424
export function isString(value: unknown): value is string {
2525
return typeof value === 'string'
2626
}
27+
28+
/**
29+
* Extracts the host name from a URL string.
30+
*/
31+
export function getHostName(value: string) {
32+
try {
33+
const url = new URL(value)
34+
return url.host
35+
} catch {
36+
throw new Error(`Invalid URL: ${value}`)
37+
}
38+
}

0 commit comments

Comments
 (0)