Skip to content

Commit 4a17c2c

Browse files
committed
fix(SimplyRecipes): instruction extraction
1 parent cddcc85 commit 4a17c2c

File tree

2 files changed

+29
-3
lines changed

2 files changed

+29
-3
lines changed

src/plugins/__tests__/html-stripper.processor.test.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ describe('HtmlStripperPlugin', () => {
99
expect(plugin.shouldProcess('title')).toBe(true)
1010
expect(plugin.shouldProcess('instructions')).toBe(true)
1111
expect(plugin.shouldProcess('ingredients')).toBe(true)
12-
expect(plugin.shouldProcess('description')).toBe(false)
1312
expect(plugin.shouldProcess('category')).toBe(false)
1413
})
1514

@@ -54,7 +53,6 @@ describe('HtmlStripperPlugin', () => {
5453
})
5554

5655
it('returns value unchanged for non-target fields', () => {
57-
expect(plugin.process('description', '<b>desc</b>')).toBe('<b>desc</b>')
5856
expect(plugin.process('category', new Set(['<b>cat</b>']))).toEqual(
5957
new Set(['<b>cat</b>']),
6058
)

src/scrapers/simplyrecipes.ts

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,37 @@
11
import { AbstractScraper } from '@/abstract-scraper'
2+
import type { RecipeFields } from '@/types/recipe.interface'
3+
import { normalizeString } from '@/utils/parsing'
24

35
export class SimplyRecipes extends AbstractScraper {
46
static host() {
57
return 'simplyrecipes.com'
68
}
79

8-
extractors = {}
10+
extractors = {
11+
instructions: this.instructions.bind(this),
12+
}
13+
14+
/**
15+
* Scrape and normalize each step under
16+
* div.structured-project__steps > ol > li
17+
*/
18+
protected instructions(): RecipeFields['instructions'] {
19+
// select all <li> under the steps container
20+
const items = this.$('div.structured-project__steps ol li').toArray()
21+
22+
if (items.length === 0) {
23+
return new Set()
24+
}
25+
26+
const steps = items
27+
.map((el) => {
28+
// clone & strip images/figures before grabbing text
29+
const $clone = this.$(el).clone()
30+
$clone.find('img, picture, figure').remove()
31+
return normalizeString($clone.text())
32+
})
33+
.filter((text) => text.length > 0)
34+
35+
return new Set(steps)
36+
}
937
}

0 commit comments

Comments
 (0)