Skip to content

Commit ccf3153

Browse files
committed
test: add tests
1 parent b8d04ef commit ccf3153

File tree

4 files changed

+279
-2
lines changed

4 files changed

+279
-2
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test'
2+
import { AbstractScraper } from '@/abstract-scraper'
3+
import { NotImplementedException } from '@/exceptions'
4+
import { Logger } from '@/logger'
5+
6+
class DummyScraper extends AbstractScraper {
7+
// implement required static host
8+
static host(): string {
9+
return 'dummy.com'
10+
}
11+
// no site-specific extractors
12+
extractors = {}
13+
}
14+
15+
describe('AbstractScraper utility methods', () => {
16+
let scraper: DummyScraper
17+
18+
describe('static host()', () => {
19+
it('throws by default on base class', () => {
20+
expect(() => AbstractScraper.host()).toThrow(NotImplementedException)
21+
})
22+
23+
it('returns host for subclass', () => {
24+
expect(DummyScraper.host()).toBe('dummy.com')
25+
})
26+
})
27+
28+
describe('canonicalUrl()', () => {
29+
it('returns absolute canonical URL when provided', () => {
30+
const html = '<link rel="canonical" href="/foo/bar"/>'
31+
scraper = new DummyScraper(html, 'http://example.com/page', {})
32+
expect(scraper.canonicalUrl()).toBe('http://example.com/foo/bar')
33+
})
34+
35+
it('returns base URL when no canonical link', () => {
36+
const html = '<html></html>'
37+
scraper = new DummyScraper(html, 'https://site.org/path?x=1', {})
38+
expect(scraper.canonicalUrl()).toBe('https://site.org/path?x=1')
39+
})
40+
41+
it('prefixes URL with https when missing protocol', () => {
42+
const html = ''
43+
scraper = new DummyScraper(html, 'site.org/abc', {})
44+
expect(scraper.canonicalUrl()).toBe('https://site.org/abc')
45+
})
46+
})
47+
48+
describe('language()', () => {
49+
let warnSpy: ReturnType<typeof spyOn>
50+
51+
beforeEach(() => {
52+
warnSpy = spyOn(Logger.prototype, 'warn').mockImplementation(() => {})
53+
})
54+
afterEach(() => {
55+
warnSpy.mockRestore()
56+
})
57+
58+
it('reads html lang attribute', () => {
59+
const html = '<html lang="fr"><body></body></html>'
60+
scraper = new DummyScraper(html, 'url', {})
61+
expect(scraper.language()).toBe('fr')
62+
expect(warnSpy).not.toHaveBeenCalled()
63+
})
64+
65+
it('falls back to meta http-equiv content-language', () => {
66+
const html =
67+
'<html><head>' +
68+
'<meta http-equiv="content-language" content="de, en"/>' +
69+
'</head></html>'
70+
scraper = new DummyScraper(html, 'url', {})
71+
expect(scraper.language()).toBe('de')
72+
expect(warnSpy).not.toHaveBeenCalled()
73+
})
74+
75+
it('defaults to "en" and logs warning when none found', () => {
76+
scraper = new DummyScraper('<html></html>', 'url', {})
77+
expect(scraper.language()).toBe('en')
78+
expect(warnSpy).toHaveBeenCalledWith('Could not determine language')
79+
})
80+
})
81+
82+
describe('links()', () => {
83+
const html = `
84+
<a href="http://foo.com/page">Foo</a>
85+
<a href="/local">Local</a>
86+
<a>No href</a>
87+
`
88+
it('returns empty list when linksEnabled is false', () => {
89+
scraper = new DummyScraper(html, 'url', { linksEnabled: false })
90+
expect(scraper.links()).toEqual([])
91+
})
92+
93+
it('returns only absolute links when linksEnabled is true', () => {
94+
scraper = new DummyScraper(html, 'url', { linksEnabled: true })
95+
const links = scraper.links()
96+
expect(links).toEqual([{ href: 'http://foo.com/page', text: 'Foo' }])
97+
})
98+
})
99+
})
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import { describe, expect, it } from 'bun:test'
2+
import type { ExtractorPlugin } from '../abstract-extractor-plugin'
3+
import type { PostProcessorPlugin } from '../abstract-postprocessor-plugin'
4+
import { PluginManager } from '../plugin-manager'
5+
6+
describe('PluginManager', () => {
7+
const makeExtractor = (name: string, priority: number): ExtractorPlugin =>
8+
({ name, priority }) as ExtractorPlugin
9+
10+
const makePostProcessor = (
11+
name: string,
12+
priority: number,
13+
): PostProcessorPlugin => ({ name, priority }) as PostProcessorPlugin
14+
15+
it('sorts base extractors by priority descending', () => {
16+
const low = makeExtractor('low', 1)
17+
const high = makeExtractor('high', 10)
18+
const manager = new PluginManager([low, high], [], [], [])
19+
const names = manager.getExtractors().map((p) => p.name)
20+
expect(names).toEqual(['high', 'low'])
21+
})
22+
23+
it('includes extra extractors and sorts them with base', () => {
24+
const base = makeExtractor('base', 5)
25+
const extra = makeExtractor('extra', 15)
26+
const manager = new PluginManager([base], [], [extra], [])
27+
const names = manager.getExtractors().map((p) => p.name)
28+
expect(names).toEqual(['extra', 'base'])
29+
})
30+
31+
it('returns empty array when no extractors provided', () => {
32+
const manager = new PluginManager([], [], [], [])
33+
expect(manager.getExtractors()).toEqual([])
34+
})
35+
36+
it('sorts base post-processors by priority descending', () => {
37+
const a = makePostProcessor('A', 2)
38+
const b = makePostProcessor('B', 8)
39+
const manager = new PluginManager([], [a, b], [], [])
40+
const names = manager.getPostProcessors().map((p) => p.name)
41+
expect(names).toEqual(['B', 'A'])
42+
})
43+
44+
it('includes extra post-processors and sorts them with base', () => {
45+
const base = makePostProcessor('basePP', 3)
46+
const extra = makePostProcessor('extraPP', 20)
47+
const manager = new PluginManager([], [base], [], [extra])
48+
const names = manager.getPostProcessors().map((p) => p.name)
49+
expect(names).toEqual(['extraPP', 'basePP'])
50+
})
51+
52+
it('returns empty array when no post-processors provided', () => {
53+
const manager = new PluginManager([], [], [], [])
54+
expect(manager.getPostProcessors()).toEqual([])
55+
})
56+
57+
it('maintains separate extractor and post-processor lists', () => {
58+
const ext = makeExtractor('ext', 7)
59+
const pp = makePostProcessor('pp', 4)
60+
const manager = new PluginManager([ext], [pp], [], [])
61+
expect(manager.getExtractors().map((p) => p.name)).toEqual(['ext'])
62+
expect(manager.getPostProcessors().map((p) => p.name)).toEqual(['pp'])
63+
})
64+
})
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import { describe, expect, it } from 'bun:test'
2+
import {
3+
hasId,
4+
isAggregateRating,
5+
isBaseType,
6+
isGraphType,
7+
isHowToSection,
8+
isHowToStep,
9+
isOrganization,
10+
isPerson,
11+
isRecipe,
12+
isRestrictedDiet,
13+
isSchemaOrgData,
14+
isThingType,
15+
isWebPage,
16+
isWebSite,
17+
} from '../type-predicates'
18+
19+
describe('type-predicates', () => {
20+
describe('hasId', () => {
21+
it('returns true when @id is present and a string', () => {
22+
const obj = { '@id': 'http://example.com' }
23+
// @ts-expect-error
24+
expect(hasId(obj)).toBe(true)
25+
})
26+
it('returns false when @id is missing or not a string', () => {
27+
// @ts-expect-error
28+
expect(hasId({})).toBe(false)
29+
// @ts-expect-error
30+
expect(hasId({ '@id': 123 })).toBe(false)
31+
})
32+
})
33+
34+
describe('isGraphType', () => {
35+
it('returns true for object with @graph array', () => {
36+
const graph = { '@graph': [{ '@type': 'Thing' }] }
37+
expect(isGraphType(graph)).toBe(true)
38+
})
39+
it('returns false for invalid graph shapes', () => {
40+
expect(isGraphType(null)).toBe(false)
41+
expect(isGraphType({ '@graph': 'not-array' })).toBe(false)
42+
expect(isGraphType([])).toBe(false)
43+
})
44+
})
45+
46+
describe('isBaseType', () => {
47+
it('returns true for object with @type string', () => {
48+
const base = { '@type': 'TestType' }
49+
expect(isBaseType(base)).toBe(true)
50+
})
51+
it('returns false for missing or non-string @type', () => {
52+
expect(isBaseType({})).toBe(false)
53+
expect(isBaseType({ '@type': 5 })).toBe(false)
54+
})
55+
})
56+
57+
describe('isSchemaOrgData', () => {
58+
it('is true for graph or base types', () => {
59+
const graph = { '@graph': [] }
60+
const base = { '@type': 'Thing' }
61+
expect(isSchemaOrgData(graph)).toBe(true)
62+
expect(isSchemaOrgData(base)).toBe(true)
63+
})
64+
it('is false otherwise', () => {
65+
expect(isSchemaOrgData({})).toBe(false)
66+
expect(isSchemaOrgData('string')).toBe(false)
67+
})
68+
})
69+
70+
describe('isThingType and specific type guards', () => {
71+
const make = (type: string) => ({ '@type': type })
72+
it('isThingType matches given type', () => {
73+
expect(isThingType(make('Custom'), 'Custom')).toBe(true)
74+
expect(isThingType(make('Other'), 'Custom')).toBe(false)
75+
})
76+
77+
it('isAggregateRating recognizes type', () => {
78+
expect(isAggregateRating(make('AggregateRating'))).toBe(true)
79+
expect(isAggregateRating(make('Recipe'))).toBe(false)
80+
})
81+
it('isHowToSection recognizes type', () => {
82+
expect(isHowToSection(make('HowToSection'))).toBe(true)
83+
expect(isHowToSection(make('HowToStep'))).toBe(false)
84+
})
85+
it('isHowToStep recognizes type', () => {
86+
expect(isHowToStep(make('HowToStep'))).toBe(true)
87+
expect(isHowToStep(make('HowToSection'))).toBe(false)
88+
})
89+
it('isOrganization recognizes type', () => {
90+
expect(isOrganization(make('Organization'))).toBe(true)
91+
expect(isOrganization(make('Person'))).toBe(false)
92+
})
93+
it('isPerson recognizes type', () => {
94+
expect(isPerson(make('Person'))).toBe(true)
95+
expect(isPerson(make('Recipe'))).toBe(false)
96+
})
97+
it('isRecipe recognizes type', () => {
98+
expect(isRecipe(make('Recipe'))).toBe(true)
99+
expect(isRecipe(make('AggregateRating'))).toBe(false)
100+
})
101+
it('isRestrictedDiet recognizes type', () => {
102+
expect(isRestrictedDiet(make('RestrictedDiet'))).toBe(true)
103+
expect(isRestrictedDiet(make('WebPage'))).toBe(false)
104+
})
105+
it('isWebPage recognizes type', () => {
106+
expect(isWebPage(make('WebPage'))).toBe(true)
107+
expect(isWebPage(make('WebSite'))).toBe(false)
108+
})
109+
it('isWebSite recognizes type', () => {
110+
expect(isWebSite(make('WebSite'))).toBe(true)
111+
expect(isWebSite(make('WebPage'))).toBe(false)
112+
})
113+
})
114+
})

src/plugins/schema-org.extractor/type-predicates.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { isPlainObject } from '@/utils'
1+
import { isPlainObject, isString } from '@/utils'
22
import type {
33
AggregateRating,
44
HowToSection,
@@ -20,7 +20,7 @@ export function isGraphType(obj: unknown): obj is Graph {
2020
}
2121

2222
export function isBaseType(obj: unknown): obj is { '@type': string } {
23-
return isPlainObject(obj) && '@type' in obj
23+
return isPlainObject(obj) && '@type' in obj && isString(obj['@type'])
2424
}
2525

2626
export function isSchemaOrgData(obj: unknown): obj is Graph | Thing {

0 commit comments

Comments
 (0)