Skip to content

Commit 4d85a48

Browse files
fb55jmbpwtw
authored andcommitted
feat(parser-stream): Support parsing fragments (inikulin#487)
1 parent 15ab3c3 commit 4d85a48

File tree

9 files changed

+67
-86
lines changed

9 files changed

+67
-86
lines changed

packages/parse5-parser-stream/lib/index.ts

+17-5
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,31 @@ import type { DefaultTreeAdapterMap } from 'parse5/dist/tree-adapters/default.js
3030
*
3131
*/
3232
export class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap> extends Writable {
33+
static getFragmentStream<T extends TreeAdapterTypeMap>(
34+
fragmentContext?: T['parentNode'] | null,
35+
options?: ParserOptions<T>
36+
): ParserStream<T> {
37+
const parser = Parser.getFragmentParser(fragmentContext, options);
38+
const stream = new ParserStream(options, parser);
39+
return stream;
40+
}
41+
3342
private lastChunkWritten = false;
3443
private writeCallback: undefined | (() => void) = undefined;
3544

36-
public parser: Parser<T>;
3745
private pendingHtmlInsertions: string[] = [];
3846
/** The resulting document node. */
39-
public document: T['document'];
47+
public get document(): T['document'] {
48+
return this.parser.document;
49+
}
50+
public getFragment(): T['documentFragment'] {
51+
return this.parser.getFragment();
52+
}
4053

4154
/**
4255
* @param options Parsing options.
4356
*/
44-
constructor(options?: ParserOptions<T>) {
57+
constructor(options?: ParserOptions<T>, public parser: Parser<T> = new Parser(options)) {
4558
super({ decodeStrings: false });
4659

4760
const resume = (): void => {
@@ -68,8 +81,7 @@ export class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>
6881
}
6982
};
7083

71-
this.parser = new Parser(options, undefined, undefined, scriptHandler);
72-
this.document = this.parser.document;
84+
this.parser.scriptHandler = scriptHandler;
7385
}
7486

7587
//WritableStream implementation

packages/parse5-parser-stream/test/location-info.test.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { parseChunked } from './utils/parse-chunked.js';
55

66
generateLocationInfoParserTests('location-info', (input, opts) =>
77
// NOTE: because of performance use bigger chunks here
8-
parseChunked(input, opts, 100, 400)
8+
parseChunked({ input }, opts, 100, 400)
99
);
1010

1111
generateTestsForEachTreeAdapter('location-info', (treeAdapter) => {
@@ -17,7 +17,7 @@ generateTestsForEachTreeAdapter('location-info', (treeAdapter) => {
1717
sourceCodeLocationInfo: true,
1818
};
1919

20-
const document = parseChunked(html, opts).node;
20+
const document = parseChunked({ input: html }, opts).node;
2121
const htmlEl = treeAdapter.getChildNodes(document)[0];
2222
const headEl = treeAdapter.getChildNodes(htmlEl)[0];
2323
const bodyEl = treeAdapter.getChildNodes(htmlEl)[1];

packages/parse5-parser-stream/test/parser-stream.test.ts

+16-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,22 @@ import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-t
44
import { parseChunked } from './utils/parse-chunked.js';
55
import { finished } from 'parse5-test-utils/utils/common.js';
66

7-
generateParsingTests('ParserStream', 'ParserStream', { skipFragments: true }, (test, opts) =>
8-
parseChunked(test.input, opts)
7+
generateParsingTests(
8+
'ParserStream',
9+
'ParserStream',
10+
{
11+
expectErrors: [
12+
//TODO(GH-448): Foreign content behaviour was updated in the HTML spec.
13+
//The old test suite still tests the old behaviour.
14+
'269.foreign-fragment',
15+
'270.foreign-fragment',
16+
'307.foreign-fragment',
17+
'309.foreign-fragment',
18+
'316.foreign-fragment',
19+
'317.foreign-fragment',
20+
],
21+
},
22+
(test, opts) => parseChunked(test, opts)
923
);
1024

1125
describe('ParserStream', () => {

packages/parse5-parser-stream/test/scripting.test.ts

+7-4
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ generateParsingTests(
1414
'ParserStream - Scripting',
1515
'ParserStream - Scripting',
1616
{
17-
skipFragments: true,
1817
withoutErrors: true,
1918
suitePath,
2019
},
2120
async (test, opts) => {
2221
const chunks = makeChunks(test.input);
23-
const parser = new ParserStream(opts);
24-
const { document } = parser;
22+
const parser = test.fragmentContext
23+
? ParserStream.getFragmentStream(test.fragmentContext, opts)
24+
: new ParserStream(opts);
2525

2626
parser.on('script', async (scriptElement, documentWrite, resume) => {
2727
const scriptTextNode = opts.treeAdapter.getChildNodes(scriptElement)[0];
@@ -48,7 +48,10 @@ generateParsingTests(
4848

4949
await finished(parser);
5050

51-
return { node: document };
51+
return {
52+
node: test.fragmentContext ? parser.getFragment() : parser.document,
53+
chunks,
54+
};
5255
}
5356
);
5457

packages/parse5-parser-stream/test/utils/parse-chunked.ts

+8-6
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@ import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'
33
import { ParserStream } from '../../lib/index.js';
44
import { makeChunks } from 'parse5-test-utils/utils/common.js';
55

6-
export function parseChunked(
7-
html: string,
8-
opts: ParserOptions<TreeAdapterTypeMap>,
6+
export function parseChunked<T extends TreeAdapterTypeMap>(
7+
test: { input: string; fragmentContext?: T['parentNode'] },
8+
opts: ParserOptions<T>,
99
minChunkSize?: number,
1010
maxChunkSize?: number
1111
): { node: TreeAdapterTypeMap['document']; chunks: string[] } {
12-
const parserStream = new ParserStream(opts);
13-
const chunks = makeChunks(html, minChunkSize, maxChunkSize);
12+
const parserStream = test.fragmentContext
13+
? ParserStream.getFragmentStream(test.fragmentContext, opts)
14+
: new ParserStream(opts);
15+
const chunks = makeChunks(test.input, minChunkSize, maxChunkSize);
1416

1517
// NOTE: set small waterline for testing purposes
1618
parserStream.parser.tokenizer.preprocessor.bufferWaterline = 8;
@@ -25,7 +27,7 @@ export function parseChunked(
2527
parserStream.end(chunks[chunks.length - 1]);
2628

2729
return {
28-
node: parserStream.document,
30+
node: test.fragmentContext ? parserStream.getFragment() : parserStream.document,
2931
chunks,
3032
};
3133
}

packages/parse5/lib/index.ts

+5-1
Original file line numberDiff line numberDiff line change
@@ -75,5 +75,9 @@ export function parseFragment<T extends TreeAdapterTypeMap = DefaultTreeAdapterM
7575
fragmentContext = null;
7676
}
7777

78-
return Parser.parseFragment(html as string, fragmentContext, options);
78+
const parser = Parser.getFragmentParser(fragmentContext, options);
79+
80+
parser.tokenizer.write(html as string, true);
81+
82+
return parser.getFragment();
7983
}

packages/parse5/lib/parser/index.test.ts

+1-54
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,15 @@
11
import * as assert from 'node:assert';
22
import * as parse5 from 'parse5';
33
import { jest } from '@jest/globals';
4-
import { Parser, ParserOptions } from './index.js';
5-
import type { TreeAdapterTypeMap } from './../tree-adapters/interface.js';
64
import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js';
75
import { treeAdapters } from 'parse5-test-utils/utils/common.js';
8-
import { NAMESPACES as NS } from '../common/html.js';
9-
10-
const origParseFragment = Parser.parseFragment;
116

127
generateParsingTests(
138
'parser',
149
'Parser',
1510
{
1611
expectErrors: [
17-
//NOTE: Foreign content behaviour was updated in the HTML spec.
12+
//TODO(GH-448): Foreign content behaviour was updated in the HTML spec.
1813
//The old test suite still tests the old behaviour.
1914
'269.foreign-fragment',
2015
'270.foreign-fragment',
@@ -55,54 +50,6 @@ describe('parser', () => {
5550
assert.strictEqual(document.childNodes[0].data, '!DOCTYPE html SYSTEM "about:legacy-compat"');
5651
});
5752

58-
describe('Regression - Incorrect arguments fallback for the parser.parseFragment (GH-82, GH-83)', () => {
59-
beforeEach(() => {
60-
Parser.parseFragment = function <T extends TreeAdapterTypeMap>(
61-
html: string,
62-
fragmentContext?: T['element'],
63-
options?: ParserOptions<T>
64-
): {
65-
html: string;
66-
fragmentContext: T['element'] | null | undefined;
67-
options: ParserOptions<T> | undefined;
68-
} {
69-
return {
70-
html,
71-
fragmentContext,
72-
options,
73-
};
74-
};
75-
});
76-
77-
afterEach(() => {
78-
Parser.parseFragment = origParseFragment;
79-
});
80-
81-
it('parses correctly', () => {
82-
const fragmentContext = treeAdapters.default.createElement('div', NS.HTML, []);
83-
const html = '<script></script>';
84-
const opts = { sourceCodeLocationInfo: true };
85-
86-
let args: any = parse5.parseFragment(fragmentContext, html, opts);
87-
88-
expect(args).toHaveProperty('fragmentContext', fragmentContext);
89-
expect(args).toHaveProperty('html', html);
90-
assert.ok(args.options.sourceCodeLocationInfo);
91-
92-
args = parse5.parseFragment(html, opts);
93-
94-
assert.ok(!args.fragmentContext);
95-
expect(args).toHaveProperty('html', html);
96-
assert.ok(args.options.sourceCodeLocationInfo);
97-
98-
args = parse5.parseFragment(html);
99-
100-
assert.ok(!args.fragmentContext);
101-
expect(args).toHaveProperty('html', html);
102-
assert.ok(!args.options);
103-
});
104-
});
105-
10653
describe("Regression - Don't inherit from Object when creating collections (GH-119)", () => {
10754
beforeEach(() => {
10855
/*eslint-disable no-extend-native*/

packages/parse5/lib/parser/index.ts

+9-7
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,10 @@ export class Parser<T extends TreeAdapterTypeMap> implements TokenHandler, Stack
165165
return parser.document;
166166
}
167167

168-
public static parseFragment<T extends TreeAdapterTypeMap>(
169-
html: string,
168+
public static getFragmentParser<T extends TreeAdapterTypeMap>(
170169
fragmentContext?: T['parentNode'] | null,
171170
options?: ParserOptions<T>
172-
): T['documentFragment'] {
171+
): Parser<T> {
173172
const opts: Required<ParserOptions<T>> = {
174173
...defaultParserOptions,
175174
...options,
@@ -194,12 +193,15 @@ export class Parser<T extends TreeAdapterTypeMap> implements TokenHandler, Stack
194193
parser._insertFakeRootElement();
195194
parser._resetInsertionMode();
196195
parser._findFormInFragmentContext();
197-
parser.tokenizer.write(html, true);
198196

199-
const rootElement = opts.treeAdapter.getFirstChild(documentMock) as T['parentNode'];
200-
const fragment = opts.treeAdapter.createDocumentFragment();
197+
return parser;
198+
}
199+
200+
public getFragment(): T['documentFragment'] {
201+
const rootElement = this.treeAdapter.getFirstChild(this.document) as T['parentNode'];
202+
const fragment = this.treeAdapter.createDocumentFragment();
201203

202-
parser._adoptNodes(rootElement, fragment);
204+
this._adoptNodes(rootElement, fragment);
203205

204206
return fragment;
205207
}

test/utils/generate-parsing-tests.ts

+2-5
Original file line numberDiff line numberDiff line change
@@ -137,19 +137,16 @@ export function generateParsingTests(
137137
name: string,
138138
prefix: string,
139139
{
140-
skipFragments,
141140
withoutErrors,
142141
expectErrors: expectError = [],
143142
suitePath = treePath,
144-
}: { skipFragments?: boolean; withoutErrors?: boolean; expectErrors?: string[]; suitePath?: URL },
143+
}: { withoutErrors?: boolean; expectErrors?: string[]; suitePath?: URL },
145144
parse: ParseMethod<TreeAdapterTypeMap>
146145
): void {
147146
generateTestsForEachTreeAdapter(name, (treeAdapter) => {
148147
const errorsToExpect = new Set(expectError);
149148

150-
for (const test of loadTreeConstructionTestData(suitePath, treeAdapter).filter(
151-
(test) => !skipFragments || !test.fragmentContext
152-
)) {
149+
for (const test of loadTreeConstructionTestData(suitePath, treeAdapter)) {
153150
const expectError = errorsToExpect.delete(`${test.idx}.${test.setName}`);
154151

155152
it(

0 commit comments

Comments
 (0)