Skip to content

Commit b8b196e

Browse files
authored
ezqms-1171: drop h4-h6 during import of controlled doc (hcengineering#6487)
Signed-off-by: Alexey Zinoviev <[email protected]>
1 parent 3eb7d6e commit b8b196e

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

dev/doc-import-tool/src/extract/extract.ts

+8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { parseDocument } from 'htmlparser2'
22
import { AnyNode, Document } from 'domhandler'
3+
import { findAll } from 'domutils'
34

45
import { FileSpec, FileSpecType, TocFileSpec } from './types'
56
import { createMetadataExtractor } from './meta'
@@ -62,6 +63,13 @@ class TocContentExtractor implements ContentExtractor {
6263
export async function extract (contents: string, spec: FileSpec, headerRoot?: AnyNode): Promise<ExtractedFile> {
6364
const extractor = new TocContentExtractor(spec)
6465
const doc = parseDocument(contents)
66+
67+
// We do not support headers > 3 so
68+
// Traverse all Document's childrent and replace all h4-h6 with paragraphs
69+
findAll((n) => ['h4', 'h5', 'h6'].includes(n.tagName), doc.childNodes).forEach((node) => {
70+
node.name = 'p'
71+
})
72+
6573
return extractor.extract(doc, headerRoot)
6674
}
6775

0 commit comments

Comments
 (0)