@@ -8,54 +8,52 @@ import {
8
8
rewriteTopLevel ,
9
9
stripJunkTags ,
10
10
makeLinksAbsolute ,
11
- } from 'utils/dom'
12
-
13
- import { convertNodeTo } from 'utils/dom'
11
+ } from 'utils/dom' ;
14
12
15
13
// Clean our article content, returning a new, cleaned node.
16
14
export default function extractCleanNode (
17
15
article ,
18
16
{
19
17
$,
20
- cleanConditionally= true ,
21
- title= '' ,
22
- url= '' ,
18
+ cleanConditionally = true ,
19
+ title = '' ,
20
+ url = '' ,
23
21
}
24
22
) {
25
23
// Rewrite the tag name to div if it's a top level node like body or
26
24
// html to avoid later complications with multiple body tags.
27
- rewriteTopLevel ( article , $ )
25
+ rewriteTopLevel ( article , $ ) ;
28
26
29
27
// Drop small images and spacer images
30
- cleanImages ( article , $ )
28
+ cleanImages ( article , $ ) ;
31
29
32
30
// Drop certain tags like <title>, etc
33
31
// This is -mostly- for cleanliness, not security.
34
- stripJunkTags ( article , $ )
32
+ stripJunkTags ( article , $ ) ;
35
33
36
34
// H1 tags are typically the article title, which should be extracted
37
35
// by the title extractor instead. If there's less than 3 of them (<3),
38
36
// strip them. Otherwise, turn 'em into H2s.
39
- cleanHOnes ( article , $ )
37
+ cleanHOnes ( article , $ ) ;
40
38
41
39
// Clean headers
42
- cleanHeaders ( article , $ , title )
40
+ cleanHeaders ( article , $ , title ) ;
43
41
44
42
// Make links absolute
45
- makeLinksAbsolute ( article , $ , url )
43
+ makeLinksAbsolute ( article , $ , url ) ;
46
44
47
45
// Remove style or align attributes
48
- cleanAttributes ( article , $ )
46
+ cleanAttributes ( article ) ;
49
47
50
48
// We used to clean UL's and OL's here, but it was leading to
51
49
// too many in-article lists being removed. Consider a better
52
50
// way to detect menus particularly and remove them.
53
- cleanTags ( article , $ , cleanConditionally )
51
+ cleanTags ( article , $ , cleanConditionally ) ;
54
52
55
53
// Remove empty paragraph nodes
56
- removeEmpty ( article , $ )
54
+ removeEmpty ( article , $ ) ;
57
55
58
- return article
56
+ return article ;
59
57
}
60
58
// headers = doc.xpath('.//h2 | .//h3 | .//h4 | .//h5 | .//h6')
61
59
// for header in headers:
0 commit comments