Skip to content

Commit 7e2a349

Browse files
committed
chore: refactored and linted
1 parent 9906bd3 commit 7e2a349

File tree

193 files changed

+4177
-4315
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

193 files changed

+4177
-4315
lines changed

.eslintignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
**/fixtures/*

.eslintrc

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Use this file as a starting point for your project's .eslintrc.
2+
// Copy this file, and add rule overrides as needed.
3+
{
4+
"parser": "babel-eslint",
5+
"extends": "airbnb",
6+
"plugins": [
7+
"babel"
8+
],
9+
"globals": {
10+
/* mocha */
11+
"describe",
12+
"it"
13+
},
14+
"rules": {
15+
"no-param-reassign": 0,
16+
/* TODO fix this; this should work w/import/resolver below, but doesn't */
17+
"import/no-extraneous-dependencies": 0,
18+
"import/no-unresolved": 0,
19+
"no-control-regex": 0,
20+
"import/prefer-default-export": 0,
21+
"generator-star-spacing": 0,
22+
"babel/generator-star-spacing": 0,
23+
"func-names": 0,
24+
"no-useless-escape": 0,
25+
"no-confusing-arrow": 0,
26+
},
27+
"settings": {
28+
"import/resolver": {
29+
"babel-module": {
30+
"extensions": [".js"]
31+
}
32+
}
33+
},
34+
"parserOptions":{
35+
"ecmaFeatures": {
36+
"experimentalObjectRestSpread": true
37+
}
38+
}
39+
}

package.json

+12-1
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,17 @@
55
"main": "index.js",
66
"scripts": {
77
"start": "node ./build",
8-
"build": "rollup -c",
8+
"lint": "eslint src/**",
9+
"build": "eslint src/** && rollup -c",
910
"test": "./test-runner"
1011
},
1112
"author": "",
1213
"license": "ISC",
1314
"devDependencies": {
15+
"babel-eslint": "^6.1.2",
1416
"babel-plugin-external-helpers": "^6.8.0",
1517
"babel-plugin-module-alias": "^1.6.0",
18+
"babel-plugin-module-resolver": "^2.2.0",
1619
"babel-plugin-transform-async-to-generator": "^6.8.0",
1720
"babel-plugin-transform-es2015-destructuring": "^6.9.0",
1821
"babel-plugin-transform-object-rest-spread": "^6.8.0",
@@ -21,6 +24,14 @@
2124
"babel-preset-es2015-rollup": "^1.2.0",
2225
"babel-register": "^6.11.6",
2326
"babelrc-rollup": "^3.0.0",
27+
"eslint": "^3.5.0",
28+
"eslint-config-airbnb": "^11.1.0",
29+
"eslint-import-resolver-babel-module": "^2.0.1",
30+
"eslint-plugin-async": "^0.1.1",
31+
"eslint-plugin-babel": "^3.3.0",
32+
"eslint-plugin-import": "^1.15.0",
33+
"eslint-plugin-jsx-a11y": "^2.2.2",
34+
"eslint-plugin-react": "^6.2.1",
2435
"mocha": "^3.0.2",
2536
"rollup": "^0.34.13",
2637
"rollup-plugin-babel": "^2.6.1",

score-move

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/local/bin/fish
2+
3+
set file $argv[1]
4+
set function $argv[2]
5+
6+
touch src/extractors/generic/next-page-url/scoring/utils/index.js
7+
touch src/extractors/generic/next-page-url/scoring/utils/$file.js
8+
touch src/extractors/generic/next-page-url/scoring/utils/$file.test.js
9+
10+
echo "import assert from 'assert';" > src/extractors/generic/next-page-url/scoring/utils/$file.test.js
11+
echo "" >> src/extractors/generic/next-page-url/scoring/utils/$file.test.js
12+
echo "import $function from './$file';" >> src/extractors/generic/next-page-url/scoring/utils/$file.test.js
13+
echo "" >> src/extractors/generic/next-page-url/scoring/utils/$file.test.js
14+
echo "export { default as $function } from './$file'" >> src/extractors/generic/next-page-url/scoring/utils/index.js
15+
16+
echo "Now make it a default export"
17+
echo "Move it to its file"
18+
echo "Move its tests to its test file"
19+
echo "import in score-links"
20+
echo "Test it."
21+

src/cleaners/author.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import { CLEAN_AUTHOR_RE } from './constants'
1+
import { CLEAN_AUTHOR_RE } from './constants';
22

33
// Take an author string (like 'By David Smith ') and clean it to
44
// just the name(s): 'David Smith'.
55
export default function cleanAuthor(author) {
6-
return author.replace(CLEAN_AUTHOR_RE, '$2').trim()
6+
return author.replace(CLEAN_AUTHOR_RE, '$2').trim();
77
}

src/cleaners/author.test.js

+10-10
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
1-
import assert from 'assert'
1+
import assert from 'assert';
22

3-
import cleanAuthor from './author'
3+
import cleanAuthor from './author';
44

55
describe('cleanAuthor(author)', () => {
66
it('removes the By from an author string', () => {
7-
const author = cleanAuthor('By Bob Dylan')
7+
const author = cleanAuthor('By Bob Dylan');
88

9-
assert.equal(author, 'Bob Dylan')
10-
})
9+
assert.equal(author, 'Bob Dylan');
10+
});
1111

1212
it('trims trailing whitespace and line breaks', () => {
1313
const text = `
1414
written by
1515
Bob Dylan
16-
`
17-
const author = cleanAuthor(text)
16+
`;
17+
const author = cleanAuthor(text);
1818

19-
assert.equal(author, 'Bob Dylan')
20-
})
21-
})
19+
assert.equal(author, 'Bob Dylan');
20+
});
21+
});

src/cleaners/constants.js

+28-10
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// CLEAN AUTHOR CONSTANTS
2-
export const CLEAN_AUTHOR_RE = /^\s*(posted |written )?by\s*:?\s*(.*)/i
2+
export const CLEAN_AUTHOR_RE = /^\s*(posted |written )?by\s*:?\s*(.*)/i;
33
// author = re.sub(r'^\s*(posted |written )?by\s*:?\s*(.*)(?i)',
44

55
// CLEAN DEK CONSTANTS
6-
export const TEXT_LINK_RE = new RegExp('http(s)?://', 'i')
6+
export const TEXT_LINK_RE = new RegExp('http(s)?://', 'i');
77
// An ordered list of meta tag names that denote likely article deks.
88
// From most distinct to least distinct.
99
//
@@ -14,7 +14,7 @@ export const TEXT_LINK_RE = new RegExp('http(s)?://', 'i')
1414
// However, these tags often have SEO-specific junk in them that's not
1515
// header-worthy like a dek is. Excerpt material at best.
1616
export const DEK_META_TAGS = [
17-
]
17+
];
1818

1919
// An ordered list of Selectors to find likely article deks. From
2020
// most explicit to least explicit.
@@ -23,18 +23,36 @@ export const DEK_META_TAGS = [
2323
// detrimental to the aesthetics of an article.
2424
export const DEK_SELECTORS = [
2525
'.entry-summary',
26-
]
26+
];
2727

2828
// CLEAN DATE PUBLISHED CONSTANTS
29-
export const CLEAN_DATE_STRING_RE = /^\s*published\s*:?\s*(.*)/i
30-
export const TIME_MERIDIAN_SPACE_RE = /(.*\d)(am|pm)(.*)/i
31-
export const TIME_MERIDIAN_DOTS_RE = /\.m\./i
32-
export const SPLIT_DATE_STRING = /(\d{1,2}:\d{2,2}(\s?[ap]\.?m\.?)?)|(\d{1,2}[\/-]\d{1,2}[\/-]\d{2,4})|(\d{1,4})|(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/ig
29+
export const CLEAN_DATE_STRING_RE = /^\s*published\s*:?\s*(.*)/i;
30+
export const TIME_MERIDIAN_SPACE_RE = /(.*\d)(am|pm)(.*)/i;
31+
export const TIME_MERIDIAN_DOTS_RE = /\.m\./i;
32+
const months = [
33+
'jan',
34+
'feb',
35+
'mar',
36+
'apr',
37+
'may',
38+
'jun',
39+
'jul',
40+
'aug',
41+
'sep',
42+
'oct',
43+
'nov',
44+
'dec',
45+
];
46+
const allMonths = months.join('|');
47+
const timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';
48+
const timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';
49+
export const SPLIT_DATE_STRING =
50+
new RegExp(`(${timestamp1})|(${timestamp2})|([0-9]{1,4})|(${allMonths})`, 'ig');
3351

3452
// CLEAN TITLE CONSTANTS
3553
// A regular expression that will match separating characters on a
3654
// title, that usually denote breadcrumbs or something similar.
37-
export const TITLE_SPLITTERS_RE = /(: | - | \| )/g
55+
export const TITLE_SPLITTERS_RE = /(: | - | \| )/g;
3856

3957
export const DOMAIN_ENDINGS_RE =
40-
new RegExp('\.com$|\.net$|\.org$|\.co\.uk$', 'g')
58+
new RegExp('\.com$|\.net$|\.org$|\.co\.uk$', 'g');

src/cleaners/content.js

+14-16
Original file line numberDiff line numberDiff line change
@@ -8,54 +8,52 @@ import {
88
rewriteTopLevel,
99
stripJunkTags,
1010
makeLinksAbsolute,
11-
} from 'utils/dom'
12-
13-
import { convertNodeTo } from 'utils/dom'
11+
} from 'utils/dom';
1412

1513
// Clean our article content, returning a new, cleaned node.
1614
export default function extractCleanNode(
1715
article,
1816
{
1917
$,
20-
cleanConditionally=true,
21-
title='',
22-
url='',
18+
cleanConditionally = true,
19+
title = '',
20+
url = '',
2321
}
2422
) {
2523
// Rewrite the tag name to div if it's a top level node like body or
2624
// html to avoid later complications with multiple body tags.
27-
rewriteTopLevel(article, $)
25+
rewriteTopLevel(article, $);
2826

2927
// Drop small images and spacer images
30-
cleanImages(article, $)
28+
cleanImages(article, $);
3129

3230
// Drop certain tags like <title>, etc
3331
// This is -mostly- for cleanliness, not security.
34-
stripJunkTags(article, $)
32+
stripJunkTags(article, $);
3533

3634
// H1 tags are typically the article title, which should be extracted
3735
// by the title extractor instead. If there's less than 3 of them (<3),
3836
// strip them. Otherwise, turn 'em into H2s.
39-
cleanHOnes(article, $)
37+
cleanHOnes(article, $);
4038

4139
// Clean headers
42-
cleanHeaders(article, $, title)
40+
cleanHeaders(article, $, title);
4341

4442
// Make links absolute
45-
makeLinksAbsolute(article, $, url)
43+
makeLinksAbsolute(article, $, url);
4644

4745
// Remove style or align attributes
48-
cleanAttributes(article, $)
46+
cleanAttributes(article);
4947

5048
// We used to clean UL's and OL's here, but it was leading to
5149
// too many in-article lists being removed. Consider a better
5250
// way to detect menus particularly and remove them.
53-
cleanTags(article, $, cleanConditionally)
51+
cleanTags(article, $, cleanConditionally);
5452

5553
// Remove empty paragraph nodes
56-
removeEmpty(article, $)
54+
removeEmpty(article, $);
5755

58-
return article
56+
return article;
5957
}
6058
// headers = doc.xpath('.//h2 | .//h3 | .//h4 | .//h5 | .//h6')
6159
// for header in headers:

src/cleaners/content.test.js

+24-24
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
1-
import assert from 'assert'
2-
import cheerio from 'cheerio'
3-
import fs from 'fs'
1+
import assert from 'assert';
2+
import cheerio from 'cheerio';
3+
import fs from 'fs';
44

5-
import extractCleanNode from './content'
6-
import extractBestNode from 'extractors/generic/content/extract-best-node'
5+
import extractBestNode from 'extractors/generic/content/extract-best-node';
6+
import extractCleanNode from './content';
77

88
describe('extractCleanNode(article, { $, cleanConditionally, title } })', () => {
9-
it("cleans cruft out of a DOM node", () => {
10-
const html = fs.readFileSync('./fixtures/wired.html', 'utf-8')
11-
let $ = cheerio.load(html)
9+
it('cleans cruft out of a DOM node', () => {
10+
const html = fs.readFileSync('./fixtures/wired.html', 'utf-8');
11+
const $ = cheerio.load(html);
1212

1313
const opts = {
14-
stripUnlikelyCandidates: true,
15-
weightNodes: true,
16-
cleanConditionally: true,
17-
}
14+
stripUnlikelyCandidates: true,
15+
weightNodes: true,
16+
cleanConditionally: true,
17+
};
1818

19-
const bestNode = extractBestNode($, opts)
20-
let result = $.html(bestNode)
21-
// console.log(result)
22-
// console.log(result.length)
23-
const cleanNode = extractCleanNode(bestNode, { $, opts })
24-
result = $.html(cleanNode)
25-
// console.log(result.length)
26-
// console.log(result)
27-
// console.log(bestNode.html())
19+
const bestNode = extractBestNode($, opts);
20+
// let result = $.html(bestNode);
21+
// // console.log(result)
22+
// // console.log(result.length)
23+
const cleanNode = extractCleanNode(bestNode, { $, opts });
24+
// result = $.html(cleanNode);
25+
// // console.log(result.length)
26+
// // console.log(result)
27+
// // console.log(bestNode.html())
2828

29-
assert.equal($(bestNode).text().length, 2687)
30-
})
31-
})
29+
assert.equal($(cleanNode).text().length, 2687);
30+
});
31+
});
3232

src/cleaners/date-published.js

+16-16
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import moment from 'moment'
1+
import moment from 'moment';
22
// Is there a compelling reason to use moment here?
33
// Mostly only being used for the isValid() method,
44
// but could just check for 'Invalid Date' string.
@@ -7,27 +7,27 @@ import {
77
CLEAN_DATE_STRING_RE,
88
SPLIT_DATE_STRING,
99
TIME_MERIDIAN_SPACE_RE,
10-
TIME_MERIDIAN_DOTS_RE
11-
} from './constants'
10+
TIME_MERIDIAN_DOTS_RE,
11+
} from './constants';
12+
13+
export function cleanDateString(dateString) {
14+
return (dateString.match(SPLIT_DATE_STRING) || [])
15+
.join(' ')
16+
.replace(TIME_MERIDIAN_DOTS_RE, 'm')
17+
.replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')
18+
.replace(CLEAN_DATE_STRING_RE, '$1')
19+
.trim();
20+
}
1221

1322
// Take a date published string, and hopefully return a date out of
1423
// it. Return none if we fail.
1524
export default function cleanDatePublished(dateString) {
16-
let date = moment(new Date(dateString))
25+
let date = moment(new Date(dateString));
1726

1827
if (!date.isValid()) {
19-
dateString = cleanDateString(dateString)
20-
date = moment(new Date(dateString))
28+
dateString = cleanDateString(dateString);
29+
date = moment(new Date(dateString));
2130
}
2231

23-
return date.isValid() ? date.toISOString() : null
24-
}
25-
26-
export function cleanDateString(dateString) {
27-
return (dateString.match(SPLIT_DATE_STRING) || [])
28-
.join(' ')
29-
.replace(TIME_MERIDIAN_DOTS_RE, 'm')
30-
.replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')
31-
.replace(CLEAN_DATE_STRING_RE, '$1')
32-
.trim()
32+
return date.isValid() ? date.toISOString() : null;
3333
}

0 commit comments

Comments
 (0)