-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.js
executable file
·99 lines (95 loc) · 3.79 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env node
const puppeteer = require('puppeteer')
const yargs = require('yargs')
const { Readability } = require('@mozilla/readability')
const JSDOM = require('jsdom').JSDOM
const turndown = require('turndown')
const turndownService = new turndown({ emDelimiter: '*', codeBlockStyle: 'fenced' })
const gfmPlugin = require('turndown-plugin-gfm').gfm
const fs = require('fs')
const ora = require('ora')
const chalk = require('chalk');
const defaultDirectory = process.env.MARKDOWNIFY_DIR || process.env.HOME
const spinner = ora();
yargs
.scriptName('markdownify')
.showHelpOnFail(true)
.command('convert [urls...]', 'Convert url(s) to markdown', (args) => {
args.positional('urls', {
type: 'array',
describe: 'the urls to be converted'
})
}, async (argv) => {
if (argv.urls.length == 0) {
console.error('No url(s) supplied')
yargs.showHelp()
} else {
const browser = await puppeteer.launch()
const page = await browser.newPage()
if (argv['js-disabled']) {
await page.setJavaScriptEnabled(false)
}
for (const url of argv.urls) {
spinner.start(`Generating markdown from ${url}`);
await page.goto(url, { waitUntil: 'networkidle0', timeout: 0 || argv.timeout })
try {
const data = await page.evaluate(() => document.querySelector('*').outerHTML)
const doc = new JSDOM(data, { url: url })
const reader = new Readability(doc.window.document)
const readerDoc = reader.parse()
if (argv.gfm) {
turndownService.use(gfmPlugin)
}
let markdownData = `*[View Original](${url})*\n\n` + turndownService.turndown(readerDoc.content)
if (argv.tags) {
const tagsList = argv.tags.split(',')
const hashtags = tagsList.map((a) => `#${a}`)
markdownData = hashtags.join(' ') + '\n\n' + markdownData
}
const filename = readerDoc.title != '' && readerDoc.title != null ? readerDoc.title : Date.now()
try {
writeMarkdown(`${defaultDirectory}/${filename}.md`, markdownData, url)
} catch (fe) {
spinner.warn(`${chalk.yellow('Filename is invalid, using timestamp as a fallback filename.')}`)
writeMarkdown(`${defaultDirectory}/${Date.now()}.md`, markdownData, url)
}
} catch (e) {
const errorMessage = chalk.red(`Error extracting markdown from ${url}`);
spinner.fail(errorMessage)
console.error(e);
}
}
await browser.close()
}
})
.options({
'tags' : {
alias: 't',
type: 'string',
description: 'Add tags to markdown',
},
'js-disabled': {
alias: 'j',
type: 'boolean',
description: 'Disable javascript on page',
},
'timeout': {
alias: 'T',
type: 'number',
description: 'Timeout for loading the page in milliseconds'
},
'gfm': {
alias: 'g',
type: 'boolean',
description: 'Enable gfm element parsing (e.g. task list, strikethrough, tables)'
},
})
.help()
.alias('help', 'h')
.demandCommand()
.argv
function writeMarkdown(dir, data, url) {
fs.writeFileSync(dir, data)
const successMessage = chalk.green(`${dir} has been created for the url: ${url}.`);
spinner.succeed(successMessage,)
}