diff --git a/.eslintignore b/.eslintignore index 75bc9d280..bfa9e7227 100644 --- a/.eslintignore +++ b/.eslintignore @@ -1,4 +1,5 @@ -packages/parse5/lib/tokenizer/named-entity-data.js test/benchmark/node_modules/**/*.js test/memory_benchmark/node_modules/**/*.js +packages/test-utils/data/html5lib-tests +packages/*/dist/ node_modules diff --git a/.eslintrc.js b/.eslintrc.js deleted file mode 100644 index c3774cdab..000000000 --- a/.eslintrc.js +++ /dev/null @@ -1,20 +0,0 @@ -module.exports = { - env: { - es6: true, - node: true - }, - extends: ['eslint:recommended', 'prettier'], - plugins: ['prettier'], - rules: { - 'prettier/prettier': 'error', - 'no-console': 'error', - curly: ['error', 'all'], - 'prefer-arrow-callback': 'error', - 'one-var': ['error', 'never'], - 'no-var': 'error', - 'prefer-const': 'error' - }, - parserOptions: { - ecmaVersion: 6 - } -}; diff --git a/.eslintrc.json b/.eslintrc.json new file mode 100644 index 000000000..426bcd905 --- /dev/null +++ b/.eslintrc.json @@ -0,0 +1,55 @@ +{ + "env": { + "es2020": true, + "node": true, + "jest": true + }, + "extends": ["eslint:recommended", "prettier", "plugin:unicorn/recommended"], + "rules": { + "no-console": "error", + "curly": ["error", "all"], + "prefer-arrow-callback": "error", + "one-var": ["error", "never"], + "no-var": "error", + "prefer-const": "error", + "object-shorthand": "error", + "prefer-destructuring": [ + "error", + { + "object": true, + "array": false + } + ], + "prefer-template": "error", + + "unicorn/no-null": "off", + "unicorn/prevent-abbreviations": "off", + "unicorn/prefer-string-slice": "off", + "unicorn/prefer-code-point": "off", + "unicorn/no-array-push-push": "off", + "unicorn/no-array-reduce": "off", + "unicorn/no-for-loop": "off", + "unicorn/consistent-destructuring": "off", + "unicorn/prefer-switch": ["error", { "emptyDefaultCase": "do-nothing-comment" }] + }, + "parserOptions": { + "sourceType": "module" + }, + "overrides": [ + { + "files": "*.ts", + "extends": [ + "plugin:@typescript-eslint/eslint-recommended", + "plugin:@typescript-eslint/recommended", + "prettier" + ], + "rules": { + "@typescript-eslint/no-non-null-assertion": "warn", + "@typescript-eslint/no-explicit-any": "warn", + "@typescript-eslint/explicit-function-return-type": "error", + + "@typescript-eslint/no-unused-vars": ["error", { "argsIgnorePattern": "^_" }] + } + } + ] +} diff --git a/.gitattributes b/.gitattributes index 09948ef45..c91c5234d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,3 @@ # Exclude the HTML files from GitHub's language statistics # https://github.com/github/linguist#using-gitattributes -test/data/* linguist-vendored +packages/test-utils/data/* linguist-vendored diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..c9bf32ac9 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +version: 2 +updates: + - package-ecosystem: npm + directory: '/' + schedule: + interval: daily + open-pull-requests-limit: 10 + versioning-strategy: increase + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: daily + - package-ecosystem: gitsubmodule + directory: '/' + schedule: + interval: daily diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 000000000..6c441444c --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,33 @@ +name: 'CodeQL' + +on: + push: + branches: [master] + pull_request: + # The branches below must be a subset of the branches above + branches: [master] + schedule: + - cron: '0 0 * * 0' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: 'javascript' + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 diff --git a/.github/workflows/dependabot-automerge.yml b/.github/workflows/dependabot-automerge.yml new file mode 100644 index 000000000..7b7fced56 --- /dev/null +++ b/.github/workflows/dependabot-automerge.yml @@ -0,0 +1,28 @@ +# Based on https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/automating-dependabot-with-github-actions#enable-auto-merge-on-a-pull-request +name: Dependabot auto-merge +on: pull_request_target + +permissions: + pull-requests: write + contents: write + +jobs: + dependabot: + runs-on: ubuntu-latest + if: ${{ github.actor == 'dependabot[bot]' }} + steps: + - name: Dependabot metadata + id: metadata + uses: dependabot/fetch-metadata@v1.1.1 + with: + github-token: '${{ secrets.GITHUB_TOKEN }}' + - name: Enable auto-merge for Dependabot PRs + # Automatically merge semver-patch and semver-minor PRs + if: "${{ steps.metadata.outputs.update-type == + 'version-update:semver-minor' || + steps.metadata.outputs.update-type == + 'version-update:semver-patch' }}" + run: gh pr merge --auto --squash "$PR_URL" + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} diff --git a/.github/workflows/nodejs-test.yml b/.github/workflows/nodejs-test.yml new file mode 100644 index 000000000..448138d19 --- /dev/null +++ b/.github/workflows/nodejs-test.yml @@ -0,0 +1,54 @@ +name: Node.js CI + +on: + push: + branches-ignore: + - 'dependabot/**' + pull_request: + +env: + CI: true + FORCE_COLOR: 2 + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - uses: actions/setup-node@v2 + with: + node-version: lts/* + cache: npm + - run: npm ci + - run: npm run lint + + test: + name: Node ${{ matrix.node }} + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + node: + - 12 + - 14 + - 16 + - lts/* + + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - name: Use Node.js ${{ matrix.node }} + uses: actions/setup-node@v2 + with: + node-version: ${{ matrix.node }} + cache: npm + - run: npm install -g npm@8 + - run: npm ci + - run: npm run build --if-present + + - name: Run Tests + run: npm run unit-tests diff --git a/.gitignore b/.gitignore index f08726031..47cf1da04 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,5 @@ node_modules docs/build docs/05_api_reference.md -package-lock.json -bench/package-lock.json +packages/*/dist/ .DS_Store diff --git a/.gitmodules b/.gitmodules index ccf6a3518..7d9a51e2d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "test/data/html5lib-tests"] - path = test/data/html5lib-tests + path = packages/test-utils/data/html5lib-tests url = https://github.com/HTMLParseErrorWG/html5lib-tests diff --git a/.husky/pre-commit b/.husky/pre-commit new file mode 100755 index 000000000..d0612ad37 --- /dev/null +++ b/.husky/pre-commit @@ -0,0 +1,4 @@ +#!/bin/sh +. "$(dirname "$0")/_/husky.sh" + +npm run pre-commit diff --git a/.prettierignore b/.prettierignore index f93620ded..3e133c3d5 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,2 +1,3 @@ -packages/parse5/lib/tokenizer/named-entity-data.js +packages/*/dist/ docs +packages/test-utils/data/html5lib-tests \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 68bfbf776..000000000 --- a/.travis.yml +++ /dev/null @@ -1,4 +0,0 @@ -language: node_js -sudo: false -node_js: - - stable diff --git a/README.md b/README.md index c91e9abed..fe48051f3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
@@ -8,8 +8,15 @@ HTML parsing/serialization toolset for Node.js. WHATWG HTML Living Standard (aka HTML5)-compliant. +--- + +This is a fork of the original parse5 package. We want this to become the next version of the package, pending on +@inikulin's go-ahead. + +--- +
-
+
@@ -24,7 +31,7 @@ as jsdom,
- List of parse5 toolset packages
+ List of parse5 toolset packages
@@ -32,6 +39,6 @@ as jsdom, - Version history + Version history
diff --git a/bench/.eslintrc.js b/bench/.eslintrc.js deleted file mode 100644 index 401a667b5..000000000 --- a/bench/.eslintrc.js +++ /dev/null @@ -1,9 +0,0 @@ -module.exports = { - extends: ['../.eslintrc.js'], - rules: { - 'no-console': 'off' - }, - parserOptions: { - ecmaVersion: 8 - } -}; diff --git a/bench/.eslintrc.json b/bench/.eslintrc.json new file mode 100644 index 000000000..1cb404c3c --- /dev/null +++ b/bench/.eslintrc.json @@ -0,0 +1,6 @@ +{ + "extends": ["../.eslintrc.json"], + "rules": { + "no-console": "off" + } +} diff --git a/bench/memory/named-entity-data.js b/bench/memory/named-entity-data.js deleted file mode 100644 index bca60f005..000000000 --- a/bench/memory/named-entity-data.js +++ /dev/null @@ -1,15 +0,0 @@ -'use strict'; - -const format = require('human-format'); - -main(); - -function main() { - const before = process.memoryUsage().rss; - - require('../../packages/parse5/lib/tokenizer/named-entity-data'); - - const after = process.memoryUsage().rss; - - console.log('Initial memory consumption: ', format(after - before, { unit: 'B' })); -} diff --git a/bench/memory/sax-parser.js b/bench/memory/sax-parser.js index d1e893cf4..be026bb61 100644 --- a/bench/memory/sax-parser.js +++ b/bench/memory/sax-parser.js @@ -1,10 +1,8 @@ -'use strict'; - -const fs = require('fs'); -const format = require('human-format'); -const promisifyEvent = require('promisify-event'); -const memwatch = require('node-memwatch'); -const SAXParser = require('../../packages/parse5-sax-parser/lib'); +import { readFile } from 'node:fs/promises'; +import format from 'human-format'; +import promisifyEvent from 'promisify-event'; +import memwatch from '@airbnb/node-memwatch'; +import { SAXParser } from '../../packages/sax-parser/lib/index.js'; main(); @@ -16,13 +14,13 @@ async function main() { const heapDiffMeasurement = new memwatch.HeapDiff(); let heapDiff = null; - memwatch.on('stats', stats => { - maxMemUsage = Math.max(maxMemUsage, stats['current_base']); + memwatch.on('stats', (stats) => { + maxMemUsage = Math.max(maxMemUsage, stats.used_heap_size); }); startDate = new Date(); - const parserPromise = parse().then(dataSize => { + const parserPromise = parse().then((dataSize) => { parsedDataSize = dataSize; endDate = new Date(); heapDiff = heapDiffMeasurement.end(); @@ -30,14 +28,14 @@ async function main() { await Promise.all([ parserPromise, - promisifyEvent(memwatch, 'stats') // NOTE: we need at least one `stats` result + promisifyEvent(memwatch, 'stats'), // NOTE: we need at least one `stats` result ]); printResults(parsedDataSize, startDate, endDate, heapDiff, maxMemUsage); } async function parse() { - const data = fs.readFileSync('../test/data/huge-page/huge-page.html', 'utf8'); + const data = readFile('../packages/test-utils/data/huge-page/huge-page.html', 'utf8'); let parsedDataSize = 0; const stream = new SAXParser(); @@ -57,16 +55,16 @@ function getDuration(startDate, endDate) { const scale = new format.Scale({ seconds: 1, minutes: 60, - hours: 3600 + hours: 3600, }); - return format((endDate - startDate) / 1000, { scale: scale }); + return format((endDate - startDate) / 1000, { scale }); } function printResults(parsedDataSize, startDate, endDate, heapDiff, maxMemUsage) { console.log('Input data size:', format(parsedDataSize, { unit: 'B' })); - console.log('Duration: ', getDuration(startDate, endDate)); - console.log('Memory before: ', heapDiff.before.size); - console.log('Memory after: ', heapDiff.after.size); - console.log('Memory max: ', format(maxMemUsage, { unit: 'B' })); + console.log('Duration:', getDuration(startDate, endDate)); + console.log('Memory before:', heapDiff.before.size); + console.log('Memory after:', heapDiff.after.size); + console.log('Memory max:', format(maxMemUsage, { unit: 'B' })); } diff --git a/bench/package.json b/bench/package.json index dbb9794b2..b4e9efe01 100644 --- a/bench/package.json +++ b/bench/package.json @@ -1,14 +1,16 @@ { - "name": "parse5-benchmarks", + "name": "@parse5/benchmarks", + "private": "true", + "type": "module", "version": "1.0.0", "description": "parse5 regression benchmarks", "author": "Ivan Nikulin