From 6a539603d0a3e15e3f902412fea2aabf49d7ffce Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Sat, 2 Nov 2024 16:16:18 +0100 Subject: [PATCH] feat: update iterator implementation for browser compatibility BREAKING CHANGE: The stream should be piped through a TextDecoderStream like for example: const textDecoder = new TextDecoderStream(); for await (const entry of iterator(file.stream().pipeThrough(textDecoder))) --- README.md | 14 ++++++--- package.json | 4 +-- src/__tests__/iterator.test.js | 45 ++++++++++++++++----------- src/iterator.browser.js | 3 -- src/iterator.js | 56 +++++++++++++++++++++++++--------- src/stream.browser.js | 3 -- 6 files changed, 80 insertions(+), 45 deletions(-) delete mode 100644 src/iterator.browser.js delete mode 100644 src/stream.browser.js diff --git a/README.md b/README.md index 2d3da66..aade191 100644 --- a/README.md +++ b/README.md @@ -59,13 +59,19 @@ var result = parse(sdf, { ## Iterator -This API is only available on Node.js. - ```js const { iterator } = require('sdf-parser'); -const readStream = createReadStream(join(__dirname, 'test.sdf.gz')); -const stream = readStream.pipe(createGunzip()); +const file = await openAsBlob(join(__dirname, 'test.sdf.gz')); + +const decompressionStream = new DecompressionStream('gzip'); +const textDecoder = new TextDecoderStream(); + +const stream = file + .stream() + .pipeThrough(decompressionStream) + .pipeThrough(textDecoder); const results = []; + for await (const entry of iterator(stream)) { results.push(entry); } diff --git a/package.json b/package.json index da77090..c22a380 100644 --- a/package.json +++ b/package.json @@ -8,9 +8,6 @@ "lib", "src" ], - "browser": { - "./src/iterator.js": "./src/iterator.browser.js" - }, "sideEffects": false, "scripts": { "build": "cheminfo-build --entry src/index.js --root SDFParser", @@ -45,6 +42,7 @@ "devDependencies": { "@babel/plugin-transform-modules-commonjs": "^7.25.9", "@types/jest": "^29.5.14", + "@types/node": "^22.8.6", "@vitest/coverage-v8": "^2.1.4", "babel-eslint": "^10.1.0", "callback-stream": "^1.1.0", diff --git a/src/__tests__/iterator.test.js b/src/__tests__/iterator.test.js index 192f157..989d4c5 100644 --- a/src/__tests__/iterator.test.js +++ b/src/__tests__/iterator.test.js @@ -1,6 +1,5 @@ -import { createReadStream, ReadStream } from 'fs'; +import { openAsBlob } from 'fs'; import { join } from 'path'; -import { createGunzip } from 'zlib'; import { fileCollectionFromPath } from 'filelist-utils'; import { test, expect } from 'vitest'; @@ -13,12 +12,15 @@ test('iterator', async () => { ).files.filter((file) => file.name === 'test.sdf'); const results = []; - if (parseInt(process.versions.node, 10) >= 18) { - for await (const entry of iterator(ReadStream.fromWeb(files[0].stream()))) { - results.push(entry); - } - expect(results).toHaveLength(128); - expect(results[0]).toMatchInlineSnapshot(` + const textDecoder = new TextDecoderStream(); + for await (const entry of iterator( + files[0].stream().pipeThrough(textDecoder), + )) { + results.push(entry); + } + + expect(results).toHaveLength(128); + expect(results[0]).toMatchInlineSnapshot(` { "CLogP": 2.7, "Code": 100380824, @@ -64,16 +66,24 @@ test('iterator', async () => { ", } `); - } }); test('iterator on stream', async () => { - const readStream = createReadStream(join(__dirname, 'test.sdf.gz')); - const stream = readStream.pipe(createGunzip()); + const file = await openAsBlob(join(__dirname, 'test.sdf.gz')); + + const decompressionStream = new DecompressionStream('gzip'); + const textDecoder = new TextDecoderStream(); + + const stream = file + .stream() + .pipeThrough(decompressionStream) + .pipeThrough(textDecoder); const results = []; + for await (const entry of iterator(stream)) { results.push(entry); } + expect(results).toHaveLength(128); expect(results[0]).toMatchInlineSnapshot(` { @@ -129,12 +139,12 @@ test('iterator on fileCollection stream', async () => { ).files[0]; const results = []; - if (parseInt(process.versions.node, 10) >= 18) { - for await (const entry of iterator(ReadStream.fromWeb(file.stream()))) { - results.push(entry); - } - expect(results).toHaveLength(128); - expect(results[0]).toMatchInlineSnapshot(` + const textDecoder = new TextDecoderStream(); + for await (const entry of iterator(file.stream().pipeThrough(textDecoder))) { + results.push(entry); + } + expect(results).toHaveLength(128); + expect(results[0]).toMatchInlineSnapshot(` { "CLogP": 2.7, "Code": 100380824, @@ -180,5 +190,4 @@ test('iterator on fileCollection stream', async () => { ", } `); - } }); diff --git a/src/iterator.browser.js b/src/iterator.browser.js deleted file mode 100644 index db55c6a..0000000 --- a/src/iterator.browser.js +++ /dev/null @@ -1,3 +0,0 @@ -export function iterator() { - throw new Error('Iterator not implemented in the browser'); -} diff --git a/src/iterator.js b/src/iterator.js index 6c80aaf..0d94528 100644 --- a/src/iterator.js +++ b/src/iterator.js @@ -1,24 +1,26 @@ -import { createInterface } from 'readline'; - import { parseString } from 'dynamic-typing'; + /** * Parse a SDF file - * @param {NodeJS.ReadableStream} readStream SDF file to parse + * @param {ReadableStream} readStream SDF file to parse * @param {object} [options={}] * @param {Function} [options.filter] Callback allowing to filter the molecules + * @param {string} [options.eol='\n'] End of line character * @param {boolean} [options.dynamicTyping] Dynamically type the data */ - export async function* iterator(readStream, options = {}) { - const lines = createInterface(readStream); + const { eol = '\n', dynamicTyping = true } = options; + + const lineStream = readStream.pipeThrough(createLineStream()); const currentLines = []; - options = { ...options }; if (options.dynamicTyping === undefined) options.dynamicTyping = true; - options.eol = '\n'; - for await (let line of lines) { + for await (let line of lineStream) { if (line.startsWith('$$$$')) { - const molecule = getMolecule(currentLines.join(options.eol), options); + const molecule = getMolecule(currentLines.join(eol), { + eol, + dynamicTyping, + }); if (!options.filter || options.filter(molecule)) { yield molecule; } @@ -29,26 +31,52 @@ export async function* iterator(readStream, options = {}) { } } +/** + * Convert a SDF part to an object + * @param {string} sdfPart + * @param {object} options + * @param {string} options.eol + * @param {boolean} options.dynamicTyping + * @returns + */ function getMolecule(sdfPart, options) { - let parts = sdfPart.split(`${options.eol}>`); + const { eol, dynamicTyping } = options; + let parts = sdfPart.split(`${eol}>`); if (parts.length === 0 || parts[0].length <= 5) return; let molecule = {}; - molecule.molfile = parts[0] + options.eol; + molecule.molfile = parts[0] + eol; for (let j = 1; j < parts.length; j++) { - let lines = parts[j].split(options.eol); + let lines = parts[j].split(eol); let from = lines[0].indexOf('<'); let to = lines[0].indexOf('>'); let label = lines[0].substring(from + 1, to); for (let k = 1; k < lines.length - 1; k++) { if (molecule[label]) { - molecule[label] += options.eol + lines[k]; + molecule[label] += eol + lines[k]; } else { molecule[label] = lines[k]; } } - if (options.dynamicTyping) { + if (dynamicTyping) { molecule[label] = parseString(molecule[label]); } } return molecule; } + +function createLineStream() { + let buffer = ''; + return new TransformStream({ + async transform(chunk, controller) { + buffer += chunk; + let lines = buffer.split('\n'); + for (let i = 0; i < lines.length - 1; i++) { + controller.enqueue(lines[i]); + } + buffer = lines[lines.length - 1]; + }, + flush(controller) { + if (buffer) controller.enqueue(buffer); + }, + }); +} diff --git a/src/stream.browser.js b/src/stream.browser.js deleted file mode 100644 index 231918f..0000000 --- a/src/stream.browser.js +++ /dev/null @@ -1,3 +0,0 @@ -const empty = {}; - -export default empty;