Skip to content

Commit

Permalink
feat: update iterator implementation for browser compatibility
Browse files Browse the repository at this point in the history
BREAKING CHANGE: The stream should be piped through a TextDecoderStream like for example:
const textDecoder = new TextDecoderStream();
for await (const entry of   iterator(file.stream().pipeThrough(textDecoder)))
  • Loading branch information
lpatiny committed Nov 2, 2024
1 parent ab74e7e commit 6a53960
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 45 deletions.
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,19 @@ var result = parse(sdf, {

## Iterator

This API is only available on Node.js.

```js
const { iterator } = require('sdf-parser');
const readStream = createReadStream(join(__dirname, 'test.sdf.gz'));
const stream = readStream.pipe(createGunzip());
const file = await openAsBlob(join(__dirname, 'test.sdf.gz'));

const decompressionStream = new DecompressionStream('gzip');
const textDecoder = new TextDecoderStream();

const stream = file
.stream()
.pipeThrough(decompressionStream)
.pipeThrough(textDecoder);
const results = [];

for await (const entry of iterator(stream)) {
results.push(entry);
}
Expand Down
4 changes: 1 addition & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
"lib",
"src"
],
"browser": {
"./src/iterator.js": "./src/iterator.browser.js"
},
"sideEffects": false,
"scripts": {
"build": "cheminfo-build --entry src/index.js --root SDFParser",
Expand Down Expand Up @@ -45,6 +42,7 @@
"devDependencies": {
"@babel/plugin-transform-modules-commonjs": "^7.25.9",
"@types/jest": "^29.5.14",
"@types/node": "^22.8.6",
"@vitest/coverage-v8": "^2.1.4",
"babel-eslint": "^10.1.0",
"callback-stream": "^1.1.0",
Expand Down
45 changes: 27 additions & 18 deletions src/__tests__/iterator.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { createReadStream, ReadStream } from 'fs';
import { openAsBlob } from 'fs';
import { join } from 'path';
import { createGunzip } from 'zlib';

import { fileCollectionFromPath } from 'filelist-utils';
import { test, expect } from 'vitest';
Expand All @@ -13,12 +12,15 @@ test('iterator', async () => {
).files.filter((file) => file.name === 'test.sdf');
const results = [];

if (parseInt(process.versions.node, 10) >= 18) {
for await (const entry of iterator(ReadStream.fromWeb(files[0].stream()))) {
results.push(entry);
}
expect(results).toHaveLength(128);
expect(results[0]).toMatchInlineSnapshot(`
const textDecoder = new TextDecoderStream();

Check failure on line 15 in src/__tests__/iterator.test.js

View workflow job for this annotation

GitHub Actions / nodejs / test (16)

src/__tests__/iterator.test.js > iterator

ReferenceError: TextDecoderStream is not defined ❯ src/__tests__/iterator.test.js:15:23
for await (const entry of iterator(
files[0].stream().pipeThrough(textDecoder),
)) {
results.push(entry);
}

expect(results).toHaveLength(128);
expect(results[0]).toMatchInlineSnapshot(`
{
"CLogP": 2.7,
"Code": 100380824,
Expand Down Expand Up @@ -64,16 +66,24 @@ test('iterator', async () => {
",
}
`);
}
});

test('iterator on stream', async () => {
const readStream = createReadStream(join(__dirname, 'test.sdf.gz'));
const stream = readStream.pipe(createGunzip());
const file = await openAsBlob(join(__dirname, 'test.sdf.gz'));

Check failure on line 72 in src/__tests__/iterator.test.js

View workflow job for this annotation

GitHub Actions / nodejs / test (16)

src/__tests__/iterator.test.js > iterator on stream

TypeError: openAsBlob is not a function ❯ src/__tests__/iterator.test.js:72:22

Check failure on line 72 in src/__tests__/iterator.test.js

View workflow job for this annotation

GitHub Actions / nodejs / test (18)

src/__tests__/iterator.test.js > iterator on stream

TypeError: openAsBlob is not a function ❯ src/__tests__/iterator.test.js:72:22

const decompressionStream = new DecompressionStream('gzip');
const textDecoder = new TextDecoderStream();

const stream = file
.stream()
.pipeThrough(decompressionStream)
.pipeThrough(textDecoder);
const results = [];

for await (const entry of iterator(stream)) {
results.push(entry);
}

expect(results).toHaveLength(128);
expect(results[0]).toMatchInlineSnapshot(`
{
Expand Down Expand Up @@ -129,12 +139,12 @@ test('iterator on fileCollection stream', async () => {
).files[0];
const results = [];

if (parseInt(process.versions.node, 10) >= 18) {
for await (const entry of iterator(ReadStream.fromWeb(file.stream()))) {
results.push(entry);
}
expect(results).toHaveLength(128);
expect(results[0]).toMatchInlineSnapshot(`
const textDecoder = new TextDecoderStream();

Check failure on line 142 in src/__tests__/iterator.test.js

View workflow job for this annotation

GitHub Actions / nodejs / test (16)

src/__tests__/iterator.test.js > iterator on fileCollection stream

ReferenceError: TextDecoderStream is not defined ❯ src/__tests__/iterator.test.js:142:23
for await (const entry of iterator(file.stream().pipeThrough(textDecoder))) {
results.push(entry);
}
expect(results).toHaveLength(128);
expect(results[0]).toMatchInlineSnapshot(`
{
"CLogP": 2.7,
"Code": 100380824,
Expand Down Expand Up @@ -180,5 +190,4 @@ test('iterator on fileCollection stream', async () => {
",
}
`);
}
});
3 changes: 0 additions & 3 deletions src/iterator.browser.js

This file was deleted.

56 changes: 42 additions & 14 deletions src/iterator.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
import { createInterface } from 'readline';

import { parseString } from 'dynamic-typing';

/**
* Parse a SDF file
* @param {NodeJS.ReadableStream} readStream SDF file to parse
* @param {ReadableStream} readStream SDF file to parse
* @param {object} [options={}]
* @param {Function} [options.filter] Callback allowing to filter the molecules
* @param {string} [options.eol='\n'] End of line character
* @param {boolean} [options.dynamicTyping] Dynamically type the data
*/

export async function* iterator(readStream, options = {}) {
const lines = createInterface(readStream);
const { eol = '\n', dynamicTyping = true } = options;

const lineStream = readStream.pipeThrough(createLineStream());
const currentLines = [];
options = { ...options };
if (options.dynamicTyping === undefined) options.dynamicTyping = true;

options.eol = '\n';
for await (let line of lines) {
for await (let line of lineStream) {
if (line.startsWith('$$$$')) {
const molecule = getMolecule(currentLines.join(options.eol), options);
const molecule = getMolecule(currentLines.join(eol), {
eol,
dynamicTyping,
});
if (!options.filter || options.filter(molecule)) {
yield molecule;
}
Expand All @@ -29,26 +31,52 @@ export async function* iterator(readStream, options = {}) {
}
}

/**
* Convert a SDF part to an object
* @param {string} sdfPart
* @param {object} options
* @param {string} options.eol
* @param {boolean} options.dynamicTyping
* @returns
*/
function getMolecule(sdfPart, options) {
let parts = sdfPart.split(`${options.eol}>`);
const { eol, dynamicTyping } = options;
let parts = sdfPart.split(`${eol}>`);
if (parts.length === 0 || parts[0].length <= 5) return;
let molecule = {};
molecule.molfile = parts[0] + options.eol;
molecule.molfile = parts[0] + eol;
for (let j = 1; j < parts.length; j++) {
let lines = parts[j].split(options.eol);
let lines = parts[j].split(eol);
let from = lines[0].indexOf('<');
let to = lines[0].indexOf('>');
let label = lines[0].substring(from + 1, to);
for (let k = 1; k < lines.length - 1; k++) {
if (molecule[label]) {
molecule[label] += options.eol + lines[k];
molecule[label] += eol + lines[k];
} else {
molecule[label] = lines[k];
}
}
if (options.dynamicTyping) {
if (dynamicTyping) {
molecule[label] = parseString(molecule[label]);
}
}
return molecule;
}

function createLineStream() {
let buffer = '';
return new TransformStream({
async transform(chunk, controller) {
buffer += chunk;
let lines = buffer.split('\n');
for (let i = 0; i < lines.length - 1; i++) {
controller.enqueue(lines[i]);
}
buffer = lines[lines.length - 1];
},
flush(controller) {
if (buffer) controller.enqueue(buffer);
},
});
}
3 changes: 0 additions & 3 deletions src/stream.browser.js

This file was deleted.

0 comments on commit 6a53960

Please sign in to comment.