From 6a539603d0a3e15e3f902412fea2aabf49d7ffce Mon Sep 17 00:00:00 2001
From: Luc Patiny <luc@patiny.com>
Date: Sat, 2 Nov 2024 16:16:18 +0100
Subject: [PATCH] feat: update iterator implementation for browser
 compatibility

BREAKING CHANGE: The stream should be piped through a TextDecoderStream like for example:
const textDecoder = new TextDecoderStream();
for await (const entry of   iterator(file.stream().pipeThrough(textDecoder)))
---
 README.md                      | 14 ++++++---
 package.json                   |  4 +--
 src/__tests__/iterator.test.js | 45 ++++++++++++++++-----------
 src/iterator.browser.js        |  3 --
 src/iterator.js                | 56 +++++++++++++++++++++++++---------
 src/stream.browser.js          |  3 --
 6 files changed, 80 insertions(+), 45 deletions(-)
 delete mode 100644 src/iterator.browser.js
 delete mode 100644 src/stream.browser.js

diff --git a/README.md b/README.md
index 2d3da66..aade191 100644
--- a/README.md
+++ b/README.md
@@ -59,13 +59,19 @@ var result = parse(sdf, {
 
 ## Iterator
 
-This API is only available on Node.js.
-
 ```js
 const { iterator } = require('sdf-parser');
-const readStream = createReadStream(join(__dirname, 'test.sdf.gz'));
-const stream = readStream.pipe(createGunzip());
+const file = await openAsBlob(join(__dirname, 'test.sdf.gz'));
+
+const decompressionStream = new DecompressionStream('gzip');
+const textDecoder = new TextDecoderStream();
+
+const stream = file
+  .stream()
+  .pipeThrough(decompressionStream)
+  .pipeThrough(textDecoder);
 const results = [];
+
 for await (const entry of iterator(stream)) {
   results.push(entry);
 }
diff --git a/package.json b/package.json
index da77090..c22a380 100644
--- a/package.json
+++ b/package.json
@@ -8,9 +8,6 @@
     "lib",
     "src"
   ],
-  "browser": {
-    "./src/iterator.js": "./src/iterator.browser.js"
-  },
   "sideEffects": false,
   "scripts": {
     "build": "cheminfo-build  --entry src/index.js  --root SDFParser",
@@ -45,6 +42,7 @@
   "devDependencies": {
     "@babel/plugin-transform-modules-commonjs": "^7.25.9",
     "@types/jest": "^29.5.14",
+    "@types/node": "^22.8.6",
     "@vitest/coverage-v8": "^2.1.4",
     "babel-eslint": "^10.1.0",
     "callback-stream": "^1.1.0",
diff --git a/src/__tests__/iterator.test.js b/src/__tests__/iterator.test.js
index 192f157..989d4c5 100644
--- a/src/__tests__/iterator.test.js
+++ b/src/__tests__/iterator.test.js
@@ -1,6 +1,5 @@
-import { createReadStream, ReadStream } from 'fs';
+import { openAsBlob } from 'fs';
 import { join } from 'path';
-import { createGunzip } from 'zlib';
 
 import { fileCollectionFromPath } from 'filelist-utils';
 import { test, expect } from 'vitest';
@@ -13,12 +12,15 @@ test('iterator', async () => {
   ).files.filter((file) => file.name === 'test.sdf');
   const results = [];
 
-  if (parseInt(process.versions.node, 10) >= 18) {
-    for await (const entry of iterator(ReadStream.fromWeb(files[0].stream()))) {
-      results.push(entry);
-    }
-    expect(results).toHaveLength(128);
-    expect(results[0]).toMatchInlineSnapshot(`
+  const textDecoder = new TextDecoderStream();
+  for await (const entry of iterator(
+    files[0].stream().pipeThrough(textDecoder),
+  )) {
+    results.push(entry);
+  }
+
+  expect(results).toHaveLength(128);
+  expect(results[0]).toMatchInlineSnapshot(`
     {
       "CLogP": 2.7,
       "Code": 100380824,
@@ -64,16 +66,24 @@ test('iterator', async () => {
     ",
     }
   `);
-  }
 });
 
 test('iterator on stream', async () => {
-  const readStream = createReadStream(join(__dirname, 'test.sdf.gz'));
-  const stream = readStream.pipe(createGunzip());
+  const file = await openAsBlob(join(__dirname, 'test.sdf.gz'));
+
+  const decompressionStream = new DecompressionStream('gzip');
+  const textDecoder = new TextDecoderStream();
+
+  const stream = file
+    .stream()
+    .pipeThrough(decompressionStream)
+    .pipeThrough(textDecoder);
   const results = [];
+
   for await (const entry of iterator(stream)) {
     results.push(entry);
   }
+
   expect(results).toHaveLength(128);
   expect(results[0]).toMatchInlineSnapshot(`
     {
@@ -129,12 +139,12 @@ test('iterator on fileCollection stream', async () => {
   ).files[0];
   const results = [];
 
-  if (parseInt(process.versions.node, 10) >= 18) {
-    for await (const entry of iterator(ReadStream.fromWeb(file.stream()))) {
-      results.push(entry);
-    }
-    expect(results).toHaveLength(128);
-    expect(results[0]).toMatchInlineSnapshot(`
+  const textDecoder = new TextDecoderStream();
+  for await (const entry of iterator(file.stream().pipeThrough(textDecoder))) {
+    results.push(entry);
+  }
+  expect(results).toHaveLength(128);
+  expect(results[0]).toMatchInlineSnapshot(`
     {
       "CLogP": 2.7,
       "Code": 100380824,
@@ -180,5 +190,4 @@ test('iterator on fileCollection stream', async () => {
     ",
     }
   `);
-  }
 });
diff --git a/src/iterator.browser.js b/src/iterator.browser.js
deleted file mode 100644
index db55c6a..0000000
--- a/src/iterator.browser.js
+++ /dev/null
@@ -1,3 +0,0 @@
-export function iterator() {
-  throw new Error('Iterator not implemented in the browser');
-}
diff --git a/src/iterator.js b/src/iterator.js
index 6c80aaf..0d94528 100644
--- a/src/iterator.js
+++ b/src/iterator.js
@@ -1,24 +1,26 @@
-import { createInterface } from 'readline';
-
 import { parseString } from 'dynamic-typing';
+
 /**
  *  Parse a SDF file
- * @param {NodeJS.ReadableStream} readStream SDF file to parse
+ * @param {ReadableStream} readStream SDF file to parse
  * @param {object} [options={}]
  * @param {Function} [options.filter] Callback allowing to filter the molecules
+ * @param {string} [options.eol='\n'] End of line character
  * @param {boolean} [options.dynamicTyping] Dynamically type the data
  */
-
 export async function* iterator(readStream, options = {}) {
-  const lines = createInterface(readStream);
+  const { eol = '\n', dynamicTyping = true } = options;
+
+  const lineStream = readStream.pipeThrough(createLineStream());
   const currentLines = [];
-  options = { ...options };
   if (options.dynamicTyping === undefined) options.dynamicTyping = true;
 
-  options.eol = '\n';
-  for await (let line of lines) {
+  for await (let line of lineStream) {
     if (line.startsWith('$$$$')) {
-      const molecule = getMolecule(currentLines.join(options.eol), options);
+      const molecule = getMolecule(currentLines.join(eol), {
+        eol,
+        dynamicTyping,
+      });
       if (!options.filter || options.filter(molecule)) {
         yield molecule;
       }
@@ -29,26 +31,52 @@ export async function* iterator(readStream, options = {}) {
   }
 }
 
+/**
+ * Convert a SDF part to an object
+ * @param {string} sdfPart
+ * @param {object} options
+ * @param {string} options.eol
+ * @param {boolean} options.dynamicTyping
+ * @returns
+ */
 function getMolecule(sdfPart, options) {
-  let parts = sdfPart.split(`${options.eol}>`);
+  const { eol, dynamicTyping } = options;
+  let parts = sdfPart.split(`${eol}>`);
   if (parts.length === 0 || parts[0].length <= 5) return;
   let molecule = {};
-  molecule.molfile = parts[0] + options.eol;
+  molecule.molfile = parts[0] + eol;
   for (let j = 1; j < parts.length; j++) {
-    let lines = parts[j].split(options.eol);
+    let lines = parts[j].split(eol);
     let from = lines[0].indexOf('<');
     let to = lines[0].indexOf('>');
     let label = lines[0].substring(from + 1, to);
     for (let k = 1; k < lines.length - 1; k++) {
       if (molecule[label]) {
-        molecule[label] += options.eol + lines[k];
+        molecule[label] += eol + lines[k];
       } else {
         molecule[label] = lines[k];
       }
     }
-    if (options.dynamicTyping) {
+    if (dynamicTyping) {
       molecule[label] = parseString(molecule[label]);
     }
   }
   return molecule;
 }
+
+function createLineStream() {
+  let buffer = '';
+  return new TransformStream({
+    async transform(chunk, controller) {
+      buffer += chunk;
+      let lines = buffer.split('\n');
+      for (let i = 0; i < lines.length - 1; i++) {
+        controller.enqueue(lines[i]);
+      }
+      buffer = lines[lines.length - 1];
+    },
+    flush(controller) {
+      if (buffer) controller.enqueue(buffer);
+    },
+  });
+}
diff --git a/src/stream.browser.js b/src/stream.browser.js
deleted file mode 100644
index 231918f..0000000
--- a/src/stream.browser.js
+++ /dev/null
@@ -1,3 +0,0 @@
-const empty = {};
-
-export default empty;