Fix infinite loop in unbuffered bytes reading

smheidrich · smheidrich · commit 1862a808bd78 · 2025-03-24T02:13:24.000+01:00
diff --git a/src/suitable_unbuffered_bytes_stream.rs b/src/suitable_unbuffered_bytes_stream.rs
@@ -47,7 +47,11 @@ impl Utf8CharSource for SuitableUnbufferedBytesStream {
         // if we're inside a unicode char, we try and read its remaining bytes
         // (or until EOF, in which case from_utf8 below will return an error):
         while n_bytes_read < n_bytes_in_char {
-            n_bytes_read += self.inner.read(&mut buf[n_bytes_read..n_bytes_in_char])?;
+            let n_bytes_read_cur = self.inner.read(&mut buf[n_bytes_read..n_bytes_in_char])?;
+            if n_bytes_read_cur < 1 {
+                break; // EOF
+            }
+            n_bytes_read += n_bytes_read_cur;
         }
         Ok(std::str::from_utf8(&buf[..n_bytes_read])
             .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("{}", e)))?
diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py
@@ -48,6 +48,11 @@ def test_malformed_utf8(bytes_to_bytes_buf):
     buf = bytes_to_bytes_buf(bytes([129]))
     with pytest.raises(
         OSError,
-        match=re.escape("malformed UTF-8 of 1 bytes at line 1 char 1"),
+        # TODO: Unify these exception messages at some point (uses two
+        #   different Rust functions for Unicode conversion which return
+        #   different error messages...)
+        match=re.compile(
+            "(invalid|malformed) UTF-8 (sequence )?of .* bytes", re.IGNORECASE
+        ),
     ):
         list(load(buf))