Skip to content

Commit 1862a80

Browse files
committed
Fix infinite loop in unbuffered bytes reading
1 parent f4a48fe commit 1862a80

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

src/suitable_unbuffered_bytes_stream.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,11 @@ impl Utf8CharSource for SuitableUnbufferedBytesStream {
4747
// if we're inside a unicode char, we try and read its remaining bytes
4848
// (or until EOF, in which case from_utf8 below will return an error):
4949
while n_bytes_read < n_bytes_in_char {
50-
n_bytes_read += self.inner.read(&mut buf[n_bytes_read..n_bytes_in_char])?;
50+
let n_bytes_read_cur = self.inner.read(&mut buf[n_bytes_read..n_bytes_in_char])?;
51+
if n_bytes_read_cur < 1 {
52+
break; // EOF
53+
}
54+
n_bytes_read += n_bytes_read_cur;
5155
}
5256
Ok(std::str::from_utf8(&buf[..n_bytes_read])
5357
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("{}", e)))?

tests/test_exceptions.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ def test_malformed_utf8(bytes_to_bytes_buf):
4848
buf = bytes_to_bytes_buf(bytes([129]))
4949
with pytest.raises(
5050
OSError,
51-
match=re.escape("malformed UTF-8 of 1 bytes at line 1 char 1"),
51+
# TODO: Unify these exception messages at some point (uses two
52+
# different Rust functions for Unicode conversion which return
53+
# different error messages...)
54+
match=re.compile(
55+
"(invalid|malformed) UTF-8 (sequence )?of .* bytes", re.IGNORECASE
56+
),
5257
):
5358
list(load(buf))

0 commit comments

Comments
 (0)