Skip to content

Commit d419f3b

Browse files
committed
Fix UTF-8 split across read() calls (unbuf. bytes)
1 parent 18e5ba1 commit d419f3b

File tree

1 file changed

+8
-11
lines changed

1 file changed

+8
-11
lines changed

src/suitable_unbuffered_bytes_stream.rs

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ impl SuitableUnbufferedBytesStream {
2525
impl Utf8CharSource for SuitableUnbufferedBytesStream {
2626
fn read_char(&mut self) -> io::Result<Option<char>> {
2727
let mut buf: [u8; 4] = [0; 4];
28-
let n_bytes_read = self.inner.read(&mut buf[..1])?;
28+
let mut n_bytes_read = self.inner.read(&mut buf[..1])?;
2929
if n_bytes_read < 1 {
3030
// EOF
3131
return Ok(None);
@@ -36,23 +36,20 @@ impl Utf8CharSource for SuitableUnbufferedBytesStream {
3636
"broken stream: returns more bytes than requested",
3737
));
3838
}
39+
// try to see if we're at the start of a unicode char:
3940
let n_bytes_in_char = get_width(buf[0]);
4041
if n_bytes_in_char == 0 {
4142
return Err(io::Error::new(
4243
io::ErrorKind::Other,
4344
format!("invalid UTF-8 start byte: {:x}", buf[0]),
4445
));
4546
}
46-
let n_bytes_actual = {
47-
if n_bytes_in_char > 1 {
48-
// this should only return fewer bytes than requested if it's cut short by EOF
49-
// => will evaluate to invalid UTF-8 at the end and return an error
50-
self.inner.read(&mut buf[1..n_bytes_in_char])? + 1
51-
} else {
52-
1
53-
}
54-
};
55-
Ok(std::str::from_utf8(&buf[..n_bytes_actual])
47+
// if we're inside a unicode char, we try and read its remaining bytes
48+
// (or until EOF, in which case from_utf8 below will return an error):
49+
while n_bytes_read < n_bytes_in_char {
50+
n_bytes_read += self.inner.read(&mut buf[n_bytes_read..n_bytes_in_char])?;
51+
}
52+
Ok(std::str::from_utf8(&buf[..n_bytes_read])
5653
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("{}", e)))?
5754
.chars()
5855
.next())

0 commit comments

Comments
 (0)