@@ -25,7 +25,7 @@ impl SuitableUnbufferedBytesStream {
2525impl Utf8CharSource for SuitableUnbufferedBytesStream {
2626 fn read_char ( & mut self ) -> io:: Result < Option < char > > {
2727 let mut buf: [ u8 ; 4 ] = [ 0 ; 4 ] ;
28- let n_bytes_read = self . inner . read ( & mut buf[ ..1 ] ) ?;
28+ let mut n_bytes_read = self . inner . read ( & mut buf[ ..1 ] ) ?;
2929 if n_bytes_read < 1 {
3030 // EOF
3131 return Ok ( None ) ;
@@ -36,23 +36,20 @@ impl Utf8CharSource for SuitableUnbufferedBytesStream {
3636 "broken stream: returns more bytes than requested" ,
3737 ) ) ;
3838 }
39+ // try to see if we're at the start of a unicode char:
3940 let n_bytes_in_char = get_width ( buf[ 0 ] ) ;
4041 if n_bytes_in_char == 0 {
4142 return Err ( io:: Error :: new (
4243 io:: ErrorKind :: Other ,
4344 format ! ( "invalid UTF-8 start byte: {:x}" , buf[ 0 ] ) ,
4445 ) ) ;
4546 }
46- let n_bytes_actual = {
47- if n_bytes_in_char > 1 {
48- // this should only return fewer bytes than requested if it's cut short by EOF
49- // => will evaluate to invalid UTF-8 at the end and return an error
50- self . inner . read ( & mut buf[ 1 ..n_bytes_in_char] ) ? + 1
51- } else {
52- 1
53- }
54- } ;
55- Ok ( std:: str:: from_utf8 ( & buf[ ..n_bytes_actual] )
47+ // if we're inside a unicode char, we try and read its remaining bytes
48+ // (or until EOF, in which case from_utf8 below will return an error):
49+ while n_bytes_read < n_bytes_in_char {
50+ n_bytes_read += self . inner . read ( & mut buf[ n_bytes_read..n_bytes_in_char] ) ?;
51+ }
52+ Ok ( std:: str:: from_utf8 ( & buf[ ..n_bytes_read] )
5653 . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: Other , format ! ( "{}" , e) ) ) ?
5754 . chars ( )
5855 . next ( ) )
0 commit comments