Skip to content

Commit fa7d340

Browse files
committed
WIP
1 parent 252d5a7 commit fa7d340

File tree

2 files changed

+103
-0
lines changed

2 files changed

+103
-0
lines changed

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ use CharOrEof::{Char, Eof};
3939
mod unicode_utils;
4040
use crate::unicode_utils::{is_surrogate, decode_surrogate_pair, UnicodeError};
4141

42+
mod strings;
43+
use crate::strings::JsonStringReader;
44+
4245
#[derive(Clone)]
4346
enum TokenType {
4447
Operator = 0,

src/strings.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/// Streamable strings support.
2+
///
3+
/// Adapted almost verbatim from json-stream's `strings.py` module.
4+
5+
use compact_str::CompactString;
6+
use pyo3::exceptions::{PyIOError, PyValueError};
7+
use pyo3::prelude::*;
8+
use std::borrow::BorrowMut;
9+
use std::io::BufRead;
10+
11+
const DEFAULT_BUFFER_SIZE: usize = 8192; // TODO get from Python's io module somehow?
12+
13+
enum State {
14+
Char,
15+
StringEscape,
16+
Unicode,
17+
UnicodeSurrogateStart,
18+
UnicodeSurrogateStringEscape,
19+
UnicodeSurrogate,
20+
}
21+
22+
#[pyclass]
23+
pub struct JsonStringReader {
24+
stream: Box<dyn SuitableStream + Send>,
25+
buffer: String,
26+
readline_buffer: String,
27+
unicode_buffer: CompactString,
28+
state: State,
29+
end_of_string: bool,
30+
index: usize,
31+
}
32+
33+
#[pymethods]
34+
impl JsonStringReader {
35+
pub fn complete(slf: PyRef<'_, Self>) -> bool {
36+
Self::_complete(&slf)
37+
}
38+
39+
pub fn read(mut slf: PyRefMut<'_, Self>, size: Option<usize>) -> PyResult<String> {
40+
let mut result = String::new();
41+
let mut length = DEFAULT_BUFFER_SIZE;
42+
while !Self::_complete(&slf) && (size == None || result.is_empty()) {
43+
if let Some(_size) = size {
44+
length = _size - result.len()
45+
}
46+
// TODO performance will be trash here:
47+
result.push_str(Self::read_chunk(slf.borrow_mut(), length)?.as_str())
48+
}
49+
return Ok(result);
50+
}
51+
}
52+
53+
impl JsonStringReader {
54+
pub fn new(stream: Box<dyn BufRead + Send>, initial_buffer: String) -> JsonStringReader {
55+
JsonStringReader {
56+
stream,
57+
buffer: initial_buffer,
58+
readline_buffer: String::new(),
59+
unicode_buffer: CompactString::with_capacity(4),
60+
state: State::Char,
61+
end_of_string: false,
62+
index: 0,
63+
}
64+
}
65+
66+
pub fn _complete(slf: &Self) -> bool {
67+
slf.end_of_string && slf.readline_buffer.is_empty()
68+
}
69+
70+
pub fn read_chunk(slf: &mut Self, size: usize) -> PyResult<String> {
71+
if !slf.readline_buffer.is_empty() {
72+
let result = slf.readline_buffer[..size].to_string();
73+
slf.readline_buffer = slf.readline_buffer[size..].to_string();
74+
return Ok(result.to_string());
75+
}
76+
let chunk = if slf.buffer.len() > 0 {
77+
slf.buffer
78+
} else {
79+
let newbuf = String::with_capacity(4*size);
80+
slf.stream.read(&mut newbuf.as_bytes());
81+
newbuf
82+
};
83+
if chunk.is_empty() {
84+
return Err(PyValueError::new_err(format!("Unterminated string at end of file")));
85+
}
86+
let mut result = String::new();
87+
let start = 0;
88+
for (i, c) in chunk.chars().enumerate() {
89+
slf.index += 1;
90+
if i == size {
91+
if let State::Char = slf.state {
92+
result.push_str(&chunk[start..i]);
93+
}
94+
slf.buffer = chunk[i..].to_string();
95+
break
96+
}
97+
}
98+
Ok(String::new())
99+
}
100+
}

0 commit comments

Comments
 (0)