Skip to content

Commit 18e5ba1

Browse files
committed
Add test for load() iterable split in UTF-8 char
1 parent 163a036 commit 18e5ba1

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

tests/test_load_iterable.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
"""
2+
Test compatibility with json-stream's support for giving iterables to `load()`.
3+
"""
4+
import json_stream
5+
import pytest
6+
7+
8+
@pytest.mark.parametrize("chunk_size", [1, 2, 3, 4, 10])
9+
def test_chunk_boundary_inside_utf8_char(chunk_size: int) -> None:
10+
"""
11+
Test that chunk boundaries inside UTF-8 chars are handled correctly.
12+
13+
Regression test for https://github.com/daggaz/json-stream/issues/59.
14+
"""
15+
inner_str = "——"
16+
document_str = f'"{inner_str}"'
17+
document_bytes = document_str.encode("utf-8")
18+
19+
iterable = (
20+
document_bytes[i : i + chunk_size]
21+
for i in range(0, len(document_bytes), chunk_size)
22+
)
23+
24+
parsed = json_stream.load(iterable)
25+
26+
assert parsed == inner_str

0 commit comments

Comments
 (0)