Skip to content

Commit cd7ee0d

Browse files
committed
fix: fuzz serde struct and fix bugs
1 parent 314dc0b commit cd7ee0d

File tree

11 files changed

+224
-142
lines changed

11 files changed

+224
-142
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "sonic-rs"
3-
version = "0.2.4"
3+
version = "0.2.5"
44
authors = ["Volo Team <[email protected]>"]
55
edition = "2021"
66
description = "Sonic-rs is a fast Rust JSON library based on SIMD"

fuzz/Cargo.lock

Lines changed: 21 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fuzz/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ members = ["."]
1515
libfuzzer-sys = "0.4"
1616
sonic-rs = { path = ".." }
1717
serde_json = { version = "1.0", features = ["float_roundtrip"] }
18+
faststr = "0.2"
19+
serde = { version = "1.0", features = ["derive"] }
1820

1921
[[bin]]
2022
name = "fuzz_value"

fuzz/fuzz_targets/from_slice.rs

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,35 @@ use sonic_rs::JsonNumberTrait;
88
use sonic_rs::JsonValue;
99
use sonic_rs::{to_array_iter, to_array_iter_unchecked, to_object_iter, to_object_iter_unchecked};
1010

11+
macro_rules! test_struct {
12+
($ty:ty, $data:expr) => {
13+
match serde_json::from_slice::<$ty>($data) {
14+
Ok(jv) => {
15+
let sv = sonic_rs::from_slice::<$ty>($data).expect(&format!(
16+
"parse valid json {:?} failed for type {}",
17+
$data,
18+
stringify!($ty)
19+
));
20+
assert_eq!(sv, jv);
21+
22+
// fuzz the struct to_string
23+
let sout = sonic_rs::to_string(&sv).unwrap();
24+
let jout = serde_json::to_string(&jv).unwrap();
25+
let sv = sonic_rs::from_str::<$ty>(&sout).unwrap();
26+
let jv = serde_json::from_str::<$ty>(&jout).unwrap();
27+
assert_eq!(sv, jv);
28+
}
29+
Err(_) => {
30+
let _ = sonic_rs::from_slice::<$ty>($data).expect_err(&format!(
31+
"parse invalid json {:?} wrong for type {}",
32+
$data,
33+
stringify!($ty)
34+
));
35+
}
36+
}
37+
};
38+
}
39+
1140
fuzz_target!(|data: &[u8]| {
1241
match serde_json::from_slice::<JValue>(data) {
1342
Ok(jv) => {
@@ -66,6 +95,12 @@ fuzz_target!(|data: &[u8]| {
6695
let _ = dom_from_slice(data).unwrap_err();
6796
}
6897
}
98+
99+
test_struct!(TestStruct, data);
100+
test_struct!(Foo, data);
101+
test_struct!(Enum, data);
102+
test_struct!(String, data);
103+
test_struct!(f64, data);
69104
});
70105

71106
fn compare_lazyvalue(jv: &JValue, sv: &sonic_rs::LazyValue) {
@@ -120,3 +155,97 @@ fn compare_value(jv: &JValue, sv: &sonic_rs::Value) -> bool {
120155
}
121156
true
122157
}
158+
159+
use faststr::FastStr;
160+
use serde::{Deserialize, Serialize};
161+
use std::borrow::Cow;
162+
use std::{collections::HashMap, hash::Hash, marker::PhantomData};
163+
164+
#[derive(Debug, Deserialize, Serialize, PartialEq)]
165+
struct Foo {
166+
name: FastStr,
167+
id: u64,
168+
}
169+
170+
#[derive(Debug, Deserialize, Serialize, Hash, Eq, PartialEq)]
171+
enum Enum {
172+
Zero = 0,
173+
One = 1,
174+
Two = 2,
175+
}
176+
177+
#[derive(Debug, Deserialize, Serialize, PartialEq)]
178+
enum FieldEnum {
179+
Integer(i8),
180+
Tuple((FastStr, i32)),
181+
Struct(Foo),
182+
Unit,
183+
}
184+
185+
#[derive(Debug, Deserialize, Serialize, PartialEq)]
186+
enum FieldlessEnum {
187+
Tuple(),
188+
Struct {},
189+
Unit,
190+
}
191+
192+
#[derive(Debug, Deserialize, Serialize, PartialEq)]
193+
struct Wrapper<'a>(&'a str);
194+
195+
// A unit struct
196+
#[derive(Debug, Deserialize, Serialize, PartialEq)]
197+
struct Unit;
198+
199+
// A uint struct
200+
#[derive(Debug, Deserialize, Serialize, PartialEq)]
201+
struct Phan<T> {
202+
phan: String,
203+
_data: PhantomData<T>,
204+
}
205+
206+
// A tuple struct
207+
#[derive(Debug, Deserialize, Serialize, PartialEq)]
208+
struct Pair(i32, f32);
209+
210+
#[derive(Debug, Deserialize, Serialize, PartialEq)]
211+
struct TestStruct<'a> {
212+
fieldless: FieldlessEnum,
213+
enummap: HashMap<Enum, FieldlessEnum>,
214+
enum_: Enum,
215+
216+
// basic types
217+
boolean: bool,
218+
integer: i32,
219+
float: f64,
220+
int128: i128,
221+
uint128: u128,
222+
char_: char,
223+
224+
// string or bytes
225+
str_: &'a str,
226+
// bytes_: &'a [u8],
227+
string: String,
228+
faststr: FastStr,
229+
#[serde(borrow)]
230+
cow: Cow<'a, str>,
231+
232+
// containers
233+
vector: Vec<u32>,
234+
array: [u32; 1],
235+
empty_array: [u8; 0],
236+
map: HashMap<FastStr, f64>,
237+
map_opkey: HashMap<Option<FastStr>, f64>,
238+
239+
// enum types
240+
option: Option<String>,
241+
fieldenum: FieldEnum,
242+
243+
// tuple or struct
244+
tuple: (u64, String),
245+
tuple_struct: Pair,
246+
unit_struct: Unit,
247+
248+
#[serde(borrow)]
249+
wrapper: Wrapper<'a>,
250+
phan_struct: Phan<()>,
251+
}

src/error.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ impl Error {
8989
| ErrorCode::ExpectedArrayCommaOrEnd
9090
| ErrorCode::ExpectedArrayStart
9191
| ErrorCode::ExpectedObjectStart
92+
| ErrorCode::InvalidSurrogateUnicodeCodePoint
9293
| ErrorCode::RecursionLimitExceeded => Category::Syntax,
9394
}
9495
}
@@ -252,6 +253,9 @@ pub(crate) enum ErrorCode {
252253

253254
#[error("Unexpected visited type in JSON visitor")]
254255
UnexpectedVisitType,
256+
257+
#[error("Invalid surrogate Unicode code point")]
258+
InvalidSurrogateUnicodeCodePoint,
255259
}
256260

257261
impl Error {

src/parser.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ where
156156
fn error_index(&self) -> usize {
157157
// when parsing strings , we need record the error postion.
158158
// it must be smaller than reader.index().
159-
std::cmp::min(self.error_index, self.read.index() - 1)
159+
std::cmp::min(self.error_index, self.read.index().saturating_sub(1))
160160
}
161161

162162
/// Error caused by a byte from next_char().
@@ -711,11 +711,12 @@ where
711711
return perr!(self, EofWhileParsing);
712712
};
713713

714+
// only check surrogate here, and we will check the code pointer later when use `codepoint_to_utf8`
714715
if (0xD800..0xDC00).contains(&point1) {
715716
// parse the second utf8 code point of surrogate
716717
let point2 = if let Some(asc) = self.read.next_n(6) {
717718
if asc[0] != b'\\' || asc[1] != b'u' {
718-
return perr!(self, InvalidUnicodeCodePoint);
719+
return perr!(self, InvalidSurrogateUnicodeCodePoint);
719720
}
720721
unsafe { hex_to_u32_nocheck(&*(asc.as_ptr().add(2) as *const _ as *const [u8; 4])) }
721722
} else {
@@ -726,13 +727,13 @@ where
726727
let low_bit = point2.wrapping_sub(0xdc00);
727728
if (low_bit >> 10) != 0 {
728729
// invalid surrogate
729-
return perr!(self, InvalidUnicodeCodePoint);
730+
return perr!(self, InvalidSurrogateUnicodeCodePoint);
730731
}
731732

732733
Ok((((point1 - 0xd800) << 10) | low_bit).wrapping_add(0x10000))
733734
} else if (0xDC00..0xE000).contains(&point1) {
734735
// invalid surrogate
735-
perr!(self, InvalidUnicodeCodePoint)
736+
perr!(self, InvalidSurrogateUnicodeCodePoint)
736737
} else {
737738
Ok(point1)
738739
}
@@ -746,6 +747,9 @@ where
746747
buf.reserve(4);
747748
let ptr = buf.as_mut_ptr().add(buf.len());
748749
let cnt = codepoint_to_utf8(code, ptr);
750+
if cnt == 0 {
751+
return perr!(self, InvalidUnicodeCodePoint);
752+
}
749753
buf.set_len(buf.len() + cnt);
750754
}
751755
Some(c) if ESCAPED_TAB[c as usize] != 0 => {
@@ -825,6 +829,7 @@ where
825829
self.read.eat(1);
826830
self.parse_escaped_char(buf)?;
827831
}
832+
b'\x00'..=b'\x1f' => return perr!(self, ControlCharacterWhileParsingString),
828833
_ => {
829834
buf.push(c);
830835
self.read.eat(1);
@@ -897,6 +902,7 @@ where
897902
self.read.eat(1);
898903
return unsafe { self.parse_string_escaped(buf) };
899904
}
905+
b'\x00'..=b'\x1f' => return perr!(self, ControlCharacterWhileParsingString),
900906
_ => self.read.eat(1),
901907
}
902908
}

src/serde/mod.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,4 +347,45 @@ mod test {
347347
"Invalid UTF-8 characters in json at line 1 column 4\n\n\t\"\0\0\0��\"\n\t....^..\n"
348348
);
349349
}
350+
351+
macro_rules! test_struct {
352+
($ty:ty, $data:expr) => {
353+
match serde_json::from_slice::<$ty>($data) {
354+
Ok(jv) => {
355+
let sv = crate::from_slice::<$ty>($data).expect(&format!(
356+
"parse valid json {:?} failed for type {}",
357+
$data,
358+
stringify!($ty)
359+
));
360+
assert_eq!(sv, jv);
361+
362+
// fuzz the struct to_string
363+
let sout = crate::to_string(&sv).unwrap();
364+
let jout = serde_json::to_string(&jv).unwrap();
365+
let sv = crate::from_str::<$ty>(&sout).unwrap();
366+
let jv = serde_json::from_str::<$ty>(&jout).unwrap();
367+
assert_eq!(sv, jv);
368+
}
369+
Err(err) => {
370+
let _ = crate::from_slice::<$ty>($data).expect_err(&format!(
371+
"parse invalid json {:?} wrong for type {}, should error: {}",
372+
$data,
373+
stringify!($ty),
374+
err
375+
));
376+
}
377+
}
378+
};
379+
}
380+
381+
// the testcase is found by fuzzing tests
382+
#[test]
383+
fn test_more_structs() {
384+
// invalid json: has control chars
385+
test_struct!(String, &[34, 58, 55, 10, 0, 34, 32, 10]);
386+
test_struct!(String, &[34, b'\\', b't', 9, 34]);
387+
test_struct!(String, &[34, 92, 34, 34]);
388+
test_struct!(String, b"\"\\umap9map009\"");
389+
test_struct!(Foo, &b"[\"5XXXXXXZX:XXZX:[\",-0]"[..]);
390+
}
350391
}

src/util/num/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,10 @@ pub(crate) fn parse_number(
171171
*index += 1;
172172

173173
if *index >= data.len() || !matches!(data[*index], b'.' | b'e' | b'E') {
174+
// view -0 as float number
175+
if negative {
176+
return Ok(ParserNumber::Float(0.0));
177+
}
174178
return Ok(ParserNumber::Unsigned(0));
175179
}
176180

0 commit comments

Comments
 (0)