Skip to content

Commit 580d77b

Browse files
fix(parser,compiler): fix string serialization bugs found by fuzzing (#774)
* fuzz: add a fuzz target for string encoding/parsing * Update to use compiler serialization * Fix serializing single-line strings with leading whitespace When parsing a block string, the first line does not affect the common indent, as it would normally have no indent at all. But when serializing a string to a block string, the first line *is* important, namely for a single-line string that starts with whitespace. Assume a text like this: ``` Starts with "spaces" ``` If we skip the first line, the common indent is detected as 0, producing this output: ```graphql """ Starts with "spaces" """ ``` Now when reparsing, the common indent is detected as 4, and you get back: ``` Starts with "spaces" ``` which is wrong. * Add failing test for \\""" in string * enable debug log in fuzz test * fix(lexer): lex \\""" as \ + \""" in block strings Backslash is not special in block strings except in the exact sequence `\"""`. So if you write `\\"""`, that sequence should still be treated as the escape sequence for `"""`. This patch fixes that. Previously, we would return to the BlockStringLiteral state on the second `\`, and interpret the `"""` as the end of the string. * fix rebase mistake * changelogs * pr number
1 parent d048d53 commit 580d77b

File tree

11 files changed

+163
-12
lines changed

11 files changed

+163
-12
lines changed

crates/apollo-compiler/CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,13 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
2222
## Features
2323
- **Add `NodeStr::from(Name)` - [goto-bus-stop], [pull/773]**
2424

25+
## Fixes
26+
- **Fix serializing single-line strings with leading whitespace - [goto-bus-stop], [pull/774]**
27+
Previously, the leading whitespace would get lost.
28+
2529
[goto-bus-stop]: https://github.com/goto-bus-stop]
2630
[pull/773]: https://github.com/apollographql/apollo-rs/pull/773
31+
[pull/774]: https://github.com/apollographql/apollo-rs/pull/774
2732

2833
# [1.0.0-beta.10](https://crates.io/crates/apollo-compiler/1.0.0-beta.10) - 2023-12-04
2934

crates/apollo-compiler/src/ast/serialize.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -918,10 +918,7 @@ fn can_be_block_string(value: &str) -> bool {
918918
}
919919

920920
let common_indent = {
921-
let mut lines = value.split('\n');
922-
// Skip the first line, the one following """
923-
lines.next();
924-
921+
let lines = value.split('\n');
925922
let each_line_indent_utf8_len = lines.filter_map(|line| {
926923
let after_indent = trim_start_graphql_whitespace(line);
927924
if !after_indent.is_empty() {

crates/apollo-compiler/test_data/ok/0039_string_literals.graphql

+12
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,16 @@ type Query {
4848

4949
"Trailing quote\""
5050
f15: Int
51+
52+
" Leading whitespace on a single line to preserve"
53+
f16: Int
54+
55+
" Leading whitespace in multi-line string to preserve\nNo leading whitespace on second line"
56+
f17: Int
57+
58+
"\n Leading empty line + indent to preserve"
59+
f18: Int
60+
61+
"When serialized as a block string, \\\"\"\" outputs \\ in front of the escaped triple quote"
62+
f19: Int
5163
}

crates/apollo-compiler/test_data/ok/0039_string_literals.txt

+59-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Schema {
66
},
77
38: SourceFile {
88
path: "0039_string_literals.graphql",
9-
source_text: "type Query {\n \"LF: a\\nb\"\n f1: Int\n\n \"CRLF: a\\r\\nb\"\n f2: Int\n\n \"\"\"\n a\n\n b\n \n\n \"\"\"\n f3: Int\n\n \"a \\\"b\\\" c\"\n f4: Int\n\n \"\"\"a \\\"\"\"b\\\"\"\" c\"\"\"\n f5: Int\n\n \"\"\"\n regex: \\d+\n \"\"\"\n f6: Int\n\n \"\\nLeading empty line to preserve\"\n f7: Int\n\n \" \\nLeading whitespace-only line to preserve\"\n f8: Int\n\n \"Trailing empty line to preserve\\n\"\n f9: Int\n\n \"Trailing whitespace-only line to preserve\\n\\t\"\n f10: Int\n\n f11(arg: String = \"a\\nb\"): Int\n\n f12(arg: String = \"a \\\"b\\\" c\"): Int\n\n f13(arg: String = \"regex: \\\\d+\"): Int\n\n \"Trailing backslash \\\\\"\n f14: Int\n\n \"Trailing quote\\\"\"\n f15: Int\n}\n",
9+
source_text: "type Query {\n \"LF: a\\nb\"\n f1: Int\n\n \"CRLF: a\\r\\nb\"\n f2: Int\n\n \"\"\"\n a\n\n b\n \n\n \"\"\"\n f3: Int\n\n \"a \\\"b\\\" c\"\n f4: Int\n\n \"\"\"a \\\"\"\"b\\\"\"\" c\"\"\"\n f5: Int\n\n \"\"\"\n regex: \\d+\n \"\"\"\n f6: Int\n\n \"\\nLeading empty line to preserve\"\n f7: Int\n\n \" \\nLeading whitespace-only line to preserve\"\n f8: Int\n\n \"Trailing empty line to preserve\\n\"\n f9: Int\n\n \"Trailing whitespace-only line to preserve\\n\\t\"\n f10: Int\n\n f11(arg: String = \"a\\nb\"): Int\n\n f12(arg: String = \"a \\\"b\\\" c\"): Int\n\n f13(arg: String = \"regex: \\\\d+\"): Int\n\n \"Trailing backslash \\\\\"\n f14: Int\n\n \"Trailing quote\\\"\"\n f15: Int\n\n \" Leading whitespace on a single line to preserve\"\n f16: Int\n\n \" Leading whitespace in multi-line string to preserve\\nNo leading whitespace on second line\"\n f17: Int\n\n \"\\n Leading empty line + indent to preserve\"\n f18: Int\n\n \"When serialized as a block string, \\\\\\\"\\\"\\\" outputs \\\\ in front of the escaped triple quote\"\n f19: Int\n}\n",
1010
},
1111
},
1212
schema_definition: SchemaDefinition {
@@ -42,7 +42,7 @@ Schema {
4242
"Boolean": built_in_type!("Boolean"),
4343
"ID": built_in_type!("ID"),
4444
"Query": Object(
45-
0..602 @38 ObjectType {
45+
0..947 @38 ObjectType {
4646
description: None,
4747
name: "Query",
4848
implements_interfaces: {},
@@ -294,6 +294,62 @@ Schema {
294294
directives: [],
295295
},
296296
},
297+
"f16": Component {
298+
origin: Definition,
299+
node: 604..667 @38 FieldDefinition {
300+
description: Some(
301+
" Leading whitespace on a single line to preserve",
302+
),
303+
name: "f16",
304+
arguments: [],
305+
ty: Named(
306+
"Int",
307+
),
308+
directives: [],
309+
},
310+
},
311+
"f17": Component {
312+
origin: Definition,
313+
node: 671..776 @38 FieldDefinition {
314+
description: Some(
315+
" Leading whitespace in multi-line string to preserve\nNo leading whitespace on second line",
316+
),
317+
name: "f17",
318+
arguments: [],
319+
ty: Named(
320+
"Int",
321+
),
322+
directives: [],
323+
},
324+
},
325+
"f18": Component {
326+
origin: Definition,
327+
node: 780..837 @38 FieldDefinition {
328+
description: Some(
329+
"\n Leading empty line + indent to preserve",
330+
),
331+
name: "f18",
332+
arguments: [],
333+
ty: Named(
334+
"Int",
335+
),
336+
directives: [],
337+
},
338+
},
339+
"f19": Component {
340+
origin: Definition,
341+
node: 841..945 @38 FieldDefinition {
342+
description: Some(
343+
"When serialized as a block string, \\\"\"\" outputs \\ in front of the escaped triple quote",
344+
),
345+
name: "f19",
346+
arguments: [],
347+
ty: Named(
348+
"Int",
349+
),
350+
directives: [],
351+
},
352+
},
297353
},
298354
},
299355
),
@@ -307,7 +363,7 @@ ExecutableDocument {
307363
},
308364
38: SourceFile {
309365
path: "0039_string_literals.graphql",
310-
source_text: "type Query {\n \"LF: a\\nb\"\n f1: Int\n\n \"CRLF: a\\r\\nb\"\n f2: Int\n\n \"\"\"\n a\n\n b\n \n\n \"\"\"\n f3: Int\n\n \"a \\\"b\\\" c\"\n f4: Int\n\n \"\"\"a \\\"\"\"b\\\"\"\" c\"\"\"\n f5: Int\n\n \"\"\"\n regex: \\d+\n \"\"\"\n f6: Int\n\n \"\\nLeading empty line to preserve\"\n f7: Int\n\n \" \\nLeading whitespace-only line to preserve\"\n f8: Int\n\n \"Trailing empty line to preserve\\n\"\n f9: Int\n\n \"Trailing whitespace-only line to preserve\\n\\t\"\n f10: Int\n\n f11(arg: String = \"a\\nb\"): Int\n\n f12(arg: String = \"a \\\"b\\\" c\"): Int\n\n f13(arg: String = \"regex: \\\\d+\"): Int\n\n \"Trailing backslash \\\\\"\n f14: Int\n\n \"Trailing quote\\\"\"\n f15: Int\n}\n",
366+
source_text: "type Query {\n \"LF: a\\nb\"\n f1: Int\n\n \"CRLF: a\\r\\nb\"\n f2: Int\n\n \"\"\"\n a\n\n b\n \n\n \"\"\"\n f3: Int\n\n \"a \\\"b\\\" c\"\n f4: Int\n\n \"\"\"a \\\"\"\"b\\\"\"\" c\"\"\"\n f5: Int\n\n \"\"\"\n regex: \\d+\n \"\"\"\n f6: Int\n\n \"\\nLeading empty line to preserve\"\n f7: Int\n\n \" \\nLeading whitespace-only line to preserve\"\n f8: Int\n\n \"Trailing empty line to preserve\\n\"\n f9: Int\n\n \"Trailing whitespace-only line to preserve\\n\\t\"\n f10: Int\n\n f11(arg: String = \"a\\nb\"): Int\n\n f12(arg: String = \"a \\\"b\\\" c\"): Int\n\n f13(arg: String = \"regex: \\\\d+\"): Int\n\n \"Trailing backslash \\\\\"\n f14: Int\n\n \"Trailing quote\\\"\"\n f15: Int\n\n \" Leading whitespace on a single line to preserve\"\n f16: Int\n\n \" Leading whitespace in multi-line string to preserve\\nNo leading whitespace on second line\"\n f17: Int\n\n \"\\n Leading empty line + indent to preserve\"\n f18: Int\n\n \"When serialized as a block string, \\\\\\\"\\\"\\\" outputs \\\\ in front of the escaped triple quote\"\n f19: Int\n}\n",
311367
},
312368
},
313369
anonymous_operation: None,

crates/apollo-compiler/test_data/serializer/ok/0039_string_literals.graphql

+13
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,17 @@ type Query {
3737
Trailing quote"
3838
"""
3939
f15: Int
40+
" Leading whitespace on a single line to preserve"
41+
f16: Int
42+
"""
43+
Leading whitespace in multi-line string to preserve
44+
No leading whitespace on second line
45+
"""
46+
f17: Int
47+
"\n Leading empty line + indent to preserve"
48+
f18: Int
49+
"""
50+
When serialized as a block string, \\""" outputs \ in front of the escaped triple quote
51+
"""
52+
f19: Int
4053
}

crates/apollo-parser/CHANGELOG.md

+11
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,17 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
1616
## Maintenance
1717
1818
## Documentation -->
19+
20+
# [x.x.x] (unreleased) - 2023-xx-xx
21+
22+
## Fixes
23+
- **fix parsing `\\"""` in block string - [goto-bus-stop], [pull/774]**
24+
Previously this was parsed as `\` followed by the end of the string,
25+
now it's correctly parsed as `\` followed by an escaped `"""`.
26+
27+
[goto-bus-stop]: https://github.com/goto-bus-stop
28+
[pull/774]: https://github.com/apollographql/apollo-rs/pull/774
29+
1930
# [0.7.4](https://crates.io/crates/apollo-parser/0.7.4) - 2023-11-17
2031

2132
## Features

crates/apollo-parser/src/lexer/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,11 @@ impl<'a> Cursor<'a> {
375375

376376
state = State::BlockStringLiteral;
377377
}
378+
'\\' => {
379+
// We need to stay in the backslash state:
380+
// it's legal to write \\\""" with two literal backslashes
381+
// and then the escape sequence.
382+
}
378383
_ => {
379384
state = State::BlockStringLiteral;
380385
}

crates/apollo-parser/test_data/lexer/ok/0008_block_string.graphql

+6-1
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,9 @@ input Filter {
1111
"""
1212
input Filter {
1313
title: String
14-
}
14+
}
15+
16+
"""
17+
\\"""
18+
"""
19+
scalar LiteralBackslashThenEscape

crates/apollo-parser/test_data/lexer/ok/0008_block_string.txt

+8-1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,11 @@ WHITESPACE@162:163 " "
2929
NAME@163:169 "String"
3030
WHITESPACE@169:170 "\n"
3131
R_CURLY@170:171 "}"
32-
EOF@171:171
32+
WHITESPACE@171:173 "\n\n"
33+
STRING_VALUE@173:186 "\"\"\"\n\\\\\"\"\"\n\"\"\""
34+
WHITESPACE@186:187 "\n"
35+
NAME@187:193 "scalar"
36+
WHITESPACE@193:194 " "
37+
NAME@194:220 "LiteralBackslashThenEscape"
38+
WHITESPACE@220:221 "\n"
39+
EOF@221:221

fuzz/Cargo.toml

+7-3
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,12 @@ cargo-fuzz = true
1717
[dependencies]
1818
libfuzzer-sys = "0.4"
1919
apollo-compiler = { path = "../crates/apollo-compiler" }
20+
apollo-parser = { path = "../crates/apollo-parser" }
2021
apollo-smith = { path = "../crates/apollo-smith" }
2122
env_logger = "0.10.0"
2223
log = "0.4.14"
2324
similar-asserts = "1.5.0"
2425

25-
[dependencies.apollo-parser]
26-
path = "../crates/apollo-parser"
27-
2826
[[bin]]
2927
name = "parser"
3028
path = "fuzz_targets/parser.rs"
@@ -42,3 +40,9 @@ name = "reparse"
4240
path = "fuzz_targets/reparse.rs"
4341
test = false
4442
doc = false
43+
44+
[[bin]]
45+
name = "strings"
46+
path = "fuzz_targets/strings.rs"
47+
test = false
48+
doc = false

fuzz/fuzz_targets/strings.rs

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#![no_main]
2+
use apollo_compiler::{name, Schema};
3+
use libfuzzer_sys::{
4+
arbitrary::{Arbitrary, Unstructured},
5+
fuzz_target,
6+
};
7+
use log::debug;
8+
9+
fuzz_target!(|data: &[u8]| {
10+
let _ = env_logger::try_init();
11+
12+
let mut u = Unstructured::new(data);
13+
let string = String::arbitrary(&mut u).unwrap();
14+
let mut input = Schema::new();
15+
let def = input.schema_definition.make_mut();
16+
def.description = Some(string.into());
17+
// We can refer to a type that doesn't exist as we won't run validation
18+
def.query = Some(name!("Dangling").into());
19+
let doc_generated = input.to_string();
20+
21+
debug!("INPUT STRING: {:?}", input.schema_definition.description);
22+
debug!("==== WHOLE DOCUMENT ====");
23+
debug!("{doc_generated}");
24+
debug!("========================");
25+
26+
let reparse = Schema::parse(doc_generated, "").unwrap();
27+
debug!(
28+
"REPARSED STRING: {:?}",
29+
reparse.schema_definition.description
30+
);
31+
32+
assert_eq!(
33+
reparse.schema_definition.description,
34+
input.schema_definition.description
35+
);
36+
});

0 commit comments

Comments
 (0)