Skip to content

Commit 8c344bc

Browse files
abestofacebook-github-bot
authored andcommitted
Explode loudly on missing Markdown code block language specifier
Summary: # What Fix this foot-gun: # Test something important ``` $ echo fo foo ``` This is *not a test* because the `scrut` language specifier is missing. That is, it should be: # Test something important ```scrut $ echo fo foo ``` To fix this: `scrut` will now explode when any code block in a Markdown test file is missing a language specifier. `plaintext` and `not-scrut-leave-me-alone` are perfectly valid language specifiers! # How * Updated markdown parsing to recognise code blocks without a language specifier as a markdown block - until now, they were treated as plain text lines. * Updated markdown parsing and generation with a `MarkdownToken::VerbatimCodeBlock` token that represents any Markdown code block that's *not* a Scrut test. Renamed `MarkdownToken::CodeBlock` to `MarkdownToken::TestCodeBlock` for clarity and symmetry. * Made the parser explode when a `VerbatimCodeBlock` has no language specified. * Updated the Markdown generator to correctly handle `VerbatimCodeBlock` # Alternatives Considered * Implementing a linter would be a similar amount of work, and require additional integration steps in OSS * We could make this feature opt-in (or opt-out) with a flag for easier migration; however, `scrut` in OSS has little adoption (and no stable release), and so we don't expect this to be an issue. Reviewed By: AndreasBackx Differential Revision: D64597015 fbshipit-source-id: 3f3c7b2a36ee429e64f74d5d1a896cd6f5518c89
1 parent f905070 commit 8c344bc

File tree

5 files changed

+127
-9
lines changed

5 files changed

+127
-9
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Example EMPTY code block without a language specified
2+
3+
```
4+
```
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Error on code block with no language specified
2+
3+
```scrut
4+
$ $SCRUT_BIN test --match-markdown "*.mdtest" "$TESTDIR"/missing-language.mdtest 2>&1
5+
* parse test from "*missing-language.mdtest" with markdown parser (glob)
6+
7+
Caused by:
8+
Code block starting at line 2 is missing language specifier. Use ```scrut to make this block a Scrut test, or any other language to make Scrut skip this block.
9+
* (glob?)
10+
[1]
11+
```
12+
13+
# Error on EMPTY code block with no language specified
14+
15+
```scrut
16+
$ $SCRUT_BIN test --match-markdown "*.mdtest" "$TESTDIR"/missing-language-empty-block.mdtest 2>&1
17+
* parse test from "*missing-language-empty-block.mdtest" with markdown parser (glob)
18+
19+
Caused by:
20+
Code block starting at line 2 is missing language specifier. Use ```scrut to make this block a Scrut test, or any other language to make Scrut skip this block.
21+
* (glob?)
22+
[1]
23+
```
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Example code block without a language specified
2+
3+
```
4+
$ echo foo
5+
foo
6+
```

src/generators/markdown.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,16 @@ impl UpdateGenerator for MarkdownUpdateGenerator {
6464
updated.push_str(&config);
6565
updated.push_str("\n---\n");
6666
}
67-
MarkdownToken::CodeBlock {
67+
MarkdownToken::VerbatimCodeBlock {
68+
starting_line_number: _,
69+
language: _,
70+
lines,
71+
} => {
72+
for line in lines {
73+
updated.push_str(&line.assure_newline());
74+
}
75+
}
76+
MarkdownToken::TestCodeBlock {
6877
language,
6978
config_lines,
7079
comment_lines,

src/parsers/markdown.rs

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,19 @@ impl Parser for MarkdownParser {
9696
title_paragraph.clear();
9797
}
9898
}
99-
MarkdownToken::CodeBlock {
99+
MarkdownToken::VerbatimCodeBlock {
100+
starting_line_number,
101+
language,
102+
lines: _,
103+
} => {
104+
if language.is_empty() {
105+
anyhow::bail!(
106+
"Code block starting at line {} is missing language specifier. Use ```scrut to make this block a Scrut test, or any other language to make Scrut skip this block.",
107+
starting_line_number
108+
);
109+
}
110+
}
111+
MarkdownToken::TestCodeBlock {
100112
language: _,
101113
config_lines,
102114
comment_lines: _,
@@ -140,14 +152,14 @@ pub(crate) enum MarkdownToken {
140152
/// Raw configuration that is prepending the document
141153
DocumentConfig(Vec<(usize, String)>),
142154

143-
/// The parsed contents of a code block within backticks:
155+
/// The parsed contents of a code block within backticks, representing a Scrut test:
144156
///
145157
/// ```scrut { ... config ..}
146158
/// # comment
147159
/// $ shell expression
148160
/// output expectations
149161
/// ```
150-
CodeBlock {
162+
TestCodeBlock {
151163
/// The used language token of the test (i.e. `scrut`)
152164
language: String,
153165

@@ -160,6 +172,18 @@ pub(crate) enum MarkdownToken {
160172
/// The code that makes up the test (shell expression & output expectations)
161173
code_lines: Vec<(usize, String)>,
162174
},
175+
176+
/// A code block that is not a test
177+
VerbatimCodeBlock {
178+
/// Index of the line containing opening backticks
179+
starting_line_number: usize,
180+
181+
/// Language specifier (e.g. `scrut`), possibly an empty string
182+
language: String,
183+
184+
/// All the lines of the code block, including opening and closing backtick lines
185+
lines: Vec<String>,
186+
},
163187
}
164188

165189
/// An iterator that parses Markdown documents in lines and code-blocks
@@ -202,11 +226,34 @@ impl<'a> Iterator for MarkdownIterator<'a> {
202226
}
203227
Some(MarkdownToken::DocumentConfig(config_content))
204228

205-
// found the start of a code block (=testcase)?
229+
// found the start of a code block (possibly a testcase)?
206230
} else if let Some((backticks, language, config)) = extract_code_block_start(line) {
207231
self.content_start = true;
208-
if language.is_empty() || !self.languages.contains(&language) {
209-
return Some(MarkdownToken::Line(self.line_index - 1, line.into()));
232+
233+
// report verbatim code block if this is not a test block
234+
if !self.languages.contains(&language) {
235+
// Record the opening line (i.e. the opening backticks)
236+
let starting_line_number = self.line_index - 1;
237+
let mut lines = vec![line.to_string()];
238+
let mut line = self.document_lines.next()?;
239+
self.line_index += 1;
240+
241+
// Record all lines until the closing backticks
242+
while !line.starts_with(backticks) {
243+
lines.push(line.to_string());
244+
line = self.document_lines.next()?;
245+
self.line_index += 1;
246+
}
247+
248+
// Record the closing backticks
249+
lines.push(line.to_string());
250+
251+
// Return the verbatim code block
252+
return Some(MarkdownToken::VerbatimCodeBlock {
253+
starting_line_number,
254+
language: language.into(),
255+
lines,
256+
});
210257
}
211258

212259
// gather optional per-test config
@@ -220,7 +267,6 @@ impl<'a> Iterator for MarkdownIterator<'a> {
220267
vec![]
221268
};
222269

223-
// gather optional comments
224270
let mut line = self.document_lines.next()?;
225271
self.line_index += 1;
226272
let mut comment_lines = vec![];
@@ -238,7 +284,7 @@ impl<'a> Iterator for MarkdownIterator<'a> {
238284
self.line_index += 1;
239285
}
240286

241-
Some(MarkdownToken::CodeBlock {
287+
Some(MarkdownToken::TestCodeBlock {
242288
language: language.into(),
243289
config_lines,
244290
comment_lines,
@@ -294,6 +340,10 @@ pub(crate) fn extract_title(line: &str) -> Option<(String, String)> {
294340
/// On the first line ending in foo, this function returns the backticks and
295341
/// the language. On all other lines it returns None.
296342
pub(crate) fn extract_code_block_start(line: &str) -> Option<(&str, &str, &str)> {
343+
if line == "```" {
344+
return Some((line, "", ""));
345+
}
346+
297347
let mut language_start = None;
298348
for (index, ch) in line.chars().enumerate() {
299349
if let Some(language_start) = language_start {
@@ -339,6 +389,7 @@ mod tests {
339389
use crate::config::TestCaseConfig;
340390
use crate::config::TestCaseWait;
341391
use crate::expectation::tests::expectation_maker;
392+
use crate::parsers::markdown::extract_code_block_start;
342393
use crate::parsers::markdown::DEFAULT_MARKDOWN_LANGUAGES;
343394
use crate::parsers::parser::Parser;
344395
use crate::test_expectation;
@@ -761,4 +812,29 @@ world
761812
testcases
762813
);
763814
}
815+
816+
#[test]
817+
fn test_extract_code_block_start() {
818+
assert_eq!(
819+
Some(("```", "scrut", "")),
820+
extract_code_block_start("```scrut")
821+
);
822+
assert_eq!(
823+
Some(("```", "bash", "")),
824+
extract_code_block_start("```bash")
825+
);
826+
}
827+
828+
#[test]
829+
fn test_extract_code_block_start_with_config() {
830+
assert_eq!(
831+
Some(("```", "scrut", "{timeout: 3m 3s, wait: 4m 4s}")),
832+
extract_code_block_start("```scrut {timeout: 3m 3s, wait: 4m 4s}")
833+
);
834+
}
835+
836+
#[test]
837+
fn test_extract_code_block_start_without_language() {
838+
assert_eq!(Some(("```", "", "")), extract_code_block_start("```"));
839+
}
764840
}

0 commit comments

Comments
 (0)