Skip to content

Commit 247c487

Browse files
committed
more error recovery
1 parent b347689 commit 247c487

File tree

4 files changed

+179
-144
lines changed

4 files changed

+179
-144
lines changed

baml_language/crates/baml_compiler_parser/src/parser.rs

Lines changed: 173 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ pub(crate) struct Parser<'a> {
165165
tokens: &'a [Token],
166166
current: usize,
167167
events: Vec<Event>,
168+
/// Track pending '>' tokens from split '>>' (for nested generics like `map<K, map<K2, V>>`).
169+
pending_greaters: u8,
168170
}
169171

170172
impl<'a> Parser<'a> {
@@ -173,6 +175,7 @@ impl<'a> Parser<'a> {
173175
tokens,
174176
current: 0,
175177
events: Vec::new(),
178+
pending_greaters: 0,
176179
}
177180
}
178181

@@ -575,6 +578,84 @@ impl<'a> Parser<'a> {
575578
)
576579
}
577580

581+
/// Expect a '>' token, but also accept '>>' and consume only one '>'.
582+
/// This handles nested generics like `map<K, map<K2, V>>` where the lexer
583+
/// tokenizes '>>' as a single token.
584+
///
585+
/// Returns true if a '>' was consumed (either standalone or as part of '>>').
586+
fn expect_greater(&mut self) -> bool {
587+
// First check if we have a pending '>' from a previous '>>' split
588+
if self.pending_greaters > 0 {
589+
self.pending_greaters -= 1;
590+
// Emit a synthetic '>' token for the syntax tree
591+
self.events.push(Event::Token {
592+
kind: SyntaxKind::GREATER,
593+
text: ">".to_string(),
594+
});
595+
return true;
596+
}
597+
598+
if self.at(TokenKind::Greater) {
599+
self.bump();
600+
true
601+
} else if self.at(TokenKind::GreaterGreater) {
602+
// Split '>>' into two '>':
603+
// - Emit one '>' token now for the current/inner generic
604+
// - Leave the second '>' pending for the outer generic
605+
self.events.push(Event::Token {
606+
kind: SyntaxKind::GREATER,
607+
text: ">".to_string(),
608+
});
609+
self.pending_greaters += 1;
610+
// Consume the '>>' token from the input
611+
self.bump();
612+
true
613+
} else {
614+
self.error_unexpected_token("'>'".to_string());
615+
false
616+
}
617+
}
618+
619+
/// Skip tokens until we find a balanced closing parenthesis.
620+
/// Used for error recovery in tuple/parenthesized type expressions.
621+
fn skip_to_balanced_paren(&mut self) {
622+
let mut paren_depth = 1;
623+
let mut bracket_depth = 0;
624+
while !self.at_end() && paren_depth > 0 {
625+
match self.current().map(|t| t.kind) {
626+
Some(TokenKind::LParen) => {
627+
paren_depth += 1;
628+
self.bump();
629+
}
630+
Some(TokenKind::RParen) => {
631+
paren_depth -= 1;
632+
if paren_depth > 0 {
633+
self.bump();
634+
}
635+
// Don't bump the final ')' - let the caller consume it
636+
}
637+
Some(TokenKind::LBracket) => {
638+
bracket_depth += 1;
639+
self.bump();
640+
}
641+
Some(TokenKind::RBracket) => {
642+
if bracket_depth > 0 {
643+
bracket_depth -= 1;
644+
self.bump();
645+
} else {
646+
// Unbalanced ] - stop here
647+
break;
648+
}
649+
}
650+
Some(TokenKind::RBrace) => {
651+
// Hit a closing brace - likely at a higher level, stop here
652+
break;
653+
}
654+
_ => self.bump(),
655+
}
656+
}
657+
}
658+
578659
/// Try to recover from an invalid top-level block like `classs Foo { ... }`.
579660
///
580661
/// Recognizes the pattern: identifier identifier { ... } (where the first identifier
@@ -635,6 +716,64 @@ impl<'a> Parser<'a> {
635716
true
636717
}
637718

719+
/// Try to recover from an invalid type alias declaration like "typpe Name = expr".
720+
/// Returns true if recovery was performed.
721+
fn try_recover_invalid_type_alias(&mut self) -> bool {
722+
// Check pattern: Word Word Equals
723+
let is_word = self.at(TokenKind::Word);
724+
let next_is_word = self.peek(1).map(|t| t.kind == TokenKind::Word).unwrap_or(false);
725+
let then_equals = self.peek(2).map(|t| t.kind == TokenKind::Equals).unwrap_or(false);
726+
727+
if !is_word || !next_is_word || !then_equals {
728+
return false;
729+
}
730+
731+
// Get the invalid keyword text for the error message
732+
let invalid_keyword = self.current().map(|t| t.text.clone()).unwrap_or_default();
733+
let span = self.current().map(|t| t.span).unwrap_or_default();
734+
735+
// Emit a helpful error message
736+
self.error(
737+
format!(
738+
"Unknown keyword '{}'. Did you mean 'type'? Usage: type Name = expression",
739+
invalid_keyword
740+
),
741+
span,
742+
);
743+
744+
// Wrap the invalid type alias in an ERROR node
745+
self.start_node(SyntaxKind::ERROR);
746+
747+
// Skip the invalid keyword, name, and = sign
748+
self.bump(); // invalid keyword (e.g., "typpe")
749+
self.bump(); // name (e.g., "Two")
750+
self.bump(); // =
751+
752+
// Skip to end of line (type alias expressions are typically one line)
753+
while !self.at_end()
754+
&& !self.at(TokenKind::Newline)
755+
&& !self.at(TokenKind::LBrace)
756+
&& !self.at(TokenKind::RBrace)
757+
{
758+
// Stop at keywords that would start a new declaration
759+
if matches!(
760+
self.current().map(|t| t.kind),
761+
Some(TokenKind::Class)
762+
| Some(TokenKind::Enum)
763+
| Some(TokenKind::Function)
764+
| Some(TokenKind::Client)
765+
| Some(TokenKind::Generator)
766+
| Some(TokenKind::Test)
767+
) {
768+
break;
769+
}
770+
self.bump();
771+
}
772+
773+
self.finish_node();
774+
true
775+
}
776+
638777
// ============ Consumption ============
639778

640779
/// Consume current token, including all trivia before it (whitespace, newlines, comments).
@@ -1216,7 +1355,7 @@ impl<'a> Parser<'a> {
12161355
p.parse_type();
12171356
}
12181357

1219-
p.expect(TokenKind::Greater);
1358+
p.expect_greater();
12201359
});
12211360
}
12221361
} else if self.at(TokenKind::LParen) {
@@ -1226,6 +1365,18 @@ impl<'a> Parser<'a> {
12261365
while self.eat(TokenKind::Comma) {
12271366
self.parse_type();
12281367
}
1368+
// Error recovery: if we're not at ')' yet, skip tokens until we find ')' or reach a recovery point
1369+
if !self.at(TokenKind::RParen) {
1370+
if let Some(token) = self.current() {
1371+
let message = if token.kind == TokenKind::Dot {
1372+
"Path identifiers (e.g., 'a.b') are not supported in type expressions".to_string()
1373+
} else {
1374+
format!("Unexpected '{}' in type expression", token.text)
1375+
};
1376+
self.error(message, token.span);
1377+
}
1378+
self.skip_to_balanced_paren();
1379+
}
12291380
self.expect(TokenKind::RParen);
12301381
} else {
12311382
self.error_unexpected_token("type".to_string());
@@ -1392,6 +1543,22 @@ impl<'a> Parser<'a> {
13921543
}
13931544
}
13941545

1546+
// Check for old-style function syntax: `function Name {` (without parens and return type)
1547+
// If we see '{' directly after the name, emit a single helpful error and skip to body
1548+
if p.at(TokenKind::LBrace) {
1549+
let span = p.current().map(|t| t.span).unwrap_or_default();
1550+
p.error(
1551+
"Old-style function syntax. Use: function Name(params...) -> ReturnType { ... }".to_string(),
1552+
span,
1553+
);
1554+
// Create empty parameter list node for AST consistency
1555+
p.start_node(SyntaxKind::PARAMETER_LIST);
1556+
p.finish_node();
1557+
// Parse the body
1558+
p.parse_function_body();
1559+
return;
1560+
}
1561+
13951562
// Parameters
13961563
p.parse_parameter_list();
13971564

@@ -2502,7 +2669,7 @@ impl<'a> Parser<'a> {
25022669
}
25032670
}
25042671

2505-
p.expect(TokenKind::Greater);
2672+
p.expect_greater();
25062673
});
25072674
}
25082675

@@ -2841,7 +3008,7 @@ impl<'a> Parser<'a> {
28413008
if p.at(TokenKind::Word) {
28423009
p.bump(); // type name
28433010
}
2844-
p.expect(TokenKind::Greater); // >
3011+
p.expect_greater(); // >
28453012
});
28463013
}
28473014

@@ -3297,6 +3464,9 @@ fn parse_impl(tokens: &[Token], cache: Option<&mut NodeCache>) -> (GreenNode, Ve
32973464
} else if parser.try_recover_invalid_block() {
32983465
// Successfully recovered from invalid block like "classs Foo { ... }"
32993466
// Continue parsing
3467+
} else if parser.try_recover_invalid_type_alias() {
3468+
// Successfully recovered from invalid type alias like "typpe Foo = int"
3469+
// Continue parsing
33003470
} else {
33013471
parser.error_unexpected_token("top-level declaration".to_string());
33023472
parser.bump(); // Skip unknown token

baml_language/crates/baml_ide_tests/test_files/syntax/class/invalid_type_aliases.baml

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -57,39 +57,12 @@ type Four = int | string | b
5757

5858
//----
5959
//- diagnostics
60-
// Error: Expected top-level declaration, found identifier
60+
// Error: Unknown keyword 'typpe'. Did you mean 'type'? Usage: type Name = expression
6161
// ╭─[ class_invalid_type_aliases.baml:9:1 ]
6262
// │
6363
// 9 │ typpe Two = float
6464
// │ ──┬──
65-
// │ ╰──── Expected top-level declaration, found identifier
66-
// │
67-
// │ Note: Error code: E0010
68-
// ───╯
69-
// Error: Expected top-level declaration, found identifier
70-
// ╭─[ class_invalid_type_aliases.baml:9:7 ]
71-
// │
72-
// 9 │ typpe Two = float
73-
// │ ─┬─
74-
// │ ╰─── Expected top-level declaration, found identifier
75-
// │
76-
// │ Note: Error code: E0010
77-
// ───╯
78-
// Error: Expected top-level declaration, found '='
79-
// ╭─[ class_invalid_type_aliases.baml:9:11 ]
80-
// │
81-
// 9 │ typpe Two = float
82-
// │ ┬
83-
// │ ╰── Expected top-level declaration, found '='
84-
// │
85-
// │ Note: Error code: E0010
86-
// ───╯
87-
// Error: Expected top-level declaration, found identifier
88-
// ╭─[ class_invalid_type_aliases.baml:9:13 ]
89-
// │
90-
// 9 │ typpe Two = float
91-
// │ ──┬──
92-
// │ ╰──── Expected top-level declaration, found identifier
65+
// │ ╰──── Unknown keyword 'typpe'. Did you mean 'type'? Usage: type Name = expression
9366
// │
9467
// │ Note: Error code: E0010
9568
// ───╯

baml_language/crates/baml_ide_tests/test_files/syntax/class/secure_types.baml

Lines changed: 2 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -28,84 +28,12 @@ class ComplexTypes {
2828

2929
//----
3030
//- diagnostics
31-
// Error: Expected ')', found '.'
31+
// Error: Path identifiers (e.g., 'a.b') are not supported in type expressions
3232
// ╭─[ class_secure_types.baml:4:65 ]
3333
// │
3434
// 4 │ b (int, map<bool, string?>, (char | float)[][] | long_word_123.foobar[])
3535
// │ ┬
36-
// │ ╰── Expected ')', found '.'
36+
// │ ╰── Path identifiers (e.g., 'a.b') are not supported in type expressions
3737
// │
3838
// │ Note: Error code: E0010
3939
// ───╯
40-
// Error: Expected Unexpected token in class body, found '.'
41-
// ╭─[ class_secure_types.baml:4:65 ]
42-
// │
43-
// 4 │ b (int, map<bool, string?>, (char | float)[][] | long_word_123.foobar[])
44-
// │ ┬
45-
// │ ╰── Expected Unexpected token in class body, found '.'
46-
// │
47-
// │ Note: Error code: E0010
48-
// ───╯
49-
// Error: field 'foobar' is missing a type annotation
50-
// ╭─[ class_secure_types.baml:4:66 ]
51-
// │
52-
// 4 │ b (int, map<bool, string?>, (char | float)[][] | long_word_123.foobar[])
53-
// │ ───┬──
54-
// │ ╰──── field 'foobar' is missing a type annotation
55-
// │
56-
// │ Note: Error code: E0010
57-
// ───╯
58-
// Error: Expected Unexpected token in class body, found '['
59-
// ╭─[ class_secure_types.baml:4:72 ]
60-
// │
61-
// 4 │ b (int, map<bool, string?>, (char | float)[][] | long_word_123.foobar[])
62-
// │ ┬
63-
// │ ╰── Expected Unexpected token in class body, found '['
64-
// │
65-
// │ Note: Error code: E0010
66-
// ───╯
67-
// Error: Expected Unexpected token in class body, found ']'
68-
// ╭─[ class_secure_types.baml:4:73 ]
69-
// │
70-
// 4 │ b (int, map<bool, string?>, (char | float)[][] | long_word_123.foobar[])
71-
// │ ┬
72-
// │ ╰── Expected Unexpected token in class body, found ']'
73-
// │
74-
// │ Note: Error code: E0010
75-
// ───╯
76-
// Error: Expected Unexpected token in class body, found ')'
77-
// ╭─[ class_secure_types.baml:4:74 ]
78-
// │
79-
// 4 │ b (int, map<bool, string?>, (char | float)[][] | long_word_123.foobar[])
80-
// │ ┬
81-
// │ ╰── Expected Unexpected token in class body, found ')'
82-
// │
83-
// │ Note: Error code: E0010
84-
// ───╯
85-
// Error: Expected '>', found '>>'
86-
// ╭─[ class_secure_types.baml:16:62 ]
87-
// │
88-
// 16 │ n map<complex_key_type[], map<another_key, (int | string[])>>
89-
// │ ─┬
90-
// │ ╰── Expected '>', found '>>'
91-
// │
92-
// │ Note: Error code: E0010
93-
// ────╯
94-
// Error: Expected '>', found '>>'
95-
// ╭─[ class_secure_types.baml:16:62 ]
96-
// │
97-
// 16 │ n map<complex_key_type[], map<another_key, (int | string[])>>
98-
// │ ─┬
99-
// │ ╰── Expected '>', found '>>'
100-
// │
101-
// │ Note: Error code: E0010
102-
// ────╯
103-
// Error: Expected Unexpected token in class body, found '>>'
104-
// ╭─[ class_secure_types.baml:16:62 ]
105-
// │
106-
// 16 │ n map<complex_key_type[], map<another_key, (int | string[])>>
107-
// │ ─┬
108-
// │ ╰── Expected Unexpected token in class body, found '>>'
109-
// │
110-
// │ Note: Error code: E0010
111-
// ────╯

0 commit comments

Comments
 (0)