From c6b0b169ce69b0a83473828740d214dada6e4f26 Mon Sep 17 00:00:00 2001 From: Julian Scheid Date: Mon, 25 Aug 2025 16:31:22 +0200 Subject: [PATCH] Fix unexpected empty source map stacks This fixes two unrelated issues that both previously presented with a similar symptom, namely the following error: RangeError: cannot get the first element of beginless range The first issue is that some constructs, such as `{} => {}` and `foo in bar` get parsed by Ripper as a `case` AST node. However, without a matching `case` keyword, the YARD parser doesn't know where it starts. The second issue is that tokens following the `def` keyword are (correctly) never treated as keywords themselves, because they are used as the method name instead. However, this logic erroneously also applied to keywords following the `:def` _symbol_ - again making the YARD parser unable to determine where the keyword (following the `:def` symbol) starts. In both cases, the problematic constructs by themselves did not cause a crash but merely provided wrong source ranges. However, in specific parser states, the corresponding `@map` entry is the empty array rather than nil (because a similar, valid, construct was encountered before, which initializes the map entry). This was causing crashes since the beginning of `line_range` and `source_range` becomes nil as a result. Here we're fixing the underlying issue and we're also adding specs to ensure that source ranges are correct, and that crashes in the presence of pre-initializing constructs are eliminated. Note that another fix would have been to treat empty `@map` entries the same as nil `@map` entries in `visit_event`. Doing so would also prevent the crashes, and improve robustness more generally, however it would mask potential similar problems with other constructs. Therefore I have refrained from making that change -- a debatable decision that might be revisited in a separate change. However, I've added a better error message in this case. DISCLOSURE: Claude wrote large parts of the specs and helped me understand the problems and create the fixes, but I'm fully adopting its output as my own (after polishing it manually and reviewing it.) Fixes #1603. Co-Authored-By: Claude --- lib/yard/parser/ruby/ruby_parser.rb | 18 +++++- spec/parser/ruby/ruby_parser_spec.rb | 96 ++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 2 deletions(-) diff --git a/lib/yard/parser/ruby/ruby_parser.rb b/lib/yard/parser/ruby/ruby_parser.rb index a7ba5fabf..38b543032 100644 --- a/lib/yard/parser/ruby/ruby_parser.rb +++ b/lib/yard/parser/ruby/ruby_parser.rb @@ -236,7 +236,18 @@ def on_sp(tok) def visit_event(node) map = @map[MAPPINGS[node.type]] - lstart, sstart = *(map ? map.pop : [lineno, @ns_charno - 1]) + + # Pattern matching and `in` syntax creates :case nodes without 'case' tokens, + # fall back to the first child node. + if node.type == :case && (!map || map.empty?) && (child_node = node[0]) + lstart = child_node.line_range.first + sstart = child_node.source_range.first + else + lstart, sstart = *(map ? map.pop : [lineno, @ns_charno - 1]) + end + + raise "Cannot determine start of node #{node} around #{file}:#{lineno}" if lstart.nil? || sstart.nil? + node.source_range = Range.new(sstart, @ns_charno - 1) node.line_range = Range.new(lstart, lineno) if node.respond_to?(:block) @@ -259,7 +270,10 @@ def visit_event_arr(node) def visit_ns_token(token, data, ast_token = false) add_token(token, data) ch = charno - @last_ns_token = [token, data] + + # For purposes of tracking parsing state, don't treat keywords as such + # where used as a symbol identifier. + @last_ns_token = [@last_ns_token && @last_ns_token.first == :symbeg ? :symbol : token, data] @charno += data.length @ns_charno = charno @newline = [:semicolon, :comment, :kw, :op, :lparen, :lbrace].include?(token) diff --git a/spec/parser/ruby/ruby_parser_spec.rb b/spec/parser/ruby/ruby_parser_spec.rb index 62cba766a..e17704ffa 100644 --- a/spec/parser/ruby/ruby_parser_spec.rb +++ b/spec/parser/ruby/ruby_parser_spec.rb @@ -553,5 +553,101 @@ def add(x) = x + 1 expect(Registry.at('A#add').docstring).to eq('Adds two numbers') end if RUBY_VERSION >= '3.' + + it "doesn't crash with pattern matching following a case statement" do + code = <<-RUBY +case value +when 1 + "number" +end + +{} => {} + RUBY + + expect { + YARD::Parser::Ruby::RubyParser.new(code, nil).parse + }.not_to raise_error + end if RUBY_VERSION >= '3.' + + it "doesn't crash with `next` following `:def` symbol after initial `next`" do + code = <<-RUBY +foo do + next + if :def + next + end +end + RUBY + + expect { + YARD::Parser::Ruby::RubyParser.new(code, nil).parse + }.not_to raise_error + end + + it "doesn't crash with mixed pattern matching syntaxes" do + code = <<-RUBY +case foo +in bar + return +end +return if foo && foo in [] + RUBY + + expect { + parser = YARD::Parser::Ruby::RubyParser.new(code, nil) + parser.parse + }.not_to raise_error + end if RUBY_VERSION >= '2.7' + + it "provides correct range for various pattern matching statements" do + patterns = [ + "{} => {}", + "{a: 1} => {b: 2}", + "{x: 'test'} => result", + "foo in []" + ] + + patterns.each do |pattern| + parser = YARD::Parser::Ruby::RubyParser.new(pattern, nil) + ast = parser.parse.root + + case_node = nil + ast.traverse do |node| + if node.type == :case + case_node = node + break + end + end + + expect(case_node).not_to be_nil, "Pattern #{pattern} should create a case node" + actual_text = pattern[case_node.source_range] + expect(actual_text).to eq(pattern), + "Pattern #{pattern} should have source range covering the full expression, got #{actual_text.inspect}" + end + end if RUBY_VERSION >= '3.' + + it "provides correct range for `next` statement following `def` _symbol_" do + code = <<-RUBY +foo do + if :def + next + end +end + RUBY + + parser = YARD::Parser::Ruby::RubyParser.new(code, nil) + ast = parser.parse.root + + next_node = nil + ast.traverse do |node| + if node.type == :next + next_node = node + break + end + end + + expect(next_node.line_range).to eq(3..3) + expect(code[next_node.source_range]).to eq('next') + end end end if HAVE_RIPPER