Skip to content

Commit

Permalink
[ruby] Fix Regex Lowering Bug (#5316)
Browse files Browse the repository at this point in the history
Fixed a bug introduced by converting all regex match methods by the `~=` binary statements. The bug is introduced is that this replaces ordinary calls (expressions) with binary statements leading to unexpected structures during AST creation.

This was done to prevent infinite lowering whenever a regex method is created (as a `match` call is part of this lowering, which in of itself is a regex method).

This change adds a "tag" to show when a call needs this lowering.
  • Loading branch information
DavidBakerEffendi authored Feb 19, 2025
1 parent aaf0bb4 commit 7986135
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,11 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
}

protected def astForHereDoc(node: HereDocNode): Ast = {
Ast(literalNode(node, code(node), prefixAsKernelDefined("String")))
Ast(literalNode(node, code(node), prefixAsCoreType(Defines.String)))
}

// Helper for nil literals to put in empty clauses
protected def astForNilLiteral: Ast = Ast(
NewLiteral().code("nil").typeFullName(prefixAsKernelDefined(Defines.NilClass))
)
protected def astForNilLiteral: Ast = Ast(NewLiteral().code("nil").typeFullName(prefixAsCoreType(Defines.NilClass)))

protected def astForNilBlock: Ast = blockAst(NewBlock(), List(astForNilLiteral))

Expand Down Expand Up @@ -225,6 +223,7 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
code(n)
}
val call = if (n.isRegexMatch || RubyOperators.regexMethods(n.methodName)) {
// TODO: We need to do the lowering
callNode(n, callCode, n.methodName, s"${prefixAsCoreType(Defines.Regexp)}.match", dispatchType)
} else {
callNode(n, callCode, n.methodName, XDefines.DynamicCallUnknownFullName, dispatchType)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,8 @@ object RubyIntermediateAst {
) extends RubyExpression(span)
with RubyCall {

def isRegexMatch: Boolean = methodName == RubyOperators.regexpMatch
def isRegexMatch: Boolean =
methodName == RubyOperators.regexpMatch || methodName.endsWith(Defines.NeedsRegexLowering)

override def withBlock(block: Block): RubyCallWithBlock[?] =
MemberCallWithBlock(target, op, methodName, arguments, block)(span)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -613,8 +613,8 @@ class RubyJsonToNodeCreator(
if (isRegexMatch) {
// For regex match that looks like "hello"[/h(el)lo/]
val newProps = obj.value
newProps.put(ParserKeys.Name, RubyOperators.regexpMatch)
visitBinaryExpression(obj.copy(value = newProps))
newProps.put(ParserKeys.Name, s"match${Defines.NeedsRegexLowering}")
visitSend(obj.copy(value = newProps))
} else {
IndexAccess(target, indices)(obj.toTextSpan)
}
Expand Down Expand Up @@ -960,12 +960,12 @@ class RubyJsonToNodeCreator(
private def visitSelf(obj: Obj): RubyExpression = SelfIdentifier()(obj.toTextSpan)

private def visitBinaryExpression(obj: Obj): RubyExpression = {
val callName = obj(ParserKeys.Name).str
val lhs = visit(obj(ParserKeys.Receiver))
val rhs = obj.visitArray(ParserKeys.Arguments).head
// Transform `match` to `=~` so that it is lowered later
val op = if RubyOperators.regexMethods.contains(callName) then RubyOperators.regexpMatch else callName
BinaryExpression(lhs, op, rhs)(obj.toTextSpan)
val op = obj(ParserKeys.Name).str
val lhs = visit(obj(ParserKeys.Receiver))
obj.visitArray(ParserKeys.Arguments).headOption match {
case Some(rhs) => BinaryExpression(lhs, op, rhs)(obj.toTextSpan)
case None => MemberCall(lhs, ".", op, Nil)(obj.toTextSpan)
}
}

private def visitSend(obj: Obj, isConditional: Boolean = false): RubyExpression = {
Expand All @@ -983,12 +983,13 @@ class RubyJsonToNodeCreator(
case "private" | "public" | "protected" => visitAccessModifier(obj)
case "private_class_method" | "public_class_method" => visitMethodAccessModifier(obj)
case requireLike if ImportCallNames.contains(requireLike) && !hasReceiver => visitRequireLike(obj)
case x
if BinaryOperators.isBinaryOperatorName(callName)
|| RubyOperators.regexMethods.contains(x) => // assert `match`, `sub`, or `gsub` is always for regex
visitBinaryExpression(obj)
case _ if BinaryOperators.isBinaryOperatorName(callName) => visitBinaryExpression(obj)
case _ if UnaryOperators.isUnaryOperatorName(callName) =>
UnaryExpression(callName, visit(obj(ParserKeys.Receiver)))(obj.toTextSpan)
case _ if RubyOperators.regexMethods.contains(callName) =>
val newProps = obj.value
newProps.put(ParserKeys.Name, s"$callName${Defines.NeedsRegexLowering}")
visitSend(obj.copy(value = newProps))
case s"$name=" if hasReceiver => visitFieldAssignmentSend(obj, name)
case _ =>
val target = SimpleIdentifier()(obj.toTextSpan.spanStart(callName))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ object Defines {
val Initialize: String = "initialize"
val TypeDeclBody: String = "<body>"

// A tag attached to the suffix of 'match' calls that have not
// yet been lowered. Prevents infinite recursion.
val NeedsRegexLowering: String = "<needsRegexLowering>"

val Main: String = "<main>"

val Resolver: String = "<dependency-resolver>"
Expand All @@ -56,7 +60,7 @@ object Defines {
val regexpMatch = "=~"
val regexpNotMatch = "!~"

val regexMethods = Set("match", "sub", "gsub")
val regexMethods = Set("match") // TODO: Figure out how to model these, "sub", "gsub")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ class RegexTests extends RubyCode2CpgFixture(withPostProcessing = false) {

/** Checks for the presence of the lowered regex match which assigns the match results to the respective global
* variables.
*
* TODO: Check for matching of match group ($1, $2, etc.) variables.
*/
def assertLoweredStructure(cpg: Cpg, tmpNo: String = "0", expectedSubject: String = "\"hello\""): Unit = {
// We lower =~ to the `match` equivalent
Expand Down Expand Up @@ -117,7 +115,8 @@ class RegexTests extends RubyCode2CpgFixture(withPostProcessing = false) {
assertLoweredStructure(cpg)
}

"be assigned to the match using `sub` (or `gsub`) calls" in {
"be assigned to the match using `sub` (or `gsub`) calls" ignore {
// fixme: This cannot simply be lowered as it returns a string result not a MatchData object
val cpg = code("""
|"hello".sub(/h(el)lo/)
|""".stripMargin)
Expand Down

0 comments on commit 7986135

Please sign in to comment.