From 7986135bd4e211ed63b5c455b4e16a6295988205 Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Wed, 19 Feb 2025 17:29:28 +0200 Subject: [PATCH] [ruby] Fix Regex Lowering Bug (#5316) Fixed a bug introduced by converting all regex match methods by the `~=` binary statements. The bug is introduced is that this replaces ordinary calls (expressions) with binary statements leading to unexpected structures during AST creation. This was done to prevent infinite lowering whenever a regex method is created (as a `match` call is part of this lowering, which in of itself is a regex method). This change adds a "tag" to show when a call needs this lowering. --- .../AstForExpressionsCreator.scala | 7 +++--- .../astcreation/RubyIntermediateAst.scala | 3 ++- .../parser/RubyJsonToNodeCreator.scala | 25 ++++++++++--------- .../io/joern/rubysrc2cpg/passes/Defines.scala | 6 ++++- .../rubysrc2cpg/querying/RegexTests.scala | 5 ++-- 5 files changed, 25 insertions(+), 21 deletions(-) diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala index cfd199305cb2..704c4949ffac 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala @@ -74,13 +74,11 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) { } protected def astForHereDoc(node: HereDocNode): Ast = { - Ast(literalNode(node, code(node), prefixAsKernelDefined("String"))) + Ast(literalNode(node, code(node), prefixAsCoreType(Defines.String))) } // Helper for nil literals to put in empty clauses - protected def astForNilLiteral: Ast = Ast( - NewLiteral().code("nil").typeFullName(prefixAsKernelDefined(Defines.NilClass)) - ) + protected def astForNilLiteral: Ast = Ast(NewLiteral().code("nil").typeFullName(prefixAsCoreType(Defines.NilClass))) protected def astForNilBlock: Ast = blockAst(NewBlock(), List(astForNilLiteral)) @@ -225,6 +223,7 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) { code(n) } val call = if (n.isRegexMatch || RubyOperators.regexMethods(n.methodName)) { + // TODO: We need to do the lowering callNode(n, callCode, n.methodName, s"${prefixAsCoreType(Defines.Regexp)}.match", dispatchType) } else { callNode(n, callCode, n.methodName, XDefines.DynamicCallUnknownFullName, dispatchType) diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/RubyIntermediateAst.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/RubyIntermediateAst.scala index 97cfeb272291..e39752a2e753 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/RubyIntermediateAst.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/RubyIntermediateAst.scala @@ -569,7 +569,8 @@ object RubyIntermediateAst { ) extends RubyExpression(span) with RubyCall { - def isRegexMatch: Boolean = methodName == RubyOperators.regexpMatch + def isRegexMatch: Boolean = + methodName == RubyOperators.regexpMatch || methodName.endsWith(Defines.NeedsRegexLowering) override def withBlock(block: Block): RubyCallWithBlock[?] = MemberCallWithBlock(target, op, methodName, arguments, block)(span) diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyJsonToNodeCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyJsonToNodeCreator.scala index c1ee35acf80d..b4d40863de5f 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyJsonToNodeCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyJsonToNodeCreator.scala @@ -613,8 +613,8 @@ class RubyJsonToNodeCreator( if (isRegexMatch) { // For regex match that looks like "hello"[/h(el)lo/] val newProps = obj.value - newProps.put(ParserKeys.Name, RubyOperators.regexpMatch) - visitBinaryExpression(obj.copy(value = newProps)) + newProps.put(ParserKeys.Name, s"match${Defines.NeedsRegexLowering}") + visitSend(obj.copy(value = newProps)) } else { IndexAccess(target, indices)(obj.toTextSpan) } @@ -960,12 +960,12 @@ class RubyJsonToNodeCreator( private def visitSelf(obj: Obj): RubyExpression = SelfIdentifier()(obj.toTextSpan) private def visitBinaryExpression(obj: Obj): RubyExpression = { - val callName = obj(ParserKeys.Name).str - val lhs = visit(obj(ParserKeys.Receiver)) - val rhs = obj.visitArray(ParserKeys.Arguments).head - // Transform `match` to `=~` so that it is lowered later - val op = if RubyOperators.regexMethods.contains(callName) then RubyOperators.regexpMatch else callName - BinaryExpression(lhs, op, rhs)(obj.toTextSpan) + val op = obj(ParserKeys.Name).str + val lhs = visit(obj(ParserKeys.Receiver)) + obj.visitArray(ParserKeys.Arguments).headOption match { + case Some(rhs) => BinaryExpression(lhs, op, rhs)(obj.toTextSpan) + case None => MemberCall(lhs, ".", op, Nil)(obj.toTextSpan) + } } private def visitSend(obj: Obj, isConditional: Boolean = false): RubyExpression = { @@ -983,12 +983,13 @@ class RubyJsonToNodeCreator( case "private" | "public" | "protected" => visitAccessModifier(obj) case "private_class_method" | "public_class_method" => visitMethodAccessModifier(obj) case requireLike if ImportCallNames.contains(requireLike) && !hasReceiver => visitRequireLike(obj) - case x - if BinaryOperators.isBinaryOperatorName(callName) - || RubyOperators.regexMethods.contains(x) => // assert `match`, `sub`, or `gsub` is always for regex - visitBinaryExpression(obj) + case _ if BinaryOperators.isBinaryOperatorName(callName) => visitBinaryExpression(obj) case _ if UnaryOperators.isUnaryOperatorName(callName) => UnaryExpression(callName, visit(obj(ParserKeys.Receiver)))(obj.toTextSpan) + case _ if RubyOperators.regexMethods.contains(callName) => + val newProps = obj.value + newProps.put(ParserKeys.Name, s"$callName${Defines.NeedsRegexLowering}") + visitSend(obj.copy(value = newProps)) case s"$name=" if hasReceiver => visitFieldAssignmentSend(obj, name) case _ => val target = SimpleIdentifier()(obj.toTextSpan.spanStart(callName)) diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/Defines.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/Defines.scala index 0e56fe443606..49cc44ee38f9 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/Defines.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/Defines.scala @@ -32,6 +32,10 @@ object Defines { val Initialize: String = "initialize" val TypeDeclBody: String = "" + // A tag attached to the suffix of 'match' calls that have not + // yet been lowered. Prevents infinite recursion. + val NeedsRegexLowering: String = "" + val Main: String = "
" val Resolver: String = "" @@ -56,7 +60,7 @@ object Defines { val regexpMatch = "=~" val regexpNotMatch = "!~" - val regexMethods = Set("match", "sub", "gsub") + val regexMethods = Set("match") // TODO: Figure out how to model these, "sub", "gsub") } } diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/RegexTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/RegexTests.scala index 63bb16c2f9c0..ca65a0078885 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/RegexTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/RegexTests.scala @@ -11,8 +11,6 @@ class RegexTests extends RubyCode2CpgFixture(withPostProcessing = false) { /** Checks for the presence of the lowered regex match which assigns the match results to the respective global * variables. - * - * TODO: Check for matching of match group ($1, $2, etc.) variables. */ def assertLoweredStructure(cpg: Cpg, tmpNo: String = "0", expectedSubject: String = "\"hello\""): Unit = { // We lower =~ to the `match` equivalent @@ -117,7 +115,8 @@ class RegexTests extends RubyCode2CpgFixture(withPostProcessing = false) { assertLoweredStructure(cpg) } - "be assigned to the match using `sub` (or `gsub`) calls" in { + "be assigned to the match using `sub` (or `gsub`) calls" ignore { + // fixme: This cannot simply be lowered as it returns a string result not a MatchData object val cpg = code(""" |"hello".sub(/h(el)lo/) |""".stripMargin)