diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreatorHelper.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreatorHelper.scala index 857df52c7eef..6a6da3070a6d 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreatorHelper.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreatorHelper.scala @@ -199,8 +199,8 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As def tmp = SimpleIdentifier()(originSpan.spanStart(tmpName)) val matchCall = { - val code = s"${target.text}.match(${regex.text})" - MemberCall(target, ".", "match", regex :: Nil)(originSpan.spanStart(code)) + val code = s"${regex.text}.match(${target.text})" + MemberCall(regex, ".", "match", target :: Nil)(originSpan.spanStart(code)) } val tmpAssignment = { val code = s"$tmpName = ${matchCall.text}" @@ -217,14 +217,24 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As val tildeCode = s"$$~ = $tmpName" val tildeAssign = SingleAssignment(globalTilde, "=", tmp)(originSpan.spanStart(tildeCode)) - def zero = StaticLiteral(getBuiltInType(Defines.Integer))(originSpan.spanStart("0")) - val tmpIndex0 = IndexAccess(tmp, zero :: Nil)(originSpan.spanStart(s"$tmpName[0]")) + def intLiteral(n: Int) = StaticLiteral(getBuiltInType(Defines.Integer))(originSpan.spanStart(s"$n")) + val tmpIndex0 = IndexAccess(tmp, intLiteral(0) :: Nil)(originSpan.spanStart(s"$tmpName[0]")) val ampersandCode = s"$$& = $tmpName[0]" val ampersandAssign = SingleAssignment(globalAmpersand, "=", tmpIndex0)(originSpan.spanStart(ampersandCode)) + + // use a simple heuristic to determine the N matched groups + val matchGroups = (1 to regex.text.count(_ == '(')).map { idx => + val matchGroupAsgnCode = s"$$$idx = $tmpName[$idx]" + val matchGroup = MemberAccess(self, ".", "$")(originSpan.spanStart("$")) + val matchGroupIndexN = IndexAccess(matchGroup, intLiteral(idx) :: Nil)(originSpan.spanStart(s"$$[$idx]")) + val tmpIndexN = IndexAccess(tmp, intLiteral(idx) :: Nil)(originSpan.spanStart(s"$tmpName[$idx]")) + SingleAssignment(matchGroupIndexN, "=", tmpIndexN)(originSpan.spanStart(matchGroupAsgnCode)) + }.toList + // tmp.begin(0) is the lowered return value of `~=` - val beginCall = MemberCall(tmp, ".", "begin", zero :: Nil)(originSpan.spanStart(s"$tmpName.begin(0)")) - StatementList(tildeAssign :: ampersandAssign :: beginCall :: Nil)( + val beginCall = MemberCall(tmp, ".", "begin", intLiteral(0) :: Nil)(originSpan.spanStart(s"$tmpName.begin(0)")) + StatementList(tildeAssign :: ampersandAssign :: Nil ++ matchGroups :+ beginCall)( originSpan.spanStart(s"$tildeCode; $ampersandCode") ) }, diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala index 74de144b8cb6..9c4916a76917 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala @@ -222,7 +222,11 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) { } else { code(n) } - val call = callNode(n, callCode, n.methodName, XDefines.DynamicCallUnknownFullName, dispatchType) + val call = if (n.isRegexMatch || RubyOperators.regexMethods(n.methodName)) { + callNode(n, callCode, n.methodName, s"${getBuiltInType(Defines.Regexp)}.match", dispatchType) + } else { + callNode(n, callCode, n.methodName, XDefines.DynamicCallUnknownFullName, dispatchType) + } if methodFullName != XDefines.DynamicCallUnknownFullName then call.possibleTypes(Seq(methodFullName)) if (isStatic) { callAst(call, argumentAsts, base = Option(baseAst)).copy(receiverEdges = Nil) diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyJsonToNodeCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyJsonToNodeCreator.scala index 29a51e1e109a..92e4a4957197 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyJsonToNodeCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyJsonToNodeCreator.scala @@ -766,10 +766,14 @@ class RubyJsonToNodeCreator( private def visitNil(obj: Obj): RubyExpression = StaticLiteral(getBuiltInType(Defines.NilClass))(obj.toTextSpan) private def visitNthRef(obj: Obj): RubyExpression = { - val span = obj.toTextSpan - val name = obj(ParserKeys.Value).num.toInt - val selfBase = SelfIdentifier()(span.spanStart("self")) - MemberAccess(selfBase, ".", s"$$$name")(span) + // We represent $1 as $[1] in order to track these arbitrary numeric accesses in a way the data-flow engine + // understands + val span = obj.toTextSpan + val name = obj(ParserKeys.Value).num.toInt + val selfBase = SelfIdentifier()(span.spanStart("self")) + val amperMemberAccess = MemberAccess(selfBase, ".", "$")(span) + val indexPos = StaticLiteral(getBuiltInType(Defines.Integer))(obj.toTextSpan.spanStart(name.toString)) + IndexAccess(amperMemberAccess, indexPos :: Nil)(obj.toTextSpan.spanStart(s"$$[$name]")) } private def visitObjectInstantiation(obj: Obj): RubyExpression = { diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/RegexTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/RegexTests.scala index f717dda05b23..ed016be9c583 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/RegexTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/RegexTests.scala @@ -7,7 +7,7 @@ import io.shiftleft.codepropertygraph.generated.{Cpg, Operators} import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier, Literal} import io.shiftleft.semanticcpg.language.* -class RegexTests extends RubyCode2CpgFixture(withPostProcessing = true) { +class RegexTests extends RubyCode2CpgFixture(withPostProcessing = false) { "Global regex related variables" should { @@ -23,12 +23,13 @@ class RegexTests extends RubyCode2CpgFixture(withPostProcessing = true) { val tmpTarget = tmpInit.target.asInstanceOf[Identifier] tmpTarget.name shouldBe s"" val tmpSource = tmpInit.source.asInstanceOf[Call] - tmpSource.code shouldBe s"$expectedSubject.match(/h(el)lo/)" + tmpSource.code shouldBe s"/h(el)lo/.match($expectedSubject)" tmpSource.name shouldBe "match" + tmpSource.methodFullName shouldBe "__core.Kernel.Regexp.match" // Now test for the lowered global variable assignments val ifStmt = cpg.controlStructure.last - inside(ifStmt.whenTrue.assignment.l) { case tildeAsgn :: amperAsgn :: Nil => + inside(ifStmt.whenTrue.assignment.l) { case tildeAsgn :: amperAsgn :: match1Asgn :: Nil => tildeAsgn.code shouldBe s"$$~ = " val taSource = tildeAsgn.source.asInstanceOf[Identifier] taSource.name shouldBe s"" @@ -46,6 +47,15 @@ class RegexTests extends RubyCode2CpgFixture(withPostProcessing = true) { val aaTarget = amperAsgn.target.asInstanceOf[Call] aaTarget.methodFullName shouldBe Operators.fieldAccess aaTarget.code shouldBe "self.$&" + + match1Asgn.code shouldBe s"$$1 = [1]" + val match1AsgnSource = match1Asgn.source.asInstanceOf[Call] + match1AsgnSource.methodFullName shouldBe Operators.indexAccess + match1AsgnSource.code shouldBe s"[1]" + + val match1AsgnTarget = match1Asgn.target.asInstanceOf[Call] + match1AsgnTarget.methodFullName shouldBe Operators.indexAccess + match1AsgnTarget.code shouldBe "$[1]" } inside(ifStmt.whenFalse.assignment.l) { case tildeAsgn :: amperAsgn :: Nil => tildeAsgn.code shouldBe "$~ = nil"