Skip to content

Commit

Permalink
[ruby] Regex Match Defines $N Vars (#5303)
Browse files Browse the repository at this point in the history
This PR implements the other component of regex matching defining global variables. In Ruby, `$1`, `$2`, etc. correspond to the group matched in the last match. This is synonymous to how a `MatchData` object could refer to these matches.

This PR models these `nref` objects to `$[1]` and, during a match lowering, defines them to the corresponding index position of the lowered temp match object, i.e., `$[1] = <tmp-0>[1]` where `N` is determined by the number of opening parenthesis (simple heuristic).

Additionally, the lowered `match` calls have their `methodFullName` defined for convenient policy/semantic definition creation.
  • Loading branch information
DavidBakerEffendi authored Feb 12, 2025
1 parent cf8498a commit c72d37d
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As
def tmp = SimpleIdentifier()(originSpan.spanStart(tmpName))

val matchCall = {
val code = s"${target.text}.match(${regex.text})"
MemberCall(target, ".", "match", regex :: Nil)(originSpan.spanStart(code))
val code = s"${regex.text}.match(${target.text})"
MemberCall(regex, ".", "match", target :: Nil)(originSpan.spanStart(code))
}
val tmpAssignment = {
val code = s"$tmpName = ${matchCall.text}"
Expand All @@ -217,14 +217,24 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As
val tildeCode = s"$$~ = $tmpName"
val tildeAssign = SingleAssignment(globalTilde, "=", tmp)(originSpan.spanStart(tildeCode))

def zero = StaticLiteral(getBuiltInType(Defines.Integer))(originSpan.spanStart("0"))
val tmpIndex0 = IndexAccess(tmp, zero :: Nil)(originSpan.spanStart(s"$tmpName[0]"))
def intLiteral(n: Int) = StaticLiteral(getBuiltInType(Defines.Integer))(originSpan.spanStart(s"$n"))
val tmpIndex0 = IndexAccess(tmp, intLiteral(0) :: Nil)(originSpan.spanStart(s"$tmpName[0]"))

val ampersandCode = s"$$& = $tmpName[0]"
val ampersandAssign = SingleAssignment(globalAmpersand, "=", tmpIndex0)(originSpan.spanStart(ampersandCode))

// use a simple heuristic to determine the N matched groups
val matchGroups = (1 to regex.text.count(_ == '(')).map { idx =>
val matchGroupAsgnCode = s"$$$idx = $tmpName[$idx]"
val matchGroup = MemberAccess(self, ".", "$")(originSpan.spanStart("$"))
val matchGroupIndexN = IndexAccess(matchGroup, intLiteral(idx) :: Nil)(originSpan.spanStart(s"$$[$idx]"))
val tmpIndexN = IndexAccess(tmp, intLiteral(idx) :: Nil)(originSpan.spanStart(s"$tmpName[$idx]"))
SingleAssignment(matchGroupIndexN, "=", tmpIndexN)(originSpan.spanStart(matchGroupAsgnCode))
}.toList

// tmp.begin(0) is the lowered return value of `~=`
val beginCall = MemberCall(tmp, ".", "begin", zero :: Nil)(originSpan.spanStart(s"$tmpName.begin(0)"))
StatementList(tildeAssign :: ampersandAssign :: beginCall :: Nil)(
val beginCall = MemberCall(tmp, ".", "begin", intLiteral(0) :: Nil)(originSpan.spanStart(s"$tmpName.begin(0)"))
StatementList(tildeAssign :: ampersandAssign :: Nil ++ matchGroups :+ beginCall)(
originSpan.spanStart(s"$tildeCode; $ampersandCode")
)
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,11 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
} else {
code(n)
}
val call = callNode(n, callCode, n.methodName, XDefines.DynamicCallUnknownFullName, dispatchType)
val call = if (n.isRegexMatch || RubyOperators.regexMethods(n.methodName)) {
callNode(n, callCode, n.methodName, s"${getBuiltInType(Defines.Regexp)}.match", dispatchType)
} else {
callNode(n, callCode, n.methodName, XDefines.DynamicCallUnknownFullName, dispatchType)
}
if methodFullName != XDefines.DynamicCallUnknownFullName then call.possibleTypes(Seq(methodFullName))
if (isStatic) {
callAst(call, argumentAsts, base = Option(baseAst)).copy(receiverEdges = Nil)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -766,10 +766,14 @@ class RubyJsonToNodeCreator(
private def visitNil(obj: Obj): RubyExpression = StaticLiteral(getBuiltInType(Defines.NilClass))(obj.toTextSpan)

private def visitNthRef(obj: Obj): RubyExpression = {
val span = obj.toTextSpan
val name = obj(ParserKeys.Value).num.toInt
val selfBase = SelfIdentifier()(span.spanStart("self"))
MemberAccess(selfBase, ".", s"$$$name")(span)
// We represent $1 as $[1] in order to track these arbitrary numeric accesses in a way the data-flow engine
// understands
val span = obj.toTextSpan
val name = obj(ParserKeys.Value).num.toInt
val selfBase = SelfIdentifier()(span.spanStart("self"))
val amperMemberAccess = MemberAccess(selfBase, ".", "$")(span)
val indexPos = StaticLiteral(getBuiltInType(Defines.Integer))(obj.toTextSpan.spanStart(name.toString))
IndexAccess(amperMemberAccess, indexPos :: Nil)(obj.toTextSpan.spanStart(s"$$[$name]"))
}

private def visitObjectInstantiation(obj: Obj): RubyExpression = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import io.shiftleft.codepropertygraph.generated.{Cpg, Operators}
import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier, Literal}
import io.shiftleft.semanticcpg.language.*

class RegexTests extends RubyCode2CpgFixture(withPostProcessing = true) {
class RegexTests extends RubyCode2CpgFixture(withPostProcessing = false) {

"Global regex related variables" should {

Expand All @@ -23,12 +23,13 @@ class RegexTests extends RubyCode2CpgFixture(withPostProcessing = true) {
val tmpTarget = tmpInit.target.asInstanceOf[Identifier]
tmpTarget.name shouldBe s"<tmp-$tmpNo>"
val tmpSource = tmpInit.source.asInstanceOf[Call]
tmpSource.code shouldBe s"$expectedSubject.match(/h(el)lo/)"
tmpSource.code shouldBe s"/h(el)lo/.match($expectedSubject)"
tmpSource.name shouldBe "match"
tmpSource.methodFullName shouldBe "__core.Kernel.Regexp.match"

// Now test for the lowered global variable assignments
val ifStmt = cpg.controlStructure.last
inside(ifStmt.whenTrue.assignment.l) { case tildeAsgn :: amperAsgn :: Nil =>
inside(ifStmt.whenTrue.assignment.l) { case tildeAsgn :: amperAsgn :: match1Asgn :: Nil =>
tildeAsgn.code shouldBe s"$$~ = <tmp-$tmpNo>"
val taSource = tildeAsgn.source.asInstanceOf[Identifier]
taSource.name shouldBe s"<tmp-$tmpNo>"
Expand All @@ -46,6 +47,15 @@ class RegexTests extends RubyCode2CpgFixture(withPostProcessing = true) {
val aaTarget = amperAsgn.target.asInstanceOf[Call]
aaTarget.methodFullName shouldBe Operators.fieldAccess
aaTarget.code shouldBe "self.$&"

match1Asgn.code shouldBe s"$$1 = <tmp-$tmpNo>[1]"
val match1AsgnSource = match1Asgn.source.asInstanceOf[Call]
match1AsgnSource.methodFullName shouldBe Operators.indexAccess
match1AsgnSource.code shouldBe s"<tmp-$tmpNo>[1]"

val match1AsgnTarget = match1Asgn.target.asInstanceOf[Call]
match1AsgnTarget.methodFullName shouldBe Operators.indexAccess
match1AsgnTarget.code shouldBe "$[1]"
}
inside(ifStmt.whenFalse.assignment.l) { case tildeAsgn :: amperAsgn :: Nil =>
tildeAsgn.code shouldBe "$~ = nil"
Expand Down

0 comments on commit c72d37d

Please sign in to comment.