Skip to content

Commit

Permalink
dataflowengineoss: Track flow from parent identifiers & parameters (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidBakerEffendi authored Mar 6, 2023
1 parent 2af71a3 commit 5797650
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 57 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,15 @@ package io.joern.dataflowengineoss.queryengine

import io.joern.x2cpg.Defines
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.Operators
import io.shiftleft.codepropertygraph.generated.nodes.{
AstNode,
Call,
CfgNode,
Expression,
FieldIdentifier,
Identifier,
Literal,
Member,
Method,
MethodReturn,
StoredNode,
TypeDecl
}
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.codepropertygraph.generated.{Operators, PropertyNames}
import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.language.operatorextension.allAssignmentTypes
import org.slf4j.LoggerFactory
import overflowdb.traversal.Traversal

import java.util.concurrent.{ForkJoinPool, ForkJoinTask, RecursiveTask, RejectedExecutionException}
import scala.util.{Failure, Success, Try}
import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.language.operatorextension.allAssignmentTypes

case class StartingPointWithSource(startingPoint: CfgNode, source: StoredNode)

Expand Down Expand Up @@ -81,14 +68,30 @@ class SourceToStartingPoints(src: StoredNode) extends RecursiveTask[List[CfgNode
case methodReturn: MethodReturn =>
methodReturn.method.callIn.l
case lit: Literal =>
List(lit) ++ usages(targetsToClassIdentifierPair(literalToInitializedMembers(lit)))
// `firstUsagesOfLHSIdentifiers` is required to handle children methods referencing the identifier this literal
// is being passed to. Perhaps not the most sound as this doesn't handle re-assignment super well but it's
// difficult to check the control flow of when the method ref might use the value
val firstUsagesOfLHSIdentifiers =
lit.inAssignment.argument(1).isIdentifier.flatMap(identifiersFromChildScopes).l.distinctBy(_.method)
List(lit) ++ usages(
targetsToClassIdentifierPair(literalToInitializedMembers(lit))
) ++ firstUsagesOfLHSIdentifiers
case member: Member =>
val initializedMember = memberToInitializedMembers(member)
usages(targetsToClassIdentifierPair(List(member)))
case x @ (_: Identifier | _: MethodParameterIn) =>
List(x).collectAll[CfgNode].toList ++ identifiersFromChildScopes(x.asInstanceOf[CfgNode])
case x => List(x).collect { case y: CfgNode => y }
}
}

private def identifiersFromChildScopes(i: CfgNode): List[Identifier] = {
val name = i.property(PropertyNames.NAME, i.code)
i.method.ast.isMethodRef.referencedMethod.ast.isIdentifier
.nameExact(name)
.sortBy(x => (x.lineNumber, x.columnNumber))
.l
}

private def usages(pairs: List[(TypeDecl, AstNode)]): List[CfgNode] = {
pairs.flatMap { case (typeDecl, astNode) =>
val nonConstructorMethods = methodsRecursively(typeDecl)
Expand Down Expand Up @@ -174,35 +177,6 @@ class SourceToStartingPoints(src: StoredNode) extends RecursiveTask[List[CfgNode
.l
}

/** Classes have a static initialization method (cinit) and a non-static initialization method (init), and each member
* should be initialized in at least one of them. This method identifies the initialization assignments for a given
* member and returns the left-hand sides (targets) of these assignments.
*/
private def memberToInitializedMembers(member: Member): List[Expression] = {
val nodesInConstructors = astNodesInConstructors(member)

nodesInConstructors.flatMap { x =>
x match {
case identifier: Identifier if identifier.name == member.name =>
isTargetInAssignment(identifier)
case fieldIdentifier: FieldIdentifier if fieldIdentifier.canonicalName == member.head.name =>
Traversal(fieldIdentifier).where(_.inAssignment).l
case _ => List[Expression]()
}
}.l
}

private def astNodesInConstructors(member: Member) = {
methodsRecursively(member.typeDecl)
.or(
_.nameExact(Defines.StaticInitMethodName, Defines.ConstructorMethodName),
// this is for python
_.name(".*<body>$")
)
.ast
.l
}

private def methodsRecursively(typeDecl: TypeDecl): List[Method] = {
def methods(x: AstNode): List[Method] = {
x match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -618,4 +618,37 @@ class DataflowTest extends DataFlowCodeToCpgSuite {
sink.reachableBy(src2).size shouldBe 1
}

"Flow correctly from parent scope to child function scope" in {
val cpg: Cpg = code("""
|function foo(u) {
|
| const x = 1;
|
| function bar() {
| y = x;
| console.log(y);
| v = u;
| console.debug(v);
| }
|
|}""".stripMargin)

val sink1 = cpg.call("log").l
val sink2 = cpg.call("debug").l
sink1.size shouldBe 1
sink2.size shouldBe 1

val iSrc = cpg.method("foo").ast.isIdentifier.name("x").lineNumber(4).l
iSrc.size shouldBe 1
sink1.reachableBy(iSrc).size shouldBe 1

val lSrc = cpg.method("foo").ast.isLiteral.code("1").lineNumber(4).l
lSrc.size shouldBe 1
sink1.reachableBy(lSrc).size shouldBe 1

val pSrc = cpg.method("foo").parameter.nameExact("u").l
pSrc.size shouldBe 1
sink2.reachableBy(pSrc).size shouldBe 1
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -178,18 +178,47 @@ class DataFlowTests extends PySrc2CpgFixture(withOssDataflow = true) {
|accountId="sometext"
|response = client.post_data(data, accountId)
|""".stripMargin)
val source = cpg.identifier(".*url.*").l
val sink = cpg.call("post").l
source.size shouldBe 2
val sourceUrlIdentifier = cpg.identifier(".*url.*").l
val sink = cpg.call("post").l
sourceUrlIdentifier.size shouldBe 2
sink.size shouldBe 1
val flows = sink.reachableByFlows(source).l
flows.size shouldBe 1
sink.reachableByFlows(sourceUrlIdentifier).size shouldBe 1

val sourceUrlLiteral = cpg.literal(".*app.commissionly.io.*").l
sourceUrlLiteral.size shouldBe 1
sink.reachableByFlows(sourceUrlLiteral).size shouldBe 1
}

"Flow correctly from parent scope to child function scope" in {
val cpg: Cpg = code("""
|def foo(u):
|
| x = 1
|
| def bar():
| y = x
| print(y)
| v = u
| debug(v)
|
|""".stripMargin)

val sink1 = cpg.call("print").l
val sink2 = cpg.call("debug").l
sink1.size shouldBe 1
sink2.size shouldBe 1

val iSrc = cpg.method("foo").ast.isIdentifier.name("x").lineNumber(4).l
iSrc.size shouldBe 1
sink1.reachableBy(iSrc).size shouldBe 1

val sourcel = cpg.literal(".*app.commissionly.io.*").l
sourcel.size shouldBe 1
val lSrc = cpg.method("foo").ast.isLiteral.code("1").lineNumber(4).l
lSrc.size shouldBe 1
sink1.reachableBy(lSrc).size shouldBe 1

val flowsl = sink.reachableByFlows(source).l
flowsl.size shouldBe 1
val pSrc = cpg.method("foo").parameter.nameExact("u").l
pSrc.size shouldBe 1
sink2.reachableBy(pSrc).size shouldBe 1
}

}

0 comments on commit 5797650

Please sign in to comment.