Skip to content

Commit

Permalink
JoernSlice: Usage Slicing (#2290)
Browse files Browse the repository at this point in the history
- Refactored `joern-slice` to support multiple "modes"
- The old version is DataFlow mode. This now produces slices for all calls in a given file.
- The new mode is Usage mode. This extracts calls that identifiers for a declaration are callers or arguments for.
- Added circe for JSON serialization of usage slices.
  • Loading branch information
DavidBakerEffendi authored Feb 21, 2023
1 parent e97d59e commit 659bc3d
Show file tree
Hide file tree
Showing 5 changed files with 481 additions and 48 deletions.
1 change: 1 addition & 0 deletions joern-cli/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ libraryDependencies ++= Seq(
"io.shiftleft" %% "codepropertygraph" % Versions.cpg,
"com.lihaoyi" %% "requests" % "0.7.1",
"com.github.scopt" %% "scopt" % "4.1.0",
"io.circe" %% "circe-core" % "0.14.4",
"io.circe" %% "circe-generic" % "0.14.4",
"org.reflections" % "reflections" % "0.10.2",
"org.scalatest" %% "scalatest" % Versions.scalatest % Test
Expand Down
130 changes: 82 additions & 48 deletions joern-cli/src/main/scala/io/joern/joerncli/JoernSlice.scala
Original file line number Diff line number Diff line change
@@ -1,80 +1,114 @@
package io.joern.joerncli

import scala.util.Using
import better.files.File
import io.circe.generic.auto._
import io.circe.syntax.EncoderOps
import io.joern.joerncli.slicing._
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.semanticcpg.language._

import scala.jdk.CollectionConverters._
import io.joern.dataflowengineoss.language._
import io.joern.joerncli.console.Joern.semantics
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.CfgNode
import overflowdb.Edge
import scala.jdk.CollectionConverters.MapHasAsScala
import scala.language.postfixOps
import scala.util.Using

/** The kind of mode to use for slicing.
*/
object SliceMode extends Enumeration {
type SliceModes = Value
val DataFlow, Usages = Value
}

object JoernSlice {

import io.joern.joerncli.SliceMode._

implicit val sliceModeRead: scopt.Read[SliceModes] =
scopt.Read.reads(SliceMode withName)

case class Config(
cpgFileName: String = "cpg.bin",
outFile: String = "slice.bin",
sourceFile: String = "",
sourceLine: Int = -1
cpgFileName: File = File("cpg.bin"),
outFile: File = File("slices"),
sliceMode: SliceModes = DataFlow,
sourceFile: Option[String] = None,
sliceDepth: Int = 20,
minNumCalls: Int = 1
)

case class Slice(nodes: List[CfgNode], edges: Map[CfgNode, List[Edge]])

def main(args: Array[String]): Unit = {
parseConfig(args).foreach { config =>
Using.resource(CpgBasedTool.loadFromOdb(config.cpgFileName)) { cpg =>
val slice = calculateSlice(cpg, config.sourceFile, config.sourceLine)
Using.resource(CpgBasedTool.loadFromOdb(config.cpgFileName.pathAsString)) { cpg =>
val slice: ProgramSlice = config.sliceMode match {
case DataFlow => DataFlowSlicing.calculateDataFlowSlice(cpg, config)
case Usages => UsageSlicing.calculateUsageSlice(cpg, config)
}
storeSliceInNewCpg(config.outFile, slice)
}
}
}

private def parseConfig(args: Array[String]): Option[Config] =
new scopt.OptionParser[Config]("joern-slice") {
head("Extract intra-procedural backward slice for a line of code")
head("Extract intra-procedural slices from the CPG.")
help("help")
arg[String]("sourcefile")
.text("The file holding the sink statement")
.action((x, c) => c.copy(sourceFile = x))
arg[String]("line")
.text("Line number of sink statement")
.action((x, c) => c.copy(sourceLine = x.toInt))
arg[String]("cpg")
.text("input CPG file name - defaults to `cpg.bin`")
.optional()
.action((x, c) => c.copy(cpgFileName = x))
.action { (x, c) =>
val path = File(x)
if (!path.isRegularFile) failure(s"File at '$x' not found or not regular, e.g. a directory.")
c.copy(cpgFileName = path)
}
opt[String]('o', "out")
.text("output CPG file")
.action((x, c) => c.copy(outFile = x))
.text("the output file to write slices to - defaults to `slices`. The file is suffixed based on the mode.")
.action((x, c) => c.copy(outFile = File(x)))
opt[SliceModes]('m', "mode")
.text(s"the kind of slicing to perform - defaults to `DataFlow`. Options: [${SliceMode.values.mkString(", ")}]")
.action((x, c) => c.copy(sliceMode = x))
opt[String]("source-file")
.text("the name of the source file to generate slices from.")
.optional()
.action((x, c) => c.copy(sourceFile = Some(x)))
opt[Int]("slice-depth")
.text(s"the max depth to traverse the DDG for the data-flow slice (for `DataFlow` mode) - defaults to 20.")
.action((x, c) => c.copy(minNumCalls = x))
opt[Int]("min-num-calls")
.text(s"the minimum number of calls required for a usage slice (for `Usage` mode) - defaults to 1.")
.action((x, c) => c.copy(minNumCalls = x))

}.parse(args, Config())

private def calculateSlice(cpg: Cpg, sourceFile: String, sourceLine: Int): Slice = {
val sinks = cpg.file.nameExact(sourceFile).ast.lineNumber(sourceLine).isCall.argument.l
val sliceNodes = sinks.repeat(_.ddgIn)(_.maxDepth(20).emit).dedup.l
val sliceEdges = sliceNodes
.flatMap(_.outE)
.filter(x => sliceNodes.contains(x.inNode()))
.groupBy(_.outNode().asInstanceOf[CfgNode])
Slice(sliceNodes, sliceEdges)
}
private def storeSliceInNewCpg(outFile: File, programSlice: ProgramSlice): Unit = {

private def storeSliceInNewCpg(outFile: String, slice: Slice): Unit = {
val newCpg = Cpg.withStorage(outFile)
val graph = newCpg.graph
slice.nodes.foreach { node =>
val keyValueSequence = node.propertiesMap().asScala.toList.flatMap { case (k, v) => List[Any](k, v) }
graph.addNode(node.id(), node.label, keyValueSequence: _*)
}
slice.nodes.foreach { node =>
val outNode = graph.node(node.id())
slice.edges.get(node).toList.foreach { edges =>
edges.foreach { edge =>
val inNode = graph.node(edge.inNode().id())
outNode.addEdge(edge.label, inNode)
def storeDataFlowSlices(cpg: Cpg, slices: Set[DataFlowSlice]): Unit = {
val graph = cpg.graph
slices.foreach { slice =>
slice.nodes.foreach { node =>
val keyValueSequence = node.propertiesMap().asScala.toList.flatMap { case (k, v) => List[Any](k, v) }
if (Option(graph.node(node.id())).isEmpty) graph.addNode(node.id(), node.label, keyValueSequence: _*)
}
slice.nodes.foreach { node =>
val outNode = graph.node(node.id())
slice.edges.get(node).toList.foreach { edges =>
edges.foreach { edge =>
val inNode = graph.node(edge.inNode().id())
if (!outNode.out(edge.label()).exists(_.id().equals(inNode.id())))
outNode.addEdge(edge.label, inNode)
}
}
}
}
}
newCpg.close()

programSlice match {
case ProgramDataFlowSlice(dataFlowSlices) =>
val sliceCpg = File(outFile.pathAsString + ".cpg").createFileIfNotExists()
Using.resource(Cpg.withStorage(sliceCpg.pathAsString)) { newCpg =>
storeDataFlowSlices(newCpg, dataFlowSlices.flatMap(_._2).toSet)
}
case programUsageSlice: ProgramUsageSlice =>
val sliceCpg = File(outFile.pathAsString + ".json").createFileIfNotExists()
sliceCpg.write(programUsageSlice.asJson.spaces2)
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package io.joern.joerncli.slicing

import io.joern.dataflowengineoss.language._
import io.joern.joerncli.JoernSlice.Config
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.semanticcpg.language._
import overflowdb.traversal.Traversal

object DataFlowSlicing {

def calculateDataFlowSlice(cpg: Cpg, config: Config): ProgramDataFlowSlice = {
val sliceMapping = (config.sourceFile match {
case Some(fileName) => cpg.file.nameExact(fileName).ast.isCall
case None => cpg.call
}).groupBy(_.method).map { case (m: Method, calls: Traversal[Call]) =>
m.fullName -> calls.map { c =>
val sinks = c.argument.l

val sliceNodes = sinks.repeat(_.ddgIn)(_.maxDepth(config.sliceDepth).emit).dedup.l
val sliceEdges = sliceNodes
.flatMap(_.outE)
.filter(x => sliceNodes.contains(x.inNode()))
.groupBy(_.outNode().asInstanceOf[CfgNode])
DataFlowSlice(sliceNodes, sliceEdges)
}.toSet
}
ProgramDataFlowSlice(sliceMapping)
}

}
Loading

0 comments on commit 659bc3d

Please sign in to comment.