-
Notifications
You must be signed in to change notification settings - Fork 302
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Refactored `joern-slice` to support multiple "modes" - The old version is DataFlow mode. This now produces slices for all calls in a given file. - The new mode is Usage mode. This extracts calls that identifiers for a declaration are callers or arguments for. - Added circe for JSON serialization of usage slices.
- Loading branch information
1 parent
e97d59e
commit 659bc3d
Showing
5 changed files
with
481 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
130 changes: 82 additions & 48 deletions
130
joern-cli/src/main/scala/io/joern/joerncli/JoernSlice.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,80 +1,114 @@ | ||
package io.joern.joerncli | ||
|
||
import scala.util.Using | ||
import better.files.File | ||
import io.circe.generic.auto._ | ||
import io.circe.syntax.EncoderOps | ||
import io.joern.joerncli.slicing._ | ||
import io.shiftleft.codepropertygraph.Cpg | ||
import io.shiftleft.semanticcpg.language._ | ||
|
||
import scala.jdk.CollectionConverters._ | ||
import io.joern.dataflowengineoss.language._ | ||
import io.joern.joerncli.console.Joern.semantics | ||
import io.shiftleft.codepropertygraph.Cpg | ||
import io.shiftleft.codepropertygraph.generated.nodes.CfgNode | ||
import overflowdb.Edge | ||
import scala.jdk.CollectionConverters.MapHasAsScala | ||
import scala.language.postfixOps | ||
import scala.util.Using | ||
|
||
/** The kind of mode to use for slicing. | ||
*/ | ||
object SliceMode extends Enumeration { | ||
type SliceModes = Value | ||
val DataFlow, Usages = Value | ||
} | ||
|
||
object JoernSlice { | ||
|
||
import io.joern.joerncli.SliceMode._ | ||
|
||
implicit val sliceModeRead: scopt.Read[SliceModes] = | ||
scopt.Read.reads(SliceMode withName) | ||
|
||
case class Config( | ||
cpgFileName: String = "cpg.bin", | ||
outFile: String = "slice.bin", | ||
sourceFile: String = "", | ||
sourceLine: Int = -1 | ||
cpgFileName: File = File("cpg.bin"), | ||
outFile: File = File("slices"), | ||
sliceMode: SliceModes = DataFlow, | ||
sourceFile: Option[String] = None, | ||
sliceDepth: Int = 20, | ||
minNumCalls: Int = 1 | ||
) | ||
|
||
case class Slice(nodes: List[CfgNode], edges: Map[CfgNode, List[Edge]]) | ||
|
||
def main(args: Array[String]): Unit = { | ||
parseConfig(args).foreach { config => | ||
Using.resource(CpgBasedTool.loadFromOdb(config.cpgFileName)) { cpg => | ||
val slice = calculateSlice(cpg, config.sourceFile, config.sourceLine) | ||
Using.resource(CpgBasedTool.loadFromOdb(config.cpgFileName.pathAsString)) { cpg => | ||
val slice: ProgramSlice = config.sliceMode match { | ||
case DataFlow => DataFlowSlicing.calculateDataFlowSlice(cpg, config) | ||
case Usages => UsageSlicing.calculateUsageSlice(cpg, config) | ||
} | ||
storeSliceInNewCpg(config.outFile, slice) | ||
} | ||
} | ||
} | ||
|
||
private def parseConfig(args: Array[String]): Option[Config] = | ||
new scopt.OptionParser[Config]("joern-slice") { | ||
head("Extract intra-procedural backward slice for a line of code") | ||
head("Extract intra-procedural slices from the CPG.") | ||
help("help") | ||
arg[String]("sourcefile") | ||
.text("The file holding the sink statement") | ||
.action((x, c) => c.copy(sourceFile = x)) | ||
arg[String]("line") | ||
.text("Line number of sink statement") | ||
.action((x, c) => c.copy(sourceLine = x.toInt)) | ||
arg[String]("cpg") | ||
.text("input CPG file name - defaults to `cpg.bin`") | ||
.optional() | ||
.action((x, c) => c.copy(cpgFileName = x)) | ||
.action { (x, c) => | ||
val path = File(x) | ||
if (!path.isRegularFile) failure(s"File at '$x' not found or not regular, e.g. a directory.") | ||
c.copy(cpgFileName = path) | ||
} | ||
opt[String]('o', "out") | ||
.text("output CPG file") | ||
.action((x, c) => c.copy(outFile = x)) | ||
.text("the output file to write slices to - defaults to `slices`. The file is suffixed based on the mode.") | ||
.action((x, c) => c.copy(outFile = File(x))) | ||
opt[SliceModes]('m', "mode") | ||
.text(s"the kind of slicing to perform - defaults to `DataFlow`. Options: [${SliceMode.values.mkString(", ")}]") | ||
.action((x, c) => c.copy(sliceMode = x)) | ||
opt[String]("source-file") | ||
.text("the name of the source file to generate slices from.") | ||
.optional() | ||
.action((x, c) => c.copy(sourceFile = Some(x))) | ||
opt[Int]("slice-depth") | ||
.text(s"the max depth to traverse the DDG for the data-flow slice (for `DataFlow` mode) - defaults to 20.") | ||
.action((x, c) => c.copy(minNumCalls = x)) | ||
opt[Int]("min-num-calls") | ||
.text(s"the minimum number of calls required for a usage slice (for `Usage` mode) - defaults to 1.") | ||
.action((x, c) => c.copy(minNumCalls = x)) | ||
|
||
}.parse(args, Config()) | ||
|
||
private def calculateSlice(cpg: Cpg, sourceFile: String, sourceLine: Int): Slice = { | ||
val sinks = cpg.file.nameExact(sourceFile).ast.lineNumber(sourceLine).isCall.argument.l | ||
val sliceNodes = sinks.repeat(_.ddgIn)(_.maxDepth(20).emit).dedup.l | ||
val sliceEdges = sliceNodes | ||
.flatMap(_.outE) | ||
.filter(x => sliceNodes.contains(x.inNode())) | ||
.groupBy(_.outNode().asInstanceOf[CfgNode]) | ||
Slice(sliceNodes, sliceEdges) | ||
} | ||
private def storeSliceInNewCpg(outFile: File, programSlice: ProgramSlice): Unit = { | ||
|
||
private def storeSliceInNewCpg(outFile: String, slice: Slice): Unit = { | ||
val newCpg = Cpg.withStorage(outFile) | ||
val graph = newCpg.graph | ||
slice.nodes.foreach { node => | ||
val keyValueSequence = node.propertiesMap().asScala.toList.flatMap { case (k, v) => List[Any](k, v) } | ||
graph.addNode(node.id(), node.label, keyValueSequence: _*) | ||
} | ||
slice.nodes.foreach { node => | ||
val outNode = graph.node(node.id()) | ||
slice.edges.get(node).toList.foreach { edges => | ||
edges.foreach { edge => | ||
val inNode = graph.node(edge.inNode().id()) | ||
outNode.addEdge(edge.label, inNode) | ||
def storeDataFlowSlices(cpg: Cpg, slices: Set[DataFlowSlice]): Unit = { | ||
val graph = cpg.graph | ||
slices.foreach { slice => | ||
slice.nodes.foreach { node => | ||
val keyValueSequence = node.propertiesMap().asScala.toList.flatMap { case (k, v) => List[Any](k, v) } | ||
if (Option(graph.node(node.id())).isEmpty) graph.addNode(node.id(), node.label, keyValueSequence: _*) | ||
} | ||
slice.nodes.foreach { node => | ||
val outNode = graph.node(node.id()) | ||
slice.edges.get(node).toList.foreach { edges => | ||
edges.foreach { edge => | ||
val inNode = graph.node(edge.inNode().id()) | ||
if (!outNode.out(edge.label()).exists(_.id().equals(inNode.id()))) | ||
outNode.addEdge(edge.label, inNode) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
newCpg.close() | ||
|
||
programSlice match { | ||
case ProgramDataFlowSlice(dataFlowSlices) => | ||
val sliceCpg = File(outFile.pathAsString + ".cpg").createFileIfNotExists() | ||
Using.resource(Cpg.withStorage(sliceCpg.pathAsString)) { newCpg => | ||
storeDataFlowSlices(newCpg, dataFlowSlices.flatMap(_._2).toSet) | ||
} | ||
case programUsageSlice: ProgramUsageSlice => | ||
val sliceCpg = File(outFile.pathAsString + ".json").createFileIfNotExists() | ||
sliceCpg.write(programUsageSlice.asJson.spaces2) | ||
} | ||
} | ||
|
||
} |
31 changes: 31 additions & 0 deletions
31
joern-cli/src/main/scala/io/joern/joerncli/slicing/DataFlowSlicing.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package io.joern.joerncli.slicing | ||
|
||
import io.joern.dataflowengineoss.language._ | ||
import io.joern.joerncli.JoernSlice.Config | ||
import io.shiftleft.codepropertygraph.Cpg | ||
import io.shiftleft.codepropertygraph.generated.nodes._ | ||
import io.shiftleft.semanticcpg.language._ | ||
import overflowdb.traversal.Traversal | ||
|
||
object DataFlowSlicing { | ||
|
||
def calculateDataFlowSlice(cpg: Cpg, config: Config): ProgramDataFlowSlice = { | ||
val sliceMapping = (config.sourceFile match { | ||
case Some(fileName) => cpg.file.nameExact(fileName).ast.isCall | ||
case None => cpg.call | ||
}).groupBy(_.method).map { case (m: Method, calls: Traversal[Call]) => | ||
m.fullName -> calls.map { c => | ||
val sinks = c.argument.l | ||
|
||
val sliceNodes = sinks.repeat(_.ddgIn)(_.maxDepth(config.sliceDepth).emit).dedup.l | ||
val sliceEdges = sliceNodes | ||
.flatMap(_.outE) | ||
.filter(x => sliceNodes.contains(x.inNode())) | ||
.groupBy(_.outNode().asInstanceOf[CfgNode]) | ||
DataFlowSlice(sliceNodes, sliceEdges) | ||
}.toSet | ||
} | ||
ProgramDataFlowSlice(sliceMapping) | ||
} | ||
|
||
} |
Oops, something went wrong.