diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala index c9d1c2e21700..7ad7b1a54f2b 100644 --- a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala @@ -148,6 +148,11 @@ object SarifSchema { * The portion of the artifact contents within the specified region. */ def snippet: Option[ArtifactContent] + + /** @return + * true if startLine is empty and larger than 0, as this is the main required property. + */ + def isEmpty: Boolean = startLine.forall(_ <= 0) } /** Metadata that describes a specific report produced by the tool, as part of the analysis it provides or its runtime @@ -219,6 +224,12 @@ object SarifSchema { * An array of 'codeFlow' objects relevant to the result. */ def codeFlows: List[CodeFlow] + + /** GitHub makes use of this property to track effectively the same finding across files between versions. + * @return + * A set of strings that contribute to the stable, unique identity of the result. + */ + def partialFingerprints: Map[String, String] } /** Describes a single run of an analysis tool, and contains the reported output of that run. @@ -368,6 +379,19 @@ object SarifSchema { } ) ), + new CustomSerializer[SarifSchema.PhysicalLocation](implicit format => + ( + { case _ => + ??? + }, + { case location: SarifSchema.PhysicalLocation => + val elementMap = Map.newBuilder[String, Any] + elementMap.addOne("artifactLocation" -> location.artifactLocation) + if !location.region.isEmpty then elementMap.addOne("region" -> Extraction.decompose(location.region)) + Extraction.decompose(elementMap.result()) + } + ) + ), new CustomSerializer[SarifSchema.Region](implicit format => ( { case _ => @@ -375,10 +399,10 @@ object SarifSchema { }, { case region: SarifSchema.Region => val elementMap = Map.newBuilder[String, Any] - region.startLine.foreach(x => elementMap.addOne("startLine" -> x)) - region.startColumn.foreach(x => elementMap.addOne("startColumn" -> x)) - region.endLine.foreach(x => elementMap.addOne("endLine" -> x)) - region.endColumn.foreach(x => elementMap.addOne("endColumn" -> x)) + region.startLine.filterNot(x => x <= 0).foreach(x => elementMap.addOne("startLine" -> x)) + region.startColumn.filterNot(x => x <= 0).foreach(x => elementMap.addOne("startColumn" -> x)) + region.endLine.filterNot(x => x <= 0).foreach(x => elementMap.addOne("endLine" -> x)) + region.endColumn.filterNot(x => x <= 0).foreach(x => elementMap.addOne("endColumn" -> x)) region.snippet.foreach(x => elementMap.addOne("snippet" -> x)) Extraction.decompose(elementMap.result()) } @@ -400,6 +424,29 @@ object SarifSchema { } ) ), + new CustomSerializer[SarifSchema.Result](implicit format => + ( + { case _ => + ??? + }, + { case result: SarifSchema.Result => + val elementMap = Map.newBuilder[String, Any] + elementMap.addOne("ruleId" -> result.ruleId) + elementMap.addOne("message" -> result.message) + elementMap.addOne("level" -> result.level) + // Locations & related locations have no minimum, but do not allow duplicates + elementMap.addOne("locations" -> result.locations.distinct) + elementMap.addOne("relatedLocations" -> result.relatedLocations.distinct) + // codeFlows may be empty, but thread flows may not have empty arrays + elementMap.addOne("codeFlows" -> result.codeFlows.filterNot(_.threadFlows.isEmpty)) + + if result.partialFingerprints.nonEmpty then + elementMap.addOne("partialFingerprints" -> result.partialFingerprints) + + Extraction.decompose(elementMap.result()) + } + ) + ), new CustomSerializer[ToolComponent](implicit format => ( { case _ => diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala index 46cebace8478..c82c3bc1a5bb 100644 --- a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala @@ -35,11 +35,12 @@ class JoernScanResultToSarifConverter extends ScanResultToSarifConverter { } protected def evidenceToCodeFlow(finding: Finding): Schema.CodeFlow = { - Schema.CodeFlow(threadFlows = - Schema.ThreadFlow( - finding.evidence.map(node => Schema.ThreadFlowLocation(location = nodeToLocation(node))).l - ) :: Nil - ) + val locations = finding.evidence.map(node => Schema.ThreadFlowLocation(location = nodeToLocation(node))).l + if (locations.isEmpty) { + Schema.CodeFlow(threadFlows = Nil) + } else { + Schema.CodeFlow(threadFlows = Schema.ThreadFlow(locations) :: Nil) + } } protected def createMessage(text: String): Schema.Message = { @@ -88,7 +89,7 @@ class JoernScanResultToSarifConverter extends ScanResultToSarifConverter { startColumn = n.columnNumber, snippet = Option(Schema.ArtifactContent(n.code)) ) - case _ => null + case _ => Schema.Region(None, None, None) } } diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala index d5ee52087366..7f974dce37f9 100644 --- a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala @@ -52,7 +52,8 @@ object Schema { level: String, locations: List[Location], relatedLocations: List[Location], - codeFlows: List[CodeFlow] + codeFlows: List[CodeFlow], + partialFingerprints: Map[String, String] = Map.empty ) extends SarifSchema.Result final case class Run(tool: Tool, results: List[SarifSchema.Result], originalUriBaseIds: Map[String, ArtifactLocation]) diff --git a/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala b/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala index 475d40365710..6930f2e51da5 100644 --- a/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala +++ b/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala @@ -93,11 +93,6 @@ class SarifTests extends AnyWordSpec with Matchers { | }, | "results":[ | { - | "ruleId":"f1", - | "message":{ - | "text":"Rule 1" - | }, - | "level":"error", | "locations":[ | { | "physicalLocation":{ @@ -130,6 +125,9 @@ class SarifTests extends AnyWordSpec with Matchers { | } | } | ], + | "message":{ + | "text":"Rule 1" + | }, | "codeFlows":[ | { | "threadFlows":[ @@ -155,7 +153,9 @@ class SarifTests extends AnyWordSpec with Matchers { | } | ] | } - | ] + | ], + | "ruleId":"f1", + | "level":"error" | } | ], | "originalUriBaseIds":{ @@ -166,6 +166,7 @@ class SarifTests extends AnyWordSpec with Matchers { | } | ] |} + | |""".stripMargin.trim } @@ -237,11 +238,6 @@ class SarifTests extends AnyWordSpec with Matchers { | }, | "results":[ | { - | "ruleId":"f1", - | "message":{ - | "text":"" - | }, - | "level":"warning", | "locations":[ | { | "physicalLocation":{ @@ -272,6 +268,9 @@ class SarifTests extends AnyWordSpec with Matchers { | } | } | ], + | "message":{ + | "text":"" + | }, | "codeFlows":[ | { | "threadFlows":[ @@ -296,7 +295,9 @@ class SarifTests extends AnyWordSpec with Matchers { | } | ] | } - | ] + | ], + | "ruleId":"f1", + | "level":"warning" | } | ], | "originalUriBaseIds":{