From 449db5ba9690822b1dd94a1944188f7ee9ab932c Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Thu, 30 Jan 2025 16:23:49 +0200 Subject: [PATCH] [sarif] GH Scanning: Fingerprints & Array/Position Validation (#5273) GitHub code scanning has strict requirements on SARIF files that need to be enforced, such as minimum elements in an array, line number > 0, etc. Some validators emit warnings on this, but GH fails the pipeline. Additionally, GitHub makes use of fingerprinting to avoid duplication between versions. The SARIF conversion has been adapted to accommodate the above. --- .../semanticcpg/sarif/SarifSchema.scala | 55 +++++++++++++++++-- .../JoernScanResultToSarifConverter.scala | 13 +++-- .../semanticcpg/sarif/v2_1_0/Schema.scala | 3 +- .../semanticcpg/language/SarifTests.scala | 25 +++++---- 4 files changed, 73 insertions(+), 23 deletions(-) diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala index c9d1c2e21700..7ad7b1a54f2b 100644 --- a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/SarifSchema.scala @@ -148,6 +148,11 @@ object SarifSchema { * The portion of the artifact contents within the specified region. */ def snippet: Option[ArtifactContent] + + /** @return + * true if startLine is empty and larger than 0, as this is the main required property. + */ + def isEmpty: Boolean = startLine.forall(_ <= 0) } /** Metadata that describes a specific report produced by the tool, as part of the analysis it provides or its runtime @@ -219,6 +224,12 @@ object SarifSchema { * An array of 'codeFlow' objects relevant to the result. */ def codeFlows: List[CodeFlow] + + /** GitHub makes use of this property to track effectively the same finding across files between versions. + * @return + * A set of strings that contribute to the stable, unique identity of the result. + */ + def partialFingerprints: Map[String, String] } /** Describes a single run of an analysis tool, and contains the reported output of that run. @@ -368,6 +379,19 @@ object SarifSchema { } ) ), + new CustomSerializer[SarifSchema.PhysicalLocation](implicit format => + ( + { case _ => + ??? + }, + { case location: SarifSchema.PhysicalLocation => + val elementMap = Map.newBuilder[String, Any] + elementMap.addOne("artifactLocation" -> location.artifactLocation) + if !location.region.isEmpty then elementMap.addOne("region" -> Extraction.decompose(location.region)) + Extraction.decompose(elementMap.result()) + } + ) + ), new CustomSerializer[SarifSchema.Region](implicit format => ( { case _ => @@ -375,10 +399,10 @@ object SarifSchema { }, { case region: SarifSchema.Region => val elementMap = Map.newBuilder[String, Any] - region.startLine.foreach(x => elementMap.addOne("startLine" -> x)) - region.startColumn.foreach(x => elementMap.addOne("startColumn" -> x)) - region.endLine.foreach(x => elementMap.addOne("endLine" -> x)) - region.endColumn.foreach(x => elementMap.addOne("endColumn" -> x)) + region.startLine.filterNot(x => x <= 0).foreach(x => elementMap.addOne("startLine" -> x)) + region.startColumn.filterNot(x => x <= 0).foreach(x => elementMap.addOne("startColumn" -> x)) + region.endLine.filterNot(x => x <= 0).foreach(x => elementMap.addOne("endLine" -> x)) + region.endColumn.filterNot(x => x <= 0).foreach(x => elementMap.addOne("endColumn" -> x)) region.snippet.foreach(x => elementMap.addOne("snippet" -> x)) Extraction.decompose(elementMap.result()) } @@ -400,6 +424,29 @@ object SarifSchema { } ) ), + new CustomSerializer[SarifSchema.Result](implicit format => + ( + { case _ => + ??? + }, + { case result: SarifSchema.Result => + val elementMap = Map.newBuilder[String, Any] + elementMap.addOne("ruleId" -> result.ruleId) + elementMap.addOne("message" -> result.message) + elementMap.addOne("level" -> result.level) + // Locations & related locations have no minimum, but do not allow duplicates + elementMap.addOne("locations" -> result.locations.distinct) + elementMap.addOne("relatedLocations" -> result.relatedLocations.distinct) + // codeFlows may be empty, but thread flows may not have empty arrays + elementMap.addOne("codeFlows" -> result.codeFlows.filterNot(_.threadFlows.isEmpty)) + + if result.partialFingerprints.nonEmpty then + elementMap.addOne("partialFingerprints" -> result.partialFingerprints) + + Extraction.decompose(elementMap.result()) + } + ) + ), new CustomSerializer[ToolComponent](implicit format => ( { case _ => diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala index 46cebace8478..c82c3bc1a5bb 100644 --- a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/JoernScanResultToSarifConverter.scala @@ -35,11 +35,12 @@ class JoernScanResultToSarifConverter extends ScanResultToSarifConverter { } protected def evidenceToCodeFlow(finding: Finding): Schema.CodeFlow = { - Schema.CodeFlow(threadFlows = - Schema.ThreadFlow( - finding.evidence.map(node => Schema.ThreadFlowLocation(location = nodeToLocation(node))).l - ) :: Nil - ) + val locations = finding.evidence.map(node => Schema.ThreadFlowLocation(location = nodeToLocation(node))).l + if (locations.isEmpty) { + Schema.CodeFlow(threadFlows = Nil) + } else { + Schema.CodeFlow(threadFlows = Schema.ThreadFlow(locations) :: Nil) + } } protected def createMessage(text: String): Schema.Message = { @@ -88,7 +89,7 @@ class JoernScanResultToSarifConverter extends ScanResultToSarifConverter { startColumn = n.columnNumber, snippet = Option(Schema.ArtifactContent(n.code)) ) - case _ => null + case _ => Schema.Region(None, None, None) } } diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala index d5ee52087366..7f974dce37f9 100644 --- a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/sarif/v2_1_0/Schema.scala @@ -52,7 +52,8 @@ object Schema { level: String, locations: List[Location], relatedLocations: List[Location], - codeFlows: List[CodeFlow] + codeFlows: List[CodeFlow], + partialFingerprints: Map[String, String] = Map.empty ) extends SarifSchema.Result final case class Run(tool: Tool, results: List[SarifSchema.Result], originalUriBaseIds: Map[String, ArtifactLocation]) diff --git a/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala b/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala index 475d40365710..6930f2e51da5 100644 --- a/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala +++ b/semanticcpg/src/test/scala/io/shiftleft/semanticcpg/language/SarifTests.scala @@ -93,11 +93,6 @@ class SarifTests extends AnyWordSpec with Matchers { | }, | "results":[ | { - | "ruleId":"f1", - | "message":{ - | "text":"Rule 1" - | }, - | "level":"error", | "locations":[ | { | "physicalLocation":{ @@ -130,6 +125,9 @@ class SarifTests extends AnyWordSpec with Matchers { | } | } | ], + | "message":{ + | "text":"Rule 1" + | }, | "codeFlows":[ | { | "threadFlows":[ @@ -155,7 +153,9 @@ class SarifTests extends AnyWordSpec with Matchers { | } | ] | } - | ] + | ], + | "ruleId":"f1", + | "level":"error" | } | ], | "originalUriBaseIds":{ @@ -166,6 +166,7 @@ class SarifTests extends AnyWordSpec with Matchers { | } | ] |} + | |""".stripMargin.trim } @@ -237,11 +238,6 @@ class SarifTests extends AnyWordSpec with Matchers { | }, | "results":[ | { - | "ruleId":"f1", - | "message":{ - | "text":"" - | }, - | "level":"warning", | "locations":[ | { | "physicalLocation":{ @@ -272,6 +268,9 @@ class SarifTests extends AnyWordSpec with Matchers { | } | } | ], + | "message":{ + | "text":"" + | }, | "codeFlows":[ | { | "threadFlows":[ @@ -296,7 +295,9 @@ class SarifTests extends AnyWordSpec with Matchers { | } | ] | } - | ] + | ], + | "ruleId":"f1", + | "level":"warning" | } | ], | "originalUriBaseIds":{