Skip to content

Commit e791f15

Browse files
authored
Merge pull request #246 from wri/gtc-2824a
GTC-2824 Use raster GADM layers for pro dashboard for small # of features
2 parents fc99e94 + d4d46f3 commit e791f15

File tree

9 files changed

+185
-90
lines changed

9 files changed

+185
-90
lines changed

src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardAnalysis.scala

Lines changed: 74 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,32 +23,72 @@ object GfwProDashboardAnalysis extends SummaryAnalysis {
2323

2424
val name = "gfwpro_dashboard"
2525

26+
/** Run the GFWPro dashboard analysis.
27+
*
28+
* If doGadmIntersect is true, read in the entire gadm feature dataset and
29+
* intersect with the user feature list to determine the relevant gadm areas. If
30+
* doGadmIntersect is false (usually for small number of user features), then
31+
* determine the relevant gadm areas by using the raster gadm datasets GadmAdm0,
32+
* GadmAdm1, and GadmAdm2.
33+
*/
2634
def apply(
2735
featureRDD: RDD[ValidatedLocation[Geometry]],
2836
featureType: String,
29-
contextualFeatureType: String,
30-
contextualFeatureUrl: NonEmptyList[String],
37+
doGadmIntersect: Boolean,
38+
gadmFeatureUrl: NonEmptyList[String],
3139
fireAlertRDD: SpatialRDD[Geometry],
3240
spark: SparkSession,
3341
kwargs: Map[String, Any]
3442
): ValidatedWorkflow[Location[JobError],(FeatureId, GfwProDashboardData)] = {
3543
featureRDD.persist(StorageLevel.MEMORY_AND_DISK)
3644

3745
val summaryRDD = ValidatedWorkflow(featureRDD).flatMap { rdd =>
38-
val spatialContextualDF = SpatialFeatureDF(contextualFeatureUrl, contextualFeatureType, FeatureFilter.empty, "geom", spark)
39-
val spatialContextualRDD = Adapter.toSpatialRdd(spatialContextualDF, "polyshape")
40-
val spatialFeatureRDD = RDDAdapter.toSpatialRDDfromLocationRdd(rdd, spark)
46+
val enrichedRDD = if (doGadmIntersect) {
47+
println("Doing intersect with vector gadm")
48+
val spatialContextualDF = SpatialFeatureDF(gadmFeatureUrl, "gadm", FeatureFilter.empty, "geom", spark)
49+
val spatialContextualRDD = Adapter.toSpatialRdd(spatialContextualDF, "polyshape")
50+
val spatialFeatureRDD = RDDAdapter.toSpatialRDDfromLocationRdd(rdd, spark)
4151

42-
/* Enrich the feature RDD by intersecting it with contextual features
43-
* The resulting FeatureId carries combined identity of source feature and contextual geometry
44-
*/
45-
val enrichedRDD =
52+
/* Enrich the feature RDD by intersecting it with contextual features
53+
* The resulting FeatureId carries combined identity of source feature and contextual geometry
54+
*/
4655
SpatialJoinRDD
4756
.flatSpatialJoin(spatialContextualRDD, spatialFeatureRDD, considerBoundaryIntersection = true, usingIndex = true)
4857
.rdd
4958
.flatMap { case (feature, context) =>
50-
refineContextualIntersection(feature, context, contextualFeatureType)
59+
refineContextualIntersection(feature, context, "gadm")
60+
}
61+
} else {
62+
println("Using raster gadm")
63+
rdd.map {
64+
case Location(CombinedFeatureId(id@GfwProFeatureId(listId, locationId), featureCentroid: PointFeatureId), geom) => {
65+
if (locationId != -1) {
66+
// For a non-dissolved location, determine the GadmFeatureId for the
67+
// centroid of the location's geometry, and add that to the feature id.
68+
// This can be expensive, since the tile reads are not cached. So, we
69+
// we only use this raster GADM approach for user inputs with a small
70+
// number of locations (e.g. <50). In that case, we get significant
71+
// performance improvement by not having to read in the entire vector
72+
// GADM file, but instead only reading the GADM raster tiles for the
73+
// relevant areas.
74+
val pt = featureCentroid.pt
75+
val windowLayout = GfwProDashboardGrid.blockTileGrid
76+
val key = windowLayout.mapTransform.keysForGeometry(pt).toList.head
77+
val rasterSource = GfwProDashboardRDD.getSources(key, windowLayout, kwargs).getOrElse(null)
78+
val raster = rasterSource.readWindow(key, windowLayout).getOrElse(null)
79+
val re = raster.rasterExtent
80+
val col = re.mapXToGrid(pt.getX())
81+
val row = re.mapYToGrid(pt.getY())
82+
Validated.valid[Location[JobError], Location[Geometry]](Location(CombinedFeatureId(id, GadmFeatureId(raster.tile.gadm0.getData(col, row),
83+
raster.tile.gadm1.getData(col, row),
84+
raster.tile.gadm2.getData(col, row))), geom))
85+
} else {
86+
// For a dissolved location, add a dummy GadmFeatureId to the feature id.
87+
Validated.valid[Location[JobError], Location[Geometry]](Location(CombinedFeatureId(id, GadmFeatureId("X", 0, 0)), geom))
88+
}
5189
}
90+
}
91+
}
5292

5393
ValidatedWorkflow(enrichedRDD)
5494
.mapValidToValidated { rdd =>
@@ -64,11 +104,32 @@ object GfwProDashboardAnalysis extends SummaryAnalysis {
64104
.flatMap { enrichedRDD =>
65105
val fireStatsRDD = fireStats(enrichedRDD, fireAlertRDD, spark)
66106
val tmp = enrichedRDD.map { case Location(id, geom) => Feature(geom, id) }
67-
val validatedSummaryStatsRdd = GfwProDashboardRDD(tmp, GfwProDashboardGrid.blockTileGrid, kwargs)
107+
// This is where the main analysis happens, including calling
108+
// GfwProDashboardSummary.getGridVisitor.visit on each pixel.
109+
val validatedSummaryStatsRdd = GfwProDashboardRDD(tmp,
110+
GfwProDashboardGrid.blockTileGrid,
111+
kwargs + ("getRasterGadm" -> !doGadmIntersect))
68112
ValidatedWorkflow(validatedSummaryStatsRdd).mapValid { summaryStatsRDD =>
69-
// fold in fireStatsRDD after polygonal summary and accumulate the errors
70113
summaryStatsRDD
71-
.mapValues(_.toGfwProDashboardData())
114+
.flatMap { case (CombinedFeatureId(fid@GfwProFeatureId(listId, locationId), gadmId), summary) =>
115+
// For non-dissolved locations or vector gadm intersection, merge all
116+
// summaries (ignoring any differing group_gadm_id), and move the
117+
// gadmId from the featureId into the group_gadm_id. For dissolved
118+
// locations for raster gadm, merge summaries into multiple rows
119+
// based on the per-pixel group_gadm_id.
120+
val ignoreRasterGadm = locationId != -1 || doGadmIntersect
121+
summary.toGfwProDashboardData(ignoreRasterGadm).map( x => {
122+
val newx = if (ignoreRasterGadm) {
123+
x.copy(group_gadm_id = gadmId.toString)
124+
} else {
125+
x
126+
}
127+
Location(fid, newx)
128+
}
129+
)
130+
case _ => throw new NotImplementedError("Missing case")
131+
}
132+
// fold in fireStatsRDD after polygonal summary and accumulate the errors
72133
.leftOuterJoin(fireStatsRDD)
73134
.mapValues { case (data, fire) =>
74135
data.copy(viirs_alerts_daily = fire.getOrElse(GfwProDashboardDataDateCount.empty))

src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardCommand.scala

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,17 @@ import org.locationtech.jts.geom.Geometry
1212

1313
object GfwProDashboardCommand extends SummaryCommand {
1414

15-
val contextualFeatureUrlOpt: Opts[NonEmptyList[String]] = Opts
15+
val gadmFeatureUrl: Opts[NonEmptyList[String]] = Opts
1616
.options[String](
17-
"contextual_feature_url",
18-
help = "URI of contextual features in TSV format"
17+
"gadm_feature_url",
18+
help = "URI of GADM features in TSV format"
1919
)
2020

21-
val contextualFeatureTypeOpt: Opts[String] = Opts
22-
.option[String](
23-
"contextual_feature_type",
24-
help = "Type of contextual features"
25-
)
21+
val gadmIntersectThreshold: Opts[Int] = Opts
22+
.option[Int](
23+
"gadm_intersect_threshold",
24+
help = "Number of input features at which to intersect GADM"
25+
).withDefault(50)
2626

2727
val gfwProDashboardCommand: Opts[Unit] = Opts.subcommand(
2828
name = GfwProDashboardAnalysis.name,
@@ -32,10 +32,10 @@ object GfwProDashboardCommand extends SummaryCommand {
3232
defaultOptions,
3333
optionalFireAlertOptions,
3434
featureFilterOptions,
35-
contextualFeatureUrlOpt,
36-
contextualFeatureTypeOpt,
35+
gadmFeatureUrl,
36+
gadmIntersectThreshold,
3737
pinnedVersionsOpts
38-
).mapN { (default, fireAlert, filterOptions, contextualFeatureUrl, contextualFeatureType, pinned) =>
38+
).mapN { (default, fireAlert, filterOptions, gadmFeatureUrl, gadmIntersectThreshold, pinned) =>
3939
val kwargs = Map(
4040
"outputUrl" -> default.outputUrl,
4141
"noOutputPathSuffix" -> default.noOutputPathSuffix,
@@ -58,11 +58,19 @@ object GfwProDashboardCommand extends SummaryCommand {
5858
spatialRDD
5959
}
6060

61+
val featureCount = featureRDD.count()
62+
val doGadmIntersect = featureCount > gadmIntersectThreshold
63+
if (doGadmIntersect) {
64+
println(s"Intersecting vector gadm for feature count $featureCount")
65+
} else {
66+
println(s"Using raster gadm for feature count $featureCount")
67+
}
68+
6169
val dashRDD = GfwProDashboardAnalysis(
6270
featureRDD,
6371
default.featureType,
64-
contextualFeatureType = contextualFeatureType,
65-
contextualFeatureUrl = contextualFeatureUrl,
72+
doGadmIntersect,
73+
gadmFeatureUrl,
6674
fireAlertRDD,
6775
spark,
6876
kwargs

src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardDF.scala

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,23 @@ package org.globalforestwatch.summarystats.gfwpro_dashboard
22

33
import org.apache.spark.rdd.RDD
44
import org.apache.spark.sql.{DataFrame, SparkSession}
5-
import org.globalforestwatch.features.{CombinedFeatureId, FeatureId, GadmFeatureId, GfwProFeatureId}
5+
import org.globalforestwatch.features.{FeatureId, GfwProFeatureId}
66
import org.globalforestwatch.summarystats._
77
import cats.data.Validated.{Valid, Invalid}
8+
import org.apache.spark.sql.functions.expr
9+
import org.globalforestwatch.summarystats.SummaryDF.RowId
810

911
object GfwProDashboardDF extends SummaryDF {
10-
case class RowGadmId(list_id: String, location_id: String, gadm_id: String)
1112

1213
def getFeatureDataFrameFromVerifiedRdd(
1314
dataRDD: RDD[ValidatedLocation[GfwProDashboardData]],
1415
spark: SparkSession
1516
): DataFrame = {
1617
import spark.implicits._
1718

18-
val rowId: FeatureId => RowGadmId = {
19-
case CombinedFeatureId(proId: GfwProFeatureId, gadmId: GadmFeatureId) =>
20-
RowGadmId(proId.listId, proId.locationId.toString, gadmId.toString())
19+
val rowId: FeatureId => RowId = {
2120
case proId: GfwProFeatureId =>
22-
RowGadmId(proId.listId, proId.locationId.toString, "none")
21+
RowId(proId.listId, proId.locationId.toString)
2322
case _ =>
2423
throw new IllegalArgumentException("Not a CombinedFeatureId[GfwProFeatureId, GadmFeatureId]")
2524
}
@@ -30,28 +29,8 @@ object GfwProDashboardDF extends SummaryDF {
3029
(rowId(id), SummaryDF.RowError.fromJobError(err), GfwProDashboardData.empty)
3130
}
3231
.toDF("id", "error", "data")
33-
.select($"id.*", $"error.*", $"data.*")
34-
}
35-
36-
def getFeatureDataFrame(
37-
dataRDD: RDD[(FeatureId, ValidatedRow[GfwProDashboardData])],
38-
spark: SparkSession
39-
): DataFrame = {
40-
import spark.implicits._
41-
42-
dataRDD.mapValues {
43-
case Valid(data) =>
44-
(SummaryDF.RowError.empty, data)
45-
case Invalid(err) =>
46-
(SummaryDF.RowError.fromJobError(err), GfwProDashboardData.empty)
47-
}.map {
48-
case (CombinedFeatureId(proId: GfwProFeatureId, gadmId: GadmFeatureId), (error, data)) =>
49-
val rowId = RowGadmId(proId.listId, proId.locationId.toString, gadmId.toString())
50-
(rowId, error, data)
51-
case _ =>
52-
throw new IllegalArgumentException("Not a CombinedFeatureId[GfwProFeatureId, GadmFeatureId]")
53-
}
54-
.toDF("id", "error", "data")
55-
.select($"id.*", $"error.*", $"data.*")
32+
// Put data.group_gadm_id right after list/location and rename to gadm_id
33+
.select($"id.*", expr("data.group_gadm_id as gadm_id"), $"error.*", $"data.*")
34+
.drop($"group_gadm_id")
5635
}
5736
}

src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardData.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
99
* Note: This case class contains mutable values
1010
*/
1111
case class GfwProDashboardData(
12+
// Relevant for dissolved locations (locationId == -1)
13+
group_gadm_id: String,
14+
1215
/* NOTE: We are temporarily leaving the existing integrated alerts fields named as
1316
* glad_alerts_*, in order to reduce the number of moving pieces as we move from
1417
* Glad alerts to integrated alerts in GFWPro. */
@@ -48,6 +51,7 @@ case class GfwProDashboardData(
4851

4952
def merge(other: GfwProDashboardData): GfwProDashboardData = {
5053
GfwProDashboardData(
54+
if (group_gadm_id != "") group_gadm_id else other.group_gadm_id,
5155
glad_alerts_coverage || other.glad_alerts_coverage,
5256
integrated_alerts_coverage || other.integrated_alerts_coverage,
5357
total_ha.merge(other.total_ha),
@@ -73,6 +77,7 @@ object GfwProDashboardData {
7377

7478
def empty: GfwProDashboardData =
7579
GfwProDashboardData(
80+
group_gadm_id = "",
7681
glad_alerts_coverage = false,
7782
integrated_alerts_coverage = false,
7883
total_ha = ForestChangeDiagnosticDataDouble.empty,

src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardGridSources.scala

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.globalforestwatch.summarystats.gfwpro_dashboard
22

3-
import cats.implicits._
43
import geotrellis.layer.{LayoutDefinition, SpatialKey}
54
import geotrellis.raster.Raster
65
import org.globalforestwatch.grids.{GridSources, GridTile}
@@ -14,30 +13,19 @@ case class GfwProDashboardGridSources(gridTile: GridTile, kwargs: Map[String, An
1413
val treeCoverDensity2000 = TreeCoverDensityPercent2000(gridTile, kwargs)
1514
val sbtnNaturalForest: SBTNNaturalForests = SBTNNaturalForests(gridTile, kwargs)
1615
val jrcForestCover: JRCForestCover = JRCForestCover(gridTile, kwargs)
16+
val gadmAdm0: GadmAdm0 = GadmAdm0(gridTile, kwargs)
17+
val gadmAdm1: GadmAdm1 = GadmAdm1(gridTile, kwargs)
18+
val gadmAdm2: GadmAdm2 = GadmAdm2(gridTile, kwargs)
1719

1820
def readWindow(
1921
windowKey: SpatialKey,
2022
windowLayout: LayoutDefinition
2123
): Either[Throwable, Raster[GfwProDashboardTile]] = {
2224

23-
for {
24-
// Integrated alerts are Optional Tiles, but we keep it this way to avoid signature changes
25-
integratedAlertsTile <- Either
26-
.catchNonFatal(integratedAlerts.fetchWindow(windowKey, windowLayout))
27-
.right
28-
tcd2000Tile <- Either
29-
.catchNonFatal(treeCoverDensity2000.fetchWindow(windowKey, windowLayout))
30-
.right
31-
sbtnNaturalForestTile <- Either
32-
.catchNonFatal(sbtnNaturalForest.fetchWindow(windowKey, windowLayout))
33-
.right
34-
jrcForestCoverTile <- Either
35-
.catchNonFatal(jrcForestCover.fetchWindow(windowKey, windowLayout))
36-
.right
37-
} yield {
38-
val tile = GfwProDashboardTile(integratedAlertsTile, tcd2000Tile, sbtnNaturalForestTile, jrcForestCoverTile)
39-
Raster(tile, windowKey.extent(windowLayout))
40-
}
25+
val tile = GfwProDashboardTile(
26+
windowKey, windowLayout, this
27+
)
28+
Right(Raster(tile, windowKey.extent(windowLayout)))
4129
}
4230
}
4331

src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardRawDataGroup.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import java.time.LocalDate
66

77

88
case class GfwProDashboardRawDataGroup(
9+
groupGadmId: String,
910
alertDateAndConf: Option[(LocalDate, Int)],
1011
integratedAlertsCoverage: Boolean,
1112
isNaturalForest: Boolean,
@@ -20,6 +21,7 @@ case class GfwProDashboardRawDataGroup(
2021
}
2122

2223
GfwProDashboardData(
24+
group_gadm_id = groupGadmId,
2325
glad_alerts_coverage = integratedAlertsCoverage,
2426
integrated_alerts_coverage = integratedAlertsCoverage,
2527
glad_alerts_daily = GfwProDashboardDataDateCount.fillDaily(alertDate, true, alertCount),

src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardSummary.scala

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,23 @@ case class GfwProDashboardSummary(
2626
def isEmpty = stats.isEmpty
2727

2828
/** Pivot raw data to GfwProDashboardData and aggregate across alert dates. */
29-
def toGfwProDashboardData(): GfwProDashboardData = {
30-
stats
31-
.map { case (group, data) => group.
32-
toGfwProDashboardData(data.alertCount, data.treeCoverExtentArea) }
33-
.foldLeft(GfwProDashboardData.empty)( _ merge _)
29+
def toGfwProDashboardData(ignoreGadm: Boolean): List[GfwProDashboardData] = {
30+
if (ignoreGadm) {
31+
// Combine all GfwProDashboardData results ignoring different groupGadmIds.
32+
List(stats
33+
.map { case (group, data) => group.
34+
toGfwProDashboardData(data.alertCount, data.treeCoverExtentArea) }
35+
.foldLeft(GfwProDashboardData.empty)( _ merge _))
36+
} else {
37+
// Combine all GfwProDashboardData results into separate rows based on groupGadmI
38+
stats
39+
.groupBy { case(group, data) => group.groupGadmId }
40+
.map { case(key, list) =>
41+
list.map { case (group, data) => group.
42+
toGfwProDashboardData(data.alertCount, data.treeCoverExtentArea) }
43+
.foldLeft(GfwProDashboardData.empty)(_ merge _)
44+
}.toList
45+
}
3446
}
3547
}
3648

@@ -51,7 +63,21 @@ object GfwProDashboardSummary {
5163
val naturalForestCategory: String = raster.tile.sbtnNaturalForest.getData(col, row)
5264
val jrcForestCover: Boolean = raster.tile.jrcForestCover.getData(col, row)
5365

54-
val groupKey = GfwProDashboardRawDataGroup(integratedAlertDateAndConf,
66+
val gadmId: String = if (kwargs("getRasterGadm") == true) {
67+
val gadmAdm0: String = raster.tile.gadm0.getData(col, row)
68+
// Skip processing this pixel if gadmAdm0 is empty
69+
if (gadmAdm0 == "") {
70+
return
71+
}
72+
val gadmAdm1: Integer = raster.tile.gadm1.getData(col, row)
73+
val gadmAdm2: Integer = raster.tile.gadm2.getData(col, row)
74+
s"$gadmAdm0.$gadmAdm1.$gadmAdm2"
75+
} else {
76+
""
77+
}
78+
79+
80+
val groupKey = GfwProDashboardRawDataGroup(gadmId, integratedAlertDateAndConf,
5581
integratedAlertCoverage,
5682
naturalForestCategory == "Natural Forest",
5783
jrcForestCover,

0 commit comments

Comments
 (0)