Skip to content

Commit 71cead3

Browse files
committed
Merge branch 'release/0.8.4'
2 parents a8a8bb7 + 0e42a35 commit 71cead3

File tree

74 files changed

+1466
-2220
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+1466
-2220
lines changed

.travis.yml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
sudo: false
21
dist: xenial
32
language: python
43

@@ -28,11 +27,10 @@ install:
2827
- pip install rasterio shapely pandas numpy pweave
2928
- wget -O - https://piccolo.link/sbt-1.2.8.tgz | tar xzf -
3029

31-
script:
32-
- sbt/bin/sbt -java-home $JAVA_HOME -batch test
33-
- sbt/bin/sbt -java-home $JAVA_HOME -batch it:test
34-
# - sbt -Dfile.encoding=UTF8 clean coverage test coverageReport
35-
# Tricks to avoid unnecessary cache updates
36-
- find $HOME/.sbt -name "*.lock" | xargs rm
37-
- find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm
3830

31+
jobs:
32+
include:
33+
- stage: "Unit Tests"
34+
script: sbt/bin/sbt -java-home $JAVA_HOME -batch test
35+
- stage: "Integration Tests"
36+
script: sbt/bin/sbt -java-home $JAVA_HOME -batch it:test

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
<img src="docs/src/main/paradox/_template/images/RasterFramesLogo.png" width="300px"/><sup style="vertical-align: top;">&reg;</sup>
1+
<img src="docs/src/main/paradox/_template/assets/images/RasterFramesLogo.png" width="300px"/><sup style="vertical-align: top;">&reg;</sup>
22

33
[![Join the chat at https://gitter.im/locationtech/rasterframes](https://badges.gitter.im/locationtech/rasterframes.svg)](https://gitter.im/locationtech/rasterframes?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
44

55
RasterFrames® brings together Earth-observation (EO) data access, cloud computing, and DataFrame-based data science. The recent explosion of EO data from public and private satellite operators presents both a huge opportunity as well as a challenge to the data analysis community. It is _Big Data_ in the truest sense, and its footprint is rapidly getting bigger.
66

77
RasterFrames provides a DataFrame-centric view over arbitrary raster data, enabling spatiotemporal queries, map algebra raster operations, and compatibility with the ecosystem of Spark ML algorithms. By using DataFrames as the core cognitive and compute data model, it is able to deliver these features in a form that is both accessible to general analysts and scalable along with the rapidly growing data footprint.
88

9-
<img src="docs/src/main/paradox/RasterFramePipeline.png" width="600px"/>
9+
<img src="pyrasterframes/src/main/python/docs/static/rasterframes-pipeline-nologo.png" width="600px"/>
1010

1111
Please see the [Getting Started](http://rasterframes.io/getting-started.html) section of the Users' Manual to start using RasterFrames.
1212

bench/src/main/scala/org/locationtech/rasterframes/bench/TileExplodeBench.scala

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,11 @@ package org.locationtech.rasterframes.bench
2222

2323
import java.util.concurrent.TimeUnit
2424

25-
import org.apache.spark.sql.catalyst.InternalRow
26-
import org.apache.spark.sql.catalyst.expressions.BoundReference
27-
import org.apache.spark.sql.rf.TileUDT
2825
import org.locationtech.rasterframes._
29-
import org.locationtech.rasterframes.expressions.generators.ExplodeTiles
26+
import org.apache.spark.sql._
27+
import org.apache.spark.sql.functions._
3028
import org.openjdk.jmh.annotations._
29+
3130
/**
3231
*
3332
* @author sfitch
@@ -37,32 +36,33 @@ import org.openjdk.jmh.annotations._
3736
@State(Scope.Benchmark)
3837
@OutputTimeUnit(TimeUnit.MILLISECONDS)
3938
class TileExplodeBench extends SparkEnv {
39+
import spark.implicits._
4040

41-
//@Param(Array("uint8", "uint16ud255", "float32", "float64"))
42-
@Param(Array("uint16ud255"))
41+
@Param(Array("uint8", "uint16ud255", "float32", "float64"))
4342
var cellTypeName: String = _
4443

4544
@Param(Array("256"))
4645
var tileSize: Int = _
4746

48-
@Param(Array("2000"))
47+
@Param(Array("100"))
4948
var numTiles: Int = _
5049

5150
@transient
52-
var tiles: Array[InternalRow] = _
53-
54-
var exploder: ExplodeTiles = _
51+
var tiles: DataFrame = _
5552

5653
@Setup(Level.Trial)
5754
def setupData(): Unit = {
58-
tiles = Array.fill(numTiles)(randomTile(tileSize, tileSize, cellTypeName))
59-
.map(t => InternalRow(TileUDT.tileSerializer.toInternalRow(t)))
60-
val expr = BoundReference(0, TileType, true)
61-
exploder = new ExplodeTiles(1.0, None, Seq(expr))
55+
tiles = Seq.fill(numTiles)(randomTile(tileSize, tileSize, cellTypeName))
56+
.toDF("tile").repartition(10)
57+
}
58+
59+
@Benchmark
60+
def arrayExplode() = {
61+
tiles.select(posexplode(rf_tile_to_array_double($"tile"))).count()
6262
}
63+
6364
@Benchmark
6465
def tileExplode() = {
65-
for(t <- tiles)
66-
exploder.eval(t)
66+
tiles.select(rf_explode_tiles($"tile")).count()
6767
}
6868
}

build.sbt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ lazy val root = project
3232
.withId("RasterFrames")
3333
.aggregate(core, datasource, pyrasterframes, experimental)
3434
.enablePlugins(RFReleasePlugin)
35-
.settings(publish / skip := true)
35+
.settings(
36+
publish / skip := true,
37+
clean := clean.dependsOn(`rf-notebook`/clean).value
38+
)
3639

3740
lazy val `rf-notebook` = project
3841
.dependsOn(pyrasterframes)

build/circleci/Dockerfile

Lines changed: 29 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,45 +6,40 @@ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
66

77
# most of these libraries required for
88
# python-pip pandoc && pip install setuptools => required for pyrasterframes testing
9-
RUN sudo apt-get update && \
9+
RUN \
10+
sudo apt-get update && \
1011
sudo apt remove \
1112
python python-minimal python2.7 python2.7-minimal \
1213
libpython-stdlib libpython2.7 libpython2.7-minimal libpython2.7-stdlib \
13-
&& sudo apt-get install -y \
14-
pandoc \
15-
wget \
16-
gcc g++ build-essential \
14+
&& \
15+
sudo apt-get install -y \
16+
pandoc wget \
17+
gcc g++ build-essential bash-completion cmake imagemagick \
1718
libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev \
18-
libcurl4-gnutls-dev \
19-
libproj-dev \
20-
libgeos-dev \
21-
libhdf4-alt-dev \
22-
bash-completion \
23-
cmake \
24-
imagemagick \
25-
libpng-dev \
26-
libffi-dev \
27-
&& sudo apt autoremove \
28-
&& sudo apt-get clean all
29-
# && sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 1
30-
# todo s
19+
liblzma-dev libcurl4-gnutls-dev libproj-dev libgeos-dev libhdf4-alt-dev libpng-dev libffi-dev \
20+
&& \
21+
sudo apt autoremove && \
22+
sudo apt-get clean all
3123

32-
RUN cd /tmp && \
33-
wget https://www.python.org/ftp/python/3.7.4/Python-3.7.4.tgz && \
34-
tar xzf Python-3.7.4.tgz && \
35-
cd Python-3.7.4 && \
36-
./configure --with-ensurepip=install --prefix=/usr/local --enable-optimization && \
37-
make && \
38-
sudo make altinstall && \
39-
rm -rf Python-3.7.4*
24+
RUN \
25+
cd /tmp && \
26+
wget https://www.python.org/ftp/python/3.7.4/Python-3.7.4.tgz && \
27+
tar xzf Python-3.7.4.tgz && \
28+
cd Python-3.7.4 && \
29+
./configure --with-ensurepip=install --prefix=/usr/local --enable-optimization && \
30+
make && \
31+
sudo make altinstall && \
32+
rm -rf Python-3.7.4*
4033

41-
RUN sudo ln -s /usr/local/bin/python3.7 /usr/local/bin/python && \
42-
sudo curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
43-
sudo python get-pip.py && \
44-
sudo pip3 install setuptools ipython==6.2.1
34+
RUN \
35+
sudo ln -s /usr/local/bin/python3.7 /usr/local/bin/python && \
36+
sudo curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
37+
sudo python get-pip.py && \
38+
sudo pip3 install setuptools ipython==6.2.1
4539

4640
# install OpenJPEG
47-
RUN cd /tmp && \
41+
RUN \
42+
cd /tmp && \
4843
wget https://github.com/uclouvain/openjpeg/archive/v${OPENJPEG_VERSION}.tar.gz && \
4944
tar -xf v${OPENJPEG_VERSION}.tar.gz && \
5045
cd openjpeg-${OPENJPEG_VERSION}/ && \
@@ -56,7 +51,8 @@ RUN cd /tmp && \
5651
cd /tmp && rm -Rf v${OPENJPEG_VERSION}.tar.gz openjpeg*
5752

5853
# Compile and install GDAL with Java bindings
59-
RUN cd /tmp && \
54+
RUN \
55+
cd /tmp && \
6056
wget http://download.osgeo.org/gdal/${GDAL_VERSION}/gdal-${GDAL_VERSION}.tar.gz && \
6157
tar -xf gdal-${GDAL_VERSION}.tar.gz && \
6258
cd gdal-${GDAL_VERSION} && \
@@ -73,8 +69,7 @@ RUN cd /tmp && \
7369
--with-threads \
7470
--without-jp2mrsid \
7571
--without-netcdf \
76-
--without-ecw \
77-
&& \
72+
--without-ecw && \
7873
make -j 8 && \
7974
sudo make install && \
8075
sudo ldconfig && \

core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,22 @@ trait RasterFunctions {
5959
/** Extracts the bounding box from a RasterSource or ProjectedRasterTile */
6060
def rf_extent(col: Column): TypedColumn[Any, Extent] = GetExtent(col)
6161

62+
/** Constructs a XZ2 index in WGS84 from either a Geometry, Extent, ProjectedRasterTile, or RasterSource and its CRS
63+
* For details: https://www.geomesa.org/documentation/user/datastores/index_overview.html */
64+
def rf_spatial_index(targetExtent: Column, targetCRS: Column, indexResolution: Short) = XZ2Indexer(targetExtent, targetCRS, indexResolution)
65+
66+
/** Constructs a XZ2 index in WGS84 from either a Geometry, Extent, ProjectedRasterTile, or RasterSource and its CRS
67+
* For details: https://www.geomesa.org/documentation/user/datastores/index_overview.html */
68+
def rf_spatial_index(targetExtent: Column, targetCRS: Column) = XZ2Indexer(targetExtent, targetCRS, 18: Short)
69+
70+
/** Constructs a XZ2 index with level 18 resolution in WGS84 from either a ProjectedRasterTile or RasterSource
71+
* For details: https://www.geomesa.org/documentation/user/datastores/index_overview.html */
72+
def rf_spatial_index(targetExtent: Column, indexResolution: Short) = XZ2Indexer(targetExtent, indexResolution)
73+
74+
/** Constructs a XZ2 index with level 18 resolution in WGS84 from either a ProjectedRasterTile or RasterSource
75+
* For details: https://www.geomesa.org/documentation/user/datastores/index_overview.html */
76+
def rf_spatial_index(targetExtent: Column) = XZ2Indexer(targetExtent, 18: Short)
77+
6278
/** Extracts the CRS from a RasterSource or ProjectedRasterTile */
6379
def rf_crs(col: Column): TypedColumn[Any, CRS] = GetCRS(col)
6480

@@ -276,12 +292,38 @@ trait RasterFunctions {
276292
}
277293

278294
/** Where the rf_mask tile contains NODATA, replace values in the source tile with NODATA */
279-
def rf_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
280-
Mask.MaskByDefined(sourceTile, maskTile)
295+
def rf_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] = rf_mask(sourceTile, maskTile, false)
296+
297+
/** Where the rf_mask tile contains NODATA, replace values in the source tile with NODATA */
298+
def rf_mask(sourceTile: Column, maskTile: Column, inverse: Boolean=false): TypedColumn[Any, Tile] =
299+
if(!inverse) Mask.MaskByDefined(sourceTile, maskTile)
300+
else Mask.InverseMaskByDefined(sourceTile, maskTile)
301+
302+
/** Where the `maskTile` equals `maskValue`, replace values in the source tile with `NoData` */
303+
def rf_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column, inverse: Boolean=false): TypedColumn[Any, Tile] =
304+
if (!inverse) Mask.MaskByValue(sourceTile, maskTile, maskValue)
305+
else Mask.InverseMaskByValue(sourceTile, maskTile, maskValue)
281306

282307
/** Where the `maskTile` equals `maskValue`, replace values in the source tile with `NoData` */
283-
def rf_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
284-
Mask.MaskByValue(sourceTile, maskTile, maskValue)
308+
def rf_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Int, inverse: Boolean): TypedColumn[Any, Tile] =
309+
rf_mask_by_value(sourceTile, maskTile, lit(maskValue), inverse)
310+
311+
/** Where the `maskTile` equals `maskValue`, replace values in the source tile with `NoData` */
312+
def rf_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Int): TypedColumn[Any, Tile] =
313+
rf_mask_by_value(sourceTile, maskTile, maskValue, false)
314+
315+
/** Generate a tile with the values from `data_tile`, but where cells in the `mask_tile` are in the `mask_values`
316+
list, replace the value with NODATA. */
317+
def rf_mask_by_values(sourceTile: Column, maskTile: Column, maskValues: Column): TypedColumn[Any, Tile] =
318+
Mask.MaskByValues(sourceTile, maskTile, maskValues)
319+
320+
/** Generate a tile with the values from `data_tile`, but where cells in the `mask_tile` are in the `mask_values`
321+
list, replace the value with NODATA. */
322+
def rf_mask_by_values(sourceTile: Column, maskTile: Column, maskValues: Seq[Int]): TypedColumn[Any, Tile] = {
323+
import org.apache.spark.sql.functions.array
324+
val valuesCol: Column = array(maskValues.map(lit).toSeq: _*)
325+
rf_mask_by_values(sourceTile, maskTile, valuesCol)
326+
}
285327

286328
/** Where the `maskTile` does **not** contain `NoData`, replace values in the source tile with `NoData` */
287329
def rf_inverse_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
@@ -291,6 +333,10 @@ trait RasterFunctions {
291333
def rf_inverse_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
292334
Mask.InverseMaskByValue(sourceTile, maskTile, maskValue)
293335

336+
/** Where the `maskTile` does **not** equal `maskValue`, replace values in the source tile with `NoData` */
337+
def rf_inverse_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Int): TypedColumn[Any, Tile] =
338+
Mask.InverseMaskByValue(sourceTile, maskTile, lit(maskValue))
339+
294340
/** Create a tile where cells in the grid defined by cols, rows, and bounds are filled with the given value. */
295341
def rf_rasterize(geometry: Column, bounds: Column, value: Column, cols: Int, rows: Int): TypedColumn[Any, Tile] =
296342
withTypedAlias("rf_rasterize", geometry)(
@@ -389,6 +435,12 @@ trait RasterFunctions {
389435
/** Cellwise inequality comparison between a tile and a scalar. */
390436
def rf_local_unequal[T: Numeric](tileCol: Column, value: T): Column = Unequal(tileCol, value)
391437

438+
/** Test if each cell value is in provided array */
439+
def rf_local_is_in(tileCol: Column, arrayCol: Column) = IsIn(tileCol, arrayCol)
440+
441+
/** Test if each cell value is in provided array */
442+
def rf_local_is_in(tileCol: Column, array: Array[Int]) = IsIn(tileCol, array)
443+
392444
/** Return a tile with ones where the input is NoData, otherwise zero */
393445
def rf_local_no_data(tileCol: Column): Column = Undefined(tileCol)
394446

core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,14 @@ package org.locationtech.rasterframes.expressions
2323

2424
import geotrellis.proj4.CRS
2525
import geotrellis.raster.{CellGrid, Tile}
26+
import geotrellis.vector.Extent
2627
import org.apache.spark.sql.Row
2728
import org.apache.spark.sql.catalyst.InternalRow
29+
import org.apache.spark.sql.jts.JTSTypes
2830
import org.apache.spark.sql.rf.{RasterSourceUDT, TileUDT}
2931
import org.apache.spark.sql.types._
3032
import org.apache.spark.unsafe.types.UTF8String
33+
import org.locationtech.jts.geom.Envelope
3134
import org.locationtech.rasterframes.encoders.CatalystSerializer._
3235
import org.locationtech.rasterframes.model.{LazyCRS, TileContext}
3336
import org.locationtech.rasterframes.ref.{ProjectedRasterLike, RasterRef, RasterSource}
@@ -94,6 +97,15 @@ object DynamicExtractors {
9497
(v: Any) => v.asInstanceOf[InternalRow].to[CRS]
9598
}
9699

100+
lazy val extentLikeExtractor: PartialFunction[DataType, Any Extent] = {
101+
case t if org.apache.spark.sql.rf.WithTypeConformity(t).conformsTo(JTSTypes.GeometryTypeInstance) =>
102+
(input: Any) => JTSTypes.GeometryTypeInstance.deserialize(input).getEnvelopeInternal
103+
case t if t.conformsTo[Extent] =>
104+
(input: Any) => input.asInstanceOf[InternalRow].to[Extent]
105+
case t if t.conformsTo[Envelope] =>
106+
(input: Any) => Extent(input.asInstanceOf[InternalRow].to[Envelope])
107+
}
108+
97109
sealed trait TileOrNumberArg
98110
sealed trait NumberArg extends TileOrNumberArg
99111
case class TileArg(tile: Tile, ctx: Option[TileContext]) extends TileOrNumberArg

core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellStatsAggregate.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ object CellStatsAggregate {
123123
import org.locationtech.rasterframes.encoders.StandardEncoders.cellStatsEncoder
124124

125125
def apply(col: Column): TypedColumn[Any, CellStatistics] =
126-
new Column(new CellStatsAggregateUDAF(col.expr))
127-
.as(s"rf_agg_stats($col)") // node renaming in class doesn't seem to propogate
126+
new CellStatsAggregate()(ExtractTile(col))
127+
.as(s"rf_agg_stats($col)")
128128
.as[CellStatistics]
129129

130130
/** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */

core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/HistogramAggregate.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ object HistogramAggregate {
9898
import org.locationtech.rasterframes.encoders.StandardEncoders.cellHistEncoder
9999

100100
def apply(col: Column): TypedColumn[Any, CellHistogram] =
101-
new Column(new HistogramAggregateUDAF(col.expr))
102-
.as(s"rf_agg_approx_histogram($col)") // node renaming in class doesn't seem to propogate
101+
new HistogramAggregate()(ExtractTile(col))
102+
.as(s"rf_agg_approx_histogram($col)")
103103
.as[CellHistogram]
104104

105105
/** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */

core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalCountAggregate.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ object LocalCountAggregate {
9292
object LocalDataCellsUDAF {
9393
def apply(child: Expression): LocalDataCellsUDAF = new LocalDataCellsUDAF(child)
9494
def apply(tile: Column): TypedColumn[Any, Tile] =
95-
new Column(new LocalDataCellsUDAF(tile.expr))
95+
new LocalCountAggregate(true)(ExtractTile(tile))
9696
.as(s"rf_agg_local_data_cells($tile)")
9797
.as[Tile]
9898
}
@@ -107,7 +107,7 @@ object LocalCountAggregate {
107107
object LocalNoDataCellsUDAF {
108108
def apply(child: Expression): LocalNoDataCellsUDAF = new LocalNoDataCellsUDAF(child)
109109
def apply(tile: Column): TypedColumn[Any, Tile] =
110-
new Column(new LocalNoDataCellsUDAF(tile.expr))
110+
new LocalCountAggregate(false)(ExtractTile(tile))
111111
.as(s"rf_agg_local_no_data_cells($tile)")
112112
.as[Tile]
113113
}

0 commit comments

Comments
 (0)