Skip to content
This repository was archived by the owner on Dec 6, 2019. It is now read-only.

Commit 20e638e

Browse files
authored
Merge pull request #17 from USGS-EROS/develop
0.5 Release
2 parents 1af3146 + 8d4c61a commit 20e638e

File tree

14 files changed

+371
-132
lines changed

14 files changed

+371
-132
lines changed

.dockerignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
.git/
22
.hypothesis/
33
lib/
4+
# Byte-compiled / optimized / DLL files
5+
__pycache__/
6+
*.py[cod]
7+
*$py.class

.test_env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
export SPARK_HOME=$(pwd)/lib/spark
1+
export SPARK_HOME="${SPARK_HOME:=$(pwd)/lib/spark}"
22
export PATH=$PATH:$SPARK_HOME/bin
33
export PYTHONPATH=$PYTHONPATH:$SPARK_HOME/python/lib/pyspark.zip
44
export PYTHONPATH=$PYTHONPATH:$SPARK_HOME/python/lib/py4j-0.10.4-src.zip

Dockerfile

Lines changed: 52 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,36 @@
11
FROM centos:7.3.1611
22

3-
LABEL maintainer="USGS EROS LCMAP http://eros.usgs.gov http://github.com/usgs-eros/lcmap-firebird"
4-
LABEL description="CentOS based Spark-Mesos image for LCMAP"
5-
LABEL org.apache.mesos.version=1.4.0
6-
LABEL org.apache.spark.version=2.2.0
7-
LABEL net.java.openjdk.version=1.8.0
8-
LABEL org.python.version=3.6
9-
LABEL org.centos=7.3.1611
3+
LABEL maintainer="USGS EROS LCMAP http://eros.usgs.gov http://github.com/usgs-eros/lcmap-firebird" \
4+
description="CentOS based Spark-Mesos image for LCMAP" \
5+
org.apache.mesos.version=1.4.0 \
6+
org.apache.spark.version=2.2.0 \
7+
net.java.openjdk.version=1.8.0 \
8+
org.python.version=3.6 \
9+
org.centos=7.3.1611
1010

1111
EXPOSE 8081 4040 8888
1212

1313
RUN yum update -y && \
14-
yum install -y sudo gcc bzip2 java-1.8.0-openjdk-devel.x86_64 && \
14+
yum install -y sudo java-1.8.0-openjdk-devel.x86_64 && \
1515
yum install -y http://repos.mesosphere.io/el/7/noarch/RPMS/mesosphere-el-repo-7-3.noarch.rpm && \
1616
yum install -y mesos && \
17-
yum -y downgrade mesos-1.4.0
17+
yum -y downgrade mesos-1.4.0 && \
18+
sudo yum clean all && \
19+
sudo rm -rf /var/cache/yum && \
20+
localedef -i en_US -f UTF-8 en_US.UTF-8
1821

19-
RUN localedef -i en_US -f UTF-8 en_US.UTF-8
22+
ENV HOME=/home/lcmap \
23+
USER=lcmap \
24+
SPARK_HOME=/opt/spark \
25+
SPARK_NO_DAEMONIZE=true \
26+
PYSPARK_PYTHON=python3 \
27+
MESOS_NATIVE_JAVA_LIBRARY=/usr/lib/libmesos.so \
28+
LC_ALL=en_US.UTF-8 \
29+
LANG=en_US.UTF-8
2030

31+
ENV PATH=$SPARK_HOME/bin:${PATH}:$HOME/miniconda3/bin \
32+
PYTHONPATH=$PYTHONPATH:$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.4-src.zip:$SPARK_HOME/python/lib/pyspark.zip
33+
2134
##########################################################################
2235
# Add a user to run as inside the container. This will prevent accidental
2336
# foo while mounting volumes. To enable access between external and
@@ -27,51 +40,51 @@ RUN adduser -ms /bin/bash lcmap && \
2740
echo "lcmap ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/lcmap && \
2841
chmod 0440 /etc/sudoers.d/lcmap
2942

30-
ENV HOME=/home/lcmap
31-
ENV USER=lcmap
3243
USER $USER
3344
WORKDIR $HOME
3445
##########################################################################
3546

36-
ENV SPARK_HOME=/opt/spark
37-
ENV SPARK_NO_DAEMONIZE=true
38-
ENV PYSPARK_PYTHON=$HOME/miniconda3/bin/python3
39-
ENV MESOS_NATIVE_JAVA_LIBRARY=/usr/lib/libmesos.so
40-
ENV PATH=$HOME/miniconda3/bin:$SPARK_HOME/bin:${PATH}
41-
ENV PYTHONPATH=$PYTHONPATH:$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.4-src.zip:$SPARK_HOME/python/lib/pyspark.zip
42-
ENV LC_ALL=en_US.UTF-8
43-
ENV LANG=en_US.UTF-8
44-
47+
# Install Spark
4548
RUN cd /opt && \
4649
sudo curl https://d3kbcqa49mib13.cloudfront.net/spark-2.2.0-bin-hadoop2.7.tgz -o spark.tgz && \
4750
sudo tar -zxf spark.tgz && \
4851
sudo rm -f spark.tgz && \
4952
sudo ln -s spark-* spark
5053

51-
RUN curl https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o mc.sh && \
54+
# Install miniconda.
55+
# Install numpy from default repo for mkl based implementation.
56+
RUN sudo yum install -y bzip2 && \
57+
curl https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o mc.sh && \
5258
chmod 755 mc.sh && \
5359
./mc.sh -b && \
5460
rm -rf mc.sh && \
55-
conda config --add channels conda-forge && \
56-
conda install cython gdal --yes
61+
sudo yum erase -y bzip2 && \
62+
sudo yum clean all && \
63+
conda install python=3.6 pip numpy cython scipy scikit-learn=0.18 --yes
5764

58-
COPY firebird firebird
59-
COPY notebook notebook
60-
COPY resources resources
61-
COPY test test
62-
COPY Makefile .
63-
COPY pom.xml .
64-
COPY README.md .
65-
COPY setup.py .
66-
COPY version.txt .
65+
# Copy firebird project artifacts into image
66+
RUN mkdir -p firebird
67+
COPY pom.xml .test_env test.sh setup.py version.txt Makefile README.rst ./firebird/
68+
COPY firebird firebird/firebird
69+
COPY notebook firebird/notebook
70+
COPY test firebird/test
6771
COPY log4j.properties $SPARK_HOME/conf/log4j.properties
68-
RUN sudo chown -R lcmap:lcmap .
6972

70-
RUN pip install -e .[test,dev]
71-
72-
RUN cd $HOME && \
73-
sudo yum install -y maven && \
73+
# Install spark-cassandra-connector
74+
RUN cd $HOME/firebird && \
75+
sudo yum install -y maven && \
7476
sudo mvn dependency:copy-dependencies -DoutputDirectory=$SPARK_HOME/jars && \
7577
sudo yum erase -y maven && \
7678
sudo yum clean all && \
77-
conda clean -all
79+
sudo rm -rf /var/cache/yum && \
80+
sudo rm -rf /root/.cache /root/.m2
81+
82+
# Do not install the test or dev profiles in this image, control image size
83+
#RUN sudo /usr/local/bin/pip install -e .[test,dev]
84+
RUN sudo chown -R lcmap:lcmap . && \
85+
sudo yum -y install gcc Cython && \
86+
sudo $HOME/miniconda3/bin/pip install -e firebird/.[test,dev] && \
87+
sudo sh -c 'find . | grep -E "(__pycache__|\.pyc|\.pyo$)" | xargs rm -rf' && \
88+
sudo yum erase -y gcc && \
89+
sudo yum clean all && \
90+
sudo rm -rf /var/cache/yum

Makefile

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
11
# pull the tag from version.txt
22
TAG:=`cat version.txt`
3-
WORKERIMAGE:=usgseros/lcmap-firebird:$(TAG)
3+
IMAGE:=usgseros/lcmap-firebird
44

55
vertest:
66
@echo TAG:$(TAG)
7-
@echo WORKERIMAGE:$(WORKERIMAGE)
7+
@echo IMAGE:$(IMAGE)
88

99
docker-build:
10-
docker build -t $(WORKERIMAGE) $(PWD)
10+
docker build -t $(IMAGE):$(TAG) -t $(IMAGE):latest $(PWD)
1111

1212
docker-push:
1313
docker login
14-
docker push $(WORKERIMAGE)
14+
docker push $(IMAGE):$(TAG)
15+
docker push $(IMAGE):latest
1516

1617
docker-shell:
17-
docker run -it --entrypoint=/bin/bash usgseros/$(WORKERIMAGE)
18+
docker run -it --entrypoint=/bin/bash $(IMAGE):latest
1819

1920
deps-up:
2021
docker-compose -f test/resources/docker-compose.yml up
@@ -34,7 +35,7 @@ spark-lib:
3435
tar -C lib -xvf lib/spark-2.2.0-bin-hadoop2.7.tar
3536
rm lib/*tar
3637
ln -s spark-2.2.0-bin-hadoop2.7 lib/spark
37-
mvn dependency:copy-dependencies -DoutputDirectory=lib/spark/jars
38+
mvn dependency:copy-dependencies -f pom.xml -DoutputDirectory=lib/spark/jars
3839

3940
tests:
4041
./test.sh

README.md renamed to README.rst

Lines changed: 43 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,90 @@
1-
# lcmap-firebird
1+
lcmap-firebird
2+
==============
23
LCMAP Science Execution Environment
34

4-
## What is lcmap-firebird?
5+
What is lcmap-firebird?
6+
-----------------------
57
* LCMAP product generation, development and analysis
68
* Built on Apache Spark, Apache Mesos, Docker and Python3
79
* Runs on 2000 cores as easily as it runs on 1
810

9-
## As an operations environment
11+
As an operations environment
12+
----------------------------
1013
* Command line interface
1114
* System requirements: Bash & Docker
1215

13-
## As a development and analysis environment
16+
As a development and analysis environment
17+
-----------------------------------------
1418
* Jupyter Notebooks & Apache Cassandra (included)
1519
* Notebooks uploaded or mounted via a volume
1620
* Results are savable to Cassandra anywhere: Cloud, cluster, dev server, local, etc
1721
* Comes with the Firebird Spark Python library for working with chips, chip-specs and creating time-series data
1822

19-
## Streamlined Research to Operations
23+
Streamlined Research to Operations
24+
----------------------------------
2025
* Development and analysis Notebooks are controlled outside Firebird.
21-
* Code may be moved to operations [by merging it directly into lcmap-firebird or included it via library.](#developing-firebird)
26+
* Code may be moved to operations `by merging it directly into lcmap-firebird or included it via library. <#developing-firebird>`_
2227

2328

24-
## Get Started
25-
```bash
29+
Get Started
30+
-----------
31+
.. code-block:: bash
32+
2633
$ wget https://raw.githubusercontent.com/USGS-EROS/lcmap-firebird/master/firebird.install.example -O firebird.install
2734
$ emacs firebird.install
2835
$ source firebird.install
2936
$ firebird-save -a 1980-01-01/2017-01-01 -b -1821585,2891595 -p seglength -p ccd -d 2014-01-01
30-
```
3137
32-
## Read [Frequently Asked Questions](faq.md)
38+
`Frequently Asked Questions <docs/faq.rst>`_
39+
----------------------------------------------
40+
41+
`Roadmap <docs/roadmap.rst>`_
42+
-----------------------------
3343

34-
## Developing Firebird
44+
Developing Firebird
45+
-------------------
3546

3647
* Install Docker, Maven and Conda
3748

3849
* Create and activate a conda environment
39-
```bash
50+
.. code-block:: bash
51+
4052
$ conda config --add channels conda-forge
4153
$ conda create --name firebird python=3.6 numpy pandas scipy gdal -y
4254
$ source activate firebird
43-
```
4455
4556
* Clone this repo, install deps
46-
```bash
57+
.. code-block:: bash
58+
4759
$ git clone [email protected]:usgs-eros/lcmap-firebird
4860
$ cd lcmap-firebird
4961
$ pip install -e .[test,dev]
50-
```
5162
5263
* Run tests
53-
```bash
54-
$ make spark-lib
55-
$ make deps-up
56-
$ make db-schema
57-
$ make tests
58-
$ make deps-down
59-
```
64+
.. code-block:: bash
65+
66+
$ make spark-lib
67+
$ make deps-up
68+
$ make db-schema
69+
$ make tests
70+
$ make deps-down
6071
6172
* Cut a branch, do some work, write some tests, update the docs, push to github
6273

6374
* Build a Docker image to test locally
64-
```bash
65-
$ emacs version.txt
66-
$ make docker-build
67-
$ emacs firebird.install # point to new version that was just built
68-
```
75+
.. code-block:: bash
76+
77+
$ emacs version.txt
78+
$ make docker-build
79+
$ emacs firebird.install # point to new version that was just built
6980
7081
* Publish the Docker image so it will be available to a cluster
71-
```bash
72-
$ make docker-push
73-
```
82+
.. code-block:: bash
83+
84+
$ make docker-push
7485
75-
## Development Philosophy
86+
Development Philosophy
87+
----------------------
7688
Apache Spark is functional programming for cluster computing therefore
7789
Firebird strives to ensure all of it's code follows functional principles:
7890
data is immutable, functions are the primary unit of abstraction, and functional

0 commit comments

Comments
 (0)