-
Notifications
You must be signed in to change notification settings - Fork 2
/
preprocess_dataset.py
66 lines (51 loc) · 2.47 KB
/
preprocess_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import sqlite3
import pandas as pd
import numpy as np
import pickle
import ee
# given a range of latitudes and longitudes, a fire database dataframe, and a desired resolution, this function
# will linearly partition the geodata into smaller grids
def extract_localized_wildfires(minLat, maxLat, minLong, maxLong, df, lat_resolution=None, long_resolution=None, resolution=None):
import numpy as np
assert (lat_resolution != None and long_resolution != None) or resolution != None
if resolution != None:
long_resolution = resolution
lat_resolution = resolution
longitude_range = np.linspace(maxLong, minLong, long_resolution + 1)
latitude_range = np.linspace(minLat, maxLat, lat_resolution + 1)
datapoints = {}
for row in range(resolution):
for col in range(resolution):
rel = df[df["LATITUDE"] >= latitude_range[row]]
rel = rel[rel["LATITUDE"] < latitude_range[row + 1]]
rel = rel[rel["LONGITUDE"] <= longitude_range[col]]
rel = rel[rel["LONGITUDE"] > longitude_range[col + 1]]
datapoints[(row, col)] = rel
return datapoints
def download_timeseries_image(minLat, maxLat, minLong, maxLong, lat_resolution=None, long_resolution=None, resolution=None):
assert (lat_resolution != None and long_resolution != None) or resolution != None
if resolution != None:
long_resolution = resolution
lat_resolution = resolution
landsat = ee.Image('LANDSAT/LC08/C01/T1_TOA/LC08_123032_20140515').select(['B1', 'B2', 'B3'])
geometry = ee.Geometry.Rectangle([116.2621, 39.8412, 116.4849, 40.01236])
path = landsat.getThumbURL({
'scale': 30,
'region': geometry
})
print(path)
return None
# asyncronously initialize the earth engine
ee.Initialize()
# load the initial fire dataset
cnx = sqlite3.connect('./us_wildfire_dataset/FPA_FOD_20170508.sqlite')
df = pd.read_sql_query("SELECT DISCOVERY_DATE, LATITUDE, LONGITUDE, FIRE_SIZE, STATE FROM fires", cnx)
# constrain the searches to California, and fix the date/time format
df = df[df["STATE"] == "CA"]
df['Date'] = pd.to_datetime(df['DISCOVERY_DATE'], unit='D', origin='julian')
df = df.drop("DISCOVERY_DATE", 1)
#print(extract_localized_wildfires(34, 36, -118, -116, df, resolution=15)[(0, 2)])
download_timeseries_image(0, 0, 0, 0, resolution=1)
# # save the dataframe to disk
# with open("./us_wildfire_dataset/ca_fires_raw.pkl", "wb") as f:
# pickle.dump(df, f, protocol=pickle.HIGHEST_PROTOCOL)