-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinvert_s2_valdata.py
171 lines (146 loc) · 5.8 KB
/
invert_s2_valdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
'''
Inversion of Sentinel-2 data for crop trait retrieval using
PROSAIL lookup tables
@author Lukas Valentin Graf, Selene Ledain
'''
import numpy as np
import pandas as pd
import geopandas as gpd
import warnings
import sys
import os
from pathlib import Path
from typing import Dict, List, Optional
from rtm_inv.core.inversion import inv_df, retrieve_traits
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import time
from scipy import stats
#logger = get_settings().logger
warnings.filterwarnings('ignore')
band_selection = [
'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B8A', 'B11', 'B12']
def invert_scenes(
data_path: str,
lut_paths: List[str],
n_solutions: Dict[str, int],
cost_functions: Dict[str, str],
aggregation_methods: Dict[str, str],
traits: Optional[List[str]] = ['lai', 'ccc']
):
"""
Lookup table based inversion of S2 imagery. The inversion setup can
be adopted for each phenological macro-stage.
:param data_path:
path to where data to invert is plocated. Should be a .pkl file with a gpd
:param lut_paths:
list of LUTs to use for inversion
:param n_solutions:
number of solutions of the inversion to use per phenological
macro-stage
:param cost_functions:
cost function per phenological macro-stage
:param aggregation_methods:
aggregation methods of the solutions found per phenological macro-stage
:param traits:
list of traits to extract (this is used to find the correct LUT file).
Defaults to 'lai', 'cab', and 'ccc'.
"""
# Load the LUTs
lut = pd.concat([pd.read_pickle(path) for path in lut_paths], axis=0, ignore_index=True)
s2_lut = lut[['B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B8A', 'B11', 'B12']]
# Load the data to invert
df_data = pd.read_pickle(data_path).reset_index(drop=True)
# Perform inversion
lut_idxs, cost_function_values = inv_df(
lut=s2_lut,
df=df_data,
cost_function=cost_functions,
n_solutions=n_solutions,
)
trait_img, q05_img, q95_img = retrieve_traits(
lut=lut,
lut_idxs=lut_idxs,
traits=traits,
cost_function_values=cost_function_values,
measure=aggregation_methods
)
# Save traits to the dataframe
for i, trait in enumerate(traits):
df_data[f'{trait}_retrieved'] = trait_img[:, i]
df_data[f'{trait}_q05'] = q05_img[:, i]
df_data[f'{trait}_q95'] = q95_img[:, i]
# Save lowest, median and highest cost function value
highest_cost_function_vals = [np.max(c) for c in cost_function_values]
lowest_cost_function_vals = [np.min(c) for c in cost_function_values]
median_cost_function_vals = [np.median(c) for c in cost_function_values]
df_data['cost_func_max'] = highest_cost_function_vals
df_data['cost_func_min'] = lowest_cost_function_vals
df_data['cost_func_median'] = median_cost_function_vals
# Save to pickle
fname = data_path.split('.pkl')[0] + '_retrievednoise.pkl'
with open(fname, 'wb') as f:
df_data.to_pickle(f)
return
def compute_scores(data_path):
fname = data_path.split('.pkl')[0] + '_retrievednoise.pkl'
df = pd.read_pickle(fname)
df = df[~df['lai'].isna()]
rmse = mean_squared_error(df.lai, df.lai_retrieved, squared=False)
r2 = r2_score(df.lai, df.lai_retrieved)
nrmse = mean_squared_error(df.lai, df.lai_retrieved, squared=False)/(np.max(df.lai) - np.min(df.lai))
pearson = stats.pearsonr(df.lai, df.lai_retrieved).statistic
print(f'LUT retrieval: RMSE {rmse}, R2: {r2}, nRMSE: {nrmse}, r2: {pearson**2}')
plt.style.use('seaborn-v0_8-darkgrid')
ax = df.plot(kind='scatter', x='lai', y='lai_retrieved', figsize=(8,8), s=50)
ax.set_xlabel('Validation LAI [$m^{2}\ m^{-2}$]', fontsize=18)
ax.set_ylabel('Retrieved LAI [$m^{2}\ m^{-2}$]', fontsize=18)
# Set axis limits
ax.set_xlim((0, 8))
ax.set_ylim((0, 8))
# Customize ticks
#ax.tick_params(axis='both', which='major', labelsize=16)
# Plot the y=x line
ax.plot([0, 8], [0, 8], color='gray', linestyle='--', label='1:1 fit')
# Plot the regression line
slope, intercept = np.polyfit(df.lai, df.lai_retrieved, 1)
xseq = np.linspace(0, 8, num=100)
ax.plot(xseq, intercept + slope * xseq, color="r", linestyle='--', label='Linear fit')
# Add legend
ax.legend(fontsize=16)
# Text for displaying on plot
#textstr = f'RMSE: {rmse:.3f}\nnRMSE: {nrmse:.3f}\n$R^2$: {pearson**2:.3f}'
textstr = (
r'RMSE: {:.3f} $m^{{2}} \, m^{{-2}}$'
'\n' # Correct LaTeX for superscript
r'nRMSE: {:.3f}'
'\n'
r'$R^2$: {:.3f}'
).format(
rmse,
nrmse,
pearson**2
)
# Add text box
props = dict(boxstyle='round', facecolor='white', alpha=0.5)
ax.text(0.03, 0.75, textstr, transform=ax.transAxes, fontsize=16, bbox=props)
# Save plot
plt.savefig('results/lut_retreival_scatter_soil_r2.png')
return
if __name__ == '__main__':
data_path = os.path.expanduser('~/mnt/eo-nas1/eoa-share/projects/010_CropCovEO/results/validation_data_extended_angles_shift.pkl')
lut_dir = os.path.expanduser('~/mnt/eo-nas1/eoa-share/projects/010_CropCovEO/results/lut_based_inversion/soil/')
lut_paths = [os.path.join(lut_dir, 'prosail_danner-etal_switzerland_soil_S2A_lai-cab-ccc-car_lut_no-constraints.pkl'), os.path.join(lut_dir, 'prosail_danner-etal_switzerland_soil_S2B_lai-cab-ccc-car_lut_no-constraints.pkl')]
cost_functions = 'mae'
aggregation_methods ='median'
n_solutions = 5000
traits = ['lai']
invert_scenes(
data_path,
lut_paths,
n_solutions=n_solutions,
cost_functions=cost_functions,
aggregation_methods=aggregation_methods,
traits=traits
)
compute_scores(data_path)