1- from dateutil import parser
1+ import logging
22
33import numpy as np
44import pandas as pd
5-
5+ import rasters as rt
6+ from dateutil import parser
7+ from pandas import DataFrame
8+ from rasters import MultiPoint , WGS84
9+ from shapely .geometry import Point
610from GEOS5FP import GEOS5FP
7- from sentinel_tiles import sentinel_tiles
8- from sun_angles import calculate_SZA_from_datetime
9- from koppengeiger import load_koppen_geiger
10-
11- import logging
11+ from NASADEM import NASADEMConnection
12+ from .retrieve_FLiESANN_inputs import retrieve_FLiESANN_inputs
1213
1314logger = logging .getLogger (__name__ )
1415
15- def generate_FLiES_inputs_table (
16- FLiES_inputs_from_calval_df : pd .DataFrame ,
17- GEOS5FP_connection : GEOS5FP = None ) -> pd .DataFrame :
18- """
19- FLiES_inputs_from_claval_df:
20- Pandas DataFrame containing the columns: tower, lat, lon, time_UTC, albedo, elevation_km
21- return:
22- Pandas DataFrame containing the columns: tower, lat, lon, time_UTC, doy, albedo, elevation_km, AOT, COT, vapor_gccm, ozone_cm, SZA, KG
16+ def generate_FLiESANN_inputs_table (
17+ input_df : DataFrame ,
18+ GEOS5FP_connection : GEOS5FP = None ,
19+ NASADEM_connection : NASADEMConnection = None ) -> DataFrame :
2320 """
24- if GEOS5FP_connection is None :
25- GEOS5FP_connection = GEOS5FP ()
26-
27- # output_rows = []
28- FLiES_inputs_df = FLiES_inputs_from_calval_df .copy ()
29-
30- doy = []
31- AOT = []
32- COT = []
33- vapor_gccm = []
34- ozone_cm = []
35- SWin = []
36- Tmin_K = []
37- SZA = []
38- KG = []
39-
40- for i , input_row in FLiES_inputs_from_calval_df .iterrows ():
41- tower = input_row .tower
42- lat = input_row .lat
43- lon = input_row .lon
44- time_UTC = input_row .time_UTC
45- albedo = input_row .albedo
46- elevation_km = input_row .elevation_km
47- logger .info (f"collecting FLiES inputs for tower { tower } lat { lat } lon { lon } time { time_UTC } UTC" )
48- time_UTC = parser .parse (str (time_UTC ))
49- doy .append (time_UTC .timetuple ().tm_yday )
50- date_UTC = time_UTC .date ()
51- tile = sentinel_tiles .toMGRS (lat , lon )[:5 ]
52-
53- try :
54- tile_grid = sentinel_tiles .grid (tile = tile , cell_size = 70 )
55- except Exception as e :
56- logger .error (e )
57- logger .warning (f"unable to process tile { tile } " )
58- AOT .append (np .nan )
59- COT .append (np .nan )
60- vapor_gccm .append (np .nan )
61- ozone_cm .append (np .nan )
62- SZA .append (np .nan )
63- KG .append (np .nan )
64- continue
65-
66- rows , cols = tile_grid .shape
67- row , col = tile_grid .index_point (rt .Point (lon , lat ))
68- geometry = tile_grid [max (0 , row - 1 ):min (row + 2 , rows - 1 ),
69- max (0 , col - 1 ):min (col + 2 , cols - 1 )]
70-
71- if not "AOT" in FLiES_inputs_from_calval_df .columns :
72- try :
73- logger .info ("retrieving GEOS-5 FP aerosol optical thickness raster" )
74- AOT .append (np .nanmedian (GEOS5FP_connection .AOT (time_UTC = time_UTC , geometry = geometry )))
75- except Exception as e :
76- AOT .append (np .nan )
77- logger .exception (e )
78-
79- if not "COT" in FLiES_inputs_from_calval_df .columns :
80- try :
81- logger .info ("generating GEOS-5 FP cloud optical thickness raster" )
82- COT .append (np .nanmedian (GEOS5FP_connection .COT (time_UTC = time_UTC , geometry = geometry )))
83- except Exception as e :
84- COT .append (np .nan )
85- logger .exception (e )
86-
87- if not "vapor_gccm" in FLiES_inputs_from_calval_df .columns :
88- try :
89- logger .info ("generating GEOS5-FP water vapor raster in grams per square centimeter" )
90- vapor_gccm .append (np .nanmedian (GEOS5FP_connection .vapor_gccm (time_UTC = time_UTC , geometry = geometry )))
91- except Exception as e :
92- vapor_gccm .append (np .nan )
93- logger .exception (e )
94-
95- if not "ozone_cm" in FLiES_inputs_from_calval_df .columns :
96- try :
97- logger .info ("generating GEOS5-FP ozone raster in grams per square centimeter" )
98- ozone_cm .append (np .nanmedian (GEOS5FP_connection .ozone_cm (time_UTC = time_UTC , geometry = geometry )))
99- except Exception as e :
100- ozone_cm .append (np .nan )
101- logger .exception (e )
102-
103- if not "SWin" in FLiES_inputs_from_calval_df .columns :
104- try :
105- logger .info ("generating GEOS5-FP incoming solar radiation raster in watts per square meter" )
106- SWin .append (np .nanmedian (GEOS5FP_connection .SWin (time_UTC = time_UTC , geometry = geometry )))
107- except Exception as e :
108- SWin .append (np .nan )
109- logger .exception (e )
110-
111- if not "Tmin_K" in FLiES_inputs_from_calval_df .columns :
112- try :
113- logger .info ("generating GEOS5-FP minimum temperature in Kelvin" )
114- Tmin_K .append (np .nanmedian (GEOS5FP_connection .Tmin_K (time_UTC = time_UTC , geometry = geometry )))
115- except Exception as e :
116- Tmin_K .append (np .nan )
117- logger .exception (e )
118-
119- if not "SZA" in FLiES_inputs_from_calval_df .columns :
120- try :
121- logger .info ("calculating solar zenith angle" )
122- SZA .append (calculate_SZA_from_datetime (time_UTC , lat , lon ))
123- except Exception as e :
124- SZA .append (np .nan )
125- logger .exception (e )
126-
127- if not "KG" in FLiES_inputs_from_calval_df .columns :
128- try :
129- logger .info ("selecting Koppen Geiger climate classification" )
130- KG .append (load_koppen_geiger (geometry = geometry )[1 , 1 ][0 ][0 ])
131- except Exception as e :
132- KG .append (np .nan )
133- logger .exception (e )
134-
135- if not "doy" in FLiES_inputs_df .columns :
136- FLiES_inputs_df ["doy" ] = doy
137-
138- if not "AOT" in FLiES_inputs_df .columns :
139- FLiES_inputs_df ["AOT" ] = AOT
140-
141- if not "COT" in FLiES_inputs_df .columns :
142- FLiES_inputs_df ["COT" ] = COT
143-
144- if not "vapor_gccm" in FLiES_inputs_df .columns :
145- FLiES_inputs_df ["vapor_gccm" ] = vapor_gccm
146-
147- if not "ozone_cm" in FLiES_inputs_df .columns :
148- FLiES_inputs_df ["ozone_cm" ] = ozone_cm
149-
150- FLiES_inputs_df ["SWin" ] = SWin
151- FLiES_inputs_df ["Tmin_K" ] = Tmin_K
21+ Generates a DataFrame of FLiES inputs by retrieving atmospheric and static data.
15222
153- if not "SZA" in FLiES_inputs_df .columns :
154- FLiES_inputs_df ["SZA" ] = SZA
23+ This is a simple wrapper around retrieve_FLiESANN_inputs that handles DataFrame
24+ input/output and geometry parsing.
25+
26+ Parameters:
27+ input_df (pd.DataFrame): A DataFrame containing the following columns:
28+ - time_UTC (str or datetime): Time in UTC.
29+ - geometry (str or shapely.geometry.Point) or (lat, lon): Spatial coordinates.
30+ If "geometry" is a string, it should be in WKT format (e.g., "POINT (lon lat)").
31+ - albedo (float, optional): Surface albedo.
32+ - COT (float, optional): Cloud optical thickness.
33+ - AOT (float, optional): Aerosol optical thickness.
34+ - vapor_gccm (float, optional): Water vapor in grams per cubic centimeter.
35+ - ozone_cm (float, optional): Ozone concentration in centimeters.
36+ - elevation_m (float, optional): Elevation in meters.
37+ - SZA_deg (float, optional): Solar zenith angle in degrees.
38+ - KG or KG_climate (str, optional): Köppen-Geiger climate classification.
39+ - SWin_Wm2 (float, optional): Shortwave incoming solar radiation.
40+ - day_of_year (float, optional): Day of year.
41+ - hour_of_day (float, optional): Hour of day.
42+ GEOS5FP_connection (GEOS5FP, optional): Connection object for GEOS-5 FP data.
43+ NASADEM_connection (NASADEMConnection, optional): Connection object for NASADEM data.
44+
45+ Returns:
46+ pd.DataFrame: A DataFrame with the same structure as the input, but with additional columns:
47+ - albedo: Surface albedo array
48+ - COT: Cloud optical thickness array
49+ - AOT: Aerosol optical thickness array
50+ - vapor_gccm: Water vapor array
51+ - ozone_cm: Ozone concentration array
52+ - elevation_m: Elevation in meters array
53+ - elevation_km: Elevation in kilometers array
54+ - KG_climate: Climate classification array
55+ - SZA_deg: Solar zenith angle array
56+ - SWin_Wm2: Shortwave incoming radiation array
57+ - day_of_year: Day of year array
58+ - atype: Aerosol type array
59+ - ctype: Cloud type array
60+
61+ Raises:
62+ KeyError: If required columns ("geometry" or "lat" and "lon") are missing.
63+ """
64+ def ensure_geometry (row ):
65+ if "geometry" in row :
66+ if isinstance (row .geometry , str ):
67+ s = row .geometry .strip ()
68+ if s .startswith ("POINT" ):
69+ coords = s .replace ("POINT" , "" ).replace ("(" , "" ).replace (")" , "" ).strip ().split ()
70+ return Point (float (coords [0 ]), float (coords [1 ]))
71+ elif "," in s :
72+ coords = [float (c ) for c in s .split ("," )]
73+ return Point (coords [0 ], coords [1 ])
74+ else :
75+ coords = [float (c ) for c in s .split ()]
76+ return Point (coords [0 ], coords [1 ])
77+ return row .geometry
78+
79+ logger .info ("started generating FLiES input table" )
80+
81+ # Ensure geometry column is properly formatted
82+ input_df = input_df .copy ()
83+ input_df ["geometry" ] = input_df .apply (ensure_geometry , axis = 1 )
84+
85+ # Prepare output DataFrame
86+ output_df = input_df .copy ()
15587
156- if not "KG" in FLiES_inputs_df .columns :
157- FLiES_inputs_df ["KG" ] = KG
88+ # Prepare geometries
89+ if "geometry" in input_df .columns :
90+ geometries = MultiPoint ([(geom .x , geom .y ) for geom in input_df .geometry ], crs = WGS84 )
91+ elif "lat" in input_df .columns and "lon" in input_df .columns :
92+ geometries = MultiPoint ([(lon , lat ) for lon , lat in zip (input_df .lon , input_df .lat )], crs = WGS84 )
93+ else :
94+ raise KeyError ("Input DataFrame must contain either 'geometry' or both 'lat' and 'lon' columns." )
15895
159- if "Ta" in FLiES_inputs_df and "Ta_C" not in FLiES_inputs_df :
160- FLiES_inputs_df . rename ({ "Ta" : "Ta_C" }, inplace = True )
96+ # Convert time column to datetime
97+ times_UTC = pd . to_datetime ( input_df . time_UTC )
16198
162- return FLiES_inputs_df
99+ logger .info (f"generating inputs for { len (input_df )} rows" )
100+
101+ # Helper function to get column values or None if column doesn't exist
102+ def get_column_or_none (df , col_name , default_col_name = None ):
103+ if col_name in df .columns :
104+ return df [col_name ].values
105+ elif default_col_name and default_col_name in df .columns :
106+ return df [default_col_name ].values
107+ else :
108+ return None
109+
110+ # Retrieve all inputs at once using vectorized retrieve_FLiESANN_inputs call
111+ FLiES_inputs = retrieve_FLiESANN_inputs (
112+ geometry = geometries ,
113+ time_UTC = times_UTC ,
114+ albedo = get_column_or_none (input_df , "albedo" ),
115+ COT = get_column_or_none (input_df , "COT" ),
116+ AOT = get_column_or_none (input_df , "AOT" ),
117+ vapor_gccm = get_column_or_none (input_df , "vapor_gccm" ),
118+ ozone_cm = get_column_or_none (input_df , "ozone_cm" ),
119+ elevation_m = get_column_or_none (input_df , "elevation_m" ),
120+ SZA_deg = get_column_or_none (input_df , "SZA_deg" , "SZA" ),
121+ KG_climate = get_column_or_none (input_df , "KG_climate" , "KG" ),
122+ SWin_Wm2 = get_column_or_none (input_df , "SWin_Wm2" ),
123+ day_of_year = get_column_or_none (input_df , "day_of_year" ),
124+ hour_of_day = get_column_or_none (input_df , "hour_of_day" ),
125+ GEOS5FP_connection = GEOS5FP_connection ,
126+ NASADEM_connection = NASADEM_connection
127+ )
128+
129+ # Add retrieved inputs to the output DataFrame
130+ for key , values in FLiES_inputs .items ():
131+ # Skip values with mismatched lengths
132+ if hasattr (values , '__len__' ) and not isinstance (values , str ):
133+ if len (values ) != len (output_df ):
134+ continue
135+ output_df [key ] = values
136+
137+ logger .info ("completed generating FLiES input table" )
138+
139+ return output_df
0 commit comments