Skip to content

Commit

Permalink
coastal_VP_stats
Browse files Browse the repository at this point in the history
  • Loading branch information
thornoe committed Feb 7, 2024
1 parent 626aae2 commit d5558a9
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 39 deletions.
68 changes: 30 additions & 38 deletions gis/coastal_CV.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,8 @@ def AccuracyScore(y_true, y_pred):
# Share of water bodies by number of non-missing values
for n in range(0, len(dfEcoObs.columns) + 1):
n, round(100 * sum(dfEcoObs.notna().sum(axis=1) == n) / len(dfEcoObs), 2) # percent
dfEcoObs.count().count()

# Subset of rows where 1-15 values are non-missing
sparse = dfEcoObs[dfEcoObs.notna().sum(axis=1).isin(list(range(1, 15 + 1)))]
sparse.count() # lowest number of non-missing values with support in all years
sparse.count().sum() # 302 non-missing values in total to loop over with LOO-CV

# Include ecological status as assessed in basis analysis for VP3
basis = dfVP[["til_oko_fy"]].copy() # phytoplankton measured as chlorophyll
Expand Down Expand Up @@ -139,21 +136,21 @@ def AccuracyScore(y_true, y_pred):

# Define the dictionaries
dict1 = {
"Nordsø": "No",
"Kattegat": "K",
"Bælthav": "B",
"Østersøen": "Ø",
"Fjord": "Fj",
"Vesterhavsfjord": "Vf",
"North Sea": "No", # Nordsø
"Kattegat": "K", # Kattegat
"Belt Sea": "B", # Bælthav
"Baltic Sea": "Ø", # Østersøen
"Fjord": "Fj", # Fjord
"North Sea fjord": "Vf", # Vesterhavsfjord
}
dict2 = {
"water exchange": "Vu",
"freshwater inflow": "F",
"water depth": "D",
"stratification": "L",
"sediment": "Se",
"salinity": "Sa",
"tide": "T",
"water exchange": "Vu", # vandudveksling
"freshwater inflow": "F", # ferskvandspåvirkning
"depth": "D", # vanddybde
"stratification": "L", # lagdeling
"sediment": "Se", # sediment
"salinity": "Sa", # salinitet
"tide": "T", # tidevand
}

# Reverse the dictionaries so the abbreviations are the keys
Expand All @@ -171,31 +168,26 @@ def AccuracyScore(y_true, y_pred):

# Merge DataFrames for typology and observed ecological status
dfTypology = dfObs.merge(dummies[cols], on="wb")
dfSparse = dfTypology.merge(sparse[[]], on="wb") # sparse subset of dfTypology

# Empty DataFrame for storing distribution of typology in
VPstats = pd.DataFrame(columns=["All VP3", "Sparse"])
# Subset dfTypology to water bodies where ecological status is observed at least once
obs = dfTypology.loc[dfEcoObs.notna().any(axis=1)]

# Yearly distribution of observed coastal waters by typology and district
for a, b in zip([dfEcoObs, sparse], [dfTypology, dfSparse]):
# df for storing number of observed coastal waters and yearly distribution by dummies
d = pd.DataFrame(a.count(), index=a.columns, columns=["n"]).astype(int)

# Yearly distribution of observed coastal waters by typology and district
for c in cols:
d[c] = 100 * b[b[c] == 1].count() / b.count()
d.loc["All VP3", c] = 100 * len(b[b[c] == 1]) / len(b)
d.loc["All VP3", "n"] = len(b)
# df for storing number of observed coastal waters and yearly distribution by dummies
d = pd.DataFrame(dfEcoObs.count(), index=dfEcoObs.columns, columns=["n"]).astype(int)

# Rename columns to the full names of dummies
d = d.rename(columns=dicts)
# Yearly distribution of observed coastal waters by typology and district
for c in cols:
d[c] = 100 * obs[obs[c] == 1].count() / obs.count()
d.loc["Obs of n", c] = 100 * len(obs[obs[c] == 1]) / len(obs)
d.loc["Obs of all", c] = 100 * len(obs[obs[c] == 1]) / len(dummies)
d.loc["All VP3", c] = 100 * len(dummies[dummies[c] == 1]) / len(dummies)
d.loc["Obs of n", "n"] = len(obs)
d.loc["Obs of all", "n"] = len(dummies)
d.loc["All VP3", "n"] = len(dummies)
d = d.rename(columns=dicts) # rename columns to full names
d.to_csv("output/coastal_VP_stats.csv") # save distributions to csv
d.loc[("Obs of n", "Obs of all", "All VP3"), :].T # report in percent

if b is dfSparse:
VPstats["Sparse"] = d.loc["All VP3", :] # save distribution
else:
VPstats["All VP3"] = d.loc["All VP3", :] # save distribution
d.to_excel("output/coastal_VP_stats.xlsx")
VPstats # report distribution - sparse only includes Fjords and Bælthav (inner straits)

########################################################################################
# 2. Multivariate feature imputation
Expand Down
4 changes: 3 additions & 1 deletion gis/output/coastal_VP_stats.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
,n,No,K,B,Ø,Fj,Vf,Vu,F,D,L,Se,Sa,T
,n,North Sea,Kattegat,Belt Sea,Baltic Sea,Fjord,North Sea fjord,water exchange,freshwater inflow,depth,stratification,sediment,salinity,tide
1988,37.0,0.0,0.0,40.54054054054054,2.7027027027027026,45.945945945945944,10.81081081081081,18.91891891891892,16.216216216216218,40.54054054054054,37.83783783783784,35.13513513513514,37.83783783783784,0.0
1989,59.0,1.694915254237288,0.0,40.67796610169491,1.694915254237288,49.152542372881356,6.779661016949152,22.033898305084747,15.254237288135593,38.983050847457626,42.3728813559322,37.28813559322034,35.59322033898305,1.694915254237288
1990,58.0,6.896551724137931,0.0,36.206896551724135,1.7241379310344827,48.275862068965516,6.896551724137931,22.413793103448278,15.517241379310345,34.48275862068966,41.37931034482759,31.03448275862069,41.37931034482759,6.896551724137931
Expand Down Expand Up @@ -35,4 +35,6 @@
2021,66.0,9.090909090909092,3.0303030303030303,27.272727272727273,3.0303030303030303,53.03030303030303,4.545454545454546,22.727272727272727,18.181818181818183,33.333333333333336,34.84848484848485,27.272727272727273,36.36363636363637,9.090909090909092
2022,47.0,10.638297872340425,0.0,31.914893617021278,6.382978723404255,48.93617021276596,2.127659574468085,17.02127659574468,12.76595744680851,36.170212765957444,36.170212765957444,25.53191489361702,46.808510638297875,10.638297872340425
2023,62.0,6.451612903225806,1.6129032258064515,32.25806451612903,3.225806451612903,50.0,6.451612903225806,19.35483870967742,17.741935483870968,35.483870967741936,40.32258064516129,35.483870967741936,38.70967741935484,6.451612903225806
Obs of n,96.0,7.291666666666667,2.0833333333333335,34.375,3.125,48.958333333333336,4.166666666666667,23.958333333333332,14.583333333333334,35.416666666666664,39.583333333333336,29.166666666666668,36.458333333333336,7.291666666666667
Obs of all,108.0,6.481481481481482,1.8518518518518519,30.555555555555557,2.7777777777777777,43.51851851851852,3.7037037037037037,21.296296296296298,12.962962962962964,31.48148148148148,35.18518518518518,25.925925925925927,32.407407407407405,6.481481481481482
All VP3,108.0,6.481481481481482,4.62962962962963,36.111111111111114,4.62962962962963,44.44444444444444,3.7037037037037037,27.77777777777778,12.962962962962964,38.888888888888886,41.666666666666664,28.703703703703702,36.111111111111114,6.481481481481482
Binary file modified gis/output/coastal_VP_stats.xlsx
Binary file not shown.

0 comments on commit d5558a9

Please sign in to comment.