Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
thornoe committed Mar 16, 2024
1 parent 6faa66e commit edc76f5
Show file tree
Hide file tree
Showing 20 changed files with 23,433 additions and 23,456 deletions.
218 changes: 109 additions & 109 deletions gis/output/coastal_eco_imp.csv

Large diffs are not rendered by default.

Binary file modified gis/output/coastal_eco_imp.pdf
Binary file not shown.
218 changes: 109 additions & 109 deletions gis/output/coastal_eco_imp_MA.csv

Large diffs are not rendered by default.

218 changes: 109 additions & 109 deletions gis/output/coastal_eco_obs.csv

Large diffs are not rendered by default.

Binary file modified gis/output/coastal_eco_obs.pdf
Binary file not shown.
1,974 changes: 987 additions & 987 deletions gis/output/lakes_eco_imp.csv

Large diffs are not rendered by default.

Binary file modified gis/output/lakes_eco_imp.pdf
Binary file not shown.
1,974 changes: 987 additions & 987 deletions gis/output/lakes_eco_imp_MA.csv

Large diffs are not rendered by default.

1,974 changes: 987 additions & 987 deletions gis/output/lakes_eco_obs.csv

Large diffs are not rendered by default.

Binary file modified gis/output/lakes_eco_obs.pdf
Binary file not shown.
8 changes: 0 additions & 8 deletions gis/output/streams_VP_stats.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1 @@
,Sparse subset,Observed subset,All in VP3
Small,0.7645461598138091,0.6499756137213462,0.6556765627331046
Medium,0.18076027928626842,0.292635343846529,0.2827092346710428
Large,0.01047323506594259,0.021947650788489675,0.022676413546173356
Soft bottom,0.04422032583397983,0.03544139164363518,0.03893778904967925
Natural,0.895655546935609,0.9270037392293936,0.9115321497836789
DK2,0.07796741660201707,0.16387579255405624,0.1654483067283306
basis,2.6574350234341715,2.6447368421052633,2.647725321888412
n,2578.0,6151.0,6703.0
14 changes: 0 additions & 14 deletions gis/output/streams_VP_stats.tex
Original file line number Diff line number Diff line change
@@ -1,14 +0,0 @@
\begin{tabular}{llll}
\toprule
& Sparse subset & Observed subset & All in VP3 \\
\midrule
Small & 0.7645 & 0.6500 & 0.6557 \\
Medium & 0.1808 & 0.2926 & 0.2827 \\
Large & 0.0105 & 0.0219 & 0.0227 \\
Soft bottom & 0.0442 & 0.0354 & 0.0389 \\
Natural & 0.8957 & 0.9270 & 0.9115 \\
DK2 & 0.0780 & 0.1639 & 0.1654 \\
basis & 2.6574 & 2.6447 & 2.6477 \\
n & 2578 & 6151 & 6703 \\
\bottomrule
\end{tabular}
13,408 changes: 6,704 additions & 6,704 deletions gis/output/streams_eco_imp.csv

Large diffs are not rendered by default.

Binary file modified gis/output/streams_eco_imp.pdf
Binary file not shown.
13,408 changes: 6,704 additions & 6,704 deletions gis/output/streams_eco_imp_MA.csv

Large diffs are not rendered by default.

13,408 changes: 6,704 additions & 6,704 deletions gis/output/streams_eco_obs.csv

Large diffs are not rendered by default.

Binary file modified gis/output/streams_eco_obs.pdf
Binary file not shown.
6 changes: 3 additions & 3 deletions gis/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,12 +502,12 @@ def process_string(s):
# Imputed ecological status using a continuous scale
dfEcoObs = dfIndicator.copy()

# Save CSV of data on mean ecological status by water body and year
dfEcoObs.to_csv("output\\" + j + "_eco_" + suffix + ".csv")

# Merge observed ecological status each year with basis analysis for VP3
dfEco = dfEcoObs.merge(dfVP[["basis"]], on="wb")

# Save CSV of data on mean ecological status by water body and year
dfEco.to_csv("output\\" + j + "_eco_" + suffix + ".csv")

if suffix != "obs":
# Prepare for statistics and missing values graph
for t in dfEco.columns:
Expand Down
6 changes: 3 additions & 3 deletions gis/script_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,12 +845,12 @@ def ecological_status(self, j, dfIndicator, dfVP, suffix="obs", index=None):
# Imputed ecological status using a continuous scale
dfEcoObs = dfIndicator.copy()

# Save CSV of data on mean ecological status by water body and year
dfEcoObs.to_csv("output\\" + j + "_eco_" + suffix + ".csv")

# Merge observed ecological status each year with basis analysis for VP3
dfEco = dfEcoObs.merge(dfVP[["basis"]], on="wb")

# Save CSV of data on mean ecological status by water body and year
dfEco.to_csv("output\\" + j + "_eco_" + suffix + ".csv")

if suffix != "obs":
# Prepare for statistics and missing values graph
for t in dfEco.columns:
Expand Down
55 changes: 27 additions & 28 deletions gis/streams_CV.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,14 @@ def stepwise_selection(subset, dummies, data, dfDummies, years, select_all=False
s.loc["Total", "n"] = s["n"].sum()

# Overwrite CSV of accuracy scores and share with less than good ecological status
if subset is sparse:
scores.to_csv("output/streams_eco_imp_accuracy_sparse.csv")
status.to_csv("output/streams_eco_imp_LessThanGood_sparse.csv")
scores_all.to_csv("output/streams_eco_imp_accuracy_sparse_all.csv")
status_all.to_csv("output/streams_eco_imp_LessThanGood_sparse_all.csv")
else:
scores.to_csv("output/streams_eco_imp_accuracy.csv")
status.to_csv("output/streams_eco_imp_LessThanGood.csv")
# if subset is sparse:
# scores.to_csv("output/streams_eco_imp_accuracy_sparse.csv")
# status.to_csv("output/streams_eco_imp_LessThanGood_sparse.csv")
# scores_all.to_csv("output/streams_eco_imp_accuracy_sparse_all.csv")
# status_all.to_csv("output/streams_eco_imp_LessThanGood_sparse_all.csv")
# else:
# scores.to_csv("output/streams_eco_imp_accuracy.csv")
# status.to_csv("output/streams_eco_imp_LessThanGood.csv")

return selected, scores, status # selected predictors; scores and stats by year

Expand Down Expand Up @@ -310,26 +310,25 @@ def stepwise_selection(subset, dummies, data, dfDummies, years, select_all=False
# 2. Multivariate feature imputation (note: Forward Stepwise Selection takes ~5 days)
########################################################################################
# # Example data for testing Forward Stepwise Selection with LOO-CV (takes ~5 seconds)
# dfEcoObs = pd.DataFrame(
# {
# 1988: [2.5, 3.0, 3.5, 4.0, np.nan, 5.0],
# 1989: [2.6, 3.1, 3.6, np.nan, 4.6, 5.1],
# 1990: [2.7, 3.2, np.nan, 4.2, 4.7, 5.2],
# 1991: [2.8, np.nan, 3.8, 4.3, 4.8, 5.3],
# 1992: [np.nan, 3.4, 3.9, 4.4, 4.9, 5.4],
# 1993: [3.0, 3.5, 3.8, 4.4, 5.1, 5.5],
# }
# )
# dfEcoObs.index.name = "wb"
# sparse = dfEcoObs[dfEcoObs.notna().sum(axis=1) == 5]
# dfObs = dfEcoObs.copy()
# dfTypology = dfObs.copy()
# dfTypology["Small"] = [0, 0, 1, 1, 0, 0] # effect: 0.2 worse in 1993
# dfNatural = dfTypology.copy()
# dfNatural["Natural"] = [0, 0, 0, 1, 1, 0] # effect: 0.1 better in 1993
# cols = ["Small", "Natural"]
# years = list(range(1989, 1993 + 1))

dfEcoObs = pd.DataFrame(
{
1988: [2.5, 3.0, 3.5, 4.0, np.nan, 5.0],
1989: [2.6, 3.1, 3.6, np.nan, 4.6, 5.1],
1990: [2.7, 3.2, np.nan, 4.2, 4.7, 5.2],
1991: [2.8, np.nan, 3.8, 4.3, 4.8, 5.3],
1992: [np.nan, 3.4, 3.9, 4.4, 4.9, 5.4],
1993: [3.0, 3.5, 3.8, 4.4, 5.1, 5.5],
}
)
dfEcoObs.index.name = "wb"
sparse = dfEcoObs[dfEcoObs.notna().sum(axis=1) == 5]
dfObs = dfEcoObs.copy()
dfTypology = dfObs.copy()
dfTypology["Small"] = [0, 0, 1, 1, 0, 0] # effect: 0.2 worse in 1993
dfNatural = dfTypology.copy()
dfNatural["Natural"] = [0, 0, 0, 1, 1, 0] # effect: 0.1 better in 1993
cols = ["Small", "Natural"]
years = list(range(1989, 1993 + 1))

# Forward stepwise selection of dummies - CV over subset of sparsely observed streams
kwargs = {"data": dfObs, "dfDummies": dfDistrict, "years": years} # shared arguments
Expand Down

0 comments on commit edc76f5

Please sign in to comment.