Modify/improve MadsTEMDriver.observed_vec function, fix bug.

Adds the capability for the observed_vec function to return either a labeled array or flat list of target (observation) values. Turns out that the labeled array was helpful for testing/cross checking the order of the flat list. Adds tests that go with the new feature. Fixes bug with indentation of return statement in observed_vec function. Change AC-MADS-TEM.jl so that it explicitly requests a flat list as the remainder of MADS seems to expect.
uaf-arctic-eco-modeling · Sep 10, 2024 · c69f9cc · c69f9cc
1 parent eb41914
commit c69f9cc
Show file tree

Hide file tree

Showing 3 changed files with 129 additions and 24 deletions.
diff --git a/mads_calibration/AC-MADS-TEM.jl b/mads_calibration/AC-MADS-TEM.jl
@@ -108,7 +108,7 @@ dvmdostem = PyCall.py"load_dvmdostem_from_configfile"(config_file)
 # ridden from the mads config (parameter distributions, intial guesses, etc)
 
 # Save the targets...
-targets = dvmdostem.observed_vec()
+targets = dvmdostem.observed_vec(format='flat')
 
 # Do the seed run and keep the results
 println("Performing seed run...")

diff --git a/scripts/drivers/MadsTEMDriver.py b/scripts/drivers/MadsTEMDriver.py
@@ -294,23 +294,66 @@ def modeled_vec(self):
     fd = pd.DataFrame(self.gather_model_outputs())
     return list(fd['value'])
 
-  def observed_vec(self):
-    '''Return a flat list of the observation values (aka targets).'''
-    # The target values are included in the model output data structures, and
-    # could be accessed exactly like the modeled_vec function but
-    # if you have not yet run the model you don't have outputs ready and
-    # therefore can't read them. There are cases where you would like to
-    # see the target values without running the model, so we will assemble
-    # them another way here.
+  def observed_vec(self, format='labeled'):
+    '''Return a list of the observation values (aka targets).
+
+    The target values are included in the model output data structures, and
+    could be accessed exactly like the `modeled_vec()` function but
+    if you have not yet run the model you don't have outputs ready and
+    therefore can't read them. There are cases where you would like to
+    see the target values without running the model, so we will assemble
+    them here from the `self.targets` data structure.
+
+    Parameters
+    ==========
+    format : {'labeled', 'flat'}
+      Choose the format that returned data will be in. 'labeled' data will be a
+      list of dicts that can be converted to Pandas DataFrame. 'flat' data will 
+      return a flat list in the order:
+      `[ pft0_leaf,pft0_stem,pft0_root, ... pftN_leaf, pftN_stem, pftN_root ]`
+
+    Returns
+    =======
+    target_data : iterable
+      The target data, organized as specified with `format` parameter.
+    '''
+
     pftnums = [i['pftnum'] for i in self.params]
-    for o in self.outputs:
-      ct = o['ctname']
-      # Not 100% sure this will work all the time?? PFTs? compartments???
-      # Likely going to need something like
-      # if type(self.targets[ct]) is dict:
-      #   sefl.targets[ct]['Leaf'][PFT]
-      targets = [self.targets[ct][PFT] for PFT in range(10) if PFT in pftnums]
-      return targets
+
+    if format == 'labeled':
+      # Builds a list of dicts that can be easily turned into Pandas DataFrame.
+      targets = []
+      for o in self.outputs:
+        ct = o['ctname']
+        if type(self.targets[ct]) is dict:
+          for PFT in range(10):
+            if PFT in pftnums:
+              targets.append(dict(cmtnum=self.cmtnum, ctname=ct, pft=PFT, cmprt='Leaf', observed=self.targets[ct]['Leaf'][PFT]))
+              targets.append(dict(cmtnum=self.cmtnum, ctname=ct, pft=PFT, cmprt='Stem', observed=self.targets[ct]['Stem'][PFT]))
+              targets.append(dict(cmtnum=self.cmtnum, ctname=ct, pft=PFT, cmprt='Root', observed=self.targets[ct]['Root'][PFT]))
+        else:
+          for PFT in range(10):
+            if PFT in pftnums:
+              targets.append(dict(cmtnum=self.cmtnum, ctname=ct, pft=PFT, observed=self.targets[ct][PFT]))
+
+    elif format == 'flat':
+      # Builds a flat list in this order:
+      # [ pft0_leaf, pft0_stem, pft1_root ... pftN_leaf, pftN_stem, pftN_root ]
+      targets = []
+      for o in self.outputs:
+        ct = o['ctname']
+        if type(self.targets[ct]) is dict:
+          for PFT in range(10):
+            if PFT in pftnums:
+              targets.append(self.targets[ct]['Leaf'][PFT])
+              targets.append(self.targets[ct]['Stem'][PFT])
+              targets.append(self.targets[ct]['Root'][PFT])
+        else:
+          for PFT in range(10):
+            if PFT in pftnums:
+              targets.append(self.targets[ct][PFT])
+
+    return targets
 
 
 

diff --git a/scripts/tests/doctests/doctests_MadsTEMDriver.rst b/scripts/tests/doctests/doctests_MadsTEMDriver.rst
@@ -52,9 +52,53 @@
 
 >>> d.setup_outputs(d.target_names)
 
->>> d.observed_vec()
+Grab the targets in two differnt formats (flat and labeled) and make sure the 
+values line up as expected.
+
+>>> import pandas as pd
+
+>>> flat_targets = d.observed_vec(format='flat')
+>>> df_targets = pd.DataFrame(d.observed_vec(format='labeled'))
+
+Check on the first block of data, which is a PFT target, but not by compartment.
+First print out the tables so we can see the expected shapes.
+>>> df_targets.loc[df_targets['ctname'] == 'GPPAllIgnoringNitrogen']
+   cmtnum                  ctname  pft  observed cmprt
+0       6  GPPAllIgnoringNitrogen    0    11.833   NaN
+1       6  GPPAllIgnoringNitrogen    1   197.867   NaN
+2       6  GPPAllIgnoringNitrogen    2    42.987   NaN
+3       6  GPPAllIgnoringNitrogen    3    10.667   NaN
+4       6  GPPAllIgnoringNitrogen    4     3.375   NaN
+5       6  GPPAllIgnoringNitrogen    5    16.000   NaN
+6       6  GPPAllIgnoringNitrogen    6     6.000   NaN
+>>> flat_targets[0:len(d.pftnums)]
 [11.833, 197.867, 42.987, 10.667, 3.375, 16.0, 6.0]
 
+Then actually check the data so that we are confident in the output order for
+the flat list.
+>>> a = flat_targets[0:len(d.pftnums)]
+>>> b = df_targets.loc[df_targets['ctname'] == 'GPPAllIgnoringNitrogen']['observed']
+>>> all(a == b)
+True
+
+Then check on some compartment data. First print out the tables of data so we 
+can see the expected shapes.
+
+>>> flat_targets[len(d.pftnums):len(d.pftnums)+3]
+[2.0, 4.0, 0.297]
+>>> df_targets.loc[ (df_targets['ctname']=='VegCarbon') & (df_targets['pft']==0) ]
+   cmtnum     ctname  pft  observed cmprt
+7       6  VegCarbon    0     2.000  Leaf
+8       6  VegCarbon    0     4.000  Stem
+9       6  VegCarbon    0     0.297  Root
+
+Then check the data so that we are confident that we understand the ordering.
+
+>>> a = flat_targets[len(d.pftnums):len(d.pftnums)+3]
+>>> b = df_targets.loc[ (df_targets['ctname']=='VegCarbon') & (df_targets['pft']==0) ]['observed']
+>>> all(a == b)
+True
+
 >>> d.params_vec()
 [22.8, 250.6, 65.0, 38.5, 7.8, 21.0, 36.3]
 
@@ -77,9 +121,27 @@ This makes sense because we haven't run the model yet so there are no outputs.
 
 >>> final_data = d.gather_model_outputs()
 >>> import pandas as pd
-
-.. 
-  >>> #pd.DataFrame(final_data)
-  >>> #print(final_data)
-  >>> #print(d.params)
-
+>>> df_finaldata = pd.DataFrame(final_data)
+>>> df_finaldata.loc[(df_finaldata['ctname']=='VegCarbon') & (df_finaldata['cmprt']=='Leaf')]
+      cmt     ctname      value  truth  pft cmprt
+7   CMT06  VegCarbon   2.138998   2.00    0  Leaf
+10  CMT06  VegCarbon  42.925257  37.10    1  Leaf
+12  CMT06  VegCarbon   0.156739   8.06    2  Leaf
+14  CMT06  VegCarbon   2.602119   2.00    3  Leaf
+16  CMT06  VegCarbon   2.250932   2.00    4  Leaf
+17  CMT06  VegCarbon  22.572059  22.00    5  Leaf
+18  CMT06  VegCarbon  22.400614  23.00    6  Leaf
+
+Now check that the observed values that are put in the final output data are
+indeed the same as the observed values that are read and setup in the
+`self.targets` datastructure before running the model. This is harder because in
+the outputs, there are only rows for valid PFT/compartment combos, whereas in 
+the targets dataframe, there are rows with zero values for the observations for
+PFT/compartment combos that are not defined...for example the Stem and Root 
+compartments are not set for Lichen, but in the targets, there are (empty) rows
+for them. So here we drop the empty rows, and then compare with the final data.
+
+>>> a = df_targets.loc[ (df_targets['ctname']=='VegCarbon') & (df_targets['pft']==2) ]['observed']
+>>> b = df_finaldata.loc[ (df_finaldata['ctname']=='VegCarbon') & (df_finaldata['pft']==2) ]['truth']
+>>> all( a[a>0].values == b.values ) 
+True