Skip to content

Commit

Permalink
Merge pull request #16 from harmsm/main
Browse files Browse the repository at this point in the history
better plotting experience with unstable models
  • Loading branch information
harmsm authored Sep 18, 2024
2 parents 8aac336 + 0e12ccc commit 39af49c
Show file tree
Hide file tree
Showing 23 changed files with 2,246 additions and 1,741 deletions.
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
project = 'dataprob'
copyright = '2024, Michael J. Harms'
author = 'Michael J. Harms'
release = '0.9.3'
release = '0.9.4'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
2 changes: 1 addition & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ One can even more finely control the assignment of fittable parameters: see the
-------------------------------

Once we have defined our model and assigned which parameters are fittable, we
can control via seven attributes of each fittable parameter. These are stored in
can control seven attributes of each fittable parameter. These are stored in
``f.param_df`` dataframe. Each row is a parameter; each column is an attribute.

.. code-block:: python
Expand Down
3,353 changes: 1,766 additions & 1,587 deletions reports/flake.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion reports/junit/junit.xml

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/dataprob/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
Version string.
"""

VERSION = (0, 9, 3)
VERSION = (0, 9, 4)

__version__ = '.'.join(map(str, VERSION))
32 changes: 29 additions & 3 deletions src/dataprob/fitters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,13 +374,36 @@ def data_df(self):
out["y_std"] = y_std

if self.success:

estimate = np.array(self.fit_df.loc[self._model.unfixed_mask,
"estimate"],dtype=float).copy()

keep_mask = self._model.unfixed_mask

estimate = np.array(self.fit_df.loc[keep_mask,"estimate"],
dtype=float).copy()
out["y_calc"] = self.model(estimate)
out["unweighted_residuals"] = self._unweighted_residuals(estimate)
out["weighted_residuals"] = self._weighted_residuals(estimate)

else:

# Try to generate a y_calc vector from the guesses. If this crashes,
# it's okay; silently ignore and do not populate y_calc and
# residuals columns.
try:
keep_mask = np.logical_not(self._model.param_df["fixed"])
guesses = np.array(self._model.param_df.loc[keep_mask,"guess"],
dtype=float).copy()

out["y_calc"] = self.model(guesses)

# Put these after y_calc. Even if y_calc succeeds, these could
# fail if we don't have y_obs and y_std loaded. Build as much
# as we can before crashing.
out["unweighted_residuals"] = self._unweighted_residuals(guesses)
out["weighted_residuals"] = self._weighted_residuals(guesses)

except Exception as e:
pass

return pd.DataFrame(out)

@data_df.setter
Expand Down Expand Up @@ -627,6 +650,9 @@ def get_sample_df(self,num_samples=100):
if self.success:
estimate = np.array(self.fit_df["estimate"],dtype=float).copy()
out["y_calc"] = self.model(estimate)
else:
estimate = np.array(self.fit_df["guess"],dtype=float).copy()
out["y_calc"] = self.model(estimate)

samples = self.samples
if samples is not None:
Expand Down
59 changes: 40 additions & 19 deletions src/dataprob/fitters/bayesian/bayesian_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,6 @@ def _sample_to_convergence(self):

# Loop until we reach max_convergence_cycles
while counter < self._max_convergence_cycles:

# Current number of steps
num_steps = self._fit_result.get_chain().shape[0]

# Get the parameter correlation time.
try:
Expand All @@ -235,29 +232,34 @@ def _sample_to_convergence(self):
# success.
max_corr = np.max(self._fit_result.get_autocorr_time())
print(f" Converged correlation time: {max_corr:.2f} iterations\n",
flush=True,
file=sys.stderr)
flush=True,
file=sys.stderr)
success = True
break

except emcee.autocorr.AutocorrError as e:

# emcee throws an AutocorrError if it is not converged yet. Get the
# maximum of its current (unreliable) guess.
max_corr = np.max(e.__dict__["tau"])
print(f" Estimated correlation time: {max_corr:.2f} iterations\n",
flush=True,
file=sys.stderr)

# Figure out how many steps to add to get to the target correlation time
# (max_corr * 50). This number is hard-coded into the acorr estimator.
next_iteration = int(np.ceil(max_corr)*50)
next_iteration = next_iteration - num_steps

# Figure out how many steps to add to get to the target correlation time
# (max_corr * 50). This number is hard-coded into the acorr estimator.
need_at_least = int(np.ceil(max_corr)*50)

msg = f" Rough estimate of correlation time: {max_corr:.2f} iterations. "
msg +=f"We need at least {need_at_least} samples.\n"
print(msg,flush=True,file=sys.stderr)

# Run next sampling, starting from previous state
print(f"Running {counter + 1} of up to {self._max_convergence_cycles} sampler iterations",
flush=True,
file=sys.stderr)
flush=True,
file=sys.stderr)

# Current number of steps
current_num_steps = self._fit_result.get_chain().shape[0]
next_iteration = need_at_least - current_num_steps

self._fit_result.run_mcmc(initial_state=None,
nsteps=next_iteration,
progress=True)
Expand Down Expand Up @@ -368,7 +370,6 @@ def fit(self,
y_std=y_std,
**emcee_kwargs)


def _fit(self,**kwargs):
"""
Fit the parameters.
Expand All @@ -389,9 +390,29 @@ def _fit(self,**kwargs):
if self._use_ml_guess:

ml_fit = MLFitter(some_function=self._model)
ml_fit.data_df = self.data_df
ml_fit.fit()
self._initial_state = ml_fit.samples[:self._num_walkers,:]
ml_fit.param_df = self.param_df.copy()
ml_fit.data_df = self.data_df.copy()

try:
ml_fit.fit(num_samples=self._num_walkers*100)
except Exception as e:
err = "\n\nInitial ml fit is failing. See error trace for details.\n\n"
raise RuntimeError(err) from e

# Get samples
success = False
if ml_fit.samples is not None:
self._initial_state = ml_fit.samples[:self._num_walkers,:]
success = True

# If we are not getting samples out of the ml fitter.
if not success:
err = "\n\nml fitter is not returning parameter samples. This can\n"
err += "occur if the initial guesses are extremely far from\n"
err += "true values or if the model is not numerically stable.\n"
err += "Try changing your parameter guesses and/or parameter\n"
err += "bounds. Alternatively, set use_ml_guess = False.\n\n"
raise RuntimeError(err)

# Generate walkers by sampling from the prior distribution. This will
# only generate values for unfixed parameters.
Expand Down
6 changes: 2 additions & 4 deletions src/dataprob/fitters/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ def _update_fit_df(self):
high_95.append(c2[i])

except np.linalg.LinAlgError:
w = "\n\nJacobian matrix was singular. Could not fit parameter\n"
w += "uncertainty.\n\n"
w = "\n\nJacobian matrix was singular. Could not find parameter uncertainty.\n\n"
warnings.warn(w)

std = np.nan*np.ones(len(estimate),dtype=float)
Expand Down Expand Up @@ -165,8 +164,7 @@ def samples(self):
cov = np.linalg.inv(2*np.dot(J.T,J))
chol_cov = np.linalg.cholesky(cov).T
except np.linalg.LinAlgError:
w = "\n\nJacobian matrix was singular. Could not generate\n"
w += "parameter samples.\n\n"
w = "\n\nJacobian matrix was singular. Could not generate parameter samples.\n\n"
warnings.warn(w)

# Return empty array
Expand Down
36 changes: 21 additions & 15 deletions src/dataprob/plot/_plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,9 @@ def get_plot_features(f,
validated number of samples
"""

# Make sure fit was successful before trying to plot anything
if not f.success:
err = "fit has not been successfully run. cannot generate plot.\n"
raise RuntimeError(err)

# Check for plot success
has_fit_results = f.success is True

if x_label is not None:
x_label = str(x_label)

Expand All @@ -54,15 +52,12 @@ def get_plot_features(f,
minimum_allowed=0)

if f.samples is None:
err = "could not get samples from this fit. cannot plot\n"
raise ValueError(err)
num_samples = 0

if len(f.samples) < num_samples:
err = f"num_samples ({num_samples}) is more than the number of\n"
err += f"samples in the fitter ({len(f.samples)})\n"
raise ValueError(err)
if f.samples is not None and len(f.samples) < num_samples:
num_samples = len(f.samples)

return x_label, y_label, num_samples
return has_fit_results, x_label, y_label, num_samples

def get_style(user_style,style_name):
"""
Expand Down Expand Up @@ -125,10 +120,21 @@ def get_vectors(f,x_axis=None):
"""

# Grab y_obs, y_std, y_calc
y_obs = np.array(f.data_df["y_obs"],dtype=float).copy()
y_std = np.array(f.data_df["y_std"],dtype=float).copy()
y_calc = np.array(f.data_df["y_calc"],dtype=float).copy()
if "y_obs" in f.data_df.columns:
y_obs = np.array(f.data_df["y_obs"],dtype=float).copy()
else:
y_obs = np.ones(len(f.data_df))*np.nan

if "y_std" in f.data_df.columns:
y_std = np.array(f.data_df["y_std"],dtype=float).copy()
else:
y_std = np.ones(len(y_obs))*np.nan

if "y_calc" in f.data_df.columns:
y_calc = np.array(f.data_df["y_calc"],dtype=float).copy()
else:
y_calc = np.ones(len(y_obs))*np.nan

# If no x-axis sent in, build one
if x_axis is None:
x_axis = np.arange(len(y_obs),dtype=float)
Expand Down
6 changes: 4 additions & 2 deletions src/dataprob/plot/plot_corner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np

import re
import warnings

def plot_corner(f,filter_params=None,**kwargs):
"""
Expand Down Expand Up @@ -57,8 +58,9 @@ def plot_corner(f,filter_params=None,**kwargs):

# Check for samples
if f.samples is None:
err = "Fit does not have samples. Could not generate a corner plot.\n"
raise RuntimeError(err)
w = "Fit does not have samples. Could not generate a corner plot.\n"
warnings.warn(w)
return None

# Go through samples
keep_indexes = []
Expand Down
26 changes: 20 additions & 6 deletions src/dataprob/plot/plot_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import matplotlib
from matplotlib import pyplot as plt

import numpy as np

def plot_fit(f,
x_axis=None,
x_label=None,
Expand Down Expand Up @@ -60,19 +62,17 @@ def plot_fit(f,
"""

# Validate inputs and prep for plotting
x_label, y_label, num_samples = get_plot_features(f,
x_label,
y_label,
num_samples)
has_results, x_label, y_label, num_samples = get_plot_features(f,
x_label,
y_label,
num_samples)

# Get styles for series
y_obs_style = get_style(y_obs_style,"y_obs")
y_std_style = get_style(y_std_style,"y_std")
y_calc_style = get_style(y_calc_style,"y_calc")
sample_line_style = get_style(sample_line_style,"sample_line")

# Get series information (as numpy arrays)
x_axis, y_obs, y_std, y_calc = get_vectors(f,x_axis)

# Define plot axis
if ax is None:
Expand All @@ -82,6 +82,20 @@ def plot_fit(f,
err = "ax should be a matplotlib Axes instance\n"
raise ValueError(err)

# Get values to plot
x_axis, y_obs, y_std, y_calc = get_vectors(f,x_axis)

good_mask = np.logical_not(np.isnan(y_obs))
x_axis = x_axis[good_mask]
y_obs = y_obs[good_mask]
y_std = y_std[good_mask]
y_calc = y_calc[good_mask]

# Gracefully return if there are no observations to plot
if len(y_obs) == 0:
fig = ax.get_figure()
return fig, ax

# Create core plot
ax.plot(x_axis,y_obs,**y_obs_style,label="y_obs")
ax.errorbar(x=x_axis,y=y_obs,yerr=y_std,**y_std_style)
Expand Down
Loading

0 comments on commit 39af49c

Please sign in to comment.