v0.12.0

Samreay · Samreay · commit 58c4ff7d8c57 · 2016-10-12T14:41:18.000+10:00
diff --git a/README.md b/README.md
@@ -98,11 +98,14 @@ features requests thought up.
 
 ### Update History
 
+##### 0.12.0
+* Adding support for grid data.
+
 ##### 0.11.3
 * Fixing bug in Gelman-Rubin statistic
 
 ##### 0.11.2
-* Improving text labels again
+* Improving text labels again.
 
 ##### 0.11.1
 * Improving text labels for high value data.
diff --git a/chainconsumer/chain.py b/chainconsumer/chain.py
@@ -16,7 +16,7 @@ class ChainConsumer(object):
     """ A class for consuming chains produced by an MCMC walk
 
     """
-    __version__ = "0.11.3"
+    __version__ = "0.12.0"
 
     def __init__(self):
         logging.basicConfig()
@@ -30,6 +30,7 @@ def __init__(self):
         self.names = []
         self.parameters = []
         self.all_parameters = []
+        self.grids = []
         self.default_parameters = None
         self._configured_bar = False
         self._configured_contour = False
@@ -45,7 +46,7 @@ def __init__(self):
             "cumulative": self._get_parameter_summary_cumulative
         }
 
-    def add_chain(self, chain, parameters=None, name=None, weights=None, posterior=None, walkers=None):
+    def add_chain(self, chain, parameters=None, name=None, weights=None, posterior=None, walkers=None, grid=False):
         """ Add a chain to the consumer.
 
         Parameters
@@ -70,6 +71,10 @@ def add_chain(self, chain, parameters=None, name=None, weights=None, posterior=N
             How many walkers went into creating the chain. Each walker should
             contribute the same number of steps, and should appear in contiguous
             blocks in the final chain.
+        grid : boolean, optional
+            Whether the input is a flattened chain from a grid search instead of a Monte-Carlo
+            chains. Note that when this is set, `walkers` should not be set, and `weights` should
+            be set to the posterior evaluation for the grid point.
 
         Returns
         -------
@@ -104,6 +109,11 @@ def add_chain(self, chain, parameters=None, name=None, weights=None, posterior=N
         if self.default_parameters is None and parameters is not None:
             self.default_parameters = parameters
 
+        self.grids.append(grid)
+        if grid:
+            assert walkers is None, "If grid is set, walkers should not be"
+            assert weights is not None, "If grid is set, you need to supply weights"
+
         if parameters is None:
             if self.default_parameters is not None:
                 assert chain.shape[1] == len(self.default_parameters), \
@@ -399,11 +409,11 @@ def get_summary(self, squeeze=True):
             One entry per chain, parameter bounds stored in dictionary with parameter as key
         """
         results = []
-        for ind, (chain, parameters, weights) in enumerate(zip(self.chains,
-                                                               self.parameters, self.weights)):
+        for ind, (chain, parameters, weights, g) in enumerate(zip(self.chains,
+                                                               self.parameters, self.weights, self.grids)):
             res = {}
             for i, p in enumerate(parameters):
-                summary = self._get_parameter_summary(chain[:, i], weights, p, ind)
+                summary = self._get_parameter_summary(chain[:, i], weights, p, ind, grid=g)
                 res[p] = summary
             results.append(res)
         if squeeze and len(results) == 1:
@@ -742,13 +752,13 @@ def plot(self, figsize="GROW", parameters=None, extents=None, filename=None,
                 do_flip = (flip and i == len(params1) - 1)
                 if plot_hists and i == j:
                     max_val = None
-                    for chain, weights, parameters, colour, bins, fit, ls, bs, lw in \
+                    for chain, weights, parameters, colour, bins, fit, ls, bs, lw, g in \
                             zip(self.chains, self.weights, self.parameters, colours,
-                                num_bins, fit_values, linestyles, bar_shades, linewidths):
+                                num_bins, fit_values, linestyles, bar_shades, linewidths, self.grids):
                         if p1 not in parameters:
                             continue
                         index = parameters.index(p1)
-                        m = self._plot_bars(ax, p1, chain[:, index], weights, colour, ls, bs, lw, bins=bins,
+                        m = self._plot_bars(ax, p1, chain[:, index], weights, colour, ls, bs, lw, g, bins=bins,
                                             fit_values=fit[p1], flip=do_flip, summary=summary,
                                             truth=truth, extents=extents[p1])
                         if max_val is None or m > max_val:
@@ -759,15 +769,15 @@ def plot(self, figsize="GROW", parameters=None, extents=None, filename=None,
                         ax.set_ylim(0, 1.1 * max_val)
 
                 else:
-                    for chain, parameters, bins, colour, ls, s, sa, lw, fit, weights in \
+                    for chain, parameters, bins, colour, ls, s, sa, lw, fit, weights, g in \
                             zip(self.chains, self.parameters, num_bins, colours, linestyles, shades,
-                                shade_alphas, linewidths, fit_values, self.weights):
+                                shade_alphas, linewidths, fit_values, self.weights, self.grids):
                         if p1 not in parameters or p2 not in parameters:
                             continue
                         i1 = parameters.index(p1)
                         i2 = parameters.index(p2)
                         self._plot_contour(ax, chain[:, i2], chain[:, i1], weights, p1, p2, colour, ls,
-                                           s, sa, lw, bins=bins, truth=truth)
+                                           s, sa, lw, g, bins=bins, truth=truth)
 
         if self.names is not None and legend:
             ax = axes[0, -1]
@@ -1084,14 +1094,16 @@ def _plot_walk(self, ax, parameter, data, truth=None, extents=None,
             ax.axhline(truth, **self.parameters_truth)
 
     def _plot_bars(self, ax, parameter, chain_row, weights, colour, linestyle, bar_shade,
-                   linewidth, bins=25, flip=False, summary=False, fit_values=None,
+                   linewidth, grid, bins=25, flip=False, summary=False, fit_values=None,
                    truth=None, extents=None):  # pragma: no cover
 
         kde = self.parameters_general["kde"]
         smooth = self.parameters_general["smooth"]
         bins, smooth = self._get_smoothed_bins(smooth, bins)
-
-        bins = np.linspace(extents[0], extents[1], bins)
+        if grid:
+            bins = self._get_grid_bins(chain_row)
+        else:
+            bins = np.linspace(extents[0], extents[1], bins)
         hist, edges = np.histogram(chain_row, bins=bins, normed=True, weights=weights)
         edge_center = 0.5 * (edges[:-1] + edges[1:])
         if smooth:
@@ -1149,17 +1161,22 @@ def _plot_bars(self, ax, parameter, chain_row, weights, colour, linestyle, bar_s
         return hist.max()
 
     def _plot_contour(self, ax, x, y, w, px, py, colour, linestyle, shade,
-                      shade_alpha, linewidth, bins=25, truth=None):  # pragma: no cover
+                      shade_alpha, linewidth, grid, bins=25, truth=None):  # pragma: no cover
 
         levels = 1.0 - np.exp(-0.5 * self.parameters_contour["sigmas"] ** 2)
         smooth = self.parameters_general["smooth"]
-        bins, smooth = self._get_smoothed_bins(smooth, bins, marginalsied=False)
+        if grid:
+            binsx = self._get_grid_bins(x)
+            binsy = self._get_grid_bins(y)
+            hist, x_bins, y_bins = np.histogram2d(x, y, bins=[binsx, binsy], weights=w)
+        else:
+            bins, smooth = self._get_smoothed_bins(smooth, bins, marginalsied=False)
+            hist, x_bins, y_bins = np.histogram2d(x, y, bins=bins, weights=w)
 
         colours = self._scale_colours(colour, len(levels))
         colours2 = [self._scale_colour(colours[0], 0.7)] + \
                    [self._scale_colour(c, 0.8) for c in colours[:-1]]
 
-        hist, x_bins, y_bins = np.histogram2d(x, y, bins=bins, weights=w)
         x_centers = 0.5 * (x_bins[:-1] + x_bins[1:])
         y_centers = 0.5 * (y_bins[:-1] + y_bins[1:])
         if smooth:
@@ -1224,16 +1241,18 @@ def _get_figure(self, all_parameters, flip, figsize=(5, 5),
             if external_extents is not None and p in external_extents:
                 min_val, max_val = external_extents[p]
             else:
-                for chain, parameters in zip(self.chains, self.parameters):
+                for i, (chain, parameters) in enumerate(zip(self.chains, self.parameters)):
                     if p not in parameters:
                         continue
                     index = parameters.index(p)
-                    # min_val = chain[:, index].min()
-                    # max_val = chain[:, index].max()
-                    mean = np.mean(chain[:, index])
-                    std = np.std(chain[:, index])
-                    min_prop = mean - sigma_extent * std
-                    max_prop = mean + sigma_extent* std
+                    if self.grids[i]:
+                        min_prop = chain[:, index].min()
+                        max_prop = chain[:, index].max()
+                    else:
+                        mean = np.mean(chain[:, index])
+                        std = np.std(chain[:, index])
+                        min_prop = mean - sigma_extent * std
+                        max_prop = mean + sigma_extent * std
                     if min_val is None or min_prop < min_val:
                         min_val = min_prop
                     if max_val is None or max_prop > max_val:
@@ -1333,10 +1352,19 @@ def _get_smoothed_bins(self, smooth, bins, marginalsied=True):
         else:
             return ((3 if marginalsied else 2) * smooth * bins), smooth
 
-    def _get_smoothed_histogram(self, data, weights, chain_index):
+    def _get_grid_bins(self, data):
+        bin_c = sorted(np.unique(data))
+        delta = 0.5 * (bin_c[1] - bin_c[0])
+        bins = np.concatenate((bin_c - delta, [bin_c[-1] + delta]))
+        return bins
+
+    def _get_smoothed_histogram(self, data, weights, chain_index, grid):
         smooth = self.parameters_general["smooth"]
-        bins = self.parameters_general['bins'][chain_index]
-        bins, smooth = self._get_smoothed_bins(smooth, bins)
+        if grid:
+            bins = self._get_grid_bins(data)
+        else:
+            bins = self.parameters_general['bins'][chain_index]
+            bins, smooth = self._get_smoothed_bins(smooth, bins)
         hist, edges = np.histogram(data, bins=bins, normed=True, weights=weights)
         edge_centers = 0.5 * (edges[1:] + edges[:-1])
         xs = np.linspace(edge_centers[0], edge_centers[-1], 10000)
@@ -1363,21 +1391,21 @@ def _get_parameter_summary(self, data, weights, parameter, chain_index, **kwargs
         method = self.summaries[self.parameters_general["statistics"][chain_index]]
         return method(data, weights, parameter, chain_index, **kwargs)
 
-    def _get_parameter_summary_mean(self, data, weights, parameter, chain_index, desired_area=0.6827):
-        xs, ys, cs = self._get_smoothed_histogram(data, weights, chain_index)
+    def _get_parameter_summary_mean(self, data, weights, parameter, chain_index, desired_area=0.6827, grid=False):
+        xs, ys, cs = self._get_smoothed_histogram(data, weights, chain_index, grid)
         vals = [0.5 - desired_area / 2, 0.5, 0.5 + desired_area / 2]
         bounds = interp1d(cs, xs)(vals)
         bounds[1] = 0.5 * (bounds[0] + bounds[2])
         return bounds
 
-    def _get_parameter_summary_cumulative(self, data, weights, parameter, chain_index, desired_area=0.6827):
-        xs, ys, cs = self._get_smoothed_histogram(data, weights, chain_index)
+    def _get_parameter_summary_cumulative(self, data, weights, parameter, chain_index, desired_area=0.6827, grid=False):
+        xs, ys, cs = self._get_smoothed_histogram(data, weights, chain_index, grid)
         vals = [0.5 - desired_area / 2, 0.5, 0.5 + desired_area / 2]
         bounds = interp1d(cs, xs)(vals)
         return bounds
 
-    def _get_parameter_summary_max(self, data, weights, parameter, chain_index, desired_area=0.6827):
-        xs, ys, cs = self._get_smoothed_histogram(data, weights, chain_index)
+    def _get_parameter_summary_max(self, data, weights, parameter, chain_index, desired_area=0.6827, grid=False):
+        xs, ys, cs = self._get_smoothed_histogram(data, weights, chain_index, grid)
         startIndex = ys.argmax()
         maxVal = ys[startIndex]
         minVal = 0
diff --git a/doc/usage.rst b/doc/usage.rst
@@ -13,7 +13,7 @@ The process of using ChainConsumer should be straightforward:
 1. Create an instance of ChainConsumer.
 2. Add your chains to this instance.
 3. Run convergence diagnostics, if desired.
-4. Update the configurations if needed.
+4. Update the configurations if needed (make sure you do this *after* loading in the data).
 5. Plot.
 
 The main page and the examples page has code demonstrating these,
diff --git a/examples/Basics/plot_convergence.py b/examples/Basics/plot_convergence.py
@@ -22,8 +22,8 @@
 # of the chain isn't agreeing with anything else!
 data_good = normal(size=100000)
 data_bad = data_good.copy()
-data_bad[98000:] += 2.0
-data_bad[:1000] *= 2.0
+data_bad += np.linspace(-0.5, 0.5, 100000)
+data_bad[98000:] += 2
 
 # Lets load it into ChainConsumer, and pretend 10 walks went into making the chain
 c = ChainConsumer()
diff --git a/examples/Basics/plot_grid.py b/examples/Basics/plot_grid.py
@@ -0,0 +1,29 @@
+"""
+==========
+Grid Data!
+==========
+
+If you don't have Monte Carlo chains, and have grid evaluations instead, that's fine too!
+
+Just flatten your grid, set the weights to the grid evaluation, and set the grid flag.
+
+Here is a nice diamond that you get from modifying a simple multivariate normal distribution.
+
+"""
+import numpy as np
+from numpy.random import normal, multivariate_normal
+from chainconsumer import ChainConsumer
+
+
+if __name__ == "__main__":
+    xx, yy = np.meshgrid(np.linspace(-3, 3, 100), np.linspace(-7, 7, 100))
+    xs, ys = xx.flatten(), yy.flatten()
+    data = np.vstack((xs, ys)).T
+    pdf = (1 / (2 * np.pi)) * np.exp(-0.5 * (xs * xs + ys * ys / 4 + np.abs(xs * ys)))
+
+    c = ChainConsumer()
+    c = ChainConsumer()
+    c.add_chain(data, parameters=["$x$", "$y$"], weights=pdf, grid=True)
+    fig = c.plot()
+
+    fig.set_size_inches(3.5 + fig.get_size_inches())  # Resize fig for doco. You don't need this.
diff --git a/test_chain.py b/test_chain.py
@@ -2,7 +2,7 @@
 import tempfile
 
 import numpy as np
-from scipy.stats import skewnorm, norm
+from scipy.stats import skewnorm, norm, multivariate_normal
 import pytest
 
 from chainconsumer import ChainConsumer
@@ -547,4 +547,21 @@ def test_geweke_default_failed(self):
         data2 = data.copy()
         data2[98000:, :] += 0.3
         consumer.add_chain(data2, walkers=20, name="c2")
-        assert not consumer.diagnostic_geweke()
+        assert not consumer.diagnostic_geweke()
+
+    def test_grid_data(self):
+        xx, yy = np.meshgrid(np.linspace(-3, 3, 100), np.linspace(-5, 5, 100))
+        xs, ys = xx.flatten(), yy.flatten()
+        chain = np.vstack((xs, ys)).T
+        pdf = (1 / (2 * np.pi)) * np.exp(-0.5 * (xs * xs + ys * ys / 4))
+        c = ChainConsumer()
+        c.add_chain(chain, parameters=['x','y'], weights=pdf, grid=True)
+        c.configure_general(smooth=1)
+        summary = c.get_summary()
+        x_sum = summary['x']
+        y_sum = summary['y']
+        expected_x = np.array([-1.0, 0.0, 1.0])
+        expected_y = np.array([-2.0, 0.0, 2.0])
+        threshold = 0.05
+        assert np.all(np.abs(expected_x - x_sum) < threshold)
+        assert np.all(np.abs(expected_y - y_sum) < threshold)