Initial work on FlowDistribution.

botev · botev · commit 666f4217c11c · 2018-01-08T20:20:53.000Z
diff --git a/examples/normalizing_flows/dlgm_nf.py b/examples/normalizing_flows/dlgm_nf.py
@@ -6,6 +6,7 @@
 from __future__ import division
 import os
 import time
+from functools import partial
 
 import tensorflow as tf
 from six.moves import range
@@ -29,14 +30,17 @@ def vae(observed, n, x_dim, z_dim, n_particles):
     return model
 
 
-def q_net(x, z_dim, n_particles):
+def q_net(x, z_dim, n_particles, n_flows):
+    def forward(samples):
+        return zs.repeated_flow(zs.planar_normalizing_flow, samples, n_iters=n_flows)
+
     with zs.BayesianNet() as variational:
         lz_x = tf.layers.dense(tf.to_float(x), 500, activation=tf.nn.relu)
         lz_x = tf.layers.dense(lz_x, 500, activation=tf.nn.relu)
         z_mean = tf.layers.dense(lz_x, z_dim)
         z_logstd = tf.layers.dense(lz_x, z_dim)
-        z = zs.Normal('z', z_mean, logstd=z_logstd, group_ndims=1,
-                      n_samples=n_particles)
+        z = zs.NormalFlow('z', forward, mean=z_mean, logstd=z_logstd, group_ndims=1,
+                          n_samples=n_particles)
     return variational
 
 
@@ -67,14 +71,9 @@ def log_joint(observed):
         log_pz, log_px_z = model.local_log_prob(['z', 'x'])
         return log_pz + log_px_z
 
-    variational = q_net(x, z_dim, n_particles)
+    variational = q_net(x, z_dim, n_particles, n_planar_flows)
     qz_samples, log_qz = variational.query('z', outputs=True,
                                            local_log_prob=True)
-    # TODO: add tests for repeated calls of flows
-    qz_samples, log_qz = zs.planar_normalizing_flow(qz_samples, log_qz,
-                                                    n_iters=n_planar_flows)
-    qz_samples, log_qz = zs.planar_normalizing_flow(qz_samples, log_qz,
-                                                    n_iters=n_planar_flows)
 
     lower_bound = zs.variational.elbo(log_joint,
                                       observed={'x': x},
diff --git a/tests/model/test_base.py b/tests/model/test_base.py
@@ -21,9 +21,11 @@ def test_init(self):
         probs = Mock()
         sample_func = Mock(return_value=samples)
         log_prob_func = Mock(return_value=log_probs)
+        sample_and_log_prob_func = Mock(return_value=(samples, log_probs))
         prob_func = Mock(return_value=probs)
         distribution = Mock(sample=sample_func,
                             log_prob=log_prob_func,
+                            sample_and_log_prob=sample_and_log_prob_func,
                             prob=prob_func,
                             dtype=tf.int32)
         with BayesianNet() as model:
@@ -86,9 +88,11 @@ def test_session_run(self):
             probs = Mock()
             sample_func = Mock(return_value=samples)
             log_prob_func = Mock(return_value=log_probs)
+            sample_and_log_prob_func = Mock(return_value=(samples, log_probs))
             prob_func = Mock(return_value=probs)
             distribution = Mock(sample=sample_func,
                                 log_prob=log_prob_func,
+                                sample_and_log_prob=sample_and_log_prob_func,
                                 prob=prob_func,
                                 dtype=tf.int32)
 
diff --git a/tests/test_transform.py b/tests/test_transform.py
@@ -20,27 +20,25 @@ def test_planar_normalizing_flow(self):
                 z.append(np.array([[vz[i]]]))
                 z[i] = tf.constant(z[i], dtype=tf.float32)
             z_0 = tf.concat(z, axis=1)
-            z_1, n_log_det_ja = planar_normalizing_flow(
-                z_0, [0.0], n_iters=10)
-
-            n_log_det_ja = tf.reshape(n_log_det_ja, [])
+            z_1, n_log_det_ja = repeated_flow(planar_normalizing_flow, z_0, n_iters=10)
 
             grad = []
             for i in range(len(vz)):
                 z_1i = z_1[0, i]
                 grad.append(tf.gradients(z_1i, z_0)[0])
-            jocabian = tf.concat(grad, axis=0)
-            log_det_jacobian = tf.log(tf.matrix_determinant(jocabian))
+            jacobian = tf.concat(grad, axis=0)
+            log_det_jacobian = tf.log(tf.matrix_determinant(jacobian))
 
             sess.run(tf.global_variables_initializer())
-            test_value, true_value = sess.run([-log_det_jacobian,
-                                               n_log_det_ja])
+            test_value, true_value = sess.run([log_det_jacobian,
+                                               tf.squeeze(n_log_det_ja)])
             self.assertAllClose(test_value, true_value)
 
     def test_flow_shape(self):
         z = tf.random_normal(shape=(2, 10, 6), mean=0, stddev=0.05)
         log_pz = tf.random_normal(shape=(2, 10), mean=0, stddev=0.05)
-        t_z, t_log_pz = planar_normalizing_flow(z, log_pz, n_iters=10)
+        t_z, log_det = repeated_flow(planar_normalizing_flow, z, n_iters=10)
+        t_log_pz = log_pz - log_det
         with self.test_session(use_gpu=True) as sess:
             sess.run(tf.global_variables_initializer())
             o_z, o_log_pz = sess.run([t_z, t_log_pz])
diff --git a/zhusuan/distributions/base.py b/zhusuan/distributions/base.py
@@ -333,3 +333,8 @@ def _prob(self, given):
         Private method for subclasses to rewrite the :meth:`prob` method.
         """
         raise NotImplementedError()
+
+    def sample_and_log_prob(self, n_samples=None):
+        samples = self.sample(n_samples=n_samples)
+        log_p = self.log_prob(samples)
+        return samples, log_p
diff --git a/zhusuan/distributions/special.py b/zhusuan/distributions/special.py
@@ -13,6 +13,7 @@
 __all__ = [
     'Empirical',
     'Implicit',
+    'FlowDistribution'
 ]
 
 
@@ -132,7 +133,7 @@ def _batch_shape(self):
 
     def _get_batch_shape(self):
         if self.samples.get_shape() == tf.TensorShape(None) or \
-                        self.explicit_value_shape == tf.TensorShape(None):
+                self.explicit_value_shape == tf.TensorShape(None):
             return tf.TensorShape(None)
         else:
             d = self.explicit_value_shape.ndims
@@ -157,3 +158,77 @@ def _prob(self, given):
             return (2 * prob - 1) * inf_dtype
         else:
             return tf.cast(prob, tf.float32)
+
+
+class FlowDistribution(Distribution):
+    """
+    The class of FlowDistribution distribution.
+    The distribution describes variable which is sampled from a base
+    distribution and then is passed through an invertible function.
+    See :class:`~zhusuan.distributions.base.Distribution` for details.
+
+    :param name: A string. The name of the `StochasticTensor`. Must be unique
+        in the `BayesianNet` context.
+    :param base: An instance of `Distribution` parametrizing the base distribution.
+    :param forward: A forward function which describes how we transform the samples
+        from the base distribution. The signature of the function should be:
+            transformed, log_det = forward(base_samples)
+    :param inverse: An inverse function which maps from the transformed samples to
+        to base samples. The signature of the function should be:
+            base_samples, log_det = inverse(transformed_samples)
+    :param group_ndims: A 0-D `int32` Tensor representing the number of
+        dimensions in `batch_shape` (counted from the end) that are grouped
+        into a single event, so that their probabilities are calculated
+        together. Default is 0, which means a single value is an event.
+        See :class:`~zhusuan.distributions.base.Distribution` for more detailed
+        explanation.
+    """
+
+    def __init__(self,
+                 base,
+                 forward,
+                 inverse=None,
+                 group_ndims=0,
+                 **kwargs):
+        self.base = base
+        self.forward = forward
+        self.inverse = inverse
+        super(FlowDistribution, self).__init__(
+            dtype=base.dtype,
+            param_dtype=base.dtype,
+            is_continuous=base.dtype.is_floating,
+            group_ndims=group_ndims,
+            is_reparameterized=False,
+            **kwargs)
+
+    def _value_shape(self):
+        return self.base.value_shape()
+
+    def _get_value_shape(self):
+        return self.base.get_value_shape()
+
+    def _batch_shape(self):
+        return self.base.batch_shape()
+
+    def _get_batch_shape(self):
+        return self.base.get_batch_shape()
+
+    def _sample(self, n_samples):
+        return self.sample_and_log_prob(n_samples)[0]
+
+    def _log_prob(self, given):
+        if self.inverse is None:
+            raise ValueError("Flow distribution can only calculate log_prob through `sample_and_log_prob` "
+                             "if `inverse=None`.")
+        else:
+            base_given, log_det = self.inverse(given)
+            log_prob = self.base.log_prob(base_given)
+            return log_prob + log_det
+
+    def _prob(self, given):
+        return tf.exp(self.log_prob(given))
+
+    def sample_and_log_prob(self, n_samples=None):
+        base_sample, log_prob = self.base.sample_and_log_prob(n_samples)
+        transformed, log_det = self.forward(base_sample)
+        return transformed, log_prob - log_det
diff --git a/zhusuan/model/base.py b/zhusuan/model/base.py
@@ -115,9 +115,16 @@ def tensor(self):
                         "with its observed value. Error message: {}".format(
                             self._name, e))
             else:
-                self._tensor = self.sample(self._n_samples)
+                self._tensor, self._local_log_prob = self.sample_and_log_prob(self._n_samples)
         return self._tensor
 
+    @property
+    def local_log_prob(self):
+        tensor = self.tensor
+        if not hasattr(self, '_local_log_prob'):
+            self._local_log_prob = self.log_prob(tensor)
+        return self._local_log_prob
+
     def get_shape(self):
         return self.tensor.get_shape()
 
@@ -149,6 +156,9 @@ def prob(self, given):
         """
         return self._distribution.prob(given)
 
+    def sample_and_log_prob(self, n_samples):
+        return self._distribution.sample_and_log_prob(n_samples)
+
     @staticmethod
     def _to_tensor(value, dtype=None, name=None, as_ref=False):
         if dtype and not dtype.is_compatible_with(value.dtype):
@@ -340,14 +350,10 @@ def local_log_prob(self, name_or_names):
         """
         name_or_names = self._check_names_exist(name_or_names)
         if isinstance(name_or_names, tuple):
-            ret = []
-            for name in name_or_names:
-                s_tensor = self._stochastic_tensors[name]
-                ret.append(s_tensor.log_prob(s_tensor.tensor))
+            return [self._stochastic_tensors[name].local_log_prob
+                    for name in name_or_names]
         else:
-            s_tensor = self._stochastic_tensors[name_or_names]
-            ret = s_tensor.log_prob(s_tensor.tensor)
-        return ret
+            return self._stochastic_tensors[name_or_names].local_log_prob
 
     def query(self, name_or_names, outputs=False, local_log_prob=False):
         """
diff --git a/zhusuan/model/stochastic.py b/zhusuan/model/stochastic.py
@@ -37,6 +37,7 @@
     'GumbelSoftmax',
     'Empirical',
     'Implicit',
+    'NormalFlow'
 ]
 
 
@@ -983,14 +984,14 @@ def __init__(self,
                  is_continuous=None,
                  n_samples=None,
                  **kwargs):
-        norm = distributions.Empirical(
+        empirical = distributions.Empirical(
             dtype, batch_shape,
             value_shape=value_shape,
             group_ndims=group_ndims,
             is_continous=is_continuous,
             **kwargs
         )
-        super(Empirical, self).__init__(name, norm, n_samples)
+        super(Empirical, self).__init__(name, empirical, n_samples)
 
 
 class Implicit(StochasticTensor):
@@ -1021,10 +1022,70 @@ def __init__(self,
                  group_ndims=0,
                  n_samples=None,
                  **kwargs):
-        norm = distributions.Implicit(
+        implicit = distributions.Implicit(
             samples,
             value_shape=value_shape,
             group_ndims=group_ndims,
             **kwargs
         )
-        super(Implicit, self).__init__(name, norm, n_samples)
+        super(Implicit, self).__init__(name, implicit, n_samples)
+
+
+class NormalFlow(StochasticTensor):
+    """
+    The class of univariate Normal `StochasticTensor` with a invertible flow
+    transformation.
+    See :class:`~zhusuan.model.stochastic.Normal` and
+    :class:`~zhusuan.distributions.special.FlowDistribution` for details.
+
+    :param name: A string. The name of the `StochasticTensor`. Must be unique
+        in the `BayesianNet` context.
+    :param forward: A forward function which describes how we transform the samples
+        from the base distribution. The signature of the function should be:
+            transformed, log_det = forward(base_samples)
+    :param inverse: An inverse function which maps from the transformed samples to
+        to base samples. The signature of the function should be:
+            base_samples, log_det = inverse(transformed_samples)
+    :param mean: A `float` Tensor. The mean of the Normal distribution.
+        Should be broadcastable to match `logstd`.
+    :param logstd: A `float` Tensor. The log standard deviation of the Normal
+        distribution. Should be broadcastable to match `mean`.
+    :param std: A `float` Tensor. The standard deviation of the Normal
+        distribution. Should be positive and broadcastable to match `mean`.
+    :param n_samples: A 0-D `int32` Tensor or None. Number of samples
+        generated by this `StochasticTensor`.
+    :param group_ndims: A 0-D `int32` Tensor representing the number of
+        dimensions in `batch_shape` (counted from the end) that are grouped
+        into a single event, so that their probabilities are calculated
+        together. Default is 0, which means a single value is an event.
+        See :class:`~zhusuan.distributions.base.Distribution` for more detailed
+        explanation.
+    :param is_reparameterized: A Bool. If True, gradients on samples from this
+        `StochasticTensor` are allowed to propagate into inputs, using the
+        reparametrization trick from (Kingma, 2013).
+    :param check_numerics: Bool. Whether to check numeric issues.
+    """
+
+    def __init__(self,
+                 name,
+                 forward,
+                 inverse=None,
+                 mean=0.,
+                 logstd=None,
+                 std=None,
+                 n_samples=None,
+                 group_ndims=0,
+                 is_reparameterized=True,
+                 check_numerics=False,
+                 **kwargs):
+        normal = distributions.Normal(
+            mean,
+            logstd=logstd,
+            std=std,
+            group_ndims=group_ndims,
+            is_reparameterized=is_reparameterized,
+            check_numerics=check_numerics,
+            **kwargs
+        )
+        flow = distributions.FlowDistribution(normal, forward, inverse, group_ndims=group_ndims)
+        super(NormalFlow, self).__init__(name, flow, n_samples)
diff --git a/zhusuan/transform.py b/zhusuan/transform.py