Skip to content

Commit e462292

Browse files
authored
use biased autocorrelation estimate by default (resolves pyro-ppl#1785) (pyro-ppl#1877)
* use biased autocorrelation estimate by default (resolves pyro-ppl#1785) * increase tolerance in unbiased autocorrelation test * review comments, fixed autocovariance test.
1 parent 0791b1f commit e462292

File tree

2 files changed

+31
-10
lines changed

2 files changed

+31
-10
lines changed

numpyro/diagnostics.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,13 @@ def _fft_next_fast_len(target):
9696
target += 1
9797

9898

99-
def autocorrelation(x, axis=0):
99+
def autocorrelation(x, axis=0, bias=True):
100100
"""
101101
Computes the autocorrelation of samples at dimension ``axis``.
102102
103103
:param numpy.ndarray x: the input array.
104104
:param int axis: the dimension to calculate autocorrelation.
105+
:param bias: whether to use a biased estimator.
105106
:return: autocorrelation of ``x``.
106107
:rtype: numpy.ndarray
107108
"""
@@ -127,25 +128,32 @@ def autocorrelation(x, axis=0):
127128

128129
# truncate and normalize the result, then transpose back to original shape
129130
autocorr = autocorr[..., :N]
130-
autocorr = autocorr / np.arange(N, 0.0, -1)
131+
132+
# the unbiased estimator is known to have "wild" tails, due to few samples at longer lags.
133+
# see Geyer (1992) and Priestley (1981) for a discussion. also note that it is only strictly
134+
# unbiased when the mean is known, whereas we it estimate from samples here.
135+
if not bias:
136+
autocorr = autocorr / np.arange(N, 0.0, -1)
137+
131138
with np.errstate(invalid="ignore", divide="ignore"):
132139
autocorr = autocorr / autocorr[..., :1]
133140
return np.swapaxes(autocorr, axis, -1)
134141

135142

136-
def autocovariance(x, axis=0):
143+
def autocovariance(x, axis=0, bias=True):
137144
"""
138145
Computes the autocovariance of samples at dimension ``axis``.
139146
140147
:param numpy.ndarray x: the input array.
141148
:param int axis: the dimension to calculate autocovariance.
149+
:param bias: whether to use a biased estimator.
142150
:return: autocovariance of ``x``.
143151
:rtype: numpy.ndarray
144152
"""
145-
return autocorrelation(x, axis) * x.var(axis=axis, keepdims=True)
153+
return autocorrelation(x, axis, bias) * x.var(axis=axis, keepdims=True)
146154

147155

148-
def effective_sample_size(x):
156+
def effective_sample_size(x, bias=True):
149157
"""
150158
Computes effective sample size of input ``x``, where the first dimension of
151159
``x`` is chain dimension and the second dimension of ``x`` is draw dimension.
@@ -158,6 +166,7 @@ def effective_sample_size(x):
158166
Stan Development Team
159167
160168
:param numpy.ndarray x: the input array.
169+
:param bias: whether to use a biased estimator of the autocovariance.
161170
:return: effective sample size of ``x``.
162171
:rtype: numpy.ndarray
163172
"""
@@ -166,7 +175,7 @@ def effective_sample_size(x):
166175
assert x.shape[1] >= 2
167176

168177
# find autocovariance for each chain at lag k
169-
gamma_k_c = autocovariance(x, axis=1)
178+
gamma_k_c = autocovariance(x, axis=1, bias=bias)
170179

171180
# find autocorrelation at lag k (from Stan reference)
172181
var_within, var_estimator = _compute_chain_variance_stats(x)

test/test_diagnostics.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
import numpy as np
5-
from numpy.testing import assert_allclose
5+
from numpy.testing import assert_, assert_allclose
66
import pytest
77
from scipy.fftpack import next_fast_len
88

@@ -60,14 +60,26 @@ def test_hpdi():
6060

6161
def test_autocorrelation():
6262
x = np.arange(10.0)
63-
actual = autocorrelation(x)
63+
actual = autocorrelation(x, bias=False)
6464
expected = np.array([1, 0.78, 0.52, 0.21, -0.13, -0.52, -0.94, -1.4, -1.91, -2.45])
6565
assert_allclose(actual, expected, atol=0.01)
6666

67+
actual = autocorrelation(x, bias=True)
68+
expected = expected * np.arange(len(x), 0.0, -1) / len(x)
69+
assert_allclose(actual, expected, atol=0.01)
70+
71+
# the unbiased estimator has variance O(1) at large lags
72+
x = np.random.normal(size=20000)
73+
ac = autocorrelation(x, bias=False)
74+
assert_(np.any(np.abs(ac[-100:]) > 0.1))
75+
76+
ac = autocorrelation(x, bias=True)
77+
assert_allclose(np.abs(ac[-100:]), 0.0, atol=0.01)
78+
6779

6880
def test_autocovariance():
6981
x = np.arange(10.0)
70-
actual = autocovariance(x)
82+
actual = autocovariance(x, bias=False)
7183
expected = np.array(
7284
[8.25, 6.42, 4.25, 1.75, -1.08, -4.25, -7.75, -11.58, -15.75, -20.25]
7385
)
@@ -93,4 +105,4 @@ def test_split_gelman_rubin_agree_with_gelman_rubin():
93105

94106
def test_effective_sample_size():
95107
x = np.arange(1000.0).reshape(100, 10)
96-
assert_allclose(effective_sample_size(x), 52.64, atol=0.01)
108+
assert_allclose(effective_sample_size(x, bias=False), 52.64, atol=0.01)

0 commit comments

Comments
 (0)