Skip to content

Commit a4818f4

Browse files
authored
Merge pull request #22 from olivercliff/directed_info_fix
Modify Directed info to use the causal entropy rate
2 parents c06e1a0 + dbc8fdb commit a4818f4

File tree

8 files changed

+81
-68
lines changed

8 files changed

+81
-68
lines changed

demos/simple_demo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" This simple demo will show you how to quickly get started with evaluating all SPIs.
1+
""" This simple demo will show you how to quickly get started with PySPI.
22
"""
33
import numpy as np
44
from pyspi.calculator import Calculator

pyspi/config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,12 @@
103103
MultiscaleGraphCorrelation:
104104

105105
# Hilbert-Schmidt independence criterion
106-
HSIC:
106+
HilbertSchmidtIndependenceCriterion:
107107
- biased: False
108108
- biased: True
109109

110110
# Heller-Heller-Gorfine (HHG) test
111-
HHG:
111+
HellerHellerGorfine:
112112

113113
# Multi-scale graph correlation for time series
114114
CrossMultiscaleGraphCorrelation:
@@ -367,7 +367,7 @@
367367
k_history: 10
368368
l_history: 1
369369

370-
IntegratedInfo:
370+
IntegratedInformation:
371371
- phitype: "star"
372372

373373
- phitype: "star"

pyspi/data.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
from scipy.stats import zscore
99
from scipy.signal import detrend
1010
import os
11-
from sktime.utils.data_io import load_from_tsfile_to_dataframe
12-
from sktime.datatypes._panel._convert import from_nested_to_3d_numpy
1311

1412
VERBOSE = False
1513

@@ -131,7 +129,10 @@ def convert_to_numpy(data):
131129
elif ext == ".csv":
132130
npdat = np.genfromtxt(data, ",")
133131
elif ext == ".ts":
134-
tsdat, tsclasses = load_from_tsfile_to_dataframe(data)
132+
from sktime.utils.data_io import load_from_tsfile_to_dataframe
133+
from sktime.datatypes._panel._convert import from_nested_to_3d_numpy
134+
135+
tsdat, _ = load_from_tsfile_to_dataframe(data)
135136
npdat = from_nested_to_3d_numpy(tsdat)
136137
else:
137138
raise TypeError(f"Unknown filename extension: {ext}")

pyspi/fast_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
- biased: True
8484

8585
# Hilbert-Schmidt independence criterion
86-
HSIC:
86+
HilbertSchmidtIndependenceCriterion:
8787
- biased: False
8888
- biased: True
8989

pyspi/statistics/distance.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def multivariate(self, data):
4444
"""
4545

4646

47-
class HSIC(Undirected, Unsigned):
47+
class HilbertSchmidtIndependenceCriterion(Undirected, Unsigned):
4848
"""Hilbert-Schmidt Independence Criterion (HSIC)"""
4949

5050
name = "Hilbert-Schmidt Independence Criterion"
@@ -63,7 +63,7 @@ def bivariate(self, data, i=None, j=None):
6363
return stat
6464

6565

66-
class HHG(Directed, Unsigned):
66+
class HellerHellerGorfine(Directed, Unsigned):
6767
"""Heller-Heller-Gorfine independence criterion"""
6868

6969
name = "Heller-Heller-Gorfine Independence Criterion"

pyspi/statistics/infotheory.py

Lines changed: 67 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,13 @@ def __init__(
7272

7373
def __getstate__(self):
7474
state = dict(self.__dict__)
75-
try:
76-
del state["_entropy_calc"]
77-
except KeyError:
78-
pass
79-
try:
80-
del state["_calc"]
81-
except KeyError:
82-
pass
8375

84-
if "_entropy_calc" in state.keys() or "_calc" in state.keys():
85-
logging.info(f"{self.identifier} contains a calculator still")
76+
unserializable_objects = ["_entropy_calc", "_calc"]
77+
78+
for k in unserializable_objects:
79+
if k in state.keys():
80+
del state[k]
81+
8682
return state
8783

8884
def __setstate__(self, state):
@@ -161,54 +157,49 @@ def _compute_entropy(self, data, i=None):
161157

162158
if data.entropy[key][i] == -np.inf:
163159
x = np.squeeze(data.to_numpy()[i])
160+
164161
self._entropy_calc.initialise(1)
165162
self._entropy_calc.setObservations(jp.JArray(jp.JDouble, 1)(x))
163+
166164
data.entropy[key][
167165
i
168166
] = self._entropy_calc.computeAverageLocalOfObservations()
169167

170168
return data.entropy[key][i]
171169

172-
# No Theiler window yet (can it be done?)
170+
# No Theiler window is available in the JIDT estimator
173171
@parse_bivariate
174-
def _compute_JointEntropy(self, data, i, j):
175-
if not hasattr(data, "JointEntropy"):
176-
data.JointEntropy = {}
172+
def _compute_joint_entropy(self, data, i, j):
173+
if not hasattr(data, "joint_entropy"):
174+
data.joint_entropy = {}
177175

178176
key = self._getkey()
179-
if key not in data.JointEntropy:
180-
data.JointEntropy[key] = np.full(
181-
(data.n_processes, data.n_processes), -np.inf
182-
)
177+
if key not in data.joint_entropy:
178+
data.joint_entropy[key] = np.full((data.n_processes, data.n_processes), -np.infty)
183179

184-
if data.JointEntropy[key][i, j] == -np.inf:
180+
if data.joint_entropy[key][i, j] == -np.inf:
185181
x, y = data.to_numpy()[[i, j]]
186182

187183
self._entropy_calc.initialise(2)
188-
self._entropy_calc.setObservations(
189-
jp.JArray(jp.JDouble, 2)(np.concatenate([x, y], axis=1))
190-
)
191-
data.JointEntropy[key][
192-
i, j
193-
] = self._entropy_calc.computeAverageLocalOfObservations()
194-
data.JointEntropy[key][j, i] = data.JointEntropy[key][i, j]
184+
self._entropy_calc.setObservations(jp.JArray(jp.JDouble, 2)(np.concatenate([x, y], axis=1)))
195185

196-
return data.JointEntropy[key][i, j]
186+
data.joint_entropy[key][i, j] = self._entropy_calc.computeAverageLocalOfObservations()
187+
data.joint_entropy[key][j, i] = data.joint_entropy[key][i, j]
197188

198-
# No Theiler window yet (can it be done?)
199-
"""
200-
TODO: match this function with previous ones (perhaps always allow multiple i's and j's)
201-
"""
189+
return data.joint_entropy[key][i, j]
202190

191+
# No Theiler window is available in the JIDT estimator
203192
def _compute_conditional_entropy(self, X, Y):
204193
XY = np.concatenate([X, Y], axis=1)
205194

206195
self._entropy_calc.initialise(XY.shape[1])
207196
self._entropy_calc.setObservations(jp.JArray(jp.JDouble, XY.ndim)(XY))
197+
208198
H_XY = self._entropy_calc.computeAverageLocalOfObservations()
209199

210200
self._entropy_calc.initialise(Y.shape[1])
211201
self._entropy_calc.setObservations(jp.JArray(jp.JDouble, Y.ndim)(Y))
202+
212203
H_Y = self._entropy_calc.computeAverageLocalOfObservations()
213204

214205
return H_XY - H_Y
@@ -218,12 +209,14 @@ def _set_theiler_window(self, data, i, j):
218209
if not hasattr(data, "theiler"):
219210
z = data.to_numpy()
220211
theiler_window = -np.ones((data.n_processes, data.n_processes))
212+
221213
# Compute effective sample size for each pair
222214
for _i in range(data.n_processes):
223215
targ = z[_i]
224216
for _j in range(_i + 1, data.n_processes):
225217
src = z[_j]
226-
# Init the Theiler window using Bartlett's formula
218+
219+
# Initialize the Theiler window using Bartlett's formula
227220
theiler_window[_i, _j] = 2 * np.dot(
228221
utils.acf(src), utils.acf(targ)
229222
)
@@ -250,7 +243,7 @@ def __init__(self, **kwargs):
250243

251244
@parse_bivariate
252245
def bivariate(self, data, i=None, j=None):
253-
return self._compute_JointEntropy(data, i=i, j=j)
246+
return self._compute_joint_entropy(data, i=i, j=j)
254247

255248

256249
class ConditionalEntropy(JIDTBase, Directed):
@@ -264,7 +257,7 @@ def __init__(self, **kwargs):
264257

265258
@parse_bivariate
266259
def bivariate(self, data, i=None, j=None):
267-
return self._compute_JointEntropy(data, i=i, j=j) - self._compute_entropy(
260+
return self._compute_joint_entropy(data, i=i, j=j) - self._compute_entropy(
268261
data, i=i
269262
)
270263

@@ -456,40 +449,41 @@ def __init__(self, n=5, **kwargs):
456449
self._n = n
457450

458451
def _compute_causal_entropy(self, src, targ):
459-
mUtils = jp.JPackage("infodynamics.utils").MatrixUtils
460-
H = 0
452+
461453
src = np.squeeze(src)
462454
targ = np.squeeze(targ)
463-
for i in range(2, self._n):
464-
Yp = mUtils.makeDelayEmbeddingVector(jp.JArray(jp.JDouble, 1)(targ), i - 1)[
465-
1:
466-
]
467-
Xp = mUtils.makeDelayEmbeddingVector(jp.JArray(jp.JDouble, 1)(src), i)
455+
456+
m_utils = jp.JPackage("infodynamics.utils").MatrixUtils
457+
458+
causal_entropy = 0
459+
for i in range(1, self._n + 1):
460+
Yp = m_utils.makeDelayEmbeddingVector(jp.JArray(jp.JDouble, 1)(targ), i - 1)[:-1]
461+
Xp = m_utils.makeDelayEmbeddingVector(jp.JArray(jp.JDouble, 1)(src), i)
468462
XYp = np.concatenate([Yp, Xp], axis=1)
469463

470464
Yf = np.expand_dims(targ[i - 1 :], 1)
471-
H = H + self._compute_conditional_entropy(Yf, XYp)
472-
return H
465+
causal_entropy = causal_entropy + self._compute_conditional_entropy(Yf, XYp)
466+
return causal_entropy
473467

474468
def _getkey(self):
475469
return super(CausalEntropy, self)._getkey() + (self._n,)
476470

477471
@parse_bivariate
478472
def bivariate(self, data, i=None, j=None):
479-
if not hasattr(data, "CausalEntropy"):
480-
data.CausalEntropy = {}
473+
if not hasattr(data, "causal_entropy"):
474+
data.causal_entropy = {}
481475

482476
key = self._getkey()
483-
if key not in data.CausalEntropy:
484-
data.CausalEntropy[key] = np.full(
477+
if key not in data.causal_entropy:
478+
data.causal_entropy[key] = np.full(
485479
(data.n_processes, data.n_processes), -np.inf
486480
)
487481

488-
if data.CausalEntropy[key][i, j] == -np.inf:
482+
if data.causal_entropy[key][i, j] == -np.inf:
489483
z = data.to_numpy(squeeze=True)
490-
data.CausalEntropy[key][i, j] = self._compute_causal_entropy(z[i], z[j])
484+
data.causal_entropy[key][i, j] = self._compute_causal_entropy(z[i], z[j])
491485

492-
return data.CausalEntropy[key][i, j]
486+
return data.causal_entropy[key][i, j]
493487

494488

495489
class DirectedInfo(CausalEntropy, Directed):
@@ -498,18 +492,34 @@ class DirectedInfo(CausalEntropy, Directed):
498492
identifier = "di"
499493
labels = ["unsigned", "infotheory", "temporal", "directed"]
500494

501-
def __init__(self, n=10, **kwargs):
495+
def __init__(self, n=5, **kwargs):
502496
super().__init__(**kwargs)
503497
self._n = n
504498

499+
def _compute_entropy_rates(self, targ):
500+
501+
targ = np.squeeze(targ)
502+
m_utils = jp.JPackage("infodynamics.utils").MatrixUtils
503+
504+
entropy_rate_sum = 0
505+
for i in range(1, self._n + 1):
506+
# Compute entropy for an i-dimensional embedding
507+
self._entropy_calc.initialise(i)
508+
509+
Yi = m_utils.makeDelayEmbeddingVector(jp.JArray(jp.JDouble, 1)(targ), i)
510+
self._entropy_calc.setObservations(Yi)
511+
entropy_rate_sum = entropy_rate_sum + self._entropy_calc.computeAverageLocalOfObservations() / i
512+
513+
return entropy_rate_sum
514+
505515
@parse_bivariate
506516
def bivariate(self, data, i=None, j=None):
507517
"""Compute directed information from i to j"""
508-
# Would prefer to match these two calls
509-
entropy = self._compute_entropy(data, j)
510-
CausalEntropy = super(DirectedInfo, self).bivariate(data, i=i, j=j)
511518

512-
return entropy - CausalEntropy
519+
entropy_rates = self._compute_entropy_rates(data.to_numpy(squeeze=True)[j])
520+
causal_entropy = super().bivariate(data, i=i, j=j)
521+
522+
return entropy_rates - causal_entropy
513523

514524

515525
class StochasticInteraction(JIDTBase, Undirected):
@@ -536,7 +546,7 @@ def bivariate(self, data, i=None, j=None, verbose=False):
536546
return H_src + H_targ - H_joint
537547

538548

539-
class IntegratedInfo(Undirected, Unsigned):
549+
class IntegratedInformation(Undirected, Unsigned):
540550

541551
name = "Integrated information"
542552
identifier = "phi"
@@ -552,7 +562,7 @@ def __init__(self, phitype="star", delay=1, normalization=0):
552562
self.identifier += f"_{phitype}_t-{delay}_norm-{normalization}"
553563

554564
@parse_bivariate
555-
def bivariate(self, data, i=None, j=None, verbose=False):
565+
def bivariate(self, data, i=None, j=None):
556566

557567
if not octave.exist("phi_comp"):
558568
path = os.path.dirname(os.path.abspath(__file__)) + "/../lib/PhiToolbox/"

pyspi/statistics/misc.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,13 @@ def _from_cache(self, data, i, j):
7272
maxlag=self._maxlag,
7373
trend=self._trend,
7474
)
75+
7576
ci = {"tstat": stats[0]}
7677
else:
7778
stats = coint_johansen(
7879
z[[i, j]].T, det_order=self._det_order, k_ar_diff=self._k_ar_diff
7980
)
81+
8082
ci = {
8183
"max_eig_stat": stats.max_eig_stat[0],
8284
"trace_stat": stats.trace_stat[0],

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
'data/cml.npy',
6767
'data/forex.npy']},
6868
include_package_data=True,
69-
version='0.2.2',
69+
version='0.4.0',
7070
description='Network analysis for time series',
7171
author='Oliver M. Cliff',
7272
author_email='[email protected]',

0 commit comments

Comments
 (0)