Skip to content

Commit e2fe31d

Browse files
authored
Merge pull request #66 from DynamicsAndNeuralSystems/jmoo2880-pypi-publish
Add new SPI (GWTau) and benchmark generation script + workflow.
2 parents 6c8d9e0 + c337bdf commit e2fe31d

File tree

10 files changed

+171
-19
lines changed

10 files changed

+171
-19
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: Generate benchmarking dataset tables
2+
3+
on:
4+
workflow_dispatch:
5+
6+
jobs:
7+
test-ubuntu:
8+
runs-on: ubuntu-latest
9+
strategy:
10+
matrix:
11+
python-version: ["3.9"]
12+
steps:
13+
- uses: actions/checkout@v4
14+
- name: Setup python ${{ matrix.python-version }}
15+
uses: actions/setup-python@v5
16+
with:
17+
python-version: ${{ matrix.python-version }}
18+
cache: 'pip'
19+
- name: Install octave
20+
run: |
21+
sudo apt-get update
22+
sudo apt-get install -y build-essential octave
23+
- name: Install pyspi dependencies
24+
run: |
25+
python -m pip install --upgrade pip
26+
pip install -r requirements.txt
27+
pip install .
28+
- name: Run data generation
29+
run: |
30+
python tests/generate_benchmark_tables.py
31+
- name: Upload artifact
32+
uses: actions/upload-artifact@v3
33+
with:
34+
name: benchmark-tables
35+
path: tests/CML7_benchmark_tables_new.pkl

.github/workflows/run_unit_tests.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,3 @@ jobs:
3232
- name: Run pyspi SPI unit tests
3333
run: |
3434
pytest -v ./tests/test_SPIs.py
35-
36-
37-
38-

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "pyspi"
7-
version = "1.0.1"
7+
version = "1.0.2"
88
authors = [
99
{ name ="Oliver M. Cliff", email="[email protected]"},
1010
]

pyspi/config.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,17 @@
310310
statistic: max
311311
squared: True
312312

313+
# Gromov-Wasserstain Distance
314+
GromovWasserstainTau:
315+
labels:
316+
- unsigned
317+
- distance
318+
- unordered
319+
- nonlinear
320+
- undirected
321+
dependencies:
322+
configs:
323+
313324
.statistics.causal:
314325
# Additive noise model
315326
AdditiveNoiseModel:

pyspi/fast_config.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,17 @@
169169
- mode: euclidean
170170
statistic: max
171171
squared: True
172+
173+
# Gromov-Wasserstain Distance
174+
GromovWasserstainTau:
175+
labels:
176+
- unsigned
177+
- distance
178+
- unordered
179+
- nonlinear
180+
- undirected
181+
dependencies:
182+
configs:
172183

173184
.statistics.causal:
174185
# Additive noise model

pyspi/statistics/distance.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,3 +259,62 @@ def bivariate(self, data, i=None, j=None):
259259
data.barycenter[self._mode][(j, i)] = data.barycenter[self._mode][(i, j)]
260260

261261
return self._statfn(bc)
262+
263+
264+
class GromovWasserstainTau(Undirected, Unsigned):
265+
"""Gromov-Wasserstain distance (GWTau)"""
266+
267+
name = "Gromov-Wasserstain Distance"
268+
identifier = "gwtau"
269+
labels = ["unsigned", "distance", "unordered", "nonlinear", "undirected"]
270+
271+
@staticmethod
272+
def vec_geo_dist(x):
273+
diffs = np.diff(x, axis=0)
274+
distances = np.linalg.norm(diffs, axis=1)
275+
return np.cumsum(distances)
276+
277+
@staticmethod
278+
def wass_sorted(x1, x2):
279+
x1 = np.sort(x1)[::-1] # sort in descending order
280+
x2 = np.sort(x2)[::-1]
281+
282+
if len(x1) == len(x2):
283+
res = np.sqrt(np.mean((x1 - x2) ** 2))
284+
else:
285+
N, M = len(x1), len(x2)
286+
i_ratios = np.arange(1, N + 1) / N
287+
j_ratios = np.arange(1, M + 1) / M
288+
289+
290+
min_values = np.minimum.outer(i_ratios, j_ratios)
291+
max_values = np.maximum.outer(i_ratios - 1/N, j_ratios - 1/M)
292+
293+
lam = np.where(min_values > max_values, min_values - max_values, 0)
294+
295+
diffs_squared = (x1[:, None] - x2) ** 2
296+
my_sum = np.sum(lam * diffs_squared)
297+
298+
res = np.sqrt(my_sum)
299+
300+
return res
301+
302+
@staticmethod
303+
def gwtau(xi, xj):
304+
timei = np.arange(len(xi))
305+
timej = np.arange(len(xj))
306+
traji = np.column_stack([timei, xi])
307+
trajj = np.column_stack([timej, xj])
308+
309+
vi = GromovWasserstainTau.vec_geo_dist(traji)
310+
vj = GromovWasserstainTau.vec_geo_dist(trajj)
311+
gw = GromovWasserstainTau.wass_sorted(vi, vj)
312+
313+
return gw
314+
315+
@parse_bivariate
316+
def bivariate(self, data, i=None, j=None):
317+
x, y = data.to_numpy()[[i, j]]
318+
# insert compute SPI code here (computes on x and y)
319+
stat = self.gwtau(x, y)
320+
return stat

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
'data/standard_normal.npy',
6262
'data/cml7.npy']},
6363
include_package_data=True,
64-
version='1.0.1',
64+
version='1.0.2',
6565
description='Library for pairwise analysis of time series data.',
6666
author='Oliver M. Cliff',
6767
author_email='[email protected]',

tests/CML7_benchmark_tables.pkl

-236 KB
Binary file not shown.

tests/generate_benchmark_tables.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import numpy as np
2+
import dill
3+
from pyspi.calculator import Calculator
4+
5+
""""Script to generate benchmarking dataset"""
6+
def get_benchmark_tables(calc_list):
7+
# get the spis from the first calculator
8+
spis = list(calc_list[1].spis.keys())
9+
num_procs = calc_list[1].dataset.n_processes
10+
# create a dict to store the mean and std for each spi
11+
benchmarks = {key : {'mean': None, 'std': None} for key in spis}
12+
num_trials = len(calc_list)
13+
for spi in spis:
14+
mpi_tensor = np.zeros(shape=(num_trials, num_procs, num_procs))
15+
for (index, calc) in enumerate(calc_list):
16+
mpi_tensor[index, :, :] = calc.table[spi].to_numpy()
17+
mean_matrix = np.mean(mpi_tensor, axis=0) # compute element-wise mean across the first dimension
18+
std_matrix = np.std(mpi_tensor, axis=0) # compute element-wise std across the first dimension
19+
benchmarks[spi]['mean'] = mean_matrix
20+
benchmarks[spi]['std'] = std_matrix
21+
22+
return benchmarks
23+
24+
# load and transpose dataset
25+
dataset = np.load("pyspi/data/cml7.npy").T
26+
27+
# create list to store the calculator objects
28+
store_calcs = list()
29+
30+
for i in range(75):
31+
np.random.seed(42)
32+
calc = Calculator(dataset=dataset)
33+
calc.compute()
34+
store_calcs.append(calc)
35+
36+
mpi_benchmarks = get_benchmark_tables(store_calcs)
37+
38+
# save data
39+
with open("tests/CML7_benchmark_tables_new.pkl", "wb") as f:
40+
dill.dump(mpi_benchmarks, f)

tests/test_SPIs.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,9 @@ def test_mpi(est, est_ob, mpi_benchmark, mpi_new, spi_warning_logger):
6161
mean_table = mpi_benchmark['mean']
6262
std_table = mpi_benchmark['std']
6363

64-
# check std stable for zeros and impute with smallest non-zero value
65-
min_nonzero_std = np.nanmin(std_table[std_table > 0])
66-
std_table[std_table == 0] = min_nonzero_std
67-
68-
64+
# check std table for zeros and impute with smallest non-zero value
65+
std_table = np.where(std_table == 0, 1e-10, std_table)
66+
6967
# check that the shapes are equal
7068
assert mean_table.shape == mpi_new.shape, f"SPI: {est}| Different table shapes. "
7169

@@ -79,24 +77,26 @@ def test_mpi(est, est_ob, mpi_benchmark, mpi_new, spi_warning_logger):
7977
# get the module name for easy reference
8078
module_name = est_ob.__module__.split(".")[-1]
8179

82-
if (mpi_new == mpi_mean).all() == False:
80+
if not np.allclose(mpi_new, mpi_mean):
8381
# tables are not equivalent, quantify the difference by z-scoring.
8482
diff = abs(mpi_new - mpi_mean)
8583
zscores = diff/std_table
84+
8685
idxs_greater_than_thresh = np.argwhere(zscores > zscore_threshold)
86+
8787
if len(idxs_greater_than_thresh) > 0:
88-
sigs = list()
89-
for idx in idxs_greater_than_thresh:
90-
sigs.append(zscores[idx[0], idx[1]])
88+
sigs = zscores[idxs_greater_than_thresh[:, 0], idxs_greater_than_thresh[:, 1]]
9189
# get the max
9290
max_z = max(sigs)
91+
9392
# number of interactions
94-
num_iteractions = (mpi_new.shape[0] * mpi_new.shape[1]) - mpi_new.shape[0]
93+
num_interactions = mpi_new.size - mpi_new.shape[0]
9594
# count exceedances
9695
num_exceed = len(sigs)
96+
9797
if isSymmetric:
9898
# number of unique exceedences is half
99-
num_exceed /= 2
100-
num_iteractions /= 2
99+
num_exceed //= 2
100+
num_interactions //= 2
101101

102-
spi_warning_logger(est, module_name, max_z, int(num_exceed), int(num_iteractions))
102+
spi_warning_logger(est, module_name, max_z, int(num_exceed), int(num_interactions))

0 commit comments

Comments
 (0)