Skip to content

Commit 7b8d5ea

Browse files
authored
fixing some alignment issues in time-frequency sonification (#410)
* fixing some alignment issues in time-frequency sonification * expanded tests and error handling for time_frequency sonification * blacked sonify * blacked tests * switched interpolation mode in tfgram sonify * switched to scipy signal convolve for windowing * fixing interpolation timing again * optimizations on tf sonification again * blacking * reverted singleton interpolation * final optimizations on tf sonification
1 parent a0a8672 commit 7b8d5ea

File tree

2 files changed

+164
-79
lines changed

2 files changed

+164
-79
lines changed

mir_eval/sonify.py

+122-79
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55

66
import numpy as np
7+
import scipy.signal
78
from numpy.lib.stride_tricks import as_strided
89
from scipy.interpolate import interp1d
910

@@ -105,105 +106,104 @@ def time_frequency(
105106
synthesized version of the piano roll
106107
107108
"""
108-
# Default value for length
109+
# Convert times to intervals if necessary
110+
time_converted = False
109111
if times.ndim == 1:
110112
# Convert to intervals
111-
times = util.boundaries_to_intervals(times)
113+
times = np.hstack((times[:-1, np.newaxis], times[1:, np.newaxis]))
114+
# We'll need this to keep track of whether we should pad an interval on
115+
time_converted = True
112116

117+
# Default value for length
113118
if length is None:
114119
length = int(times[-1, 1] * fs)
115120

116121
last_time_in_secs = float(length) / fs
117-
times, _ = util.adjust_intervals(times, t_max=last_time_in_secs)
118-
119-
# Truncate times so that the shape matches gram. However if the time boundaries were converted
120-
# to intervals, then the number of times will be reduced by one, so we only truncate
121-
# if the gram is smaller.
122-
n_times = min(gram.shape[1], times.shape[0])
123-
times = times[:n_times]
124-
# Round up to ensure that the adjusted interval last time does not diverge from length
125-
# due to a loss of precision and truncation to ints.
126-
sample_intervals = np.round(times * fs).astype(int)
127-
128-
def _fast_synthesize(frequency):
129-
"""Efficiently synthesize a signal.
130-
Generate one cycle, and simulate arbitrary repetitions
131-
using array indexing tricks.
132-
"""
133-
# hack so that we can ensure an integer number of periods and samples
134-
# rounds frequency to 1st decimal, s.t. 10 * frequency will be an int
135-
frequency = np.round(frequency, n_dec)
136-
137-
# Generate 10*frequency periods at this frequency
138-
# Equivalent to n_samples = int(n_periods * fs / frequency)
139-
# n_periods = 10*frequency is the smallest integer that guarantees
140-
# that n_samples will be an integer, since assuming 10*frequency
141-
# is an integer
142-
n_samples = int(10.0**n_dec * fs)
143-
144-
short_signal = function(2.0 * np.pi * np.arange(n_samples) * frequency / fs)
145-
146-
# Calculate the number of loops we need to fill the duration
147-
n_repeats = int(np.ceil(length / float(short_signal.shape[0])))
148-
149-
# Simulate tiling the short buffer by using stride tricks
150-
long_signal = as_strided(
151-
short_signal,
152-
shape=(n_repeats, len(short_signal)),
153-
strides=(0, short_signal.itemsize),
122+
123+
if time_converted and times.shape[0] != gram.shape[1]:
124+
times = np.vstack((times, [times[-1, 1], last_time_in_secs]))
125+
126+
if times.shape[0] != gram.shape[1]:
127+
raise ValueError(
128+
f"times.shape={times.shape} is incompatible with gram.shape={gram.shape}"
154129
)
155130

156-
# Use a flatiter to simulate a long 1D buffer
157-
return long_signal.flat
131+
if frequencies.shape[0] != gram.shape[0]:
132+
raise ValueError(
133+
f"frequencies.shape={frequencies.shape} is incompatible with gram.shape={gram.shape}"
134+
)
135+
136+
padding = [0, 0]
137+
stacking = []
138+
139+
if times.min() > 0:
140+
# We need to pad a silence column on to gram at the beginning
141+
padding[0] = 1
142+
stacking.append([0, times.min()])
158143

159-
def _const_interpolator(value):
160-
"""Return a function that returns `value`
161-
no matter the input.
162-
"""
144+
stacking.append(times)
163145

164-
def __interpolator(x):
165-
return value
146+
if times.max() < last_time_in_secs:
147+
# We need to pad a silence column onto gram at the end
148+
padding[1] = 1
149+
stacking.append([times.max(), last_time_in_secs])
166150

167-
return __interpolator
151+
gram = np.pad(gram, ((0, 0), padding), mode="constant")
152+
times = np.vstack(stacking)
168153

169-
# Threshold the tfgram to remove non-positive values
154+
# Identify the time intervals that have some overlap with the duration
155+
idx = np.logical_and(times[:, 1] >= 0, times[:, 0] <= last_time_in_secs)
156+
gram = gram[:, idx]
157+
times = np.clip(times[idx], 0, last_time_in_secs)
158+
159+
n_times = times.shape[0]
160+
161+
# Threshold the tfgram to remove negative values
170162
gram = np.maximum(gram, 0)
171163

172164
# Pre-allocate output signal
173165
output = np.zeros(length)
174-
time_centers = np.mean(times, axis=1) * float(fs)
166+
if gram.shape[1] == 0:
167+
# There are no time intervals to process, so return
168+
# the empty signal.
169+
return output
170+
171+
# Discard frequencies below threshold
172+
freq_keep = np.max(gram, axis=1) >= threshold
173+
174+
gram = gram[freq_keep, :]
175+
frequencies = frequencies[freq_keep]
176+
177+
# Interpolate the values in gram over the time grid.
178+
if n_times > 1:
179+
interpolator = interp1d(
180+
times[:, 0] * fs,
181+
gram[:, :n_times],
182+
kind="previous",
183+
bounds_error=False,
184+
fill_value=(gram[:, 0], gram[:, -1]),
185+
)
186+
else:
187+
# NOTE: This is a special case where there is only one time interval.
188+
# scipy 1.10 and above handle this case directly with the interp1d above,
189+
# but older scipy's do not. This is a workaround for that.
190+
#
191+
# In the 0.9 release, we can bump the minimum scipy to 1.10 and remove this
192+
interpolator = _const_interpolator(gram[:, 0])
193+
194+
signal = interpolator(np.arange(length))
175195

176-
# Check if there is at least one element on each frequency that has a value above the threshold
177-
# to justify processing, for optimisation.
178-
spectral_max_magnitudes = np.max(gram, axis=1)
179196
for n, frequency in enumerate(frequencies):
180-
if spectral_max_magnitudes[n] < threshold:
181-
continue
182197
# Get a waveform of length samples at this frequency
183-
wave = _fast_synthesize(frequency)
184-
185-
# Interpolate the values in gram over the time grid.
186-
if len(time_centers) > 1:
187-
# If times was converted from boundaries to intervals, it will change shape from
188-
# (len, 1) to (len-1, 2), and hence differ from the length of gram (i.e one less),
189-
# so we ensure gram is reduced appropriately.
190-
gram_interpolator = interp1d(
191-
time_centers,
192-
gram[n, :n_times],
193-
kind="linear",
194-
bounds_error=False,
195-
fill_value=(gram[n, 0], gram[n, -1]),
196-
)
197-
# If only one time point, create constant interpolator
198-
else:
199-
gram_interpolator = _const_interpolator(gram[n, 0])
200-
201-
# Create the time-varying scaling for the entire time interval by the piano roll
202-
# magnitude and add to the accumulating waveform.
203-
t_in = max(sample_intervals[0][0], 0)
204-
t_out = min(sample_intervals[-1][-1], length)
205-
signal = gram_interpolator(np.arange(t_in, t_out))
206-
output[t_in:t_out] += wave[: len(signal)] * signal
198+
wave = _fast_synthesize(frequency, n_dec, fs, function, length)
199+
200+
# Use a two-cycle ramp to smooth over transients
201+
period = 2 * int(fs / frequency)
202+
filter = np.ones(period) / period
203+
signal_n = scipy.signal.convolve(signal[n], filter, mode="same")
204+
205+
# Mix the signal into the output
206+
output[:] += wave[: len(signal_n)] * signal_n
207207

208208
# Normalize, but only if there's non-zero values
209209
norm = np.abs(output).max()
@@ -213,6 +213,49 @@ def __interpolator(x):
213213
return output
214214

215215

216+
def _const_interpolator(value):
217+
"""Return a function that returns `value`
218+
no matter the input.
219+
"""
220+
221+
def __interpolator(x):
222+
return value
223+
224+
return __interpolator
225+
226+
227+
def _fast_synthesize(frequency, n_dec, fs, function, length):
228+
"""Efficiently synthesize a signal.
229+
Generate one cycle, and simulate arbitrary repetitions
230+
using array indexing tricks.
231+
"""
232+
# hack so that we can ensure an integer number of periods and samples
233+
# rounds frequency to 1st decimal, s.t. 10 * frequency will be an int
234+
frequency = np.round(frequency, n_dec)
235+
236+
# Generate 10*frequency periods at this frequency
237+
# Equivalent to n_samples = int(n_periods * fs / frequency)
238+
# n_periods = 10*frequency is the smallest integer that guarantees
239+
# that n_samples will be an integer, since assuming 10*frequency
240+
# is an integer
241+
n_samples = int(10.0**n_dec * fs)
242+
243+
short_signal = function(2.0 * np.pi * np.arange(n_samples) * frequency / fs)
244+
245+
# Calculate the number of loops we need to fill the duration
246+
n_repeats = int(np.ceil(length / float(short_signal.shape[0])))
247+
248+
# Simulate tiling the short buffer by using stride tricks
249+
long_signal = as_strided(
250+
short_signal,
251+
shape=(n_repeats, len(short_signal)),
252+
strides=(0, short_signal.itemsize),
253+
)
254+
255+
# Use a flatiter to simulate a long 1D buffer
256+
return long_signal.flat
257+
258+
216259
def pitch_contour(
217260
times, frequencies, fs, amplitudes=None, function=np.sin, length=None, kind="linear"
218261
):

tests/test_sonify.py

+42
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,48 @@ def test_time_frequency(fs):
5050
assert len(signal) == 5 * fs
5151

5252

53+
def test_time_frequency_offset():
54+
fs = 8000
55+
# Length is 3 seconds, first interval starts at 5.
56+
# Should produce an empty signal.
57+
signal = mir_eval.sonify.time_frequency(
58+
np.random.standard_normal((100, 100)),
59+
np.arange(1, 101),
60+
np.linspace(5, 10, 100),
61+
fs,
62+
length=fs * 3,
63+
)
64+
assert len(signal) == 3 * fs
65+
assert np.allclose(signal, 0)
66+
67+
68+
@pytest.mark.xfail(raises=ValueError)
69+
def test_time_frequency_badtime():
70+
fs = 8000
71+
gram = np.ones((10, 20))
72+
times = np.arange(10)
73+
74+
mir_eval.sonify.time_frequency(gram, np.arange(1, 11), times, fs)
75+
76+
77+
@pytest.mark.xfail(raises=ValueError)
78+
def test_time_frequency_badintervals():
79+
fs = 8000
80+
gram = np.ones((10, 20))
81+
times = np.ones((11, 2))
82+
83+
mir_eval.sonify.time_frequency(gram, np.arange(1, 11), times, fs)
84+
85+
86+
@pytest.mark.xfail(raises=ValueError)
87+
def test_time_frequency_frequency():
88+
fs = 8000
89+
gram = np.ones((10, 20))
90+
times = np.arange(20)
91+
92+
mir_eval.sonify.time_frequency(gram, np.arange(1, 8), times, fs)
93+
94+
5395
@pytest.mark.parametrize("fs", [8000, 44100])
5496
def test_chroma(fs):
5597
signal = mir_eval.sonify.chroma(

0 commit comments

Comments
 (0)