Skip to content

Commit

Permalink
Rename best_offset and its arguments. Set up a test system.
Browse files Browse the repository at this point in the history
  • Loading branch information
iver56 committed Jul 4, 2023
1 parent 7dcf9d5 commit 38dc1c5
Show file tree
Hide file tree
Showing 15 changed files with 94 additions and 36 deletions.
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ import numpy as np
arr = np.random.uniform(size=10_000).astype("float32")

# Find the best offset for aligning two arrays
print(fast_align_audio.best_offset(arr, np.pad(arr, (121, 0)), 1_000, 5_000))
print(fast_align_audio.find_best_alignment_offset(arr, np.pad(arr, (121, 0)), 1_000, 5_000))
# Output: -121

print(fast_align_audio.best_offset(arr, arr[121:], 1_000, 5_000))
print(fast_align_audio.find_best_alignment_offset(arr, arr[121:], 1_000, 5_000))
# Output: 121

# Align two arrays and confirm they're equal post alignment
Expand All @@ -42,7 +42,13 @@ np.array_equal(arr, arr1) and np.array_equal(arr, arr2)
# Output: True
```

In this example, we first create a random numpy array. We then call the best_offset
In this example, we first create a random numpy array. We then call the find_best_alignment_offset
method to find the best offset to align two arrays, and we use the align method to align
the arrays. The np.array_equal method checks if two arrays are equal, demonstrating the
successful alignment of the two original arrays.

# Development

* Install dev/build/test dependencies as denoted in setup.py
* `python setup.py develop`
* `pytest`
2 changes: 1 addition & 1 deletion fast_align_audio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .alignment import best_offset, align
from .alignment import find_best_alignment_offset, align

__version__ = "0.1.2"
53 changes: 23 additions & 30 deletions fast_align_audio/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,64 +2,57 @@
import _fast_align_audio


def best_offset(a, b, max_offset, max_lookahead=None):
def find_best_alignment_offset(
reference_audio, delayed_audio, max_offset_samples, lookahead_samples=None
):
"""
Find best offset of `a` w.r.t. `b`.
Find best offset of `delayed_audio` w.r.t. `reference_audio`.
Best = smallest mean squared error (mse).
If the returned offset is positive, it means the smallest mse is:
((a[n:...] - b[:...])**2).mean()
If the returned is negative, it means the smallest mse is:
((a[:...] - b[n:...])**2).mean()
(Here, `...` means that you have to account for different array lengths for
this computation to actually work.)
Args:
a, b (float32 C-contiguous NumPy arrays):
reference_audio, delayed_audio (float32 C-contiguous NumPy arrays):
The arrays to compare
max_offset (int > 0):
max_offset_samples (int > 0):
Maximum expected offset. It will not find any larger offsets.
max_lookahead (int > 0, optional):
lookahead_samples (int > 0, optional):
Maximum number of array elements to use for each mse computation.
If `None` (the default), there is no maximum.
"""
assert {a.dtype, b.dtype} == {np.dtype("float32")}, "Arrays must be float32"
assert {reference_audio.dtype, delayed_audio.dtype} == {
np.dtype("float32")
}, "Arrays must be float32"
assert (
a.flags["C_CONTIGUOUS"] and b.flags["C_CONTIGUOUS"]
reference_audio.flags["C_CONTIGUOUS"] and delayed_audio.flags["C_CONTIGUOUS"]
), "Arrays must be C-contiguous"
if max_lookahead is None:
max_lookahead = max(len(a), len(b))
if lookahead_samples is None:
lookahead_samples = max(len(reference_audio), len(delayed_audio))
return _fast_align_audio.lib.fast_find_alignment(
len(a),
_fast_align_audio.ffi.cast("float *", a.ctypes.data),
len(b),
_fast_align_audio.ffi.cast("float *", b.ctypes.data),
max_offset,
max_lookahead,
len(delayed_audio),
_fast_align_audio.ffi.cast("float *", delayed_audio.ctypes.data),
len(reference_audio),
_fast_align_audio.ffi.cast("float *", reference_audio.ctypes.data),
max_offset_samples,
lookahead_samples,
)


def align(a, b, max_offset, max_lookahead=None, *, align_mode, fix_length=None):
"""
Align `a` and `b`. See the documentation of `best_offset` for most of the args.
Align `a` and `b`. See the documentation of `find_best_alignment_offset` for most of the args.
Args:
align_mode (Either `"crop"` or `"pad"`): How to align `a` and `b`.
If `crop`, `best_offset` number of elements are removed from the
front of the "too-long" array. If `pad`, `best_offset` number of
If `crop`, "best_offset" number of elements are removed from the
front of the "too-long" array. If `pad`, "best_offset" number of
elements are padding to the front of the "too-short" array.
fix_length (Either `"shortest"`, `"longest"` or `None`): How to fix the
lengths of `a` and `b` after alignment. If `shortest`, the longer
array is cropped (at the end/right) to the length of the shorter one.
If `longest`, the shorter array is padded (to the end/right) to the
length of the longest one. If `None`, lengths are not changed.
"""
offset = best_offset(a, b, max_offset, max_lookahead)
offset = find_best_alignment_offset(a, b, max_offset, max_lookahead)
if offset > 0:
# mse(a[offset:], b) = min
a, b = _align(a, b, offset, align_mode)
Expand Down
3 changes: 3 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
python_files=test*.py
norecursedirs = .git .github .idea fast_align_audio.egg-info build dist
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ def find_version(*file_paths):
name="fast-align-audio",
version=find_version("fast_align_audio", "__init__.py"),
description=(
"A fast python library for aligning similar audio snippets passed in as NumPy arrays."
"A fast python library for aligning similar audio snippets passed in as NumPy"
" arrays."
),
license="ISC",
long_description=long_description,
long_description_content_type="text/markdown",
packages=find_packages(),
packages=find_packages(exclude=["test_fixtures", "tests"]),
setup_requires=["cffi>=1.0.0"],
tests_require=["librosa==0.10.0.post2", "pytest"],
cffi_modules=["fast_align_audio/_alignment_cffi.py:ffibuilder"],
install_requires=[],
classifiers=[
Expand Down
Binary file added test_fixtures/multi_mic1/main.flac
Binary file not shown.
Binary file added test_fixtures/multi_mic1/other1.flac
Binary file not shown.
Binary file added test_fixtures/multi_mic1/other2.flac
Binary file not shown.
Binary file added test_fixtures/multi_mic1/other3.flac
Binary file not shown.
Binary file added test_fixtures/multi_mic2/main.flac
Binary file not shown.
Binary file added test_fixtures/multi_mic2/other1.flac
Binary file not shown.
Binary file added test_fixtures/multi_mic2/other2.flac
Binary file not shown.
Binary file added test_fixtures/multi_mic2/other3.flac
Binary file not shown.
Empty file added tests/__init__.py
Empty file.
54 changes: 54 additions & 0 deletions tests/test_find_best_alignment_offset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import os
from pathlib import Path

import librosa
import numpy as np

import fast_align_audio

DEMO_DIR = Path(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
TEST_FIXTURES_DIR = DEMO_DIR / "test_fixtures"


class TestFindBestAlignmentOffset:
def test_simple_padded_array(self):
reference = np.random.uniform(size=10_000).astype("float32")
delayed = np.pad(reference, (121, 0))
offset = fast_align_audio.find_best_alignment_offset(
reference, delayed, max_offset_samples=1000, lookahead_samples=5000
)
assert offset == 121

def test_multi_mic1(self):
main, sr = librosa.load(TEST_FIXTURES_DIR / "multi_mic1" / "main.flac", sr=None)
other1, _ = librosa.load(
TEST_FIXTURES_DIR / "multi_mic1" / "other1.flac", sr=None
)
other2, _ = librosa.load(
TEST_FIXTURES_DIR / "multi_mic1" / "other2.flac", sr=None
)
other3, _ = librosa.load(
TEST_FIXTURES_DIR / "multi_mic1" / "other3.flac", sr=None
)

max_offset_samples = int(0.05 * sr)

offset1 = fast_align_audio.find_best_alignment_offset(
main, other1, max_offset_samples=max_offset_samples
)
print(offset1, "offset1")

offset2 = fast_align_audio.find_best_alignment_offset(
main, other2, max_offset_samples=max_offset_samples
)
print(offset2, "offset2")

offset3 = fast_align_audio.find_best_alignment_offset(
main, other3, max_offset_samples=max_offset_samples
)
print(offset3, "offset3")

# TODO: Assert that the offsets are sane

# TODO
# def test_multi_mic2(self):

0 comments on commit 38dc1c5

Please sign in to comment.