Added geomspace_int function

david-zwicker · david-zwicker · commit 7455bc277d2c · 2025-10-30T18:54:02.000+01:00
diff --git a/README.md b/README.md
@@ -15,3 +15,4 @@ axes.
 ## Mathematical functions:
 - `xlogx` calculates $x \log(x)$ with the correct limit for $x=0$
 - `random_uniform_fixed_sum` samples uniformly distributed, positive numbers adding to 1
+- `geomspace_int` provides an (approximately) geometric sequence of integers
diff --git a/tests/test_mathematics.py b/tests/test_mathematics.py
@@ -8,7 +8,7 @@
 import pytest
 from scipy import stats
 
-from utilitiez import random_uniform_fixed_sum, xlogx
+from utilitiez import geomspace_int, random_uniform_fixed_sum, xlogx
 
 
 @pytest.mark.parametrize("jit", [True, False])
@@ -117,3 +117,38 @@ def f(dim, size):
         assert stats.ks_1samp(xs[:, 2], cdf).statistic < 0.1
     else:
         raise NotImplementedError("Check not implemented for dim>3")
+
+
+def test_geomspace_int():
+    """Test the `geomspace_int` function."""
+    for num in [3, 20]:
+        for a, b in [[0, 5], [1, 100]]:
+            x = geomspace_int(a, b, num)
+            assert np.issubdtype(x.dtype, np.integer)
+            assert x[0] == a
+            assert x[-1] == b
+            assert len(x) <= num
+
+    x = geomspace_int(10, 1000, 32)
+    y = np.geomspace(10, 1000, 32)
+    np.testing.assert_allclose(x - y, 0, atol=1)
+
+    assert np.issubdtype(geomspace_int(0, 1, 0).dtype, np.integer)
+    assert np.issubdtype(geomspace_int(0, 0, 10).dtype, np.integer)
+    np.testing.assert_equal(geomspace_int(0, 1, 0), np.array([]))
+    np.testing.assert_equal(geomspace_int(0, 0, 10), np.array([0]))
+    np.testing.assert_equal(geomspace_int(0, 10, 1), np.array([0]))
+    np.testing.assert_equal(geomspace_int(0, 2, 10), np.array([0, 1, 2]))
+    np.testing.assert_equal(geomspace_int(0, 20, 2), np.array([0, 20]))
+    np.testing.assert_equal(geomspace_int(0, 20, 3), np.array([0, 1, 20]))
+
+    x = geomspace_int(10, 100, 20)
+    y = geomspace_int(100, 10, 20)
+    np.testing.assert_equal(x, y[::-1])
+
+    with pytest.raises(ValueError):
+        geomspace_int(0, 1, -1)
+    with pytest.raises(ValueError):
+        geomspace_int(-1, 2)
+    with pytest.raises(ValueError):
+        geomspace_int(1, -2)
diff --git a/utilitiez/__init__.py b/utilitiez/__init__.py
@@ -4,4 +4,4 @@
 """
 
 from .densityplot import densityplot
-from .mathematics import random_uniform_fixed_sum, xlogx
+from .mathematics import *
diff --git a/utilitiez/mathematics.py b/utilitiez/mathematics.py
@@ -17,7 +17,7 @@
 import numba as nb
 import numpy as np
 from numba.extending import overload, register_jitable
-from numpy.typing import ArrayLike
+from numpy.typing import ArrayLike, NDArray
 
 
 def xlogx_scalar(x):
@@ -182,3 +182,103 @@ def impl(dim, size=None):
         raise nb.TypingError("`size` must be positive integer or None")
 
     return impl
+
+
+def geomspace_int(
+    start: int, end: int, num: int = 50, *, max_steps: int = 100
+) -> NDArray[np.integer]:
+    """Return integers spaced (approximately) evenly on a log scale.
+
+    Parameters:
+        start (int):
+            The starting value of the sequence.
+        final (int):
+            The final value of the sequence.
+        num (int, optional)
+            Number of samples to generate. Default is 50.
+        max_steps (int, optional)
+            The maximal number of steps of the iterative algorithm. If the algorithm
+            could not find a solution, a `RuntimeError` is raised.
+
+    Returns:
+        an ordered sequence of at most `num` integers from `start` to `end` with
+        approximately logarithmic spacing.
+    """
+    # check whether the supplied number is valid
+    num = int(num)
+    if num < 0:
+        raise ValueError(f"Number of samples, {num}, must be non-negative.")
+    if num == 0:
+        return np.array([], dtype=int)
+
+    # check corner cases
+    start = int(start)
+    end = int(end)
+    if start < 0 or end < 0:
+        raise ValueError("`start` and `end` must be positive numbers")
+    if num == 1 or start == end:
+        return np.array([start])
+
+    if start > end:
+        # inverted sequence
+        return geomspace_int(end, start, num)[::-1]
+
+    if num == 2:
+        # return end intervals, which could be inverted by above line
+        return np.array([start, end])
+
+    if num > end - start:
+        # all integers need to be returned
+        return np.arange(start, end + 1)
+
+    # calculate the maximal size of underlying logarithmic range
+    if start == 0:
+        start = 1
+        num -= 1
+        add_zero = True
+    else:
+        add_zero = False
+
+    num_max = int(
+        np.ceil((math.log(end) - math.log(start)) / (math.log(end) - math.log(end - 1)))
+    )
+    a, b = num, num_max  # interval of log-range
+    n = a
+
+    # try different log-ranges
+    for _ in range(max_steps):
+        # determine discretized logarithmic range
+        ys = np.geomspace(start, end, num=n)
+        ys = np.unique(ys.astype(int))
+        ys_len = len(ys)
+
+        if ys_len == num:
+            break  # reached correct number
+
+        if ys_len < num:
+            # n is too small
+            a = n
+            n = int(math.sqrt(n * b))
+            if a == n:
+                n += 1
+                if n == b:
+                    break
+
+        elif ys_len > num:
+            # n is too large
+            b = n
+            n = int(math.sqrt(a * n))
+            if b == n:
+                n -= 1
+                if n == a:
+                    break
+    else:
+        raise RuntimeError("Exceeded attempts")
+
+    if add_zero:
+        return np.r_[0, ys]
+    else:
+        return ys
+
+
+__all__ = ["geomspace_int", "random_uniform_fixed_sum", "xlogx"]