Reduce allocations in various functions (#39)

* Add some function barriers and union-splitting to improve type inference * Reduce allocations in `filterpeaks!` and related functions * Reduce allocations by changing `haskey` => `hasproperty` * Reduce allocations by more explicitly/manually adding the `proms` field to `pks` * oops * fix a dumb bug; reduce allocations * Fix error in unstrict peakprom * Change CI to avoid double CI runs
halleysfifthinc · Mar 13, 2024 · 068d01b · 068d01b
1 parent 7fe55a4
commit 068d01b
Show file tree

Hide file tree

Showing 5 changed files with 119 additions and 85 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -1,7 +1,11 @@
 name: CI
 on:
-  - push
-  - pull_request
+  push:
+    branches:
+      - master
+      - main
+      - 'release*'
+  pull_request:
 
 # needed to allow julia-actions/cache to delete old caches that it has created
 permissions:

diff --git a/src/peakprom.jl b/src/peakprom.jl
@@ -70,6 +70,35 @@ peakproms(; kwargs...) = function _curried_peakproms(pks)
     return peakproms(pks; kwargs...)
 end
 
+function _strict_inner_promscalcloop!(cmp::C, extremum::M, extrema::A, _ref::MT, x::AbstractVector{T}, peaks::AbstractVector{Int}, proms::AbstractVector{MT}) where {C,M,A,T,MT}
+        lbegin, lend = firstindex(x), lastindex(x)
+
+        @inbounds for i in eachindex(peaks, proms)
+            # Find left and right bound (self-intersections)
+            lb = something(findprev(y -> cmp(y, x[peaks[i]]) === true, x, peaks[i] - 2),
+                lbegin)
+            rb = something(findnext(y -> cmp(y, x[peaks[i]]) === true, x, peaks[i] + 2),
+                lend)
+
+            # Find extremum of left and right bounds
+            if isempty(lb:(peaks[i]-1))
+                lref = _ref
+            else
+                lref = extremum(view(x, lb:(peaks[i]-1)))
+            end
+
+            if isempty((peaks[i]+1):rb)
+                rref = _ref
+            else
+                rref = extremum(view(x, (peaks[i]+1):rb))
+            end
+
+            proms[i] = abs(x[peaks[i]] - extrema(lref, rref))
+        end
+
+        return nothing
+end
+
 """
     peakproms!(indices, x; [strict=true, min, max]) -> (indices, proms)
     peakproms!(pks::NamedTuple; [strict=true, min, max]) -> NamedTuple
@@ -134,29 +163,11 @@ function peakproms!(peaks::AbstractVector{Int}, x::AbstractVector{T};
     proms = similar(peaks, promote_type(T, typeof(_ref)))
 
     if strict
-        lbegin, lend = firstindex(x), lastindex(x)
-
-        @inbounds for i in eachindex(peaks, proms)
-            # Find left and right bound (self-intersections)
-            lb = something(findprev(y -> cmp(y, x[peaks[i]]) === true, x, peaks[i] - 2),
-                lbegin)
-            rb = something(findnext(y -> cmp(y, x[peaks[i]]) === true, x, peaks[i] + 2),
-                lend)
-
-            # Find extremum of left and right bounds
-            if isempty(lb:(peaks[i]-1))
-                lref = _ref
-            else
-                lref = exm(view(x, lb:(peaks[i]-1)))
-            end
-
-            if isempty((peaks[i]+1):rb)
-                rref = _ref
-            else
-                rref = exm(view(x, (peaks[i]+1):rb))
-            end
-
-            proms[i] = abs(x[peaks[i]] - exa(lref, rref))
+        # Add a function barrier and manually union-split the cmp/exm/exa functions
+        if maxima
+            _strict_inner_promscalcloop!(≥, minimum, Base.max, _ref, x, peaks, proms)
+        else
+            _strict_inner_promscalcloop!(≤, maximum, Base.min, _ref, x, peaks, proms)
         end
     else
         # The extremum search space in the bounding intervals can be reduced by
@@ -175,12 +186,16 @@ function peakproms!(peaks::AbstractVector{Int}, x::AbstractVector{T};
         notmval = x[notm]
 
         for i in eachindex(peaks, proms)
-            j = searchsorted(peaks′, peaks[i])
+            j = only(searchsorted(peaks′, peaks[i]))
 
             # Find left and right bounding peaks
-            _lb = findprev(y -> cmp(x[y], x[peaks[i]]) === true, peaks′, first(j) - 1)
-            peaks′[j] === peaks[i] && (j += 1)
-            _rb = findnext(y -> cmp(x[y], x[peaks[i]]) === true, peaks′, last(j) + 1)
+            if maxima # cmp = ≥, manual union-splitting
+                _lb = findprev(y -> ≥(x[y], x[peaks[i]]) === true, peaks′, j - 1)
+                _rb = findnext(y -> ≥(x[y], x[peaks[i]]) === true, peaks′, j + 1)
+            else # cmp = ≤
+                _lb = findprev(y -> ≤(x[y], x[peaks[i]]) === true, peaks′, j - 1)
+                _rb = findnext(y -> ≤(x[y], x[peaks[i]]) === true, peaks′, j + 1)
+            end
 
             # Find left and right reverse peaks just inside the bounding peaks
             lb = isnothing(_lb) ? firstindex(notm) :
@@ -205,15 +220,15 @@ function peakproms!(peaks::AbstractVector{Int}, x::AbstractVector{T};
 
             # exa(coalesce(lref, rref), coalesce(rref, lref)))
             # we manually union-split this for better type-inference
-            exa_lref_rref = if ismissing(lref)
+            exa_ref = if ismissing(lref)
                 rref
             elseif ismissing(rref)
                 lref
             else
                 exa(lref, rref)
             end
 
-            proms[i] =  abs(x[peaks[i]] - exa_lref_rref)
+            proms[i] =  abs(x[peaks[i]] - exa_ref)
         end
     end
 
@@ -228,12 +243,11 @@ function peakproms!(peaks::AbstractVector{Int}, x::AbstractVector{T};
     return peaks, proms
 end
 
-function peakproms!(pks::NamedTuple; strict=true, min=nothing, max=nothing)
+function peakproms!(pks::NamedTuple{names,Ts}; strict=true, min=nothing, max=nothing) where {names,Ts}
     if !hasproperty(pks, :proms)
-        # Avoid filtering by min/max/strict here, so that it always happens outside if-statement.
-        # Pro: one less edge case. Con: More internal allocations
+        # Wait to filter until after merging `pks`
         _, proms = peakproms(pks.indices, pks.data; strict)
-        pks = merge(pks, (; proms))
+        pks = NamedTuple{(names..., :proms)}((values(pks)..., proms))
     end
     filterpeaks!(pks, :proms; min, max)
     return pks

diff --git a/src/peakwidth.jl b/src/peakwidth.jl
@@ -79,6 +79,40 @@ peakwidths(; kwargs...) = function _curried_peakwidths(pks)
     return peakwidths(pks; kwargs...)
 end
 
+function _inner_widthscalcloop!(op::O, cmp::C, x::AbstractVector{T}, peaks::AbstractVector{Int}, proms::AbstractVector{P}, ledge::AbstractVector{V}, redge::AbstractVector{V}, relheight::U, _bad, fst, lst, strict::Bool) where {O,C,T,P,V,U}
+    for i in eachindex(peaks, ledge, redge)
+        prom = proms[i]
+        if ismissing(prom) || isnan(prom)
+            redge[i] = _bad
+            ledge[i] = _bad
+        else
+            ht = op(x[peaks[i]], relheight * prom)
+            lo = findprev(v -> !ismissing(v) && cmp(v, ht), x, peaks[i])
+            hi = findnext(v -> !ismissing(v) && cmp(v, ht), x, peaks[i])
+
+            if !strict
+                if !isnothing(lo)
+                    lo1 = findnext(v -> !ismissing(v) && cmp(ht, v), x, lo + 1)
+                    @assert !isnothing(lo1)
+                    lo += (ht - x[lo]) / (x[lo1] - x[lo]) * (lo1 - lo)
+                end
+                if !isnothing(hi)
+                    hi1 = findprev(v -> !ismissing(v) && cmp(ht, v), x, hi - 1)
+                    @assert !isnothing(hi1)
+                    hi -= (ht - x[hi]) / (x[hi1] - x[hi]) * (hi - hi1)
+                end
+            else
+                !isnothing(lo) && (lo += (ht - x[lo]) / (x[lo+1] - x[lo]))
+                !isnothing(hi) && (hi -= (ht - x[hi]) / (x[hi-1] - x[hi]))
+            end
+            redge[i] = something(hi, lst)
+            ledge[i] = something(lo, fst)
+        end
+    end
+
+    return nothing
+end
+
 """
     peakwidths!(indices, x; [strict=true, relheight=0.5, min, max]) -> (indices, widths, ledge, redge)
     peakwidths!(pks::NamedTuple; [strict=true, relheight=0.5, min, max]) -> NamedTuple
@@ -132,15 +166,14 @@ function peakwidths!(
     else
         throw(ArgumentError("The first peak in `indices` is not a local extrema"))
     end
-    cmp = maxima ? (≤) : (≥)
-    op = maxima ? (-) : (+)
 
     V1 = promote_type(T, U)
     _bad = Missing <: V1 ? missing : float(Int)(NaN)
 
     V = promote_type(V1, float(Int))
     ledge = similar(proms, V)
     redge = similar(proms, V)
+    widths = similar(proms, V)
 
     if strict
         lst, fst = _bad, _bad
@@ -149,35 +182,13 @@ function peakwidths!(
         fst = firstindex(x)
     end
 
-    for i in eachindex(peaks, ledge, redge)
-        prom = proms[i]
-        if ismissing(prom) || isnan(prom)
-            redge[i] = _bad
-            ledge[i] = _bad
-        else
-            ht = op(x[peaks[i]], relheight * proms[i])
-            lo = findprev(v -> !ismissing(v) && cmp(v, ht), x, peaks[i])
-            hi = findnext(v -> !ismissing(v) && cmp(v, ht), x, peaks[i])
-
-            if !strict
-                if !isnothing(lo)
-                    lo1 = findnext(v -> !ismissing(v) && cmp(ht, v), x, lo + 1)
-                    lo += (ht - x[lo]) / (x[lo1] - x[lo]) * (lo1 - lo)
-                end
-                if !isnothing(hi)
-                    hi1 = findprev(v -> !ismissing(v) && cmp(ht, v), x, hi - 1)
-                    hi -= (ht - x[hi]) / (x[hi1] - x[hi]) * (hi - hi1)
-                end
-            else
-                !isnothing(lo) && (lo += (ht - x[lo]) / (x[lo+1] - x[lo]))
-                !isnothing(hi) && (hi -= (ht - x[hi]) / (x[hi-1] - x[hi]))
-            end
-            redge[i] = something(hi, lst)
-            ledge[i] = something(lo, fst)
-        end
+    if maxima
+        _inner_widthscalcloop!(-, ≤, x, peaks, proms, ledge, redge, relheight, _bad, fst, lst, strict)
+    else
+        _inner_widthscalcloop!(+, ≥, x, peaks, proms, ledge, redge, relheight, _bad, fst, lst, strict)
     end
 
-    widths::Vector{V} = redge - ledge
+    widths .= redge .- ledge
 
     if !isnothing(min) || !isnothing(max)
         lo = something(min, zero(eltype(widths)))
@@ -193,14 +204,13 @@ function peakwidths!(
 end
 
 function peakwidths!(pks::NamedTuple; strict=true, relheight=0.5, min=nothing, max=nothing)
-    !haskey(pks, :proms) && throw(ArgumentError(
+    !hasproperty(pks, :proms) && throw(ArgumentError(
         "Argument `pks` is expected to have prominences (`:proms`) already calculated"))
     if xor(hasproperty(pks, :widths), hasproperty(pks, :edges))
         throw(ArgumentError("Argument `pks` is expected have neither or both of the fields `:widths` and `:edges`."))
     end
     if !hasproperty(pks, :widths)
-        # Avoid filtering by min/max/strict here, so that it always happens outside if-statement.
-        # Pro: one less edge case. Con: More internal allocations
+        # Wait to filter until after merging `pks`
         _, widths, leftedges, rightedges = peakwidths(pks.indices, pks.data, pks.proms; relheight, strict)
         pks = merge(pks, (; widths, edges=collect(zip(leftedges, rightedges))))
     end

diff --git a/src/utils.jl b/src/utils.jl
@@ -1,24 +1,20 @@
 const known_fields = (:indices, :proms, :heights, :widths, :edges)
 
 function check_known_fields_equal_length(pks::NamedTuple)
-    features_to_filter = known_fields
-
-    feature_lengths = [length(pks[feature])
-        for feature in features_to_filter if hasproperty(pks, feature)]
-
-    # We refrain from using `allequal` to support Julia < 1.8
-    if !all(first(feature_lengths) == feature_lengths[i]
-            for i in eachindex(feature_lengths))
+    if !all(==(length(pks.indices))∘length, pks[k]
+            for k in keys(pks) if k in known_fields)
         length_pairs = [feature=>length(pks[feature])
-            for feature in features_to_filter if hasproperty(pks, feature)]
-        throw(DimensionMismatch("Expected all known fields of `pks` to be of equal length. Instead found the following pairs of known field and length: $length_pairs"))
+            for feature in known_fields if hasproperty(pks, feature)]
+        throw(DimensionMismatch(
+        "Expected all standard fields of `pks` (exc. `:data`) to be of equal length. Instead,"*
+        "found the following lengths: "*join(length_pairs, ", ", " and ")))
     end
     return nothing
 end
 
 function check_has_required_fields(pks::NamedTuple)
-    !haskey(pks, :indices) && throw(ArgumentError(
-        "`pks` is missing required field `:indices`"))
+    !hasproperty(pks, :indices) &&
+        throw(ArgumentError("`pks` is missing required field `:indices`"))
     return nothing
 end
 
@@ -51,7 +47,7 @@ julia> filterpeaks!(pks, mask)
 (indices = [7], heights = [4], data = [0, 5, 2, 3, 3, 1, 4, 0], proms = Union{Missing, Int64}[3])
 ```
 """
-function filterpeaks!(pks::NamedTuple, mask::Union{BitVector, Vector{Bool}})
+function filterpeaks!(pks::NamedTuple, mask::Union{BitVector, Vector{Bool}}; mutatemask=false)
     # Check lengths first to avoid a dimension mismatch
     # after having filtered some features.
     # feature_mask = hasproperty.(pks, features_to_filter)
@@ -65,20 +61,25 @@ function filterpeaks!(pks::NamedTuple, mask::Union{BitVector, Vector{Bool}})
         ))
     end
 
+    if mutatemask
+        mask .= .!mask
+    else
+        mask = .!mask
+    end
     for field in known_fields  # Only risk mutating fields added by this package
         hasproperty(pks, field) || continue  # Do nothing if field is not present
-        deleteat!(pks[field], .!mask)
+        deleteat!(pks[field], mask)
     end
     return pks
 end
 
 function filterpeaks!(pks::NamedTuple, feature::Symbol; min=nothing, max=nothing)
-    haskey(pks, feature) || throw(ArgumentError("`pks` does not have key `$feature`"))
+    hasproperty(pks, feature) || throw(ArgumentError("`pks` does not have key `$feature`"))
     if !isnothing(min) || !isnothing(max)
         lo = something(min, zero(eltype(pks.data)))
         hi = something(max, typemax(Base.nonmissingtype(eltype(pks.data))))
         mask = map(x -> !ismissing(x) && (lo ≤ x ≤ hi), pks[feature])
-        filterpeaks!(pks, mask)
+        filterpeaks!(pks, mask; mutatemask=true)
     end
     return pks
 end
@@ -87,8 +88,8 @@ end
     filterpeaks!(pred, pks) -> NamedTuple
 
 Apply a predicate function `pred` to NamedTuple slices (the scalar values related to each
-peak, e.g. `(indices=5, heights=3, proms=2)`)to get a filter-mask . A peak is removed if
-`pred` returns `false`.
+peak, e.g. `(indices=5, heights=3, proms=2)`) to and remove a peak if `pred` returns
+`false`.
 
 # Examples
 ```jldoctest

diff --git a/test/peakprom.jl b/test/peakprom.jl
@@ -47,6 +47,11 @@ x1 = a*sin.(2*pi*f1*T*t)+b*sin.(2*pi*f2*T*t)+c*sin.(2*pi*f3*T*t);
         @test last(peakproms(argmaxima(p4[2:end-1]), p4[2:end-1])) == [1]
         @test last(peakproms(argmaxima(p4[2:end-1]; strict=false), p4[2:end-1]; strict=false)) == [2,1,1]
 
+        # Plateaus have the same prominence
+        p4_plat = [0,4,2,4,4,3,4,0]
+        @test last(peakproms(argmaxima(p4_plat), p4_plat)) == [2,1,1]
+        @test last(peakproms(argmaxima(p4_plat), p4_plat; strict=false)) == [2,1,1]
+
         # The presence of a missing/NaN in either bounding interval poisons the prominence
         m4 = [missing; p4; missing]
         n4 = [NaN; p4; NaN]