|
28 | 28 | @generated offsetsize(::Type{T}) where {T} = Base.allocatedinline(T) ? sizeof(T) : sizeof(Int)
|
29 | 29 |
|
30 | 30 | @inline store!(p::Ptr{T}, v) where {T} = store!(p, convert(T, v))
|
| 31 | + |
| 32 | +""" |
| 33 | + LazyPreserve(x, ptrcall=nothing) |
| 34 | +
|
| 35 | +Used by [`preserve`](@ref) to identify arguments that will be unwrapped with |
| 36 | +[`preserve_buffer`](@ref), which is in turn converted to a pointer. If `ptrcall` is |
| 37 | +specified conversion to a pointer occurs through a call equivalent to |
| 38 | +`ptrcall(preserve_buffer(x), x)`. Otherwise, a call equivalent to |
| 39 | +`pointer(preserve_buffer(x))` occurs. |
| 40 | +""" |
| 41 | +struct LazyPreserve{A,P} |
| 42 | + arg::A |
| 43 | + ptrcall::P |
| 44 | +end |
| 45 | +LazyPreserve(arg) = LazyPreserve(arg, nothing) |
| 46 | +(p::LazyPreserve)(x) = p.ptrcall(x, p.arg) |
| 47 | +(p::LazyPreserve{A,Nothing})(x) where {A} = pointer(x) |
| 48 | + |
| 49 | + |
| 50 | +""" |
| 51 | + preserve_buffer(x) |
| 52 | +
|
| 53 | +For structs wrapping arrays, using `GC.@preserve` can trigger heap allocations. |
| 54 | +`preserve_buffer` attempts to extract the heap-allocated part. Isolating it by itself |
| 55 | +will often allow the heap allocations to be elided. For example: |
| 56 | +
|
| 57 | +```julia |
| 58 | +julia> using StaticArrays, BenchmarkTools |
| 59 | +julia> # Needed until a release is made featuring https://github.com/JuliaArrays/StaticArrays.jl/commit/a0179213b741c0feebd2fc6a1101a7358a90caed |
| 60 | + Base.elsize(::Type{<:MArray{S,T}}) where {S,T} = sizeof(T) |
| 61 | +julia> @noinline foo(A) = unsafe_load(A,1) |
| 62 | +foo (generic function with 1 method) |
| 63 | +julia> function alloc_test_1() |
| 64 | + A = view(MMatrix{8,8,Float64}(undef), 2:5, 3:7) |
| 65 | + A[begin] = 4 |
| 66 | + GC.@preserve A foo(pointer(A)) |
| 67 | + end |
| 68 | +alloc_test_1 (generic function with 1 method) |
| 69 | +julia> function alloc_test_2() |
| 70 | + A = view(MMatrix{8,8,Float64}(undef), 2:5, 3:7) |
| 71 | + A[begin] = 4 |
| 72 | + pb = parent(A) # or `LoopVectorization.preserve_buffer(A)`; `perserve_buffer(::SubArray)` calls `parent` |
| 73 | + GC.@preserve pb foo(pointer(A)) |
| 74 | + end |
| 75 | +alloc_test_2 (generic function with 1 method) |
| 76 | +julia> @benchmark alloc_test_1() |
| 77 | +BenchmarkTools.Trial: |
| 78 | + memory estimate: 544 bytes |
| 79 | + allocs estimate: 1 |
| 80 | + -------------- |
| 81 | + minimum time: 17.227 ns (0.00% GC) |
| 82 | + median time: 21.352 ns (0.00% GC) |
| 83 | + mean time: 26.151 ns (13.33% GC) |
| 84 | + maximum time: 571.130 ns (78.53% GC) |
| 85 | + -------------- |
| 86 | + samples: 10000 |
| 87 | + evals/sample: 998 |
| 88 | +julia> @benchmark alloc_test_2() |
| 89 | +BenchmarkTools.Trial: |
| 90 | + memory estimate: 0 bytes |
| 91 | + allocs estimate: 0 |
| 92 | + -------------- |
| 93 | + minimum time: 3.275 ns (0.00% GC) |
| 94 | + median time: 3.493 ns (0.00% GC) |
| 95 | + mean time: 3.491 ns (0.00% GC) |
| 96 | + maximum time: 4.998 ns (0.00% GC) |
| 97 | + -------------- |
| 98 | + samples: 10000 |
| 99 | + evals/sample: 1000 |
| 100 | +``` |
| 101 | +""" |
| 102 | +@inline preserve_buffer(x::LazyPreserve) = preserve_buffer(x.arg) |
31 | 103 | @inline preserve_buffer(x) = x
|
32 | 104 | @inline preserve_buffer(A::AbstractArray) = _preserve_buffer(A, parent(A))
|
33 | 105 | @inline _preserve_buffer(a::A, p::P) where {A,P<:AbstractArray} = _preserve_buffer(p, parent(p))
|
|
101 | 173 | return body
|
102 | 174 | end
|
103 | 175 |
|
| 176 | +""" |
| 177 | + preserve(op, args...; kwargs...) |
| 178 | +
|
| 179 | +Searches through `args` and `kwargs` for instances of [`LazyPreserve`](@ref), which are |
| 180 | +unwrapped using [`preserve_buffer`](@ref) and preserved from garbage collection |
| 181 | +(`GC.@preserve`). the resulting buffers are converted to pointers and passed in order to `op`. |
| 182 | +
|
| 183 | +# Examples |
| 184 | +
|
| 185 | +```julia |
| 186 | +julia> using ManualMemory: store!, preserve, LazyPreserve |
| 187 | +
|
| 188 | +julia> x = [0 0; 0 0]; |
| 189 | +
|
| 190 | +julia> preserve(store!, LazyPreserve(x), 1) |
| 191 | +
|
| 192 | +julia> x[1] |
| 193 | +1 |
| 194 | +
|
| 195 | +``` |
| 196 | +""" |
| 197 | +preserve(op, args...; kwargs...) = _preserve(op, args, kwargs.data) |
| 198 | +@generated function _preserve(op, args::A, kwargs::NamedTuple{syms,K}) where {A,syms,K} |
| 199 | + _preserve_expr(A, syms, K) |
| 200 | +end |
| 201 | +function _preserve_expr(::Type{A}, syms::Tuple{Vararg{Symbol}}, ::Type{K}) where {A,K} |
| 202 | + body = Expr(:block, Expr(:meta,:inline)) |
| 203 | + call = Expr(:call, :op) |
| 204 | + pres = :(GC.@preserve) |
| 205 | + @inbounds for i in 1:length(A.parameters) |
| 206 | + arg_i = _unwrap_preserve(body, pres, :(getfield(args, $i)), A.parameters[i]) |
| 207 | + push!(call.args, arg_i) |
| 208 | + end |
| 209 | + if length(syms) > 0 |
| 210 | + kwargs = Expr(:parameters) |
| 211 | + @inbounds for i in 1:length(syms) |
| 212 | + arg_i = _unwrap_preserve(body, pres, :(getfield(kwargs, $i)), K.parameters[i]) |
| 213 | + push!(call.args, Expr(:kw, syms[i], arg_i)) |
| 214 | + end |
| 215 | + push!(call.args, kwargs) |
| 216 | + end |
| 217 | + push!(pres.args, call) |
| 218 | + push!(body.args, pres) |
| 219 | + return body |
| 220 | +end |
| 221 | +function _unwrap_preserve(body::Expr, pres::Expr, argexpr::Expr, argtype::Type) |
| 222 | + if argtype <: LazyPreserve |
| 223 | + bufsym = gensym() |
| 224 | + push!(body.args, Expr(:(=), bufsym, Expr(:call, :preserve_buffer, argexpr))) |
| 225 | + push!(pres.args, bufsym) |
| 226 | + return :($argexpr($bufsym)) |
| 227 | + else |
| 228 | + return argexpr |
| 229 | + end |
| 230 | +end |
| 231 | + |
104 | 232 | end
|
0 commit comments