Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve read/write performance #21

Merged
merged 22 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f2d7e37
Be less eager with slow get_multipled_parameter_names calls
halleysfifthinc Aug 13, 2024
fb8bff9
Replace one transcode with a widen
halleysfifthinc Aug 13, 2024
4b26a35
strip before transcoding
halleysfifthinc Aug 13, 2024
55df1e4
Fix copy-pasta
halleysfifthinc Aug 13, 2024
1d90a7a
Only create pt_labels if there are points to be labeled
halleysfifthinc Aug 13, 2024
44566a8
Only create an_labels if there are analog channels
halleysfifthinc Aug 13, 2024
258bf4b
Add some big files to benchmarks and update tuning file
halleysfifthinc Aug 13, 2024
7bbc351
Avoid repeated typed_index for POINT:SCALE
halleysfifthinc Aug 13, 2024
01d3a25
Further refine duplicate warnings (maxlog once by hash of the filenam…
halleysfifthinc Aug 13, 2024
be698f8
Avoid repeated dict indexing and improve broadcasting of point sample
halleysfifthinc Aug 13, 2024
12ec8ab
Use adjoint(view) instead of permutedims for point and analog
halleysfifthinc Aug 13, 2024
d008a45
Avoid repeated acquire/release of file for position calls
halleysfifthinc Aug 13, 2024
3d7eeea
Test another big data file
halleysfifthinc Aug 13, 2024
737a701
Fix missed rename of rstrip_vectorstring
halleysfifthinc Aug 13, 2024
09705ba
Tweak predicate order when stripping non-text
halleysfifthinc Aug 13, 2024
d83b58e
Separate pointdata block creation into a function
halleysfifthinc Aug 13, 2024
63079e0
Fix bad dispatch in roundapprox
halleysfifthinc Aug 13, 2024
6f41df0
Move dynamic dispatch further up the call chain and create pointdata …
halleysfifthinc Aug 13, 2024
021bde0
Create analogdata manually instead of mapreduce and fix manually type…
halleysfifthinc Aug 13, 2024
72f3988
Fix type instability and add explicit rtol to approx
halleysfifthinc Aug 13, 2024
59c972b
Add init to sum write (groups|parameters)
halleysfifthinc Aug 13, 2024
a7e9ead
Address missed API change in writedata
halleysfifthinc Aug 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions benchmark/benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ SUITE["read"]["big"]["sample17/128analogchannels.c2d"]["full"] =
@benchmarkable readc3d(fn) seconds=10 setup=(fn=artifact"sample17/128analogchannels.c3d")
SUITE["read"]["big"]["sample19/sample19.c3d"]["full"] =
@benchmarkable readc3d(fn) seconds=10 setup=(fn=artifact"sample19/sample19.c3d")
SUITE["read"]["big"]["sample33/bigparlove.c3d"]["full"] =
@benchmarkable readc3d(fn) seconds=10 setup=(fn=artifact"sample33/bigparlove.c3d")
SUITE["read"]["big"]["sample12/c24089 13.c3d"]["full"] =
@benchmarkable readc3d(fn) seconds=10 setup=(fn=artifact"sample12/c24089 13.c3d")
SUITE["read"]["big"]["sample31/large01.c3d"]["full"] =
@benchmarkable readc3d(fn) seconds=10 setup=(fn=artifact"sample31/large01.c3d")

SUITE["read"]["pc-real"]["sample01"]["params"] =
@benchmarkable readc3d(fn; paramsonly=true) seconds=10 setup=(fn=artifact"sample01/Eb015pr.c3d")
Expand Down Expand Up @@ -75,6 +81,12 @@ SUITE["read"]["big"]["sample17/128analogchannels.c3d"]["params"] =
@benchmarkable readc3d(fn; paramsonly=true) seconds=10 setup=(fn=artifact"sample17/128analogchannels.c3d")
SUITE["read"]["big"]["sample19/sample19.c3d"]["params"] =
@benchmarkable readc3d(fn; paramsonly=true) seconds=10 setup=(fn=artifact"sample19/sample19.c3d")
SUITE["read"]["big"]["sample33/bigparlove.c3d"]["params"] =
@benchmarkable readc3d(fn; paramsonly=true) seconds=10 setup=(fn=artifact"sample33/bigparlove.c3d")
SUITE["read"]["big"]["sample12/c24089 13.c3d"]["params"] =
@benchmarkable readc3d(fn; paramsonly=true) seconds=10 setup=(fn=artifact"sample12/c24089 13.c3d")
SUITE["read"]["big"]["sample31/large01.c3d"]["params"] =
@benchmarkable readc3d(fn; paramsonly=true) seconds=10 setup=(fn=artifact"sample31/large01.c3d")

SUITE["show"] = BenchmarkGroup()

Expand Down
2 changes: 1 addition & 1 deletion benchmark/tune.json

Large diffs are not rendered by default.

55 changes: 31 additions & 24 deletions src/C3D.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,6 @@ function C3DFile(name::String, header::Header{END}, groups::LittleDict{Symbol,Gr
cameras = OrderedDict{String,Vector{UInt8}}()
fanalog = OrderedDict{String,Vector{Float32}}()

numpts = groups[:POINT][Int, :USED]

ptlabel_keys = get_multipled_parameter_names(groups, :POINT, :LABELS)
pt_labels = Iterators.flatten(
groups[:POINT][Vector{String}, label] for label in ptlabel_keys
)

if strip_prefixes
if haskey(groups, :SUBJECTS) && groups[:SUBJECTS][Int, :USES_PREFIXES] == 1
rgx = Regex(
Expand All @@ -60,16 +53,27 @@ function C3DFile(name::String, header::Header{END}, groups::LittleDict{Symbol,Gr
end
end

nolabel_count = 1
numpts = groups[:POINT][Int, :USED]
if !iszero(numpts)
if haskey(groups[:POINT], :LABELS2)
ptlabel_keys = get_multipled_parameter_names(groups, :POINT, :LABELS)
pt_labels = Iterators.flatten(
groups[:POINT][Vector{String}, label] for label in ptlabel_keys
)
else
pt_labels = groups[:POINT][Vector{String}, :LABELS]
end

sizehint!(fpoint, numpts)
sizehint!(fresiduals, numpts)
sizehint!(cameras, numpts)

invalidpoints = Vector{Bool}(undef, size(point, 1))
calculatedpoints = Vector{Bool}(undef, size(point, 1))
goodpoints = Vector{Bool}(undef, size(point, 1))
abs_scale = abs(groups[:POINT][Float32, :SCALE])

nolabel_count = 1
for (idx, ptname) in enumerate(pt_labels)
idx > numpts && break # can't slice Iterators.flatten
og_ptname = ptname
Expand All @@ -95,7 +99,7 @@ function C3DFile(name::String, header::Header{END}, groups::LittleDict{Symbol,Gr
while ptname*"_$cnt" ∈ keys(fpoint); cnt+=1 end
ptname *= "_$cnt"
dedupped_ptname = ptname
@warn "Duplicate marker label detected (\"$(og_ptname)\"). Duplicate renamed to \"$dedupped_ptname\"." _id=dedupped_ptname
@warn "Duplicate marker label detected (\"$(og_ptname)\"). Duplicate renamed to \"$dedupped_ptname\"." _id=hash(name*dedupped_ptname) maxlog=1
end
ptname ∉ keys(fpoint) || throw(DuplicateMarkerError(
"Markers labels must be unique but found duplicate marker label \"$ptname\""*
Expand All @@ -112,36 +116,39 @@ function C3DFile(name::String, header::Header{END}, groups::LittleDict{Symbol,Gr
invalidpoints .= ((@view(residuals[:,idx]) .% UInt16) .& 0x8000) .!== 0x0000
calculatedpoints .= iszero.(@view(residuals[:,idx]) .& 0xff)
goodpoints .= .~(invalidpoints .| calculatedpoints)
calcresiduals!(fresiduals[ptname], goodpoints, abs(groups[:POINT][Float32, :SCALE]))
calcresiduals!(fresiduals[ptname], goodpoints, abs_scale)

if missingpoints
pt_residual = fresiduals[ptname]
pt_point = fpoint[ptname]
for i in eachindex(fresiduals[ptname])
if calculatedpoints[i] & ~invalidpoints[i]
fresiduals[ptname][i] = 0.0f0
pt_residual[i] = 0.0f0
end

if invalidpoints[i]
fpoint[ptname][i, :] .= missing
fresiduals[ptname][i] = missing
pt_point[i, :] .= (missing, missing, missing)
pt_residual[i] = missing
end
end
end
end
end

anlabel_keys = collect(filter(keys(groups[:ANALOG])) do k
contains(string(k), r"^LABELS\d*")
end)
sort!(anlabel_keys; by=_naturalsortby)
an_labels = Iterators.flatten(
groups[:ANALOG][Vector{String}, label] for label in anlabel_keys
)

numanalogs = groups[:ANALOG][Int, :USED]

nolabel_count = 1
if !iszero(numanalogs)
if haskey(groups[:ANALOG], :LABELS2)
anlabel_keys = get_multipled_parameter_names(groups, :ANALOG, :LABELS)
an_labels = Iterators.flatten(
groups[:ANALOG][Vector{String}, label] for label in anlabel_keys
)
else
an_labels = groups[:ANALOG][Vector{String}, :LABELS]
end

sizehint!(fanalog, numanalogs)

nolabel_count = 1
for (idx, name) in enumerate(an_labels)
idx > numanalogs && break # can't slice Iterators.flatten
og_name = name
Expand All @@ -156,7 +163,7 @@ function C3DFile(name::String, header::Header{END}, groups::LittleDict{Symbol,Gr
while name*"_$cnt" ∈ keys(fanalog); cnt+=1 end
name *= "_$cnt"
dedupped_name = name
@warn "Duplicate analog signal label detected (\"$(og_name)\"). Duplicate renamed to \"$dedupped_name\"." _id=dedupped_name
@warn "Duplicate analog signal label detected (\"$(og_name)\"). Duplicate renamed to \"$dedupped_name\"." _id=hash(name*dedupped_name) maxlog=1
end

fanalog[name] = analog[:, idx]
Expand Down
23 changes: 10 additions & 13 deletions src/parameters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ function readparam(io::IO, ::Type{END}) where {END<:AbstractEndian}
@assert gid != 0
_name = read(io, abs(nl))
@assert any(!iscntrl∘Char, _name)
name = Symbol(replace(strip(transcode(String, view(_name, :))), r"[^a-zA-Z0-9_]" => '_'))
name = Symbol(replace(transcode(String, rstrip_vectorstring(!isspace∘Char, view(_name, :))), r"[^a-zA-Z0-9_]" => '_'))

# @debug "Parameter $name at $pos has unofficially supported characters.
# Unexpected results may occur" maxlog=occursin(r"[^a-zA-Z0-9_ ]", transcode(String, copy(_name)))
Expand Down Expand Up @@ -256,7 +256,7 @@ function readparam(io::IO, ::Type{END}) where {END<:AbstractEndian}
end

dl = read(io, UInt8)::UInt8
desc = copy(rstrip_cntrl_null_space(read(io, dl)))
desc = copy(rstrip_vectorstring(read(io, dl)))

pointer = pos + np + abs(nl) + 2
# @debug "wrong pointer in $name" position(io) pointer maxlog=(position(io) != pointer)
Expand Down Expand Up @@ -295,14 +295,9 @@ function _readarrayparameter(io::IO, END::Type{<:AbstractEndian{T}}, dims) where
return read!(io, a, END)
end

# Taken from Base.iscntrl and Base.isspace, but for bytes instead of chars
_iscntrl(c::Char) = iscntrl(c)
_isspace(c::Char) = isspace(c)
_iscntrl(c::Union{Int8,UInt8}) = c <= 0x1f || 0x7f <= c <= 0x9f
_isspace(c::Union{Int8,UInt8}) = c == 0x20 || 0x09 <= c <= 0x0d || c == 0x85 || 0xa0 <= c

function rstrip_cntrl_null_space(s)
l = findlast(c -> !_iscntrl(c) && !_isspace(c), s)
rstrip_vectorstring(s) = rstrip_vectorstring((c -> !isspace(c) && !iscntrl(c))∘Char, s)
function rstrip_vectorstring(f, s)
l = findlast(f, s)
if isnothing(l)
return @view s[end:end-1]
else
Expand All @@ -321,13 +316,15 @@ function _readarrayparameter(io::IO, ::Type{<:AbstractEndian{String}}, dims)::Ar
if length(dims) > 1
_, rdims... = dims
data = Array{String}(undef, rdims)::Array{String}
temp = Vector{UInt16}(undef, dims[1])
for ijk::CartesianIndex in CartesianIndices(data)
data[ijk] = string(rstrip(x -> iscntrl(x) || isspace(x),
transcode(String, transcode(UInt16, @view tdata[:, ijk]))))::String
temp .= tdata[:, ijk]
data[ijk] = transcode(String,
rstrip_vectorstring(convert(Vector{UInt16}, temp)))
# @debug "" @view(tdata[:, ijk]), data[ijk]
end
else
data = [ rstrip(x -> iscntrl(x) || isspace(x), transcode(String, transcode(UInt16, _tdata))) ]
data = [ transcode(String, rstrip_vectorstring(convert(Vector{UInt16}, _tdata))) ]
end
return data
end
Expand Down
18 changes: 11 additions & 7 deletions src/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,20 @@ function readdata(
# Analog Samples Per Frame => ASPF
analog = zeros(Float32, numchannels, aspf*numframes)

nbanalog = numchannels*aspf
analogtmp = Matrix{F}(undef, (numchannels,aspf))
else
analog = Array{Float32,2}(undef, 0,0)
end

pos = position(io) # calculate position to avoid repeatedly acquire/release of file lock
@inbounds for i in 1:numframes
if hasmarkers
if _iosize - position(io) ≥ sizeof(pointtmp)
if _iosize - pos ≥ sizeof(pointtmp)
read!(io, pointtmp, END)
point[:,i] .= convert.(Float32, pointview)
residuals[:,i] .= convert.(Int32, resview) .% Int16
pos += nb*sizeof(F)
else
# Make marker data for missing frames be treated as missing
residuals[:,i] .= -ones(Int16, nummarkers)
Expand All @@ -106,9 +109,10 @@ function readdata(
end
end
if haschannels
if _iosize - position(io) ≥ sizeof(analogtmp)
if _iosize - pos ≥ sizeof(analogtmp)
read!(io, analogtmp, END)
analog[:,((i-1)*aspf+1):(i*aspf)] .= convert.(Float32, analogtmp)
pos += nbanalog*sizeof(F)
else
@debug "End-of-file reached before expected; frame$(length(i:numframes) > 1 ? "s" : "") $(i:numframes) \
are missing"
Expand All @@ -129,8 +133,8 @@ function readdata(
SCALE = groups[:ANALOG][Float32, :GEN_SCALE] * groups[:ANALOG][Float32, :SCALE]
analog .= (analog .- ANALOG_OFFSET) .* SCALE
else
off_labels = get_multipled_parameter_names(groups, :ANALOG, :OFFSET)
if length(off_labels) > 1
if haskey(groups[:ANALOG], :OFFSET2)
off_labels = get_multipled_parameter_names(groups, :ANALOG, :OFFSET)
VECANALOG_OFFSET = convert(Vector{Float32}, reduce(vcat,
groups[:ANALOG][Vector{Int}, offset]
for offset in off_labels))[1:numchannels]
Expand All @@ -139,8 +143,8 @@ function readdata(
groups[:ANALOG][Vector{Int}, :OFFSET][1:numchannels])
end

scale_labels = get_multipled_parameter_names(groups, :ANALOG, :SCALE)
if length(scale_labels) > 1
if haskey(groups[:ANALOG], :SCALE2)
scale_labels = get_multipled_parameter_names(groups, :ANALOG, :SCALE)
VECSCALE = convert(Vector{Float32}, reduce(vcat,
groups[:ANALOG][Vector{Int}, scale]
for scale in scale_labels))[1:numchannels]
Expand All @@ -153,7 +157,7 @@ function readdata(
end
end

return (permutedims(point), permutedims(residuals), permutedims(analog))
return (point', permutedims(residuals), analog')
end

"""
Expand Down
Loading
Loading