Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate C3DFile before writing #20

Merged
merged 49 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
8c9c41e
Reformat line lengths
halleysfifthinc Jul 31, 2024
7e9e294
Fix string parameter stripping
halleysfifthinc Jul 31, 2024
5244b78
Leave parameter data as read (for writing)
halleysfifthinc Jul 31, 2024
a7beeae
Add internal consistency test macro and use for blank labels files
halleysfifthinc Aug 5, 2024
07cdc43
test basic c3d variant samples for internal consistency
halleysfifthinc Aug 5, 2024
210ab46
Refactor tests to reduce redundancy and improve readability
halleysfifthinc Aug 5, 2024
5eacd94
Wrap all tests in a testset to improve test summary
halleysfifthinc Aug 5, 2024
d71208c
Move multiple labels group identification to a function
halleysfifthinc Aug 6, 2024
7ae2c27
Swap Dict for LittleDict or OrderedDict
halleysfifthinc Aug 6, 2024
e1ad437
Add sizehints to C3DFile constructor dicts
halleysfifthinc Aug 6, 2024
87b3e8e
Refactor group and parameter dict assembling
halleysfifthinc Aug 6, 2024
c08e9cf
Grab bag of (mostly) type-stability improvements
halleysfifthinc Aug 7, 2024
43076ea
Mostly more type-stability improvements, some algorithmic too
halleysfifthinc Aug 7, 2024
de6634f
Fix un-expected/intended error sinkholing in readparam and readgroup
halleysfifthinc Aug 9, 2024
fe46788
Create and reuse name length variable in ParameterComparison constructor
halleysfifthinc Aug 9, 2024
ea0b6ce
Fully handle all allowed and disallowed differences between original …
halleysfifthinc Aug 9, 2024
b2800c1
Treat a missing group/parameter in the original file as equal
halleysfifthinc Aug 9, 2024
205c7f8
Refactor write tests (add a bunch more)
halleysfifthinc Aug 9, 2024
0dbdc5f
Limit convenience constructor to allowed scalar types to force errors…
halleysfifthinc Aug 9, 2024
a639667
Fix incorrect size/ndims results
halleysfifthinc Aug 9, 2024
255dcea
Correctly handle/transcode UTF8
halleysfifthinc Aug 9, 2024
f804b36
unsafe convert integer data when writing
halleysfifthinc Aug 9, 2024
3e10bf5
Convert to correct/allowed types at construction when adding parameters
halleysfifthinc Aug 9, 2024
abb4717
Rearrange validation (SCALE is necessary for point format type even i…
halleysfifthinc Aug 9, 2024
94db6a1
Move MANUFACTURER:EDITED creation/addition to separate function
halleysfifthinc Aug 9, 2024
3a66990
Use (now) ordered nature of point/analog dicts when iterating to write
halleysfifthinc Aug 9, 2024
82f4ee5
Rewrite group/parameter GID uniqueness and assignment and comment
halleysfifthinc Aug 9, 2024
b923ebe
Strip trailing space|cntrl chars from _desc, and transcode correctly …
halleysfifthinc Aug 9, 2024
666a159
Don't read data when testing groups/parameters
halleysfifthinc Aug 9, 2024
3035b70
Refine duplicate warning ids
halleysfifthinc Aug 10, 2024
bec84af
Move datastart checks out of `readdata` to be able to directly test r…
halleysfifthinc Aug 10, 2024
22f2a70
Fix datastart calcs for writing
halleysfifthinc Aug 10, 2024
ecfe329
Refactor and add data write roundtripping tests
halleysfifthinc Aug 10, 2024
2396635
Add missing dimension to vector number params; print numbers compactly
halleysfifthinc Aug 12, 2024
a08662b
Fix paramptr and datastart padding
halleysfifthinc Aug 12, 2024
a5ae8f3
Merge calculated/invalid point iteration and only mark residual as ze…
halleysfifthinc Aug 12, 2024
97e3308
Ensure group gid is negative in constructor and on write
halleysfifthinc Aug 12, 2024
fad8674
Fix (again) datastart and padding calculation
halleysfifthinc Aug 12, 2024
693229d
Refactor identical file comparison helper function and tests
halleysfifthinc Aug 12, 2024
4b09f9c
Update binary data comparisons for write tests
halleysfifthinc Aug 12, 2024
dd8c41a
Tweak debugging printing
halleysfifthinc Aug 12, 2024
ea61021
Tweak residuals type and reading/calculation
halleysfifthinc Aug 12, 2024
17a0e0d
Simplify(?) (be a little stricter/correct at least) when processing r…
halleysfifthinc Aug 12, 2024
22225b1
Sinkhole warnings about duplicate markers/analog channels
halleysfifthinc Aug 12, 2024
2b8a520
Test that read => write => read results in the same data
halleysfifthinc Aug 12, 2024
5a7af00
Fix residual creation and add init for no markers
halleysfifthinc Aug 12, 2024
62bc7da
Fix analog data generation
halleysfifthinc Aug 12, 2024
8949232
Fix uncareful assumption that one ANALOG:SCALE contains all channels
halleysfifthinc Aug 12, 2024
958f959
Update CI config
halleysfifthinc Aug 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 19 additions & 13 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
name: CI
on:
- push
- pull_request
push:
branches:
- master
- main
- 'release*'
pull_request:

# needed to allow julia-actions/cache to delete old caches that it has created
permissions:
actions: write
contents: read

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}

jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
Expand All @@ -23,19 +36,12 @@ jobs:
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: actions/cache@v1
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/cache@v1
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v3
- uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: lcov.info

1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ version = "0.8.0-DEV"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
VaxData = "a6f58a78-e649-57e2-81bc-1d865c7b74f7"

Expand Down
146 changes: 112 additions & 34 deletions src/C3D.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module C3D

using VaxData, PrecompileTools, LazyArtifacts, Dates, DelimitedFiles
using VaxData, OrderedCollections, PrecompileTools, LazyArtifacts, Dates, DelimitedFiles

export readc3d, writec3d, numpointframes, numanalogframes, writetrc

Expand All @@ -14,73 +14,151 @@ include("header.jl")
struct C3DFile{END<:AbstractEndian}
name::String
header::Header{END}
groups::Dict{Symbol, Group}
point::Dict{String, Array{Union{Missing, Float32},2}}
residual::Dict{String, Array{Union{Missing, Float32},1}}
cameras::Dict{String, Vector{UInt8}}
analog::Dict{String, Array{Float32,1}}
groups::LittleDict{Symbol, Group{END}, Vector{Symbol}, Vector{Group{END}}}
point::OrderedDict{String, Array{Union{Missing, Float32},2}}
residual::OrderedDict{String, Array{Union{Missing, Float32},1}}
cameras::OrderedDict{String, Vector{UInt8}}
analog::OrderedDict{String, Array{Float32,1}}
end

include("read.jl")
include("validate.jl")
include("write.jl")
include("util.jl")

function C3DFile(name::String, header::Header, groups::Dict{Symbol,Group},
struct DuplicateMarkerError
msg::String
end

function Base.showerror(io::IO, err::DuplicateMarkerError)
println(io, "DuplicateMarkerError: ", err.msg)
end

function C3DFile(name::String, header::Header{END}, groups::LittleDict{Symbol,Group{END}, Vector{Symbol}, Vector{Group{END}}},
point::AbstractArray, residuals::AbstractArray, analog::AbstractArray;
missingpoints::Bool=true, strip_prefixes::Bool=false)
fpoint = Dict{String,Matrix{Union{Missing, Float32}}}()
fresiduals = Dict{String,Vector{Union{Missing, Float32}}}()
cameras = Dict{String,Vector{UInt8}}()
fanalog = Dict{String,Vector{Float32}}()

l = size(point, 1)
allpoints = 1:l
missingpoints::Bool=true, strip_prefixes::Bool=false) where {END}
fpoint = OrderedDict{String,Matrix{Union{Missing, Float32}}}()
fresiduals = OrderedDict{String,Vector{Union{Missing, Float32}}}()
cameras = OrderedDict{String,Vector{UInt8}}()
fanalog = OrderedDict{String,Vector{Float32}}()

numpts = groups[:POINT][Int, :USED]

ptlabel_keys = get_multipled_parameter_names(groups, :POINT, :LABELS)
pt_labels = Iterators.flatten(
groups[:POINT][Vector{String}, label] for label in ptlabel_keys
)

if strip_prefixes
if haskey(groups, :SUBJECTS) && groups[:SUBJECTS][Int, :USES_PREFIXES] == 1
rgx = Regex("("*join(groups[:SUBJECTS][Vector{String}, :LABEL_PREFIXES], '|')*
")(?<label>\\w*)")
rgx = Regex(
"("*
join(groups[:SUBJECTS][Vector{String}, :LABEL_PREFIXES], '|')*
")(?<label>\\w*)")
else
rgx = r":(?<label>\w*)"
end
allunique(map(x -> something(something(match(rgx, x), (;label=nothing))[:label], x),
groups[:POINT][Vector{String}, :LABELS][1:numpts])) ||
throw(ArgumentError("marker names would not be unique after removing subject prefixes"))
end

nolabel_count = 1
if !iszero(numpts)
sizehint!(fpoint, numpts)
sizehint!(fresiduals, numpts)
sizehint!(cameras, numpts)

invalidpoints = Vector{Bool}(undef, size(point, 1))
calculatedpoints = Vector{Bool}(undef, size(point, 1))
goodpoints = Vector{Bool}(undef, size(point, 1))

for (idx, ptname) in enumerate(groups[:POINT][Vector{String}, :LABELS][1:numpts])
for (idx, ptname) in enumerate(pt_labels)
idx > numpts && break # can't slice Iterators.flatten
og_ptname = ptname
stripped_ptname = ""
if strip_prefixes
m = match(rgx, ptname)
if !isnothing(m) && !isnothing(m[:label])
ptname = m[:label]
stripped_ptname = ptname
end
end
if isempty(ptname)
ptname = "M"*string(nolabel_count, pad=3)
nolabel_count += 1
end

# don't dedup stripped names; we don't assume uniqueness of suffixes (aka marker
# names) because there can be multiple subjects using the same marker set
if isempty(stripped_ptname) && ptname ∈ keys(fpoint)
# append "_$(duplicate count)" while making sure not to duplicate an
# existing marker name (unlikely)
cnt = 2
while ptname*"_$cnt" ∈ keys(fpoint); cnt+=1 end
ptname *= "_$cnt"
dedupped_ptname = ptname
@warn "Duplicate marker label detected (\"$(og_ptname)\"). Duplicate renamed to \"$dedupped_ptname\"." _id=dedupped_ptname
end
ptname ∉ keys(fpoint) || throw(DuplicateMarkerError(
"Markers labels must be unique but found duplicate marker label \"$ptname\""*
if !isempty(stripped_ptname)
" (originally \"$(og_ptname)\" before stripping subject prefixes)"
else
"" # shouldn't be reachable?
end
))

fpoint[ptname] = point[:,((idx-1)*3+1):((idx-1)*3+3)]
fresiduals[ptname] = residuals[:,idx]
cameras[ptname] = ((convert.(Int32, @view(residuals[:,idx])) .>> 8) .& 0xff) .% UInt8
invalidpoints .= convert.(Int32, fresiduals[ptname]) .% Int16 .< 0
calculatedpoints .= iszero.(fresiduals[ptname])
cameras[ptname] = ((@view(residuals[:,idx]) .>> 8) .% UInt8) .& 0x7f
invalidpoints .= ((@view(residuals[:,idx]) .% UInt16) .& 0x8000) .!== 0x0000
calculatedpoints .= iszero.(@view(residuals[:,idx]) .& 0xff)
goodpoints .= .~(invalidpoints .| calculatedpoints)
fresiduals[ptname][goodpoints] = calcresiduals(fresiduals[ptname][goodpoints], abs(groups[:POINT][Float32, :SCALE]))
calcresiduals!(fresiduals[ptname], goodpoints, abs(groups[:POINT][Float32, :SCALE]))

if missingpoints
fpoint[ptname][invalidpoints, :] .= missing
fresiduals[ptname][invalidpoints] .= missing
fresiduals[ptname][calculatedpoints] .= 0.0f0
for i in eachindex(fresiduals[ptname])
if calculatedpoints[i] & ~invalidpoints[i]
fresiduals[ptname][i] = 0.0f0
end

if invalidpoints[i]
fpoint[ptname][i, :] .= missing
fresiduals[ptname][i] = missing
end
end
end
end
end

if !iszero(groups[:ANALOG][Int, :USED])
for (idx, name) in enumerate(groups[:ANALOG][Vector{String}, :LABELS][1:groups[:ANALOG][Int, :USED]])
anlabel_keys = collect(filter(keys(groups[:ANALOG])) do k
contains(string(k), r"^LABELS\d*")
end)
sort!(anlabel_keys; by=_naturalsortby)
an_labels = Iterators.flatten(
groups[:ANALOG][Vector{String}, label] for label in anlabel_keys
)

numanalogs = groups[:ANALOG][Int, :USED]

nolabel_count = 1
if !iszero(numanalogs)
sizehint!(fanalog, numanalogs)
for (idx, name) in enumerate(an_labels)
idx > numanalogs && break # can't slice Iterators.flatten
og_name = name
if isempty(name)
name = "A"*string(nolabel_count, pad=3)
nolabel_count += 1
end
if name ∈ keys(fanalog)
# append "_$(duplicate count)" while making sure not to duplicate an
# existing marker name (unlikely)
cnt = 2
while name*"_$cnt" ∈ keys(fanalog); cnt+=1 end
name *= "_$cnt"
dedupped_name = name
@warn "Duplicate analog signal label detected (\"$(og_name)\"). Duplicate renamed to \"$dedupped_name\"." _id=dedupped_name
end

fanalog[name] = analog[:, idx]
end
end
Expand All @@ -92,7 +170,7 @@ C3DFile(fn::AbstractString) = readc3d(string(fn))

numpointframes(f::C3DFile) = numpointframes(f.groups)

function numpointframes(groups::Dict{Symbol,Group})::Int
function numpointframes(groups::LittleDict{Symbol,Group{E}})::Int where E <: AbstractEndian
numframes::Int = groups[:POINT][Int, :FRAMES]
if haskey(groups[:POINT], :LONG_FRAMES)
if typeof(groups[:POINT][:LONG_FRAMES]) <: Vector{Int16}
Expand Down Expand Up @@ -131,8 +209,8 @@ end

endianness(f::C3DFile{END}) where END = END

groups(f::C3DFile) = collect(values(f.groups))
parameters(f::C3DFile) = collect(Iterators.flatten((values(group) for group in groups(f))))
groups(f::C3DFile) = values(f.groups)
parameters(f::C3DFile) = Iterators.flatten((values(group) for group in groups(f)))

function Header{END}(f::C3DFile{OEND}) where {END<:AbstractEndian,OEND<:AbstractEndian}
h = f.header
Expand All @@ -147,7 +225,7 @@ function Header{END}(f::C3DFile{OEND}) where {END<:AbstractEndian,OEND<:Abstract
end
ampf::UInt16 = aspf*f.groups[:ANALOG][Int, :USED]

datastart::UInt8 = 2+round(sum(writesize, Iterators.flatten((groups(f), parameters(f))))/512, RoundUp)
datastart::UInt8 = 2+cld(sum(writesize, Iterators.flatten((groups(f), parameters(f))))+4, 512)

return Header{END}(paramptr, datafmt, npoints, ampf, h.fframe, h.lframe, h.maxinterp,
h.scale, datastart, aspf, pointrate, h.res1, h.labeltype, h.numevents, h.res2,
Expand Down
67 changes: 46 additions & 21 deletions src/groups.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,58 @@ mutable struct Group{END<:AbstractEndian}
const pos::Int
gid::Int8 # Group ID
const locked::Bool
const _name::Vector{UInt8} # Character set should be A-Z, 0-9, and _ (lowercase is ok)
const name::Symbol
const np::Int16 # Pointer in bytes to the start of next group/parameter (officially supposed to be signed)
const _name::Vector{UInt8} # Character set should be A-Z, 0-9, and _ (lowercase is ok)
name::Symbol
const _desc::Vector{UInt8} # Character set should be A-Z, 0-9, and _ (lowercase is ok)
const params::Dict{Symbol,Parameter}
const params::LittleDict{Symbol,Parameter,Vector{Symbol},Vector{Parameter}}

function Group{END}(
pos::Int, gid::Int8, locked::Bool, np::Int16, _name::Vector{UInt8}, name::Symbol,
_desc::Vector{UInt8},
params::LittleDict{Symbol,Parameter,Vector{Symbol},Vector{Parameter}}) where {END}
return new{END}(pos, copysign(gid, -1), locked, np, _name, name, _desc, params)
end
end

function Group(
pos::Int, gid::Int8, locked::Bool, _name::Vector{UInt8}, name::Symbol, np::Int16,
_desc::Vector{UInt8}, params::Dict{Symbol,Parameter}
pos::Int, gid::Int8, locked::Bool, np::Int16, _name::Vector{UInt8}, name::Symbol,
_desc::Vector{UInt8}, params::LittleDict{Symbol,Parameter,Vector{Symbol},Vector{Parameter}}
)
return Group{LE{Float32}}(pos, gid, locked, _name, name, np, _desc, params)
return Group{LE{Float32}}(pos, gid, locked, np, _name, name, _desc, params)
end

function Group{END}(
pos, gid, locked, name, np, desc, params=Dict{Symbol,Parameter}()
pos, gid, locked, np, name, desc, params=LittleDict{Symbol,Parameter,Vector{Symbol},Vector{Parameter}}()
) where {END<:AbstractEndian}
return Group{END}(pos, convert(Int8, gid), locked, Vector{UInt8}(name), Symbol(name),
convert(Int16, np), Vector{UInt8}(desc), params)
return Group{END}(pos, convert(Int8, gid), locked, convert(Int16, np),
Vector{UInt8}(name), Symbol(name), Vector{UInt8}(desc), params)
end

function Group(pos, gid, locked, name, np, desc, params=Dict{Symbol,Parameter}())
return Group{LE{Float32}}(pos, convert(Int8, gid), locked, Vector{UInt8}(name),
Symbol(name), convert(Int16, np), Vector{UInt8}(desc), params)
function Group(pos, gid, locked, np, name, desc, params=LittleDict{Symbol,Parameter,Vector{Symbol},Vector{Parameter}}())
return Group{LE{Float32}}(pos, convert(Int8, gid), locked, convert(Int16, np),
Vector{UInt8}(name), Symbol(name), Vector{UInt8}(desc), params)
end

function Group{END}(
name::String, desc::String, params=Dict{Symbol,Parameter}(); gid=0, locked=signbit(gid)
name::String, desc::String, params=LittleDict{Symbol,Parameter,Vector{Symbol},Vector{Parameter}}(); gid=0, locked=signbit(gid)
) where {END<:AbstractEndian}
return Group{END}(0, gid, locked, name, 0, desc, params)
return Group{END}(0, gid, locked, 0, name, desc, params)
end

function Group(name::String, desc::String, params=LittleDict{Symbol,Parameter,Vector{Symbol},Vector{Parameter}}(); gid=0, locked=signbit(gid))
return Group{LE{Float32}}(0, gid, locked, 0, name, desc, params)
end

function Group(name::String, desc::String, params=Dict{Symbol,Parameter}(); gid=0, locked=signbit(gid))
return Group{LE{Float32}}(0, gid, locked, name, 0, desc, params)
function Base.:(==)(g1::Group, g2::Group)
return g1.gid === g2.gid && g1.name === g2.name && g1.params == g2.params
end

gid(g::Group{E}) where E = abs(g.gid)
_position(g::Group{E}) where E = g.pos

function Base.getindex(g::Group, k::Symbol)
return getindex(g.params, k).payload.data
return data(getindex(g.params, k))
end

function Base.getindex(g::Group, ::Type{T}, k::Symbol) where T
Expand Down Expand Up @@ -69,11 +83,22 @@ Base.haskey(g::Group, key) = haskey(g.params, key)

# TODO: Add get! method?
function Base.get(g::Group, key, default)
_key = key isa Tuple ? last(key) : key
return haskey(g, key) ? g[key] : default
end
function Base.get(g::Group, key::Tuple{Type,Symbol}, default::T) where T
_key = last(key)
return haskey(g, _key) ? g[key...] : default
end

Base.show(io::IO, g::Group) = show(io, keys(g.params))

function Base.show(io::IO, g::Group)
print(io, "Group(:$(g.name)) ")
if isempty(keys(g.params))
print(io, "[]")
else
print(io, keys(g.params))
end
end

function Base.show(io::IO, ::MIME"text/plain", g::Group)
print(io, "Group(:$(g.name))")
Expand Down Expand Up @@ -117,13 +142,13 @@ function Base.read(io::IO, ::Type{Group{END}}) where {END<:AbstractEndian}

pointer = pos + np + abs(nl) + 2
@debug "wrong pointer in $name" position(io), pointer maxlog=(position(io) != pointer)
return Group{END}(pos, gid, locked, _name, name, np, desc, Dict{Symbol,Parameter}())
return Group{END}(pos, gid, locked, np, _name, name, desc, LittleDict{Symbol,Parameter,Vector{Symbol},Vector{Parameter}}())
end

function Base.write(io::IO, g::Group{END}; last::Bool=false) where {END}
nb = 0
nb += write(io, flipsign(UInt8(length(g._name)), -1*g.locked))
nb += write(io, g.gid)
nb += write(io, copysign(g.gid, -1))
nb += write(io, g._name)
nb += write(io, last ? 0x0000 : END(Int16)(UInt16(length(g._desc) + 3)))
nb += write(io, UInt8(length(g._desc)))
Expand Down
Loading
Loading