Skip to content

Commit 27c787b

Browse files
authored
Merge pull request #40 from GasChromatographyToolbox/merge_duplicates_slice
Update ThermalModulator.jl
2 parents 495611e + 3387912 commit 27c787b

File tree

1 file changed

+44
-0
lines changed

1 file changed

+44
-0
lines changed

src/ThermalModulator.jl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,56 @@ function slicing(pl, PM, ratio, shift, par::GasChromatographySimulator.Parameter
106106

107107
#df_A_foc = DataFrame(Name=Name, CAS=CAS, Annotations=Ann_focussed, A=A_focussed+A_unfocussed, t0=t0_foc)
108108
df_A_foc = DataFrame(Name=Name, CAS=CAS, Annotations=Ann_focussed, A=A_focussed, t0=t0_foc)
109+
# merge duplicates, add the areas of the duplicates
110+
df_A_foc = merge_duplicates(df_A_foc)
109111
# t0 is the start time of the slice
110112
# A the area of the sliced peak (including the section during hot-jet [source of error])
111113

112114
return newpar_focussed, df_A_foc
113115
end
114116

117+
"""
118+
merge_duplicates(slice_df)
119+
120+
Remove/merge duplicate entries from a chromatogram slice DataFrame by combining their areas.
121+
122+
# Arguments
123+
- `slice_df`: DataFrame containing chromatogram slice data with columns:
124+
- `CAS`: CAS numbers of compounds
125+
- `t0`: Initial retention times
126+
- `A`: Peak areas
127+
128+
# Returns
129+
- DataFrame with duplicate entries removed, where:
130+
- Duplicate entries (same CAS and t0) are combined
131+
- Areas of duplicate entries are summed
132+
- Only unique combinations of CAS and t0 are kept
133+
134+
# Notes
135+
- Duplicates are identified by matching both CAS numbers and initial retention times (t0)
136+
- When duplicates are found, their areas are summed and assigned to the first occurrence
137+
- The function preserves all other columns in the DataFrame
138+
- Useful for cleaning up chromatogram data where the same compound appears multiple times
139+
with the same initial retention time
140+
"""
141+
function merge_duplicates(slice_df)
142+
# index of duplicated substance with same t0 (original should be at index before)
143+
duplicates = findall(nonunique(slice_df, [:CAS, :t0]))
144+
# find all the duplicated and original entries
145+
originals_duplicates = [findall(slice_df.CAS[index] .== slice_df.CAS .&& slice_df.t0[index] .== slice_df.t0) for index in duplicates]
146+
# adding the areas of the duplicates
147+
combined_areas = [sum(slice_df.A[indices]) for indices in originals_duplicates]
148+
# removed duplicated entrys (of CAS and t0)
149+
unique_df = unique(slice_df, [:CAS, :t0])
150+
# update the area
151+
# index of the originals of duplicates in new dataframe
152+
originals_index = [findall(slice_df.CAS[index] .== unique_df.CAS .&& slice_df.t0[index] .== unique_df.t0) for index in duplicates]
153+
for i=1:length(originals_index)
154+
unique_df.A[originals_index[i][1]] = combined_areas[i]
155+
end
156+
return unique_df
157+
end
158+
115159
"""
116160
simplifiedTM(T, par, df_A, PM, ratio, shift, Thot)
117161

0 commit comments

Comments
 (0)