@@ -106,12 +106,56 @@ function slicing(pl, PM, ratio, shift, par::GasChromatographySimulator.Parameter
106
106
107
107
# df_A_foc = DataFrame(Name=Name, CAS=CAS, Annotations=Ann_focussed, A=A_focussed+A_unfocussed, t0=t0_foc)
108
108
df_A_foc = DataFrame (Name= Name, CAS= CAS, Annotations= Ann_focussed, A= A_focussed, t0= t0_foc)
109
+ # merge duplicates, add the areas of the duplicates
110
+ df_A_foc = merge_duplicates (df_A_foc)
109
111
# t0 is the start time of the slice
110
112
# A the area of the sliced peak (including the section during hot-jet [source of error])
111
113
112
114
return newpar_focussed, df_A_foc
113
115
end
114
116
117
+ """
118
+ merge_duplicates(slice_df)
119
+
120
+ Remove/merge duplicate entries from a chromatogram slice DataFrame by combining their areas.
121
+
122
+ # Arguments
123
+ - `slice_df`: DataFrame containing chromatogram slice data with columns:
124
+ - `CAS`: CAS numbers of compounds
125
+ - `t0`: Initial retention times
126
+ - `A`: Peak areas
127
+
128
+ # Returns
129
+ - DataFrame with duplicate entries removed, where:
130
+ - Duplicate entries (same CAS and t0) are combined
131
+ - Areas of duplicate entries are summed
132
+ - Only unique combinations of CAS and t0 are kept
133
+
134
+ # Notes
135
+ - Duplicates are identified by matching both CAS numbers and initial retention times (t0)
136
+ - When duplicates are found, their areas are summed and assigned to the first occurrence
137
+ - The function preserves all other columns in the DataFrame
138
+ - Useful for cleaning up chromatogram data where the same compound appears multiple times
139
+ with the same initial retention time
140
+ """
141
+ function merge_duplicates (slice_df)
142
+ # index of duplicated substance with same t0 (original should be at index before)
143
+ duplicates = findall (nonunique (slice_df, [:CAS , :t0 ]))
144
+ # find all the duplicated and original entries
145
+ originals_duplicates = [findall (slice_df. CAS[index] .== slice_df. CAS .&& slice_df. t0[index] .== slice_df. t0) for index in duplicates]
146
+ # adding the areas of the duplicates
147
+ combined_areas = [sum (slice_df. A[indices]) for indices in originals_duplicates]
148
+ # removed duplicated entrys (of CAS and t0)
149
+ unique_df = unique (slice_df, [:CAS , :t0 ])
150
+ # update the area
151
+ # index of the originals of duplicates in new dataframe
152
+ originals_index = [findall (slice_df. CAS[index] .== unique_df. CAS .&& slice_df. t0[index] .== unique_df. t0) for index in duplicates]
153
+ for i= 1 : length (originals_index)
154
+ unique_df. A[originals_index[i][1 ]] = combined_areas[i]
155
+ end
156
+ return unique_df
157
+ end
158
+
115
159
"""
116
160
simplifiedTM(T, par, df_A, PM, ratio, shift, Thot)
117
161
0 commit comments