Skip to content

Commit 8ec58f7

Browse files
authored
Merge pull request #1425 from samuelgarcia/fix_neuralynx_gaps
Neuralynx : new approach gap tolerance
2 parents 6cd8158 + 1ed52bd commit 8ec58f7

File tree

3 files changed

+118
-263
lines changed

3 files changed

+118
-263
lines changed

neo/rawio/neuralynxrawio/ncssections.py

Lines changed: 95 additions & 257 deletions
Original file line numberDiff line numberDiff line change
@@ -172,264 +172,64 @@ def calc_sample_time(sampFr, startTime, posn):
172172
return round(startTime + NcsSectionsFactory.get_micros_per_samp_for_freq(sampFr) * posn)
173173

174174
@staticmethod
175-
def _parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime):
175+
def _buildNcsSections(ncsMemMap, sampFreq, gapTolerance=0):
176176
"""
177-
Parse sections in memory mapped file when microsPerSampUsed and sampFreqUsed are known,
178-
filling in an NcsSections object.
179-
180-
Parameters
181-
----------
182-
ncsMemMap:
183-
memmap of Ncs file
184-
ncsSections:
185-
NcsSections with actual sampFreqUsed correct, first NcsSection with proper startSect
186-
and startTime already added.
187-
chanNum:
188-
channel number that should be present in all records
189-
reqFreq:
190-
rounded frequency that all records should contain
191-
blkOnePredTime:
192-
predicted starting time of second record in block
193-
194-
Returns
195-
-------
196-
NcsSections object with block locations marked
177+
Construct NcsSections with fast mode when no gaps or parsing the file to detect gaps.
197178
"""
198-
199-
# New code numpy vector based (speedup X50)
200-
delta = (ncsMemMap["timestamp"][1:] - ncsMemMap["timestamp"][:-1]).astype(np.int64)
201-
delta_prediction = ((ncsMemMap["nb_valid"][:-1] / ncsSects.sampFreqUsed) * 1e6).astype(np.int64)
202-
gap_inds = np.flatnonzero((delta - delta_prediction) != 0)
203-
gap_inds += 1
204-
sections_limits = [0] + gap_inds.tolist() + [len(ncsMemMap)]
205-
206-
for i in range(len(gap_inds) + 1):
207-
start = sections_limits[i]
208-
stop = sections_limits[i + 1]
209-
ncsSects.sects.append(
210-
NcsSection(
211-
startRec=start,
212-
startTime=ncsMemMap["timestamp"][start],
213-
endRec=stop - 1,
214-
endTime=ncsMemMap["timestamp"][stop - 1]
215-
+ np.uint64(ncsMemMap["nb_valid"][stop - 1] / ncsSects.sampFreqUsed * 1e6),
216-
n_samples=np.sum(ncsMemMap["nb_valid"][start:stop]),
217-
)
218-
)
219-
220-
return ncsSects
221-
222-
@staticmethod
223-
def _buildGivenActualFrequency(ncsMemMap, actualSampFreq, reqFreq):
224-
"""
225-
Build NcsSections object for file given actual sampling frequency.
226-
227-
Requires that frequency in each record agrees with requested frequency. This is
228-
normally obtained by rounding the header frequency; however, this value may be different
229-
from the rounded actual frequency used in the recording, since the underlying
230-
requirement in older Ncs files was that the number of microseconds per sample in the
231-
records is the inverse of the sampling frequency stated in the header truncated to
232-
whole microseconds.
233-
234-
Parameters
235-
----------
236-
ncsMemMap:
237-
memmap of Ncs file
238-
actualSampFreq:
239-
actual sampling frequency used
240-
reqFreq:
241-
frequency to require in records
242-
243-
Returns
244-
-------
245-
NcsSections object
246-
"""
247-
# check frequency in first record
248-
if ncsMemMap["sample_rate"][0] != reqFreq:
249-
raise IOError("Sampling frequency in first record doesn't agree with header.")
250-
chanNum = ncsMemMap["channel_id"][0]
179+
channel_id = ncsMemMap["channel_id"][0]
251180

252181
ncsSects = NcsSections()
253-
ncsSects.sampFreqUsed = actualSampFreq
254-
ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(actualSampFreq)
255182

256183
# check if file is one block of records, which is often the case, and avoid full parse
257-
lastBlkI = ncsMemMap.shape[0] - 1
258-
ts0 = ncsMemMap["timestamp"][0]
259-
nb0 = ncsMemMap["nb_valid"][0]
184+
# need that last timestamp match the predicted one.
260185
predLastBlockStartTime = NcsSectionsFactory.calc_sample_time(
261-
actualSampFreq, ts0, NcsSection._RECORD_SIZE * lastBlkI
186+
sampFreq, ncsMemMap["timestamp"][0], NcsSection._RECORD_SIZE * (ncsMemMap.shape[0] - 1)
262187
)
263-
lts = ncsMemMap["timestamp"][lastBlkI]
264-
lnb = ncsMemMap["nb_valid"][lastBlkI]
265188
if (
266-
ncsMemMap["channel_id"][lastBlkI] == chanNum
267-
and ncsMemMap["sample_rate"][lastBlkI] == reqFreq
268-
and lts == predLastBlockStartTime
189+
channel_id == ncsMemMap["channel_id"][-1]
190+
and ncsMemMap["sample_rate"][0] == ncsMemMap["sample_rate"][-1]
191+
and ncsMemMap["timestamp"][-1] == predLastBlockStartTime
269192
):
270-
lastBlkEndTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, lts, lnb)
271-
n_samples = NcsSection._RECORD_SIZE * lastBlkI
272-
curBlock = NcsSection(0, ts0, lastBlkI, lastBlkEndTime, n_samples)
273-
274-
ncsSects.sects.append(curBlock)
275-
return ncsSects
276-
193+
lastBlkEndTime = NcsSectionsFactory.calc_sample_time(sampFreq, ncsMemMap["timestamp"][-1], ncsMemMap["nb_valid"][-1])
194+
n_samples = NcsSection._RECORD_SIZE * (ncsMemMap.size - 1) + ncsMemMap["nb_valid"][-1]
195+
section0 = NcsSection(
196+
startRec=0,
197+
startTime=ncsMemMap["timestamp"][0],
198+
endRec=ncsMemMap.size - 1,
199+
endTime=lastBlkEndTime,
200+
n_samples=n_samples
201+
)
202+
ncsSects.sects.append(section0)
203+
277204
else:
278-
# otherwise need to scan looking for data gaps
279-
blkOnePredTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, ts0, nb0)
280-
# curBlock = NcsSection(0, ts0, -1, -1, -1)
281-
# ncsSects.sects.append(curBlock)
282-
ncsSects = NcsSectionsFactory._parseGivenActualFrequency(
283-
ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime
284-
)
285-
return ncsSects
286-
287-
@staticmethod
288-
def _parseForMaxGap(ncsMemMap, ncsSects, maxGapLen):
289-
"""
290-
Parse blocks of records from file, allowing a maximum gap in timestamps between records
291-
in sections. Estimates frequency being used based on timestamps.
292-
293-
Parameters
294-
----------
295-
ncsMemMap:
296-
memmap of Ncs file
297-
ncsSects:
298-
NcsSections object with sampFreqUsed set to nominal frequency to use in computing time
299-
for samples (Hz)
300-
maxGapLen:
301-
maximum difference within a block between predicted time of start of record and
302-
recorded time
303-
304-
Returns
305-
-------
306-
NcsSections object with sampFreqUsed and microsPerSamp set based on estimate from
307-
largest block
308-
"""
309-
310-
chanNum = ncsMemMap["channel_id"][0]
311-
recFreq = ncsMemMap["sample_rate"][0]
312-
313-
# check for consistent channel_ids and sampling rates
314-
ncsMemMap["channel_id"]
315-
if not (ncsMemMap["channel_id"] == chanNum).all():
316-
raise IOError("Channel number changed in records within file")
317-
318-
if not all(ncsMemMap["sample_rate"] == recFreq):
319-
raise IOError("Sampling frequency changed in records within file")
320-
321-
# find most frequent number of samples
322-
exp_nb_valid = np.argmax(np.bincount(ncsMemMap["nb_valid"]))
323-
# detect records with incomplete number of samples
324-
gap_rec_ids = list(np.where(ncsMemMap["nb_valid"] != exp_nb_valid)[0])
325-
326-
rec_duration = 1e6 / ncsSects.sampFreqUsed * ncsMemMap["nb_valid"]
327-
pred_times = np.rint(ncsMemMap["timestamp"] + rec_duration).astype(np.int64)
328-
max_pred_times = pred_times + maxGapLen
329-
# data records that start later than the predicted time (including the
330-
# maximal accepted gap length) are considered delayed and a gap is
331-
# registered.
332-
delayed_recs = list(np.where(max_pred_times[:-1] < ncsMemMap["timestamp"][1:])[0])
333-
gap_rec_ids.extend(delayed_recs)
334-
335-
# cleaning extracted gap ids
336-
# last record can not be the beginning of a gap
337-
last_rec_id = len(ncsMemMap["timestamp"]) - 1
338-
if last_rec_id in gap_rec_ids:
339-
gap_rec_ids.remove(last_rec_id)
340-
341-
# gap ids can only be listed once
342-
gap_rec_ids = sorted(set(gap_rec_ids))
343-
344-
# create recording segments from identified gaps
345-
ncsSects.sects.append(NcsSection(0, ncsMemMap["timestamp"][0], -1, -1, -1))
346-
for gap_rec_id in gap_rec_ids:
347-
curr_sec = ncsSects.sects[-1]
348-
curr_sec.endRec = gap_rec_id
349-
curr_sec.endTime = pred_times[gap_rec_id]
350-
n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec : gap_rec_id + 1])
351-
curr_sec.n_samples = n_samples
352-
353-
next_sec = NcsSection(gap_rec_id + 1, ncsMemMap["timestamp"][gap_rec_id + 1], -1, -1, -1)
354-
ncsSects.sects.append(next_sec)
355-
356-
curr_sec = ncsSects.sects[-1]
357-
curr_sec.endRec = len(ncsMemMap["timestamp"]) - 1
358-
curr_sec.endTime = pred_times[-1]
359-
n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec :])
360-
curr_sec.n_samples = n_samples
361-
362-
# calculate the estimated frequency of the block with the most samples
363-
max_blk_idx = np.argmax([bl.endRec - bl.startRec for bl in ncsSects.sects])
364-
max_blk = ncsSects.sects[max_blk_idx]
365-
366-
maxBlkFreqEstimate = (
367-
(max_blk.n_samples - ncsMemMap["nb_valid"][max_blk.endRec])
368-
* 1e6
369-
/ (ncsMemMap["timestamp"][max_blk.endRec] - max_blk.startTime)
370-
)
205+
# need to parse all data block to detect gaps
206+
# check when the predicted timestamp is outside the tolerance
207+
delta = (ncsMemMap["timestamp"][1:] - ncsMemMap["timestamp"][:-1]).astype(np.int64)
208+
delta_prediction = ((ncsMemMap["nb_valid"][:-1] / sampFreq) * 1e6).astype(np.int64)
209+
210+
gap_inds = np.flatnonzero(np.abs(delta - delta_prediction) > gapTolerance)
211+
gap_inds += 1
212+
213+
sections_limits = [ 0 ] + gap_inds.tolist() + [len(ncsMemMap)]
214+
215+
for i in range(len(gap_inds) + 1):
216+
start = sections_limits[i]
217+
stop = sections_limits[i + 1]
218+
duration = np.uint64(1e6 / sampFreq * ncsMemMap["nb_valid"][stop-1])
219+
ncsSects.sects.append(
220+
NcsSection(
221+
startRec=start,
222+
startTime=ncsMemMap["timestamp"][start],
223+
endRec=stop-1,
224+
endTime=ncsMemMap["timestamp"][stop-1] + duration,
225+
n_samples=np.sum(ncsMemMap["nb_valid"][start:stop])
226+
)
227+
)
371228

372-
ncsSects.sampFreqUsed = maxBlkFreqEstimate
373-
ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(maxBlkFreqEstimate)
374-
# free memory that is unnecessarily occupied by the memmap
375-
# (see https://github.com/numpy/numpy/issues/19340)
376-
del ncsMemMap
377229
return ncsSects
378230

379231
@staticmethod
380-
def _buildForMaxGap(ncsMemMap, nomFreq):
381-
"""
382-
Determine sections of records in memory mapped Ncs file given a nominal frequency of
383-
the file, using the default values of frequency tolerance and maximum gap between blocks.
384-
385-
Parameters
386-
----------
387-
ncsMemMap:
388-
memmap of Ncs file
389-
nomFreq:
390-
nominal sampling frequency used, normally from header of file
391-
392-
Returns
393-
-------
394-
NcsSections object
395-
"""
396-
nb = NcsSections()
397-
398-
numRecs = ncsMemMap.shape[0]
399-
if numRecs < 1:
400-
return nb
401-
402-
chanNum = ncsMemMap["channel_id"][0]
403-
ts0 = ncsMemMap["timestamp"][0]
404-
405-
lastBlkI = numRecs - 1
406-
lts = ncsMemMap["timestamp"][lastBlkI]
407-
lcid = ncsMemMap["channel_id"][lastBlkI]
408-
lnb = ncsMemMap["nb_valid"][lastBlkI]
409-
lsr = ncsMemMap["sample_rate"][lastBlkI]
410-
411-
# check if file is one block of records, with exact timestamp match, which may be the case
412-
numSampsForPred = NcsSection._RECORD_SIZE * lastBlkI
413-
predLastBlockStartTime = NcsSectionsFactory.calc_sample_time(nomFreq, ts0, numSampsForPred)
414-
freqInFile = math.floor(nomFreq)
415-
if lts - predLastBlockStartTime == 0 and lcid == chanNum and lsr == freqInFile:
416-
endTime = NcsSectionsFactory.calc_sample_time(nomFreq, lts, lnb)
417-
curBlock = NcsSection(0, ts0, lastBlkI, endTime, numSampsForPred)
418-
nb.sects.append(curBlock)
419-
nb.sampFreqUsed = (numSampsForPred + lnb) / (endTime - ts0) * 1e6
420-
nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed)
421-
422-
# otherwise parse records to determine blocks using default maximum gap length
423-
else:
424-
nb.sampFreqUsed = nomFreq
425-
nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed)
426-
maxGapToAllow = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / nomFreq)
427-
nb = NcsSectionsFactory._parseForMaxGap(ncsMemMap, nb, maxGapToAllow)
428-
429-
return nb
430-
431-
@staticmethod
432-
def build_for_ncs_file(ncsMemMap, nlxHdr):
232+
def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None, strict_gap_mode=True):
433233
"""
434234
Build an NcsSections object for an NcsFile, given as a memmap and NlxHeader,
435235
handling gap detection appropriately given the file type as specified by the header.
@@ -446,32 +246,70 @@ def build_for_ncs_file(ncsMemMap, nlxHdr):
446246
An NcsSections corresponding to the provided ncsMemMap and nlxHdr
447247
"""
448248
acqType = nlxHdr.type_of_recording()
249+
freq = nlxHdr["sampling_rate"]
449250

450-
# Old Neuralynx style with truncated whole microseconds for actual sampling. This
451-
# restriction arose from the sampling being based on a master 1 MHz clock.
452251
if acqType == "PRE4":
453-
freq = nlxHdr["sampling_rate"]
252+
# Old Neuralynx style with truncated whole microseconds for actual sampling. This
253+
# restriction arose from the sampling being based on a master 1 MHz clock.
454254
microsPerSampUsed = math.floor(NcsSectionsFactory.get_micros_per_samp_for_freq(freq))
455255
sampFreqUsed = NcsSectionsFactory.get_freq_for_micros_per_samp(microsPerSampUsed)
456-
nb = NcsSectionsFactory._buildGivenActualFrequency(ncsMemMap, sampFreqUsed, math.floor(freq))
457-
nb.sampFreqUsed = sampFreqUsed
458-
nb.microsPerSampUsed = microsPerSampUsed
256+
if gapTolerance is None:
257+
if strict_gap_mode:
258+
# this is the old behavior, maybe we could put 0.9 sample interval no ?
259+
gapTolerance = 0
260+
else:
261+
gapTolerance = 0
262+
263+
ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, sampFreqUsed, gapTolerance=gapTolerance)
264+
ncsSects.sampFreqUsed = sampFreqUsed
265+
ncsSects.microsPerSampUsed = microsPerSampUsed
459266

460-
# digital lynx style with fractional frequency and micros per samp determined from
461-
# block times
462267
elif acqType in ["DIGITALLYNX", "DIGITALLYNXSX", "CHEETAH64", "CHEETAH560", "RAWDATAFILE"]:
463-
nomFreq = nlxHdr["sampling_rate"]
464-
nb = NcsSectionsFactory._buildForMaxGap(ncsMemMap, nomFreq)
268+
# digital lynx style with fractional frequency and micros per samp determined from block times
269+
if gapTolerance is None:
270+
if strict_gap_mode:
271+
# this is the old behavior
272+
gapTolerance = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / freq)
273+
else:
274+
# quarter of paquet size is tolerate
275+
gapTolerance = round(0.25 * NcsSection._RECORD_SIZE * 1e6 / freq)
276+
ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, freq, gapTolerance=gapTolerance)
277+
278+
279+
# take longer data block to compute reaal sampling rate
280+
# ind_max = np.argmax([section.n_samples for section in ncsSects.sects])
281+
ind_max = np.argmax([section.endRec - section.startRec for section in ncsSects.sects])
282+
section = ncsSects.sects[ind_max]
283+
if section.endRec != section.startRec:
284+
# need several data block
285+
sampFreqUsed = (
286+
(section.n_samples - ncsMemMap["nb_valid"][section.endRec])
287+
* 1e6
288+
/ (ncsMemMap["timestamp"][section.endRec] - section.startTime)
289+
)
290+
else:
291+
sampFreqUsed = freq
292+
ncsSects.sampFreqUsed = sampFreqUsed
293+
ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(sampFreqUsed)
465294

466-
# BML & ATLAS style with fractional frequency and micros per samp
295+
467296
elif acqType == "BML" or acqType == "ATLAS":
468-
sampFreqUsed = nlxHdr["sampling_rate"]
469-
nb = NcsSectionsFactory._buildGivenActualFrequency(ncsMemMap, sampFreqUsed, math.floor(sampFreqUsed))
297+
# BML & ATLAS style with fractional frequency and micros per samp
298+
if strict_gap_mode:
299+
# this is the old behavior, maybe we could put 0.9 sample interval no ?
300+
gapTolerance = 0
301+
else:
302+
# quarter of paquet size is tolerate
303+
gapTolerance = round(0.25 * NcsSection._RECORD_SIZE * 1e6 / freq)
304+
ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, freq, gapTolerance=gapTolerance)
305+
ncsSects.sampFreqUsed = freq
306+
ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(freq)
307+
470308

471309
else:
472310
raise TypeError("Unknown Ncs file type from header.")
473311

474-
return nb
312+
return ncsSects
475313

476314
@staticmethod
477315
def _verifySectionsStructure(ncsMemMap, ncsSects):

0 commit comments

Comments
 (0)