Merge pull request #13 from biomarkersParkinson/ppg_toolbox_deliverab…

…les_until_predictions Ppg toolbox deliverables until predictions
biomarkersParkinson · Mar 26, 2024 · d23a938 · d23a938
2 parents 7b8db1a + 79d3955
commit d23a938
Show file tree

Hide file tree

Showing 52 changed files with 874 additions and 0 deletions.
diff --git a/docs/matlab_scripts/main_sqa_toolbox.m b/docs/matlab_scripts/main_sqa_toolbox.m
diff --git a/src/dbpd/ppg/classifier/LR_model.mat b/src/dbpd/ppg/classifier/LR_model.mat
diff --git a/src/dbpd/ppg/feat_extraction/imu_psd_feature.m b/src/dbpd/ppg/feat_extraction/imu_psd_feature.m
@@ -0,0 +1,40 @@
+function ratio_imu = imu_psd_feature(imu_segment, ppg_segment, fs_imu, fs_ppg)
+
+    % Initalize parameters for psd feature IMU
+    pwelchwin_imu = 3*fs_imu;       % Using pwelch for 3 seconds
+    pwelchwin_ppg = 3*fs_ppg;
+    perc_overlap = 0.5;
+    noverlap_imu = perc_overlap*pwelchwin_imu;   % overlap between windows is 50 %
+    noverlap_ppg = perc_overlap*pwelchwin_ppg;
+
+    f_bin_res = 0.05;   % the treshold is set based on this binning --> so range of 0.1 Hz for calculating the PSD feature 
+    nfft_ppg = 0:f_bin_res:fs_ppg/2;        % create equal binning for ppg and imu
+    nfft_imu = 0:f_bin_res:fs_imu/2;
+
+    [pxx1,f1] = pwelch(imu_segment,hann(pwelchwin_imu), noverlap_imu, nfft_imu, fs_imu);    % calculate psd using pwelch
+    PSD_imu = sum(pxx1,2);     % sum over the three axis
+    [pxx2,f2] = pwelch(ppg_segment,hann(pwelchwin_ppg), noverlap_ppg, nfft_ppg, fs_ppg);
+    PSD_ppg = sum(pxx2,2);      % not relevant ...
+
+    [~, max_PPG_psd_idx] = max(PSD_ppg);
+    max_PPG_freq_psd = f2(max_PPG_psd_idx);
+
+    %%---check dominant frequency (df) indices----%%
+    [~, corr_imu_psd_df_idx] = min(abs(max_PPG_freq_psd-f1)); 
+
+    df_idx = corr_imu_psd_df_idx-1:corr_imu_psd_df_idx+1;
+
+    %%---check first harmonic (fh) frequency indices----%%
+    [~, corr_imu_psd_fh_idx] = min(abs(max_PPG_freq_psd*2-f1)); 
+    fh_idx = corr_imu_psd_fh_idx-1:corr_imu_psd_fh_idx+1;
+
+    %%---check half dominant frequency----%%  Sometimes this is the dominant frequency and the first harmonic is related to the PPG!
+    [~, corr_imu_psd_fdom_idx] = min(abs(max_PPG_freq_psd/2-f1)); 
+    fdom_idx = corr_imu_psd_fdom_idx-1:corr_imu_psd_fdom_idx+1;
+
+    acc_power_PPG_range = trapz(f1(df_idx), PSD_imu(df_idx)) + trapz(f1(fh_idx), PSD_imu(fh_idx)) + trapz(f1(fdom_idx), PSD_imu(fdom_idx));   % Add the correct indices and calculate power using trapezoidal integration
+    acc_power_total = trapz(f1, PSD_imu);
+
+    ratio_imu = acc_power_PPG_range/acc_power_total;        % calculate the relative power between power in IMU using the PPG range and total power in IMU
+
+end
diff --git a/src/dbpd/ppg/feat_extraction/ppg_features.m b/src/dbpd/ppg/feat_extraction/ppg_features.m
@@ -0,0 +1,53 @@
+function [FeaturesPPG] = ppg_features(PPG,fs)
+
+    N_feat = 10;
+    FeaturesPPG = zeros(1, N_feat);
+    % Time-domain features
+    absPPG = abs(PPG);
+    FeaturesPPG(1) = var(PPG); % Feature 1: variance
+    FeaturesPPG(2) = mean(absPPG); % Feature 2: mean
+    FeaturesPPG(3) = median(absPPG); % Feature 3: median
+    FeaturesPPG(4) = kurtosis(PPG); % Feature 4: kurtosis
+    FeaturesPPG(5) = skewness(PPG); % Feature 5: skewness
+
+    window = 3*fs;   % 90 samples for Welch's method => fr = 2/3 = 0.67 Hz --> not an issue with a clear distinct frequency
+    overlap = 0.5*window; % 45 samples overlap for Welch's Method
+
+    [P, f] = pwelch(PPG, window, overlap, [], fs);
+
+    % Find the dominant frequency
+    [~, maxIndex] = max(P);
+    FeaturesPPG(6) = f(maxIndex);    % Feature 6: dominant frequency
+
+    ph_idx = find(f >= 0.75 & f <= 3); % find indices of f in relevant physiological heart range 45-180 bpm
+    [~, maxIndex_ph] = max(P(ph_idx)); %  Index of dominant frequency
+    dominantFrequency_ph = f(ph_idx(maxIndex_ph)); % Extract dominant frequency 
+    f_dom_band = find(f >= dominantFrequency_ph - 0.2 & f <= dominantFrequency_ph + 0.2); %
+    FeaturesPPG(7) = trapz(P(f_dom_band))/trapz(P);  % Feature 7 = relative power
+
+
+    % Normalize the power spectrum
+    pxx_norm = P / sum(P);
+
+    % Compute spectral entropy
+    FeaturesPPG(8) = -sum(pxx_norm .* log2(pxx_norm))/log2(length(PPG)); % Feature 8 = spectral entropy --> normalize between 0 and 1! Or should we perform this operation at the min-max normalization! No because the values can come from different lengths!
+
+    % Signal to noise ratio
+    Signal = var(PPG);
+    Noise = var(absPPG);
+    FeaturesPPG(9) = Signal/Noise; % Feature 9 = surrogate of signal to noise ratio
+
+    %% Autocorrelation features
+
+    [acf, ~] = autocorr(PPG, 'NumLags', fs*3);
+    [peakValues, ~] = peakdet(acf, 0.01);
+    sortedValues = sort(peakValues(:,2), 'descend');     % sort the peaks found in the corellogram
+    if length(sortedValues) > 1
+        FeaturesPPG(10) = sortedValues(2); % determine the second peak as the highest peak after the peak at lag=0
+    else
+        FeaturesPPG(10) = 0;              % Set at 0 if there is no clear second peak
+    end
+
+
+
+
diff --git a/src/dbpd/ppg/glob_functions/extract_overlapping_segments.m b/src/dbpd/ppg/glob_functions/extract_overlapping_segments.m
@@ -0,0 +1,26 @@
+function [ppg_indices, imu_indices] = extract_overlapping_segments(ts_ppg, ts_imu, t_unix_ppg, t_unix_imu)
+% K.I. Veldkamp, PhD student AI4P, 29-02-24
+% Function to extract indices indicating overlapping data between IMU and
+% PPG segment
+    % Convert Unix timestamps to absolute time
+    ppg_absolute_time = datetime(ts_ppg / 1000, 'ConvertFrom', 'posixtime');
+    imu_absolute_time = datetime(ts_imu / 1000, 'ConvertFrom', 'posixtime');
+
+    % Convert UNIX milliseconds to seconds
+    ppg_time = t_unix_ppg / 1000; % Convert milliseconds to seconds
+    imu_time = t_unix_imu / 1000; % Convert milliseconds to seconds
+
+    % Determine the overlapping time interval
+    start_time = max(ppg_absolute_time, imu_absolute_time);
+    end_time = min(ppg_absolute_time + seconds(ppg_time(end) - ppg_time(1)), imu_absolute_time + seconds(imu_time(end)-imu_time(1)));
+
+    % Convert overlapping time interval to indices
+    ppg_start_index = find(ppg_time >= posixtime(start_time), 1);
+    ppg_end_index = find(ppg_time <= posixtime(end_time), 1, 'last');
+    imu_start_index = find(imu_time >= posixtime(start_time), 1);
+    imu_end_index = find(imu_time <= posixtime(end_time), 1, 'last');
+
+    % Extract overlapping segments
+    ppg_indices = [ppg_start_index, ppg_end_index];
+    imu_indices = [imu_start_index, imu_end_index];
+end
diff --git a/src/dbpd/ppg/glob_functions/tsdf_scan_meta.m b/src/dbpd/ppg/glob_functions/tsdf_scan_meta.m
@@ -0,0 +1,22 @@
+function tsdf = tsdf_scan_meta(tsdf_data_full_path)
+% K.I. Veldkamp, PhD student AI4P, 29-02-24
+% For each given TSDB directory, transcribe TSDF metadata contents to SQL
+% table --> function specific for toolbox data structure
+tsdf = [];
+irow = 1;
+
+meta_list = dir(fullfile(tsdf_data_full_path, '*_meta.json'));
+meta_filenames = {meta_list.name};
+
+jsonobj = {};
+for n = 1:length(meta_filenames)
+    tsdb_meta_fullpath = fullfile(tsdf_data_full_path, meta_filenames{n});
+    jsonstr = fileread(tsdb_meta_fullpath);
+    jsonobj{n} = loadjson(jsonstr);
+    tsdf(irow).tsdf_meta_fullpath = tsdb_meta_fullpath;
+    tsdf(irow).subject_id = jsonobj{n}.subject_id;
+    tsdf(irow).start_iso8601 = jsonobj{n}.start_iso8601;
+    tsdf(irow).end_iso8601 = jsonobj{n}.end_iso8601;
+    irow = irow + 1;
+end
+
diff --git a/src/dbpd/ppg/hr_functions/Long_TFD_JOT.m b/src/dbpd/ppg/hr_functions/Long_TFD_JOT.m
@@ -0,0 +1,37 @@
+%% Implementation using the toolbox of J. O'Toole
+
+% Only the essential code is used. The function nonsep_gdtfd is code from
+% the toolbox of J. O'Toole: Copyright © 2014, John M. O  Toole, University College Cork. All rights reserved.
+function HR_smooth_tfd = Long_TFD_JOT(rel_ppg_tfd, MA, fs, kern_type, kern_params)
+
+tfd = nonsep_gdtfd(rel_ppg_tfd, kern_type, kern_params);   % for now the wigner ville distribution but one could also use the smoothed pseudo WVD --> returns matrix of size NxN 
+
+if MA.value == 1
+    input = tfd';            
+    tfd = filtfilt(MA.FC,1,input);
+    tfd = tfd.';
+end
+
+%%----- Get time and frequency axis for tfd-----%%
+[~,M]=size(tfd);
+Ntime=size(tfd,1);   
+
+ntime=1:Ntime; ntime=ntime./fs-1/fs;       % time array
+Mh=ceil(M);
+k=linspace(0,0.5,Mh);
+k=k.*fs;                                   % frequency array
+
+%%---Estimate HR using same approach as for given tfd----%%
+[~, k_idx] = max(tfd, [], 1);
+
+
+count = 0;
+
+for i = 2:2:length(rel_ppg_tfd)/fs-4            % starting at 2 and ending at length -4 to discard the first and last 2 sec of the WVD which are influenced by boundary effects
+    count = count + 1;
+    rel_wvd_idx = ntime>=i & ntime<i+2;
+    HR_smooth_tfd(count) = 60*mean(k(k_idx(rel_wvd_idx))); 
+
+end
+
+end
diff --git a/src/dbpd/ppg/hr_functions/PPG_TFD_HR.m b/src/dbpd/ppg/hr_functions/PPG_TFD_HR.m
@@ -0,0 +1,58 @@
+function HR_smooth_tfd = PPG_TFD_HR(rel_ppg_wvd, tfd_length, MA, fs, kern_type, kern_params)
+% Function to estimate the HR using a particular TFD method, such as WVD,
+% SPWVD or other kernel methods over segments of 30 s. If the remaining segment
+% length is longer than 30 s it is calculated over the entire length to
+% include all data into the analysis. 
+
+% Input:
+% - rel_ppg_wvd: ppg signal with an addition of 2s on both sides to
+% overcome boundary effects
+% - tfd_length: set the length to calculate time frequency distribution
+% - MA: struct containing the information to perform a moving average
+% filter over the TFD to overcome fundamental frequency switching
+% - fs: sample frequency
+% - kern_type: specified as a string such as 'wvd' for the wigner ville
+% distribution
+% - kern_params: kernel specifications, not relevant for 'wvd', but they
+% are for 'spwvd', 'swvd' etc.
+
+% Output:
+% - HR_smooth_tfd: array containing an estimation of the HR for every 2 s
+% of the relevant epoch (without the additional 2 s on both sides)
+
+if ~exist('kern_params', 'var')
+    kern_params = {};     % Function also works without having imu_label --> then based on ppg_prob alone!
+end
+
+edge_add = 4;       % adding an additional 4 sec (2 s to both sides) to overcome boundary effects/discontinuities of the WVD, in SPWVD this effect is already diminished due to a double kernel function
+epoch_length = tfd_length + edge_add;
+segment_length = (length(rel_ppg_wvd)-edge_add*fs)/fs;              % substract the 4 added sec to obtain the original segment length
+
+if segment_length > epoch_length
+    n_segments = floor(segment_length/tfd_length); %% Dit moet aangepast worden!!
+else 
+    n_segments = 1;  % for HR segments which are shorter than 30 s due to uneven start and end of the segment which can make the segment 28 s if 1 s is substracted
+end
+
+ppg_segments = cell(n_segments,1);
+
+for i = 1:n_segments                        % Split segments in 30 s PPG epochs 
+
+    if i ~= n_segments          
+        ppg_segments{i} = rel_ppg_wvd(1 + (i-1)*tfd_length*fs: (i*tfd_length+edge_add)*fs);
+
+    else 
+
+        ppg_segments{i} = rel_ppg_wvd(1 + (i-1)*tfd_length*fs:end);
+
+    end
+
+end
+
+HR_smooth_tfd = [];
+
+for j = 1:n_segments                    % Calculate the HR
+    ppg = ppg_segments{j};
+    HR_tfd = Long_TFD_JOT(ppg, MA, fs, kern_type, kern_params);
+    HR_smooth_tfd = [HR_smooth_tfd HR_tfd];
+end
diff --git a/src/dbpd/ppg/hr_functions/sample_prob_final.m b/src/dbpd/ppg/hr_functions/sample_prob_final.m
@@ -0,0 +1,47 @@
+function data_prob = sample_prob_final(ppg_prob, imu_label)
+% K.I. Veldkamp, PhD student AI4P, 29-02-24
+
+%%--Assign probability to every individual data point!--%%
+% Inputs:
+%   - ppg_prob
+%   - imu label
+% Output:
+% - data_prob: containing of an array with for every sample a probability.
+% This can be mapped to the samples of the synced PPG-IMU
+
+if ~exist('imu_label', 'var')
+    imu_label = ones(length(ppg_prob));     % Function also works without having imu_label --> then based on ppg_prob alone!
+end
+
+epoch_length = 6; % in seconds
+overlap = 5; % in seconds
+fs = 30;
+% Number of samples in epoch
+samples_per_epoch = epoch_length * fs;
+
+% Calculate number of samples to shift for each epoch
+samples_shift = (epoch_length - overlap) * fs;
+
+fs_ppg = 30;
+n_samples = (length(ppg_prob) + overlap) * fs_ppg;
+data_prob = zeros(n_samples,1);
+
+prob_array = ppg_prob;
+prob_array(imu_label == 0) = 0;
+
+for i = 1:n_samples
+    start_idx = ceil((i-(samples_per_epoch-samples_shift))/fs);  %start_idx for the non starting and ending epochs is equal to ceil((data idx - n_overlap)/fs)
+    end_idx = ceil(i/fs);
+
+    %%-----Correct for first and last 6s epochs (those have less than 6 epochs to calculate labels and prob)-----%%
+    if start_idx < 1 
+        start_idx = 1;     % The first 5 epochs indices start at 1
+    elseif end_idx>length(prob_array) 
+        end_idx = length(prob_array); % The last 5 epochs indices end at the last label
+    end
+
+    prob = prob_array(start_idx:end_idx);
+    data_prob(i) = mean(prob);
+end
+
+end
diff --git a/src/dbpd/ppg/preprocessing/preprocessing_imu.m b/src/dbpd/ppg/preprocessing/preprocessing_imu.m
@@ -0,0 +1,11 @@
+function [IMU_filt, t_tar] = preprocessing_imu(t_orig, IMU_sample, fs_imu) 
+%%----Preprocessing of the IMU pipeline----%%                        
+    % Resampling to a uniform sampling rate at 30 Hz                                  
+    t_tar = t_orig(1):1/fs_imu:t_orig(end);         % Correct target for resampling (by usage of t_start = t_orig(1) and t_end = t_orig(end));
+    signal_resampled = spline(t_orig, IMU_sample', t_tar);
+
+    % High-pass filter for detrending
+    [b,a]=butter(6,0.4/(fs_imu/2),'high');  % high-pass filter for gravity removal
+    IMU_filt = filtfilt(b,a,double(signal_resampled'));
+
+end
diff --git a/src/dbpd/ppg/preprocessing/preprocessing_ppg.m b/src/dbpd/ppg/preprocessing/preprocessing_ppg.m
@@ -0,0 +1,13 @@
+function [PPG_data, t_tar] = preprocessing_ppg(t_orig, PPG_sample, fs_ppg)         
+
+    % Resampling to a uniform sampling rate at 30 Hz                  
+    t_tar = t_orig(1):1/fs_ppg:t_orig(end);         % Correct target for resampling (by usage of t_start and t_end);
+    signal_resampled = spline(t_orig, PPG_sample, t_tar); % Spline interpolation used for resampling
+
+    % Band-pass filter for detrending
+
+    [b,a]=butter(6,[0.4, 3.5]/(fs_ppg/2),'bandpass');  % band-pass filter for detrending
+    PPG_filt = filtfilt(b,a,double(signal_resampled)); % Apply filter
+    PPG_data = PPG_filt';   % In case of a sample package error where PPG_filt consists of multiple "parts"
+
+end
diff --git a/tests/data/0.classifiers/asd_classifier.pkl → ...ata/0.classifiers/gait/asd_classifier.pkl b/tests/data/0.classifiers/asd_classifier.pkl → ...ata/0.classifiers/gait/asd_classifier.pkl
diff --git a/tests/data/0.classifiers/gd_classifier.pkl → ...data/0.classifiers/gait/gd_classifier.pkl b/tests/data/0.classifiers/gd_classifier.pkl → ...data/0.classifiers/gait/gd_classifier.pkl
diff --git a/tests/data/0.classifiers/gd_thresholds.txt → ...data/0.classifiers/gait/gd_thresholds.txt b/tests/data/0.classifiers/gd_thresholds.txt → ...data/0.classifiers/gait/gd_thresholds.txt
diff --git a/tests/data/0.classifiers/ppg/LR_model.mat b/tests/data/0.classifiers/ppg/LR_model.mat
diff --git a/tests/data/2.preprocessed_data/PPG/PPG_meta.json b/tests/data/2.preprocessed_data/PPG/PPG_meta.json
@@ -0,0 +1,35 @@
+{
+    "study_id": "PPP",
+    "device_id": "Verily Study Watch",
+    "subject_id": "0A0B82C94960D6DCABC1F597EC0BA657F4B0EDC320702BCEE3B6955CE924DE05",
+    "ppp_source_protobuf": "WatchData.PPG.Week104.raw",
+    "metadata_version": "0.1",
+    "start_iso8601": "27-Jun-2021 16:52:20",
+    "end_iso8601": "27-Jun-2021 17:03:04",
+    "scale_factors": 1,
+    "rows": 19338,
+    "data_type": "float",
+    "endianness": "little",
+    "bits": 64,
+    "freq_sampling_original": 100.49069555614177,
+    "sensors": [
+        {
+            "file_name": "PPG_time.bin",
+            "channels": [
+                "time"
+            ],
+            "units": [
+                "ms"
+            ]
+        },
+        {
+            "file_name": "PPG_samples.bin",
+            "channels": [
+                "green"
+            ],
+            "units": [
+                "none"
+            ]
+        }
+    ]
+}
diff --git a/tests/data/2.preprocessed_data/PPG/PPG_samples.bin b/tests/data/2.preprocessed_data/PPG/PPG_samples.bin
diff --git a/tests/data/2.preprocessed_data/PPG/PPG_time.bin b/tests/data/2.preprocessed_data/PPG/PPG_time.bin
diff --git a/tests/data/2.preprocessed_data/PPG/acceleration_meta.json b/tests/data/2.preprocessed_data/PPG/acceleration_meta.json
@@ -0,0 +1,40 @@
+{
+    "study_id": "PPP",
+    "device_id": "Verily Study Watch",
+    "subject_id": "0A0B82C94960D6DCABC1F597EC0BA657F4B0EDC320702BCEE3B6955CE924DE05",
+    "ppp_source_protobuf": "WatchData.IMU.Week104.raw",
+    "metadata_version": "0.1",
+    "start_iso8601": "27-Jun-2021 16:52:20",
+    "end_iso8601": "27-Jun-2021 17:03:04",
+    "scale_factors": 1,
+    "rows": 64459,
+    "data_type": "float",
+    "endianness": "little",
+    "bits": 64,
+    "sensors": [
+        {
+            "file_name": "acceleration_time.bin",
+            "channels": [
+                "time"
+            ],
+            "units": [
+                "ms"
+            ],
+            "freq_sampling_original": 100.49069555614177
+        },
+        {
+            "file_name": "acceleration_samples.bin",
+            "channels": [
+                "acceleration_x",
+                "acceleration_y",
+                "acceleration_z"
+            ],
+            "units": [
+                "m/s/s",
+                "m/s/s",
+                "m/s/s"
+            ],
+            "freq_sampling_original": 99.82007835743657
+        }
+    ]
+}
diff --git a/tests/data/2.preprocessed_data/PPG/acceleration_samples.bin b/tests/data/2.preprocessed_data/PPG/acceleration_samples.bin
diff --git a/tests/data/2.preprocessed_data/PPG/acceleration_time.bin b/tests/data/2.preprocessed_data/PPG/acceleration_time.bin
diff --git a/....preprocessed_data/acceleration_meta.json → ...rocessed_data/gait/acceleration_meta.json b/....preprocessed_data/acceleration_meta.json → ...rocessed_data/gait/acceleration_meta.json
diff --git a/...reprocessed_data/acceleration_samples.bin → ...cessed_data/gait/acceleration_samples.bin b/...reprocessed_data/acceleration_samples.bin → ...cessed_data/gait/acceleration_samples.bin
diff --git a/...2.preprocessed_data/acceleration_time.bin → ...processed_data/gait/acceleration_time.bin b/...2.preprocessed_data/acceleration_time.bin → ...processed_data/gait/acceleration_time.bin
diff --git a/...ta/2.preprocessed_data/rotation_meta.json → ...preprocessed_data/gait/rotation_meta.json b/...ta/2.preprocessed_data/rotation_meta.json → ...preprocessed_data/gait/rotation_meta.json
diff --git a/.../2.preprocessed_data/rotation_samples.bin → ...eprocessed_data/gait/rotation_samples.bin b/.../2.preprocessed_data/rotation_samples.bin → ...eprocessed_data/gait/rotation_samples.bin
diff --git a/...ata/2.preprocessed_data/rotation_time.bin → ....preprocessed_data/gait/rotation_time.bin b/...ata/2.preprocessed_data/rotation_time.bin → ....preprocessed_data/gait/rotation_time.bin
diff --git a/.../3.extracted_features/arm_swing_meta.json → ...tracted_features/gait/arm_swing_meta.json b/.../3.extracted_features/arm_swing_meta.json → ...tracted_features/gait/arm_swing_meta.json
diff --git a/...a/3.extracted_features/arm_swing_time.bin → ...xtracted_features/gait/arm_swing_time.bin b/...a/3.extracted_features/arm_swing_time.bin → ...xtracted_features/gait/arm_swing_time.bin
diff --git a/...3.extracted_features/arm_swing_values.bin → ...racted_features/gait/arm_swing_values.bin b/...3.extracted_features/arm_swing_values.bin → ...racted_features/gait/arm_swing_values.bin
diff --git a/.../data/3.extracted_features/gait_meta.json → .../3.extracted_features/gait/gait_meta.json b/.../data/3.extracted_features/gait_meta.json → .../3.extracted_features/gait/gait_meta.json
diff --git a/...s/data/3.extracted_features/gait_time.bin → ...a/3.extracted_features/gait/gait_time.bin b/...s/data/3.extracted_features/gait_time.bin → ...a/3.extracted_features/gait/gait_time.bin
diff --git a/...data/3.extracted_features/gait_values.bin → ...3.extracted_features/gait/gait_values.bin b/...data/3.extracted_features/gait_values.bin → ...3.extracted_features/gait/gait_values.bin