Skip to content

Commit

Permalink
Merge pull request #13 from biomarkersParkinson/ppg_toolbox_deliverab…
Browse files Browse the repository at this point in the history
…les_until_predictions

Ppg toolbox deliverables until predictions
  • Loading branch information
vedran-kasalica authored Mar 26, 2024
2 parents 7b8db1a + 79d3955 commit d23a938
Show file tree
Hide file tree
Showing 52 changed files with 874 additions and 0 deletions.
349 changes: 349 additions & 0 deletions docs/matlab_scripts/main_sqa_toolbox.m

Large diffs are not rendered by default.

Binary file added src/dbpd/ppg/classifier/LR_model.mat
Binary file not shown.
40 changes: 40 additions & 0 deletions src/dbpd/ppg/feat_extraction/imu_psd_feature.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
function ratio_imu = imu_psd_feature(imu_segment, ppg_segment, fs_imu, fs_ppg)

% Initalize parameters for psd feature IMU
pwelchwin_imu = 3*fs_imu; % Using pwelch for 3 seconds
pwelchwin_ppg = 3*fs_ppg;
perc_overlap = 0.5;
noverlap_imu = perc_overlap*pwelchwin_imu; % overlap between windows is 50 %
noverlap_ppg = perc_overlap*pwelchwin_ppg;

f_bin_res = 0.05; % the treshold is set based on this binning --> so range of 0.1 Hz for calculating the PSD feature
nfft_ppg = 0:f_bin_res:fs_ppg/2; % create equal binning for ppg and imu
nfft_imu = 0:f_bin_res:fs_imu/2;

[pxx1,f1] = pwelch(imu_segment,hann(pwelchwin_imu), noverlap_imu, nfft_imu, fs_imu); % calculate psd using pwelch
PSD_imu = sum(pxx1,2); % sum over the three axis
[pxx2,f2] = pwelch(ppg_segment,hann(pwelchwin_ppg), noverlap_ppg, nfft_ppg, fs_ppg);
PSD_ppg = sum(pxx2,2); % not relevant ...

[~, max_PPG_psd_idx] = max(PSD_ppg);
max_PPG_freq_psd = f2(max_PPG_psd_idx);

%%---check dominant frequency (df) indices----%%
[~, corr_imu_psd_df_idx] = min(abs(max_PPG_freq_psd-f1));

df_idx = corr_imu_psd_df_idx-1:corr_imu_psd_df_idx+1;

%%---check first harmonic (fh) frequency indices----%%
[~, corr_imu_psd_fh_idx] = min(abs(max_PPG_freq_psd*2-f1));
fh_idx = corr_imu_psd_fh_idx-1:corr_imu_psd_fh_idx+1;

%%---check half dominant frequency----%% Sometimes this is the dominant frequency and the first harmonic is related to the PPG!
[~, corr_imu_psd_fdom_idx] = min(abs(max_PPG_freq_psd/2-f1));
fdom_idx = corr_imu_psd_fdom_idx-1:corr_imu_psd_fdom_idx+1;

acc_power_PPG_range = trapz(f1(df_idx), PSD_imu(df_idx)) + trapz(f1(fh_idx), PSD_imu(fh_idx)) + trapz(f1(fdom_idx), PSD_imu(fdom_idx)); % Add the correct indices and calculate power using trapezoidal integration
acc_power_total = trapz(f1, PSD_imu);

ratio_imu = acc_power_PPG_range/acc_power_total; % calculate the relative power between power in IMU using the PPG range and total power in IMU

end
53 changes: 53 additions & 0 deletions src/dbpd/ppg/feat_extraction/ppg_features.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
function [FeaturesPPG] = ppg_features(PPG,fs)

N_feat = 10;
FeaturesPPG = zeros(1, N_feat);
% Time-domain features
absPPG = abs(PPG);
FeaturesPPG(1) = var(PPG); % Feature 1: variance
FeaturesPPG(2) = mean(absPPG); % Feature 2: mean
FeaturesPPG(3) = median(absPPG); % Feature 3: median
FeaturesPPG(4) = kurtosis(PPG); % Feature 4: kurtosis
FeaturesPPG(5) = skewness(PPG); % Feature 5: skewness

window = 3*fs; % 90 samples for Welch's method => fr = 2/3 = 0.67 Hz --> not an issue with a clear distinct frequency
overlap = 0.5*window; % 45 samples overlap for Welch's Method

[P, f] = pwelch(PPG, window, overlap, [], fs);

% Find the dominant frequency
[~, maxIndex] = max(P);
FeaturesPPG(6) = f(maxIndex); % Feature 6: dominant frequency

ph_idx = find(f >= 0.75 & f <= 3); % find indices of f in relevant physiological heart range 45-180 bpm
[~, maxIndex_ph] = max(P(ph_idx)); % Index of dominant frequency
dominantFrequency_ph = f(ph_idx(maxIndex_ph)); % Extract dominant frequency
f_dom_band = find(f >= dominantFrequency_ph - 0.2 & f <= dominantFrequency_ph + 0.2); %
FeaturesPPG(7) = trapz(P(f_dom_band))/trapz(P); % Feature 7 = relative power


% Normalize the power spectrum
pxx_norm = P / sum(P);

% Compute spectral entropy
FeaturesPPG(8) = -sum(pxx_norm .* log2(pxx_norm))/log2(length(PPG)); % Feature 8 = spectral entropy --> normalize between 0 and 1! Or should we perform this operation at the min-max normalization! No because the values can come from different lengths!

% Signal to noise ratio
Signal = var(PPG);
Noise = var(absPPG);
FeaturesPPG(9) = Signal/Noise; % Feature 9 = surrogate of signal to noise ratio

%% Autocorrelation features

[acf, ~] = autocorr(PPG, 'NumLags', fs*3);
[peakValues, ~] = peakdet(acf, 0.01);
sortedValues = sort(peakValues(:,2), 'descend'); % sort the peaks found in the corellogram
if length(sortedValues) > 1
FeaturesPPG(10) = sortedValues(2); % determine the second peak as the highest peak after the peak at lag=0
else
FeaturesPPG(10) = 0; % Set at 0 if there is no clear second peak
end




26 changes: 26 additions & 0 deletions src/dbpd/ppg/glob_functions/extract_overlapping_segments.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
function [ppg_indices, imu_indices] = extract_overlapping_segments(ts_ppg, ts_imu, t_unix_ppg, t_unix_imu)
% K.I. Veldkamp, PhD student AI4P, 29-02-24
% Function to extract indices indicating overlapping data between IMU and
% PPG segment
% Convert Unix timestamps to absolute time
ppg_absolute_time = datetime(ts_ppg / 1000, 'ConvertFrom', 'posixtime');
imu_absolute_time = datetime(ts_imu / 1000, 'ConvertFrom', 'posixtime');

% Convert UNIX milliseconds to seconds
ppg_time = t_unix_ppg / 1000; % Convert milliseconds to seconds
imu_time = t_unix_imu / 1000; % Convert milliseconds to seconds

% Determine the overlapping time interval
start_time = max(ppg_absolute_time, imu_absolute_time);
end_time = min(ppg_absolute_time + seconds(ppg_time(end) - ppg_time(1)), imu_absolute_time + seconds(imu_time(end)-imu_time(1)));

% Convert overlapping time interval to indices
ppg_start_index = find(ppg_time >= posixtime(start_time), 1);
ppg_end_index = find(ppg_time <= posixtime(end_time), 1, 'last');
imu_start_index = find(imu_time >= posixtime(start_time), 1);
imu_end_index = find(imu_time <= posixtime(end_time), 1, 'last');

% Extract overlapping segments
ppg_indices = [ppg_start_index, ppg_end_index];
imu_indices = [imu_start_index, imu_end_index];
end
22 changes: 22 additions & 0 deletions src/dbpd/ppg/glob_functions/tsdf_scan_meta.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
function tsdf = tsdf_scan_meta(tsdf_data_full_path)
% K.I. Veldkamp, PhD student AI4P, 29-02-24
% For each given TSDB directory, transcribe TSDF metadata contents to SQL
% table --> function specific for toolbox data structure
tsdf = [];
irow = 1;

meta_list = dir(fullfile(tsdf_data_full_path, '*_meta.json'));
meta_filenames = {meta_list.name};

jsonobj = {};
for n = 1:length(meta_filenames)
tsdb_meta_fullpath = fullfile(tsdf_data_full_path, meta_filenames{n});
jsonstr = fileread(tsdb_meta_fullpath);
jsonobj{n} = loadjson(jsonstr);
tsdf(irow).tsdf_meta_fullpath = tsdb_meta_fullpath;
tsdf(irow).subject_id = jsonobj{n}.subject_id;
tsdf(irow).start_iso8601 = jsonobj{n}.start_iso8601;
tsdf(irow).end_iso8601 = jsonobj{n}.end_iso8601;
irow = irow + 1;
end

37 changes: 37 additions & 0 deletions src/dbpd/ppg/hr_functions/Long_TFD_JOT.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
%% Implementation using the toolbox of J. O'Toole

% Only the essential code is used. The function nonsep_gdtfd is code from
% the toolbox of J. O'Toole: Copyright © 2014, John M. O Toole, University College Cork. All rights reserved.
function HR_smooth_tfd = Long_TFD_JOT(rel_ppg_tfd, MA, fs, kern_type, kern_params)

tfd = nonsep_gdtfd(rel_ppg_tfd, kern_type, kern_params); % for now the wigner ville distribution but one could also use the smoothed pseudo WVD --> returns matrix of size NxN

if MA.value == 1
input = tfd';
tfd = filtfilt(MA.FC,1,input);
tfd = tfd.';
end

%%----- Get time and frequency axis for tfd-----%%
[~,M]=size(tfd);
Ntime=size(tfd,1);

ntime=1:Ntime; ntime=ntime./fs-1/fs; % time array
Mh=ceil(M);
k=linspace(0,0.5,Mh);
k=k.*fs; % frequency array

%%---Estimate HR using same approach as for given tfd----%%
[~, k_idx] = max(tfd, [], 1);


count = 0;

for i = 2:2:length(rel_ppg_tfd)/fs-4 % starting at 2 and ending at length -4 to discard the first and last 2 sec of the WVD which are influenced by boundary effects
count = count + 1;
rel_wvd_idx = ntime>=i & ntime<i+2;
HR_smooth_tfd(count) = 60*mean(k(k_idx(rel_wvd_idx)));

end

end
58 changes: 58 additions & 0 deletions src/dbpd/ppg/hr_functions/PPG_TFD_HR.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
function HR_smooth_tfd = PPG_TFD_HR(rel_ppg_wvd, tfd_length, MA, fs, kern_type, kern_params)
% Function to estimate the HR using a particular TFD method, such as WVD,
% SPWVD or other kernel methods over segments of 30 s. If the remaining segment
% length is longer than 30 s it is calculated over the entire length to
% include all data into the analysis.

% Input:
% - rel_ppg_wvd: ppg signal with an addition of 2s on both sides to
% overcome boundary effects
% - tfd_length: set the length to calculate time frequency distribution
% - MA: struct containing the information to perform a moving average
% filter over the TFD to overcome fundamental frequency switching
% - fs: sample frequency
% - kern_type: specified as a string such as 'wvd' for the wigner ville
% distribution
% - kern_params: kernel specifications, not relevant for 'wvd', but they
% are for 'spwvd', 'swvd' etc.

% Output:
% - HR_smooth_tfd: array containing an estimation of the HR for every 2 s
% of the relevant epoch (without the additional 2 s on both sides)

if ~exist('kern_params', 'var')
kern_params = {}; % Function also works without having imu_label --> then based on ppg_prob alone!
end

edge_add = 4; % adding an additional 4 sec (2 s to both sides) to overcome boundary effects/discontinuities of the WVD, in SPWVD this effect is already diminished due to a double kernel function
epoch_length = tfd_length + edge_add;
segment_length = (length(rel_ppg_wvd)-edge_add*fs)/fs; % substract the 4 added sec to obtain the original segment length

if segment_length > epoch_length
n_segments = floor(segment_length/tfd_length); %% Dit moet aangepast worden!!
else
n_segments = 1; % for HR segments which are shorter than 30 s due to uneven start and end of the segment which can make the segment 28 s if 1 s is substracted
end

ppg_segments = cell(n_segments,1);

for i = 1:n_segments % Split segments in 30 s PPG epochs

if i ~= n_segments
ppg_segments{i} = rel_ppg_wvd(1 + (i-1)*tfd_length*fs: (i*tfd_length+edge_add)*fs);

else

ppg_segments{i} = rel_ppg_wvd(1 + (i-1)*tfd_length*fs:end);

end

end

HR_smooth_tfd = [];

for j = 1:n_segments % Calculate the HR
ppg = ppg_segments{j};
HR_tfd = Long_TFD_JOT(ppg, MA, fs, kern_type, kern_params);
HR_smooth_tfd = [HR_smooth_tfd HR_tfd];
end
47 changes: 47 additions & 0 deletions src/dbpd/ppg/hr_functions/sample_prob_final.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
function data_prob = sample_prob_final(ppg_prob, imu_label)
% K.I. Veldkamp, PhD student AI4P, 29-02-24

%%--Assign probability to every individual data point!--%%
% Inputs:
% - ppg_prob
% - imu label
% Output:
% - data_prob: containing of an array with for every sample a probability.
% This can be mapped to the samples of the synced PPG-IMU

if ~exist('imu_label', 'var')
imu_label = ones(length(ppg_prob)); % Function also works without having imu_label --> then based on ppg_prob alone!
end

epoch_length = 6; % in seconds
overlap = 5; % in seconds
fs = 30;
% Number of samples in epoch
samples_per_epoch = epoch_length * fs;

% Calculate number of samples to shift for each epoch
samples_shift = (epoch_length - overlap) * fs;

fs_ppg = 30;
n_samples = (length(ppg_prob) + overlap) * fs_ppg;
data_prob = zeros(n_samples,1);

prob_array = ppg_prob;
prob_array(imu_label == 0) = 0;

for i = 1:n_samples
start_idx = ceil((i-(samples_per_epoch-samples_shift))/fs); %start_idx for the non starting and ending epochs is equal to ceil((data idx - n_overlap)/fs)
end_idx = ceil(i/fs);

%%-----Correct for first and last 6s epochs (those have less than 6 epochs to calculate labels and prob)-----%%
if start_idx < 1
start_idx = 1; % The first 5 epochs indices start at 1
elseif end_idx>length(prob_array)
end_idx = length(prob_array); % The last 5 epochs indices end at the last label
end

prob = prob_array(start_idx:end_idx);
data_prob(i) = mean(prob);
end

end
11 changes: 11 additions & 0 deletions src/dbpd/ppg/preprocessing/preprocessing_imu.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
function [IMU_filt, t_tar] = preprocessing_imu(t_orig, IMU_sample, fs_imu)
%%----Preprocessing of the IMU pipeline----%%
% Resampling to a uniform sampling rate at 30 Hz
t_tar = t_orig(1):1/fs_imu:t_orig(end); % Correct target for resampling (by usage of t_start = t_orig(1) and t_end = t_orig(end));
signal_resampled = spline(t_orig, IMU_sample', t_tar);

% High-pass filter for detrending
[b,a]=butter(6,0.4/(fs_imu/2),'high'); % high-pass filter for gravity removal
IMU_filt = filtfilt(b,a,double(signal_resampled'));

end
13 changes: 13 additions & 0 deletions src/dbpd/ppg/preprocessing/preprocessing_ppg.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
function [PPG_data, t_tar] = preprocessing_ppg(t_orig, PPG_sample, fs_ppg)

% Resampling to a uniform sampling rate at 30 Hz
t_tar = t_orig(1):1/fs_ppg:t_orig(end); % Correct target for resampling (by usage of t_start and t_end);
signal_resampled = spline(t_orig, PPG_sample, t_tar); % Spline interpolation used for resampling

% Band-pass filter for detrending

[b,a]=butter(6,[0.4, 3.5]/(fs_ppg/2),'bandpass'); % band-pass filter for detrending
PPG_filt = filtfilt(b,a,double(signal_resampled)); % Apply filter
PPG_data = PPG_filt'; % In case of a sample package error where PPG_filt consists of multiple "parts"

end
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file added tests/data/0.classifiers/ppg/LR_model.mat
Binary file not shown.
35 changes: 35 additions & 0 deletions tests/data/2.preprocessed_data/PPG/PPG_meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"study_id": "PPP",
"device_id": "Verily Study Watch",
"subject_id": "0A0B82C94960D6DCABC1F597EC0BA657F4B0EDC320702BCEE3B6955CE924DE05",
"ppp_source_protobuf": "WatchData.PPG.Week104.raw",
"metadata_version": "0.1",
"start_iso8601": "27-Jun-2021 16:52:20",
"end_iso8601": "27-Jun-2021 17:03:04",
"scale_factors": 1,
"rows": 19338,
"data_type": "float",
"endianness": "little",
"bits": 64,
"freq_sampling_original": 100.49069555614177,
"sensors": [
{
"file_name": "PPG_time.bin",
"channels": [
"time"
],
"units": [
"ms"
]
},
{
"file_name": "PPG_samples.bin",
"channels": [
"green"
],
"units": [
"none"
]
}
]
}
Binary file not shown.
Binary file added tests/data/2.preprocessed_data/PPG/PPG_time.bin
Binary file not shown.
40 changes: 40 additions & 0 deletions tests/data/2.preprocessed_data/PPG/acceleration_meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"study_id": "PPP",
"device_id": "Verily Study Watch",
"subject_id": "0A0B82C94960D6DCABC1F597EC0BA657F4B0EDC320702BCEE3B6955CE924DE05",
"ppp_source_protobuf": "WatchData.IMU.Week104.raw",
"metadata_version": "0.1",
"start_iso8601": "27-Jun-2021 16:52:20",
"end_iso8601": "27-Jun-2021 17:03:04",
"scale_factors": 1,
"rows": 64459,
"data_type": "float",
"endianness": "little",
"bits": 64,
"sensors": [
{
"file_name": "acceleration_time.bin",
"channels": [
"time"
],
"units": [
"ms"
],
"freq_sampling_original": 100.49069555614177
},
{
"file_name": "acceleration_samples.bin",
"channels": [
"acceleration_x",
"acceleration_y",
"acceleration_z"
],
"units": [
"m/s/s",
"m/s/s",
"m/s/s"
],
"freq_sampling_original": 99.82007835743657
}
]
}
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit d23a938

Please sign in to comment.