Skip to content

Commit e64e9fa

Browse files
author
Mikkel Roald-Arbøl
committed
The rest
1 parent 0492954 commit e64e9fa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1920
-1171
lines changed

Diff for: NAMESPACE

+17-8
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,32 @@ export(classify_by_stability)
1414
export(classify_by_threshold)
1515
export(classify_high_periods)
1616
export(classify_low_periods)
17-
export(clean_kinematics)
1817
export(does_file_have_expected_headers)
1918
export(ensure_file_has_expected_headers)
2019
export(ensure_file_has_headers)
21-
export(filter_by_confidence)
22-
export(filter_by_speed)
2320
export(filter_highpass)
2421
export(filter_highpass_fft)
2522
export(filter_kalman)
2623
export(filter_kalman_irregular)
2724
export(filter_lowpass)
2825
export(filter_lowpass_fft)
26+
export(filter_movement)
27+
export(filter_na_confidence)
28+
export(filter_na_roi)
29+
export(filter_na_speed)
30+
export(filter_rollmean)
31+
export(filter_rollmedian)
32+
export(filter_sgolay)
33+
export(find_lag)
2934
export(find_peaks)
30-
export(find_time_lag)
3135
export(find_troughs)
36+
export(get_example_data)
3237
export(get_metadata)
3338
export(ggplot_na_gapsize)
3439
export(group_every)
3540
export(init_metadata)
3641
export(map_to_cartesian)
3742
export(map_to_polar)
38-
export(na_interpolation)
3943
export(plot_position_timeseries)
4044
export(plot_speed_timeseries)
4145
export(read_animalta)
@@ -48,7 +52,6 @@ export(read_sleap)
4852
export(read_trackball)
4953
export(read_treadmill)
5054
export(read_trex)
51-
export(replace_missing)
5255
export(replace_na)
5356
export(replace_na_linear)
5457
export(replace_na_locf)
@@ -60,8 +63,6 @@ export(set_framerate)
6063
export(set_individual)
6164
export(set_start_datetime)
6265
export(set_uuid)
63-
export(smooth_by_savgol)
64-
export(smooth_movement)
6566
export(transform_to_egocentric)
6667
export(translate_coords)
6768
export(validate_animalta)
@@ -132,7 +133,15 @@ importFrom(signal,butter)
132133
importFrom(signal,filtfilt)
133134
importFrom(signal,sgolayfilt)
134135
importFrom(stats,approx)
136+
importFrom(stats,ccf)
137+
importFrom(stats,complete.cases)
138+
importFrom(stats,fft)
139+
importFrom(stats,mad)
140+
importFrom(stats,median)
141+
importFrom(stats,qnorm)
142+
importFrom(stats,quantile)
135143
importFrom(stats,spline)
136144
importFrom(stinepack,stinterp)
137145
importFrom(stringi,stri_rand_strings)
146+
importFrom(utils,download.file)
138147
importFrom(vroom,vroom)

Diff for: R/add_centroid.R

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#' Add Centroid to Movement Data
2+
#'
3+
#' @description
4+
#' Calculates and adds a centroid point to movement tracking data. The centroid
5+
#' represents the mean position of selected keypoints at each time point.
6+
#'
7+
#' @param data A data frame containing movement tracking data with the following
8+
#' required columns:
9+
#' - `individual`: Identifier for each tracked subject
10+
#' - `keypoint`: Factor specifying tracked points
11+
#' - `time`: Time values
12+
#' - `x`: x-coordinates
13+
#' - `y`: y-coordinates
14+
#' - `confidence`: Confidence values for tracked points
15+
#' @param include_keypoints Optional character vector specifying which keypoints
16+
#' to use for centroid calculation. If NULL (default), all keypoints are used
17+
#' unless `exclude_keypoints` is specified.
18+
#' @param exclude_keypoints Optional character vector specifying which keypoints
19+
#' to exclude from centroid calculation. If NULL (default), no keypoints are
20+
#' excluded unless `include_keypoints` is specified.
21+
#' @param centroid_name Character string specifying the name for the centroid
22+
#' keypoint (default: "centroid")
23+
#'
24+
#' @return A data frame with the same structure as the input, but with an
25+
#' additional keypoint representing the centroid. The centroid's confidence
26+
#' values are set to NA.
27+
#'
28+
#' @details
29+
#' The function calculates the centroid as the mean x and y position of the
30+
#' selected keypoints at each time point for each individual. Keypoints can be
31+
#' selected either by specifying which ones to include (`include_keypoints`) or
32+
#' which ones to exclude (`exclude_keypoints`). The resulting centroid is added
33+
#' as a new keypoint to the data frame.
34+
#'
35+
#' @examples
36+
#' \dontrun{
37+
#' # Add centroid using all keypoints
38+
#' add_centroid(movement_data)
39+
#'
40+
#' # Calculate centroid using only specific keypoints
41+
#' add_centroid(movement_data,
42+
#' include_keypoints = c("head", "thorax", "abdomen"))
43+
#'
44+
#' # Calculate centroid excluding certain keypoints
45+
#' add_centroid(movement_data,
46+
#' exclude_keypoints = c("antenna_left", "antenna_right"),
47+
#' centroid_name = "body_centroid")
48+
#' }
49+
#'
50+
#' @seealso
51+
#' `convert_nan_to_na()` for NaN handling in the centroid calculation
52+
#'
53+
#' @importFrom dplyr filter group_by summarise mutate arrange bind_rows
54+
#'
55+
#' @export
56+
add_centroid <- function(data,
57+
include_keypoints=NULL,
58+
exclude_keypoints=NULL,
59+
centroid_name="centroid"){
60+
# Check that centroid isn't there
61+
# Check that it's a movement data frame
62+
# To be optimised with collapse later on
63+
if (!is.null(include_keypoints)){
64+
df_centroid <- data |>
65+
dplyr::filter(.data$keypoint %in% include_keypoints)
66+
} else if (!is.null(exclude_keypoints)){
67+
df_centroid <- data |>
68+
dplyr::filter(!.data$keypoint %in% exclude_keypoints)
69+
} else {
70+
df_centroid <- data
71+
}
72+
73+
df_centroid <- df_centroid |>
74+
dplyr::group_by(.data$individual, .data$time) |>
75+
dplyr::summarise(x = mean(.data$x, na.rm=TRUE),
76+
y = mean(.data$y, na.rm=TRUE),
77+
confidence = NA,
78+
.groups = "keep") |>
79+
dplyr::mutate(keypoint = factor(as.character(centroid_name))) |>
80+
convert_nan_to_na()
81+
82+
data <- bind_rows(data, df_centroid) |>
83+
dplyr::arrange(.data$time, .data$individual, .data$keypoint)
84+
85+
return(data)
86+
}

Diff for: R/align_timeseries.R

+10-8
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@
2121
#' t <- seq(0, 10, 0.1)
2222
#' reference <- sin(t)
2323
#' signal <- sin(t - 0.5) # Signal delayed by 0.5 units
24-
#' lag <- find_time_lag(signal, reference)
24+
#' lag <- find_lag(signal, reference)
2525
#' print(lag) # Should be approximately 5 samples (0.5 units)
2626
#'
27-
#' @seealso \code{\link{align_time_series}} for applying the computed lag
27+
#' @seealso \code{\link{align_timeseries}} for applying the computed lag
28+
#'
29+
#' @importFrom stats complete.cases ccf
2830
#'
2931
#' @export
30-
find_time_lag <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
31-
complete_cases <- complete.cases(signal, reference)
32+
find_lag <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
33+
complete_cases <- stats::complete.cases(signal, reference)
3234
signal <- signal[complete_cases]
3335
reference <- reference[complete_cases]
3436

@@ -42,7 +44,7 @@ find_time_lag <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
4244
max_lag = length(signal) - 1
4345
}
4446

45-
ccf_result <- ccf(signal, reference, plot = FALSE, lag.max = max_lag)
47+
ccf_result <- stats::ccf(signal, reference, plot = FALSE, lag.max = max_lag)
4648
best_lag <- ccf_result$lag[which.max(abs(ccf_result$acf))]
4749

4850
# Subtract one observation, which seems to be needed in tests
@@ -55,10 +57,10 @@ find_time_lag <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
5557
#'
5658
#' This function aligns two time series by shifting one series relative to the
5759
#' reference based on their cross-correlation. It first finds the optimal lag
58-
#' using \code{find_time_lag}, then applies the shift by padding with NA values
60+
#' using \code{find_lag}, then applies the shift by padding with NA values
5961
#' as needed.
6062
#'
61-
#' @inheritParams find_time_lag
63+
#' @inheritParams find_lag
6264
#' @param signal Time series to align (numeric vector)
6365
#' @param reference Reference time series to align against (numeric vector)
6466
#'
@@ -81,7 +83,7 @@ find_time_lag <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
8183
#'
8284
#' @export
8385
align_timeseries <- function(signal, reference, max_lag = 5000, normalize = TRUE) {
84-
lag <- find_time_lag(signal, reference, max_lag, normalize)
86+
lag <- find_lag(signal, reference, max_lag, normalize)
8587

8688
if (lag > 0) {
8789
aligned <- c(rep(NA, lag), signal[1:(length(signal)-lag)])

Diff for: R/classify_by_high_periods.R

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#' Classifies Periods of High Activity in Time Series Using Peaks and Troughs
2+
#'
3+
#' @description
4+
#' Identifies periods of high activity in a time series by analyzing peaks and troughs,
5+
#' returning a logical vector marking these periods. The function handles special cases
6+
#' like adjacent peaks and the initial/final sequences.
7+
#'
8+
#' @param x numeric vector; the time series values
9+
#' @param peaks logical vector; same length as x, TRUE indicates peak positions
10+
#' @param troughs logical vector; same length as x, TRUE indicates trough positions
11+
#'
12+
#' @return logical vector; TRUE indicates periods of high activity
13+
#'
14+
#' @details
15+
#' The function performs the following steps:
16+
#' 1. Resolves adjacent peaks by keeping only the highest
17+
#' 2. Handles the initial sequence before the first trough
18+
#' 3. Handles the final sequence after the last event
19+
#' 4. Identifies regions between troughs containing exactly one peak
20+
#'
21+
#' @examples
22+
#' \dontrun{
23+
#' x <- c(1, 3, 2, 1, 4, 2, 1)
24+
#' peaks <- c(FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE)
25+
#' troughs <- c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE)
26+
#' classify_high_periods(x, peaks, troughs)
27+
#' }
28+
#'
29+
#' @export
30+
classify_high_periods <- function(x, peaks, troughs) {
31+
cli::cli_abort("Doesn't currently work")
32+
# # Input validation
33+
# if (length(peaks) != length(troughs) || length(x) != length(peaks)) {
34+
# cli::cli_abort("Lengths of x, peaks, and troughs must match")
35+
# }
36+
#
37+
# n <- length(x)
38+
# result <- logical(n)
39+
#
40+
# # First handle adjacent peaks - keep only highest
41+
# peak_indices <- which(peaks)
42+
# for(i in 1:(length(peak_indices)-1)) {
43+
# # Look at all peaks until we find a trough
44+
# for(j in (i+1):length(peak_indices)) {
45+
# if(any(troughs[peak_indices[i]:peak_indices[j]])) break
46+
# # Keep highest peak, remove others
47+
# if(x[peak_indices[i]] <= x[peak_indices[j]]) {
48+
# peaks[peak_indices[i]] <- FALSE
49+
# break
50+
# } else {
51+
# peaks[peak_indices[j]] <- FALSE
52+
# }
53+
# }
54+
# }
55+
#
56+
# # Handle start sequence
57+
# first_event <- min(c(peak_indices[1], trough_indices[1]))
58+
# result[1:first_event] <- ifelse(first_event == peak_indices[1], TRUE, FALSE)
59+
#
60+
# # End sequence
61+
# last_event <- max(c(peak_indices[length(peak_indices)],
62+
# trough_indices[length(trough_indices)]))
63+
# result[last_event:n] <- ifelse(last_event == peak_indices[length(peak_indices)],
64+
# TRUE, FALSE)
65+
#
66+
# # Find regions between troughs that have exactly one peak
67+
# for(i in 1:(length(trough_indices)-1)) {
68+
# current_trough <- trough_indices[i]
69+
# next_trough <- trough_indices[i+1]
70+
# peaks_between <- which(peaks[current_trough:next_trough])
71+
#
72+
# if(length(peaks_between) == 1) {
73+
# result[(current_trough+1):(next_trough-1)] <- TRUE
74+
# }
75+
# }
76+
#
77+
# return(result)
78+
}

Diff for: R/classify_by_low_periods.R

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#' Classifies Periods of Low Activity in Time Series Using Peaks and Troughs
2+
#'
3+
#' @description
4+
#' Identifies periods of low activity in a time series by analyzing peaks and troughs,
5+
#' returning a logical vector marking these periods. Low activity periods are defined
6+
#' as regions between consecutive troughs that contain no peaks.
7+
#'
8+
#' @param peaks logical vector; TRUE indicates peak positions
9+
#' @param troughs logical vector; same length as peaks, TRUE indicates trough positions
10+
#'
11+
#' @return logical vector; TRUE indicates periods of low activity
12+
#'
13+
#' @details
14+
#' The function performs the following steps:
15+
#' 1. Validates input lengths
16+
#' 2. Initializes all periods as potentially low activity (TRUE)
17+
#' 3. For each pair of consecutive troughs:
18+
#' - If no peaks exist between them, maintains TRUE for that period
19+
#' - If any peaks exist, marks that period as FALSE (not low activity)
20+
#'
21+
#' @examples
22+
#' peaks <- c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)
23+
#' troughs <- c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE)
24+
#' classify_low_periods(peaks, troughs)
25+
#'
26+
#' @export
27+
classify_low_periods <- function(peaks, troughs) {
28+
# Input validation
29+
if (length(peaks) != length(troughs)) {
30+
cli::cli_abort("Lengths of peaks and troughs must match")
31+
}
32+
33+
# Initialize output vector
34+
result <- rep(TRUE, length(peaks))
35+
36+
# Find indices of troughs
37+
trough_indices <- which(troughs)
38+
39+
# For each consecutive pair of troughs
40+
for (i in seq_len(length(trough_indices) - 1)) {
41+
start_idx <- trough_indices[i]
42+
end_idx <- trough_indices[i + 1]
43+
44+
# Check if there are any peaks between these troughs
45+
between_slice <- peaks[(start_idx + 1):(end_idx - 1)]
46+
47+
if (length(between_slice) > 0 && !any(between_slice)) {
48+
# If no peaks between troughs, set those positions to FALSE
49+
result[(start_idx + 1):(end_idx - 1)] <- FALSE
50+
}
51+
}
52+
53+
return(result)
54+
}

Diff for: R/classify_by_stability.R

+7-4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@
4242
#' - 1: High activity state
4343
#' - 0: Low activity state
4444
#' - NA: Unable to classify (usually due to missing data)
45+
#'
46+
#' @importFrom stats quantile qnorm median mad
47+
#'
4548
#' @export
4649
classify_by_stability <- function(speed,
4750
window_size = 30,
@@ -78,7 +81,7 @@ classify_by_stability <- function(speed,
7881
))
7982

8083
# Find baseline statistics using stable periods
81-
var_threshold <- quantile(roll_var, 0.75, na.rm = TRUE)
84+
var_threshold <- stats::quantile(roll_var, 0.75, na.rm = TRUE)
8285
stable_periods <- !is.na(roll_var) & roll_var < var_threshold
8386

8487
rle_obj <- rle(stable_periods)
@@ -109,7 +112,7 @@ classify_by_stability <- function(speed,
109112
baseline_sd <- sd(speed[baseline_start:baseline_end], na.rm = TRUE)
110113

111114
# Convert tolerance to threshold using inverse normal CDF
112-
threshold_multiplier <- qnorm(1 - tolerance)
115+
threshold_multiplier <- stats::qnorm(1 - tolerance)
113116
threshold <- baseline_mean + threshold_multiplier * baseline_sd
114117

115118
# Initial classification
@@ -154,8 +157,8 @@ classify_by_stability <- function(speed,
154157

155158
stable_means <- roll_mean[stable_mask]
156159
list(
157-
level = median(stable_means, na.rm = TRUE),
158-
spread = mad(stable_means, na.rm = TRUE)
160+
level = stats::median(stable_means, na.rm = TRUE),
161+
spread = stats::mad(stable_means, na.rm = TRUE)
159162
)
160163
}
161164

0 commit comments

Comments
 (0)