|
| 1 | +# This file defines a private helper: calc_single_interval_metrics() that |
| 2 | +# works on a single interval and a public function `calc_interval_metrics()` |
| 3 | +# that process BirdFlowIntervals. |
| 4 | + |
| 5 | + |
| 6 | +#' Calculate interval metrics for a single interval |
| 7 | +#' |
| 8 | +#' This internal function evaluates model performance using as single interval |
| 9 | +#' which is a pair of observations of a real bird separated by at least |
| 10 | +#' a week. This is a helper function for `[calc_interval_metrics()` |
| 11 | +#' |
| 12 | +#' @param birdflow_interval_row A row of data in the `BirdFlowIntervals` object |
| 13 | +#' @param bf BirdFlow model |
| 14 | +#' @param gcd Matrix of great circle distance |
| 15 | +#' @param st_dists Matrix of S&T distribution with weeks as columns, |
| 16 | +#' location as rows, probability as values. |
| 17 | +#' @return A named vector with various metrics |
| 18 | +#' \describe{ |
| 19 | +#' \item{pred}{Weighted average great-circle distance (km) from |
| 20 | +#' the BF prediction distribution to the actual encounter cell} |
| 21 | +#' \item{st}{Weighted average great-circle distance (km) from the S&T |
| 22 | +#' empirical distribution to the actual encounter cell} |
| 23 | +#' \item{win_prob}{Probability that BF is closer than S&T |
| 24 | +#' (i.e.“win” probability of BF vs. S&T)} |
| 25 | +#' \item{win_distance}{Absolute distance improvement (km): \code{st – pred}} |
| 26 | +#' \item{win_distance_fraction}{Normalized distance improvement: |
| 27 | +#' \code{(st – pred) / st}} |
| 28 | +#' \item{global_prob_of_the_starting}{Probability (relative abundance) of the |
| 29 | +#' starting cell in the BF distribution at the start date} |
| 30 | +#' \item{elapsed_days}{Elapsed time of the interval (days) between banding |
| 31 | +#' (\code{date1}) and encounter (\code{date2})} |
| 32 | +#' \item{elapsed_km}{Observed great-circle distance (km) between banding |
| 33 | +#' and encounter locations} |
| 34 | +#' \item{null_ll}{Log-likelihood of the encounter cell under the S&T |
| 35 | +#' distribution: \code{log(final_st_distr[i_final])}} |
| 36 | +#' \item{ll}{Log-likelihood of the encounter cell under the BF prediction: |
| 37 | +#' \code{log(preds_final[i_final])}} |
| 38 | +#' \item{energy_score_bf}{Energy score of the BF predictive distribution |
| 39 | +#' (with \eqn{\beta=1}{beta=1})} |
| 40 | +#' \item{energy_score_st}{Energy score of the S&T empirical distribution |
| 41 | +#' (with \eqn{\beta=1}{beta=1})} |
| 42 | +#' \item{energy_improvement}{Difference in energy score: |
| 43 | +#' \code{energy_score_st – energy_score_bf}} |
| 44 | +#' \item{pred_elapsed_dist_by_pred}{Predicted elapsed distance (km) from |
| 45 | +#' starting cell, weighted by BF predictions} |
| 46 | +#' \item{pred_elapsed_dist_by_st}{Predicted elapsed distance (km) from |
| 47 | +#' starting cell, weighted by S&T distribution} |
| 48 | +#' } |
| 49 | +#' @seealso [calc_interval_metrics()] |
| 50 | +#' @keywords internal |
| 51 | +calc_single_interval_metrics <- function( |
| 52 | + birdflow_interval_row, bf, gcd, st_dists) { |
| 53 | + # latlong data for banding and encounter location |
| 54 | + point_df_initial <- data.frame( |
| 55 | + x = birdflow_interval_row$lon1, y = birdflow_interval_row$lat1 |
| 56 | + ) |
| 57 | + point_df_final <- data.frame( |
| 58 | + x = birdflow_interval_row$lon2, y = birdflow_interval_row$lat2 |
| 59 | + ) |
| 60 | + # birdflow one-hot distributions for banding and encounter locations |
| 61 | + d_initial <- as_distr(x = point_df_initial, bf = bf, crs = "EPSG:4326") |
| 62 | + # same as birdflow_interval_row$i1 |
| 63 | + d_final <- as_distr(x = point_df_final, bf = bf, crs = "EPSG:4326") |
| 64 | + # same as birdflow_interval_row$i2 |
| 65 | + # get s&t distribution for final timestep |
| 66 | + final_timestep <- birdflow_interval_row$timestep2 |
| 67 | + final_st_distr <- st_dists[, final_timestep] |
| 68 | + # birdflow cell index for encounter location |
| 69 | + i_final <- which(d_final == 1) |
| 70 | + # birdflow predictions from banding one-hot, for encounter date |
| 71 | + preds <- predict(bf, d_initial, |
| 72 | + start = birdflow_interval_row$date1, |
| 73 | + end = birdflow_interval_row$date2 |
| 74 | + ) |
| 75 | + preds_final <- preds[, ncol(preds), drop = FALSE] |
| 76 | + preds_final <- as.vector(preds_final) |
| 77 | + # subset great circle distances for cell of actual encounter location |
| 78 | + gcd_final <- gcd[, i_final] |
| 79 | + # weighted average distance from predicted encounter |
| 80 | + # distribution to actual encounter location |
| 81 | + |
| 82 | + # Dave's distance metric |
| 83 | + dist_mean_pred <- sum(preds_final * gcd_final) |
| 84 | + dist_mean_st <- sum(final_st_distr * gcd_final) |
| 85 | + win_distance <- dist_mean_st - dist_mean_pred |
| 86 | + pred_elapsed_dist_by_pred <- sum(preds_final * gcd[, which(d_initial == 1)]) |
| 87 | + pred_elapsed_dist_by_st <- sum(final_st_distr * gcd[, which(d_initial == 1)]) |
| 88 | + |
| 89 | + # Normalized distance metric |
| 90 | + win_distance_fraction <- (dist_mean_st - dist_mean_pred) / dist_mean_st |
| 91 | + |
| 92 | + ## YK's function |
| 93 | + # For each predicted location, calculate the win probability. |
| 94 | + # Average the win probability based on predicted probability. |
| 95 | + M <- outer(gcd_final, gcd_final, FUN = function(x, y) y > x) |
| 96 | + win_prob_each <- rowSums(M * rep(final_st_distr, each = length(gcd_final))) |
| 97 | + win_prob <- sum(win_prob_each * preds_final) |
| 98 | + |
| 99 | + # get location index of banding starting point |
| 100 | + loc_i_starting <- birdflow_interval_row$i1 |
| 101 | + date_starting <- birdflow_interval_row$timestep1 |
| 102 | + |
| 103 | + # |
| 104 | + elapsed_days <- as.numeric( |
| 105 | + birdflow_interval_row$date2 - birdflow_interval_row$date1, |
| 106 | + unit = "days" |
| 107 | + ) |
| 108 | + elapsed_km <- great_circle_distance_lonlat_input( |
| 109 | + birdflow_interval_row$lat1, birdflow_interval_row$lon1, |
| 110 | + birdflow_interval_row$lat2, birdflow_interval_row$lon2 |
| 111 | + ) |
| 112 | + |
| 113 | + # LL |
| 114 | + null_ll <- log(final_st_distr[i_final] + 1e-8) |
| 115 | + ll <- log(preds_final[i_final] + 1e-8) |
| 116 | + |
| 117 | + ## Energy Score Calculations (with beta = 1) |
| 118 | + beta <- 1 |
| 119 | + # For the predicted distribution: |
| 120 | + first_term_pred <- sum(preds_final * (gcd_final^beta)) |
| 121 | + second_term_pred <- 0.5 * sum(outer(preds_final, preds_final) * (gcd^beta)) |
| 122 | + # Second term: weighted average of pairwise distances |
| 123 | + # (using full distance matrix gcd) |
| 124 | + energy_score_pred <- first_term_pred - second_term_pred |
| 125 | + # For the s&t distribution: |
| 126 | + first_term_st <- sum(final_st_distr * (gcd_final^beta)) |
| 127 | + second_term_st <- 0.5 * |
| 128 | + sum(outer(final_st_distr, final_st_distr) * (gcd^beta)) |
| 129 | + energy_score_st <- first_term_st - second_term_st |
| 130 | + |
| 131 | + |
| 132 | + # return |
| 133 | + return(c( |
| 134 | + pred = dist_mean_pred, st = dist_mean_st, |
| 135 | + win_prob = win_prob, |
| 136 | + win_distance = win_distance, |
| 137 | + win_distance_fraction = win_distance_fraction, |
| 138 | + global_prob_of_the_starting = as.numeric( |
| 139 | + bf$distr[loc_i_starting, date_starting] / 52 |
| 140 | + ), |
| 141 | + elapsed_days = elapsed_days, |
| 142 | + elapsed_km = elapsed_km, |
| 143 | + null_ll = null_ll, |
| 144 | + ll = ll, |
| 145 | + energy_score_bf = energy_score_pred, |
| 146 | + energy_score_st = energy_score_st, |
| 147 | + energy_improvement = energy_score_st - energy_score_pred, |
| 148 | + pred_elapsed_dist_by_pred = pred_elapsed_dist_by_pred, |
| 149 | + pred_elapsed_dist_by_st = pred_elapsed_dist_by_st |
| 150 | + )) |
| 151 | +} |
| 152 | + |
| 153 | + |
| 154 | +#' Calculate interval metrics |
| 155 | +#' |
| 156 | +#' Calculate interval‐based validation metrics—including distance, likelihood, |
| 157 | +#' and energy‐score metrics—for all transition pairs in a BirdFlowIntervals |
| 158 | +#' object. |
| 159 | +#' |
| 160 | +#' @param birdflow_intervals A `BirdFlowIntervals` object containing |
| 161 | +#' transition data |
| 162 | +#' @param bf A fitted `BirdFlow` model |
| 163 | +#' |
| 164 | +#' @return A list with two elements: |
| 165 | +#' \describe{ |
| 166 | +#' \item{metrics}{A named numeric vector of summary metrics across all |
| 167 | +#' intervals: |
| 168 | +#' \describe{ |
| 169 | +#' \item{mean_pred}{Mean weighted average distance (km) from |
| 170 | +#' BF predictions} |
| 171 | +#' \item{mean_st}{Mean weighted average distance (km) from S&T |
| 172 | +#' distributions} |
| 173 | +#' \item{mean_win_prob}{Mean win probability (BF vs. S&T)} |
| 174 | +#' \item{mean_win_distance}{Mean absolute distance improvement (km)} |
| 175 | +#' \item{mean_win_distance_fraction}{Mean normalized distance improvement} |
| 176 | +#' \item{mean_global_prob_of_the_starting}{Mean relative abundance at |
| 177 | +#' start cells} |
| 178 | +#' \item{mean_elapsed_days}{Mean elapsed days per interval} |
| 179 | +#' \item{mean_elapsed_km}{Mean observed great‐circle distance (km)} |
| 180 | +#' \item{mean_null_ll}{Mean log‐likelihood under the S&T null |
| 181 | +#' distribution} |
| 182 | +#' \item{mean_ll}{Mean log‐likelihood under the BF prediction} |
| 183 | +#' \item{mean_energy_score_bf}{Mean energy score of BF predictions} |
| 184 | +#' \item{mean_energy_score_st}{Mean energy score of S&T distributions} |
| 185 | +#' \item{mean_energy_improvement}{Mean difference in energy score} |
| 186 | +#' \item{mean_pred_elapsed_dist_by_pred}{Mean predicted elapsed distance |
| 187 | +#' by BF} |
| 188 | +#' \item{mean_pred_elapsed_dist_by_st}{Mean predicted elapsed distance |
| 189 | +#' by S&T} |
| 190 | +#' \item{weighted_mean_win_prob}{Global‐abundance‐weighted mean win |
| 191 | +#' probability} |
| 192 | +#' \item{weighted_mean_win_distance}{Global‐abundance‐weighted mean win |
| 193 | +#' distance} |
| 194 | +#' \item{weighted_mean_win_distance_fraction}{Global‐abundance‐weighted |
| 195 | +#' mean distance fraction} |
| 196 | +#' \item{weighted_mean_null_ll}{Global‐abundance‐weighted mean null |
| 197 | +#' log‐likelihood} |
| 198 | +#' \item{weighted_mean_ll}{Global‐abundance‐weighted mean log‐likelihood} |
| 199 | +#' \item{weighted_energy_improvement}{Global‐abundance‐weighted mean |
| 200 | +#' energy improvement} |
| 201 | +#' \item{n_intervals}{Number of transition pairs evaluated} |
| 202 | +#' } |
| 203 | +#' } |
| 204 | +#' \item{per_interval}{A `data.frame` of the raw, per‐transition metrics |
| 205 | +#' (same fields as above without the “mean_” prefix)} |
| 206 | +#' } |
| 207 | +#' @export |
| 208 | +#' @examples |
| 209 | +#' route_df <- data.frame( |
| 210 | +#' route_id = c("001", "001", "001", "001", "001", "003", "003", "003", "004"), |
| 211 | +#' date = as.Date(c("2025-01-01", "2025-01-08", "2025-01-15", "2025-01-21", |
| 212 | +#' "2025-02-10", "2025-03-01", "2025-05-01", "2025-06-01", |
| 213 | +#' "2025-05-01")), |
| 214 | +#' lon = c(-75.0060, -75.0060, -74.0060, -87.6298, -87.6298, -87.6298, |
| 215 | +#' -89.6298, -85.6298, -95.3698), |
| 216 | +#' lat = c(39.7128, 39.7128, 40.7128, 41.8781, 41.8781, 41.8781, |
| 217 | +#' 42.8781, 40.8781, 29.7604), |
| 218 | +#' route_type = c("tracking", "tracking", "tracking", "tracking", |
| 219 | +#' "tracking", "motus", "motus", "motus", "motus") |
| 220 | +#' ) |
| 221 | +#' |
| 222 | +#' bf <- BirdFlowModels::amewoo |
| 223 | +#' species1 <- bf$species |
| 224 | +#' source1 <- "Testing" |
| 225 | +#' |
| 226 | +#' my_routes <- Routes(route_df, |
| 227 | +#' species = species1, |
| 228 | +#' source = source1 |
| 229 | +#' ) |
| 230 | +#' my_bfroutes <- as_BirdFlowRoutes(my_routes, bf = bf) |
| 231 | +#' |
| 232 | +#' # Constraints |
| 233 | +#' min_day <- 7 |
| 234 | +#' max_day <- 180 |
| 235 | +#' min_km <- 200 |
| 236 | +#' max_km <- 8000 |
| 237 | +#' |
| 238 | +#' my_intervals <- as_BirdFlowIntervals(my_bfroutes, |
| 239 | +#' max_n = 1000, |
| 240 | +#' min_day_interval = min_day, |
| 241 | +#' max_day_interval = max_day, |
| 242 | +#' min_km_interval = min_km, |
| 243 | +#' max_km_interval = max_km |
| 244 | +#' ) |
| 245 | +#' |
| 246 | +#' eval_res <- calc_interval_metrics(my_intervals, bf) |
| 247 | +#' single_value_outputs <- eval_res[[1]] |
| 248 | +#' transition_level_outputs <- eval_res[[2]] |
| 249 | +calc_interval_metrics <- function(birdflow_intervals, bf) { |
| 250 | + # weekly distributions directly from S&T |
| 251 | + st_dists <- get_distr(bf, which = "all", from_marginals = FALSE) |
| 252 | + |
| 253 | + # Great circle distances between cells |
| 254 | + gcd <- great_circle_distances(bf) |
| 255 | + |
| 256 | + # Calculate distance metric & ll |
| 257 | + dists <- sapply( |
| 258 | + split(birdflow_intervals$data, seq_len(nrow(birdflow_intervals$data))), |
| 259 | + calc_single_interval_metrics, bf, gcd, st_dists |
| 260 | + ) |
| 261 | + dists <- t(dists) |
| 262 | + dists <- as.data.frame(dists) |
| 263 | + |
| 264 | + n_intervals <- nrow(birdflow_intervals$data) |
| 265 | + |
| 266 | + output <- colMeans(dists) |
| 267 | + names(output) <- paste0("mean_", names(output)) |
| 268 | + |
| 269 | + output <- |
| 270 | + c( |
| 271 | + output, |
| 272 | + c( |
| 273 | + weighted_mean_win_prob = sum( |
| 274 | + (dists$global_prob_of_the_starting / |
| 275 | + sum(dists$global_prob_of_the_starting) |
| 276 | + ) * dists$win_prob |
| 277 | + ), |
| 278 | + weighted_mean_win_distance = sum( |
| 279 | + ( |
| 280 | + dists$global_prob_of_the_starting / |
| 281 | + sum(dists$global_prob_of_the_starting) |
| 282 | + ) * dists$win_distance |
| 283 | + ), |
| 284 | + weighted_mean_win_distance_fraction = sum( |
| 285 | + ( |
| 286 | + dists$global_prob_of_the_starting / |
| 287 | + sum(dists$global_prob_of_the_starting) |
| 288 | + ) * dists$win_distance_fraction |
| 289 | + ), |
| 290 | + weighted_mean_null_ll = sum( |
| 291 | + (dists$global_prob_of_the_starting / |
| 292 | + sum(dists$global_prob_of_the_starting) |
| 293 | + ) * dists$null_ll |
| 294 | + ), |
| 295 | + weighted_mean_ll = sum( |
| 296 | + (dists$global_prob_of_the_starting / |
| 297 | + sum(dists$global_prob_of_the_starting) |
| 298 | + ) * dists$ll |
| 299 | + ), |
| 300 | + weighted_energy_improvement = sum( |
| 301 | + (dists$global_prob_of_the_starting / |
| 302 | + sum(dists$global_prob_of_the_starting) |
| 303 | + ) * dists$energy_improvement |
| 304 | + ), |
| 305 | + n_intervals = n_intervals |
| 306 | + ) |
| 307 | + ) |
| 308 | + |
| 309 | + return(list(output, dists)) |
| 310 | +} |
0 commit comments