birdflow-science
diff --git a/‎DESCRIPTION‎
Lines changed: 1 addition & 1 deletion b/‎DESCRIPTION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎NAMESPACE‎
Lines changed: 1 addition & 1 deletion b/‎NAMESPACE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎NEWS.md‎
Lines changed: 10 additions & 3 deletions b/‎NEWS.md‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎R/calc_interval_metrics.R‎
Lines changed: 310 additions & 0 deletions b/‎R/calc_interval_metrics.R‎
Lines changed: 310 additions & 0 deletions
diff --git a/‎R/distribution_performance.R‎
Lines changed: 5 additions & 0 deletions b/‎R/distribution_performance.R‎
Lines changed: 5 additions & 0 deletions
@@ -1,6 +1,6 @@
 Package: BirdFlowR
 Title: Predict and Visualize Bird Movement
-Version: 0.1.0.9073
+Version: 0.1.0.9075
 Authors@R: 
     c(person("Ethan", "Plunkett",  email = "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-4405-2251")),
 
@@ -30,8 +30,8 @@ export(as_transitions)
 export(birdflow_options)
 export(build_transitions)
 export(calc_flux)
+export(calc_interval_metrics)
 export(calc_movement_vectors)
-export(calculate_interval_metrics)
 export(col_to_x)
 export(combine_transitions)
 export(distribution_performance)
 
@@ -1,7 +1,14 @@
+# BirdFlowR 0.1.0.9075
+2025-05-21
+
+* Finalize the interval based validation metrics in `calc_interval_metrics()`
+* Fixed some small documentation bugs where the arguments are not used but 
+  described.
+
 # BirdFlowR 0.1.0.9074
 2025-05-13
 
-Update for compatabality with **ebirdst** 3.2023.0, released 2025-05-07.
+Update for compatibility with **ebirdst** 3.2023.0, released 2025-05-07.
 
 **BirdFlowR** will work with either the 2022 or 2023 versions of the package.
 BirdFlow models retain all the species metadata and distributions they were
@@ -23,7 +30,7 @@ changed a lot as well.
   **ebirdst** version is supported by different components of **BirdFlowR** and 
   optionally throws an error if they aren't.  This is now called from
   `preprocess_species()` and `lookup_species_metadata()`.  This is mainly for
-  convience when maintaining the package.
+  convenience when maintaining the package.
 
 * Testing of routes and intervals now uses the full species object from a 
   reference BirdFlow model. This means the tests aren't calling 
@@ -38,7 +45,7 @@ changed a lot as well.
 
 * Cleaned up Notes generated while checking the package.
 
-## Changes in **eBirdst**
+## Changes in **ebirdst**
 
 This is NOT a complete list but here are the changes I encountered
 while updating the package.
 
@@ -0,0 +1,310 @@
+# This file defines a private helper: calc_single_interval_metrics() that
+# works on a single interval and a public function `calc_interval_metrics()`
+# that process BirdFlowIntervals.
+
+
+#' Calculate interval metrics for a single interval
+#'
+#' This internal function evaluates model performance using as single interval
+#' which is a pair of observations of a real bird separated by at least
+#' a week.  This is a helper function for `[calc_interval_metrics()`
+#'
+#' @param birdflow_interval_row A row of data in the `BirdFlowIntervals` object
+#' @param bf BirdFlow model
+#' @param gcd Matrix of great circle distance
+#' @param st_dists Matrix of S&T distribution with weeks as columns,
+#' location as rows, probability as values.
+#' @return A named vector with various metrics
+#' \describe{
+#'   \item{pred}{Weighted average great-circle distance (km) from
+#'   the BF prediction distribution to the actual encounter cell}
+#'   \item{st}{Weighted average great-circle distance (km) from the S&T
+#'   empirical distribution to the actual encounter cell}
+#'   \item{win_prob}{Probability that BF is closer than S&T
+#'   (i.e.“win” probability of BF vs. S&T)}
+#'   \item{win_distance}{Absolute distance improvement (km): \code{st – pred}}
+#'   \item{win_distance_fraction}{Normalized distance improvement:
+#'   \code{(st – pred) / st}}
+#'   \item{global_prob_of_the_starting}{Probability (relative abundance) of the
+#'   starting cell in the BF distribution at the start date}
+#'   \item{elapsed_days}{Elapsed time of the interval (days) between banding
+#'   (\code{date1}) and encounter (\code{date2})}
+#'   \item{elapsed_km}{Observed great-circle distance (km) between banding
+#'   and encounter locations}
+#'   \item{null_ll}{Log-likelihood of the encounter cell under the S&T
+#'   distribution: \code{log(final_st_distr[i_final])}}
+#'   \item{ll}{Log-likelihood of the encounter cell under the BF prediction:
+#'   \code{log(preds_final[i_final])}}
+#'   \item{energy_score_bf}{Energy score of the BF predictive distribution
+#'   (with \eqn{\beta=1}{beta=1})}
+#'   \item{energy_score_st}{Energy score of the S&T empirical distribution
+#'   (with \eqn{\beta=1}{beta=1})}
+#'   \item{energy_improvement}{Difference in energy score:
+#'   \code{energy_score_st – energy_score_bf}}
+#'   \item{pred_elapsed_dist_by_pred}{Predicted elapsed distance (km) from
+#'   starting cell, weighted by BF predictions}
+#'   \item{pred_elapsed_dist_by_st}{Predicted elapsed distance (km) from
+#'   starting cell, weighted by S&T distribution}
+#' }
+#' @seealso [calc_interval_metrics()]
+#' @keywords internal
+calc_single_interval_metrics <- function(
+    birdflow_interval_row, bf, gcd, st_dists) {
+  # latlong data for banding and encounter location
+  point_df_initial <- data.frame(
+    x = birdflow_interval_row$lon1, y = birdflow_interval_row$lat1
+  )
+  point_df_final <- data.frame(
+    x = birdflow_interval_row$lon2, y = birdflow_interval_row$lat2
+  )
+  # birdflow one-hot distributions for banding and encounter locations
+  d_initial <- as_distr(x = point_df_initial, bf = bf, crs = "EPSG:4326")
+  # same as birdflow_interval_row$i1
+  d_final <- as_distr(x = point_df_final, bf = bf, crs = "EPSG:4326")
+  # same as birdflow_interval_row$i2
+  # get s&t distribution for final timestep
+  final_timestep <- birdflow_interval_row$timestep2
+  final_st_distr <- st_dists[, final_timestep]
+  # birdflow cell index for encounter location
+  i_final <- which(d_final == 1)
+  # birdflow predictions from banding one-hot, for encounter date
+  preds <- predict(bf, d_initial,
+    start = birdflow_interval_row$date1,
+    end = birdflow_interval_row$date2
+  )
+  preds_final <- preds[, ncol(preds), drop = FALSE]
+  preds_final <- as.vector(preds_final)
+  # subset great circle distances for cell of actual encounter location
+  gcd_final <- gcd[, i_final]
+  # weighted average distance from predicted encounter
+  # distribution to actual encounter location
+
+  # Dave's distance metric
+  dist_mean_pred <- sum(preds_final * gcd_final)
+  dist_mean_st <- sum(final_st_distr * gcd_final)
+  win_distance <- dist_mean_st - dist_mean_pred
+  pred_elapsed_dist_by_pred <- sum(preds_final * gcd[, which(d_initial == 1)])
+  pred_elapsed_dist_by_st <- sum(final_st_distr * gcd[, which(d_initial == 1)])
+
+  # Normalized distance metric
+  win_distance_fraction <- (dist_mean_st - dist_mean_pred) / dist_mean_st
+
+  ## YK's function
+  # For each predicted location, calculate the win probability.
+  # Average the win probability based on predicted probability.
+  M <- outer(gcd_final, gcd_final, FUN = function(x, y) y > x)
+  win_prob_each <- rowSums(M * rep(final_st_distr, each = length(gcd_final)))
+  win_prob <- sum(win_prob_each * preds_final)
+
+  # get location index of banding starting point
+  loc_i_starting <- birdflow_interval_row$i1
+  date_starting <- birdflow_interval_row$timestep1
+
+  #
+  elapsed_days <- as.numeric(
+    birdflow_interval_row$date2 - birdflow_interval_row$date1,
+    unit = "days"
+  )
+  elapsed_km <- great_circle_distance_lonlat_input(
+    birdflow_interval_row$lat1, birdflow_interval_row$lon1,
+    birdflow_interval_row$lat2, birdflow_interval_row$lon2
+  )
+
+  # LL
+  null_ll <- log(final_st_distr[i_final] + 1e-8)
+  ll <- log(preds_final[i_final] + 1e-8)
+
+  ## Energy Score Calculations (with beta = 1)
+  beta <- 1
+  # For the predicted distribution:
+  first_term_pred <- sum(preds_final * (gcd_final^beta))
+  second_term_pred <- 0.5 * sum(outer(preds_final, preds_final) * (gcd^beta))
+  # Second term: weighted average of pairwise distances
+  # (using full distance matrix gcd)
+  energy_score_pred <- first_term_pred - second_term_pred
+  # For the s&t distribution:
+  first_term_st <- sum(final_st_distr * (gcd_final^beta))
+  second_term_st <- 0.5 *
+    sum(outer(final_st_distr, final_st_distr) * (gcd^beta))
+  energy_score_st <- first_term_st - second_term_st
+
+
+  # return
+  return(c(
+    pred = dist_mean_pred, st = dist_mean_st,
+    win_prob = win_prob,
+    win_distance = win_distance,
+    win_distance_fraction = win_distance_fraction,
+    global_prob_of_the_starting = as.numeric(
+      bf$distr[loc_i_starting, date_starting] / 52
+    ),
+    elapsed_days = elapsed_days,
+    elapsed_km = elapsed_km,
+    null_ll = null_ll,
+    ll = ll,
+    energy_score_bf = energy_score_pred,
+    energy_score_st = energy_score_st,
+    energy_improvement = energy_score_st - energy_score_pred,
+    pred_elapsed_dist_by_pred = pred_elapsed_dist_by_pred,
+    pred_elapsed_dist_by_st = pred_elapsed_dist_by_st
+  ))
+}
+
+
+#' Calculate interval metrics
+#'
+#' Calculate interval‐based validation metrics—including distance, likelihood,
+#' and energy‐score metrics—for all transition pairs in a BirdFlowIntervals
+#' object.
+#'
+#' @param birdflow_intervals A `BirdFlowIntervals` object containing
+#' transition data
+#' @param bf A fitted `BirdFlow` model
+#'
+#' @return A list with two elements:
+#' \describe{
+#'   \item{metrics}{A named numeric vector of summary metrics across all
+#'   intervals:
+#'     \describe{
+#'       \item{mean_pred}{Mean weighted average distance (km) from
+#'       BF predictions}
+#'       \item{mean_st}{Mean weighted average distance (km) from S&T
+#'       distributions}
+#'       \item{mean_win_prob}{Mean win probability (BF vs. S&T)}
+#'       \item{mean_win_distance}{Mean absolute distance improvement (km)}
+#'       \item{mean_win_distance_fraction}{Mean normalized distance improvement}
+#'       \item{mean_global_prob_of_the_starting}{Mean relative abundance at
+#'       start cells}
+#'       \item{mean_elapsed_days}{Mean elapsed days per interval}
+#'       \item{mean_elapsed_km}{Mean observed great‐circle distance (km)}
+#'       \item{mean_null_ll}{Mean log‐likelihood under the S&T null
+#'       distribution}
+#'       \item{mean_ll}{Mean log‐likelihood under the BF prediction}
+#'       \item{mean_energy_score_bf}{Mean energy score of BF predictions}
+#'       \item{mean_energy_score_st}{Mean energy score of S&T distributions}
+#'       \item{mean_energy_improvement}{Mean difference in energy score}
+#'       \item{mean_pred_elapsed_dist_by_pred}{Mean predicted elapsed distance
+#'       by BF}
+#'       \item{mean_pred_elapsed_dist_by_st}{Mean predicted elapsed distance
+#'       by S&T}
+#'       \item{weighted_mean_win_prob}{Global‐abundance‐weighted mean win
+#'       probability}
+#'       \item{weighted_mean_win_distance}{Global‐abundance‐weighted mean win
+#'       distance}
+#'       \item{weighted_mean_win_distance_fraction}{Global‐abundance‐weighted
+#'       mean distance fraction}
+#'       \item{weighted_mean_null_ll}{Global‐abundance‐weighted mean null
+#'       log‐likelihood}
+#'       \item{weighted_mean_ll}{Global‐abundance‐weighted mean log‐likelihood}
+#'       \item{weighted_energy_improvement}{Global‐abundance‐weighted mean
+#'       energy improvement}
+#'       \item{n_intervals}{Number of transition pairs evaluated}
+#'     }
+#'   }
+#'   \item{per_interval}{A `data.frame` of the raw, per‐transition metrics
+#'   (same fields as above without the “mean_” prefix)}
+#' }
+#' @export
+#' @examples
+#' route_df <- data.frame(
+#' route_id = c("001", "001", "001", "001", "001", "003", "003", "003", "004"),
+#' date = as.Date(c("2025-01-01", "2025-01-08", "2025-01-15", "2025-01-21",
+#'                 "2025-02-10", "2025-03-01", "2025-05-01", "2025-06-01",
+#'                 "2025-05-01")),
+#' lon = c(-75.0060, -75.0060, -74.0060, -87.6298, -87.6298, -87.6298,
+#'         -89.6298, -85.6298, -95.3698),
+#' lat = c(39.7128, 39.7128, 40.7128, 41.8781, 41.8781, 41.8781,
+#'         42.8781, 40.8781, 29.7604),
+#' route_type = c("tracking", "tracking", "tracking", "tracking",
+#'                "tracking", "motus", "motus", "motus", "motus")
+#' )
+#'
+#' bf <- BirdFlowModels::amewoo
+#' species1 <- bf$species
+#' source1 <- "Testing"
+#'
+#' my_routes <- Routes(route_df,
+#'                     species = species1,
+#'                     source = source1
+#' )
+#' my_bfroutes <- as_BirdFlowRoutes(my_routes, bf = bf)
+#'
+#' # Constraints
+#' min_day <- 7
+#' max_day <- 180
+#' min_km <- 200
+#' max_km <- 8000
+#'
+#' my_intervals <- as_BirdFlowIntervals(my_bfroutes,
+#'                                      max_n = 1000,
+#'                                      min_day_interval = min_day,
+#'                                      max_day_interval = max_day,
+#'                                      min_km_interval = min_km,
+#'                                      max_km_interval = max_km
+#' )
+#'
+#' eval_res <- calc_interval_metrics(my_intervals, bf)
+#' single_value_outputs <- eval_res[[1]]
+#' transition_level_outputs <- eval_res[[2]]
+calc_interval_metrics <- function(birdflow_intervals, bf) {
+  # weekly distributions directly from S&T
+  st_dists <- get_distr(bf, which = "all", from_marginals = FALSE)
+
+  # Great circle distances between cells
+  gcd <- great_circle_distances(bf)
+
+  # Calculate distance metric & ll
+  dists <- sapply(
+    split(birdflow_intervals$data, seq_len(nrow(birdflow_intervals$data))),
+    calc_single_interval_metrics, bf, gcd, st_dists
+  )
+  dists <- t(dists)
+  dists <- as.data.frame(dists)
+
+  n_intervals <- nrow(birdflow_intervals$data)
+
+  output <- colMeans(dists)
+  names(output) <- paste0("mean_", names(output))
+
+  output <-
+    c(
+      output,
+      c(
+        weighted_mean_win_prob = sum(
+          (dists$global_prob_of_the_starting /
+            sum(dists$global_prob_of_the_starting)
+          ) * dists$win_prob
+        ),
+        weighted_mean_win_distance = sum(
+          (
+            dists$global_prob_of_the_starting /
+              sum(dists$global_prob_of_the_starting)
+          ) * dists$win_distance
+        ),
+        weighted_mean_win_distance_fraction = sum(
+          (
+            dists$global_prob_of_the_starting /
+              sum(dists$global_prob_of_the_starting)
+          ) * dists$win_distance_fraction
+        ),
+        weighted_mean_null_ll = sum(
+          (dists$global_prob_of_the_starting /
+            sum(dists$global_prob_of_the_starting)
+          ) * dists$null_ll
+        ),
+        weighted_mean_ll = sum(
+          (dists$global_prob_of_the_starting /
+            sum(dists$global_prob_of_the_starting)
+          ) * dists$ll
+        ),
+        weighted_energy_improvement = sum(
+          (dists$global_prob_of_the_starting /
+            sum(dists$global_prob_of_the_starting)
+          ) * dists$energy_improvement
+        ),
+        n_intervals = n_intervals
+      )
+    )
+
+  return(list(output, dists))
+}
@@ -57,6 +57,8 @@
 #'  states  (locations in space and time) that can be reached in the model.}
 #'
 #' }
+#' @seealso [calc_interval_metrics()] to evaluate a BirdFlow model using
+#' movement data from real birds.
 #'
 #' @examples
 #'  bf <- BirdFlowModels::amewoo
@@ -128,6 +130,7 @@ distribution_performance <- function(x, metrics = NULL, ...) {
       marginal_start_distr <- get_distr(x, from, from_marginals = TRUE)
       start_dm <- get_dynamic_mask(x, from)
       distr_cor[i] <- cor(start_distr[start_dm], marginal_start_distr[start_dm])
+
       distr_states[i] <- sum(marginal_start_distr != 0)
 
       # Calculate single step projection correlations
@@ -164,11 +167,13 @@ distribution_performance <- function(x, metrics = NULL, ...) {
 
     projected <- projected[, , dim(projected)[3]] # subset to last timestep
     end_dm <- get_dynamic_mask(x, end)
+
     # Two traverse correlations
     # "st_" starts with eBird S&T distribution
     st_traverse_cor <- cor(end_distr[end_dm], projected[end_dm, 1])
     # "md_" starts with marginal distribution
     md_traverse_cor <- cor(end_distr[end_dm], projected[end_dm, 2])
+
   } # end traverse
 
   result <- list(mean_step_cor = mean_step_cor,