Skip to content

Commit 4a9ee02

Browse files
Merge pull request #218 from chenyangkang/main
Update interval-based validation metrics
2 parents c294094 + 8c1c96d commit 4a9ee02

25 files changed

+567
-396
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: BirdFlowR
22
Title: Predict and Visualize Bird Movement
3-
Version: 0.1.0.9073
3+
Version: 0.1.0.9075
44
Authors@R:
55
c(person("Ethan", "Plunkett", email = "[email protected]", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0003-4405-2251")),

NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ export(as_transitions)
3030
export(birdflow_options)
3131
export(build_transitions)
3232
export(calc_flux)
33+
export(calc_interval_metrics)
3334
export(calc_movement_vectors)
34-
export(calculate_interval_metrics)
3535
export(col_to_x)
3636
export(combine_transitions)
3737
export(distribution_performance)

NEWS.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
1+
# BirdFlowR 0.1.0.9075
2+
2025-05-21
3+
4+
* Finalize the interval based validation metrics in `calc_interval_metrics()`
5+
* Fixed some small documentation bugs where the arguments are not used but
6+
described.
7+
18
# BirdFlowR 0.1.0.9074
29
2025-05-13
310

4-
Update for compatabality with **ebirdst** 3.2023.0, released 2025-05-07.
11+
Update for compatibility with **ebirdst** 3.2023.0, released 2025-05-07.
512

613
**BirdFlowR** will work with either the 2022 or 2023 versions of the package.
714
BirdFlow models retain all the species metadata and distributions they were
@@ -23,7 +30,7 @@ changed a lot as well.
2330
**ebirdst** version is supported by different components of **BirdFlowR** and
2431
optionally throws an error if they aren't. This is now called from
2532
`preprocess_species()` and `lookup_species_metadata()`. This is mainly for
26-
convience when maintaining the package.
33+
convenience when maintaining the package.
2734

2835
* Testing of routes and intervals now uses the full species object from a
2936
reference BirdFlow model. This means the tests aren't calling
@@ -38,7 +45,7 @@ changed a lot as well.
3845

3946
* Cleaned up Notes generated while checking the package.
4047

41-
## Changes in **eBirdst**
48+
## Changes in **ebirdst**
4249

4350
This is NOT a complete list but here are the changes I encountered
4451
while updating the package.

R/calc_interval_metrics.R

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
# This file defines a private helper: calc_single_interval_metrics() that
2+
# works on a single interval and a public function `calc_interval_metrics()`
3+
# that process BirdFlowIntervals.
4+
5+
6+
#' Calculate interval metrics for a single interval
7+
#'
8+
#' This internal function evaluates model performance using as single interval
9+
#' which is a pair of observations of a real bird separated by at least
10+
#' a week. This is a helper function for `[calc_interval_metrics()`
11+
#'
12+
#' @param birdflow_interval_row A row of data in the `BirdFlowIntervals` object
13+
#' @param bf BirdFlow model
14+
#' @param gcd Matrix of great circle distance
15+
#' @param st_dists Matrix of S&T distribution with weeks as columns,
16+
#' location as rows, probability as values.
17+
#' @return A named vector with various metrics
18+
#' \describe{
19+
#' \item{pred}{Weighted average great-circle distance (km) from
20+
#' the BF prediction distribution to the actual encounter cell}
21+
#' \item{st}{Weighted average great-circle distance (km) from the S&T
22+
#' empirical distribution to the actual encounter cell}
23+
#' \item{win_prob}{Probability that BF is closer than S&T
24+
#' (i.e.“win” probability of BF vs. S&T)}
25+
#' \item{win_distance}{Absolute distance improvement (km): \code{st – pred}}
26+
#' \item{win_distance_fraction}{Normalized distance improvement:
27+
#' \code{(st – pred) / st}}
28+
#' \item{global_prob_of_the_starting}{Probability (relative abundance) of the
29+
#' starting cell in the BF distribution at the start date}
30+
#' \item{elapsed_days}{Elapsed time of the interval (days) between banding
31+
#' (\code{date1}) and encounter (\code{date2})}
32+
#' \item{elapsed_km}{Observed great-circle distance (km) between banding
33+
#' and encounter locations}
34+
#' \item{null_ll}{Log-likelihood of the encounter cell under the S&T
35+
#' distribution: \code{log(final_st_distr[i_final])}}
36+
#' \item{ll}{Log-likelihood of the encounter cell under the BF prediction:
37+
#' \code{log(preds_final[i_final])}}
38+
#' \item{energy_score_bf}{Energy score of the BF predictive distribution
39+
#' (with \eqn{\beta=1}{beta=1})}
40+
#' \item{energy_score_st}{Energy score of the S&T empirical distribution
41+
#' (with \eqn{\beta=1}{beta=1})}
42+
#' \item{energy_improvement}{Difference in energy score:
43+
#' \code{energy_score_st – energy_score_bf}}
44+
#' \item{pred_elapsed_dist_by_pred}{Predicted elapsed distance (km) from
45+
#' starting cell, weighted by BF predictions}
46+
#' \item{pred_elapsed_dist_by_st}{Predicted elapsed distance (km) from
47+
#' starting cell, weighted by S&T distribution}
48+
#' }
49+
#' @seealso [calc_interval_metrics()]
50+
#' @keywords internal
51+
calc_single_interval_metrics <- function(
52+
birdflow_interval_row, bf, gcd, st_dists) {
53+
# latlong data for banding and encounter location
54+
point_df_initial <- data.frame(
55+
x = birdflow_interval_row$lon1, y = birdflow_interval_row$lat1
56+
)
57+
point_df_final <- data.frame(
58+
x = birdflow_interval_row$lon2, y = birdflow_interval_row$lat2
59+
)
60+
# birdflow one-hot distributions for banding and encounter locations
61+
d_initial <- as_distr(x = point_df_initial, bf = bf, crs = "EPSG:4326")
62+
# same as birdflow_interval_row$i1
63+
d_final <- as_distr(x = point_df_final, bf = bf, crs = "EPSG:4326")
64+
# same as birdflow_interval_row$i2
65+
# get s&t distribution for final timestep
66+
final_timestep <- birdflow_interval_row$timestep2
67+
final_st_distr <- st_dists[, final_timestep]
68+
# birdflow cell index for encounter location
69+
i_final <- which(d_final == 1)
70+
# birdflow predictions from banding one-hot, for encounter date
71+
preds <- predict(bf, d_initial,
72+
start = birdflow_interval_row$date1,
73+
end = birdflow_interval_row$date2
74+
)
75+
preds_final <- preds[, ncol(preds), drop = FALSE]
76+
preds_final <- as.vector(preds_final)
77+
# subset great circle distances for cell of actual encounter location
78+
gcd_final <- gcd[, i_final]
79+
# weighted average distance from predicted encounter
80+
# distribution to actual encounter location
81+
82+
# Dave's distance metric
83+
dist_mean_pred <- sum(preds_final * gcd_final)
84+
dist_mean_st <- sum(final_st_distr * gcd_final)
85+
win_distance <- dist_mean_st - dist_mean_pred
86+
pred_elapsed_dist_by_pred <- sum(preds_final * gcd[, which(d_initial == 1)])
87+
pred_elapsed_dist_by_st <- sum(final_st_distr * gcd[, which(d_initial == 1)])
88+
89+
# Normalized distance metric
90+
win_distance_fraction <- (dist_mean_st - dist_mean_pred) / dist_mean_st
91+
92+
## YK's function
93+
# For each predicted location, calculate the win probability.
94+
# Average the win probability based on predicted probability.
95+
M <- outer(gcd_final, gcd_final, FUN = function(x, y) y > x)
96+
win_prob_each <- rowSums(M * rep(final_st_distr, each = length(gcd_final)))
97+
win_prob <- sum(win_prob_each * preds_final)
98+
99+
# get location index of banding starting point
100+
loc_i_starting <- birdflow_interval_row$i1
101+
date_starting <- birdflow_interval_row$timestep1
102+
103+
#
104+
elapsed_days <- as.numeric(
105+
birdflow_interval_row$date2 - birdflow_interval_row$date1,
106+
unit = "days"
107+
)
108+
elapsed_km <- great_circle_distance_lonlat_input(
109+
birdflow_interval_row$lat1, birdflow_interval_row$lon1,
110+
birdflow_interval_row$lat2, birdflow_interval_row$lon2
111+
)
112+
113+
# LL
114+
null_ll <- log(final_st_distr[i_final] + 1e-8)
115+
ll <- log(preds_final[i_final] + 1e-8)
116+
117+
## Energy Score Calculations (with beta = 1)
118+
beta <- 1
119+
# For the predicted distribution:
120+
first_term_pred <- sum(preds_final * (gcd_final^beta))
121+
second_term_pred <- 0.5 * sum(outer(preds_final, preds_final) * (gcd^beta))
122+
# Second term: weighted average of pairwise distances
123+
# (using full distance matrix gcd)
124+
energy_score_pred <- first_term_pred - second_term_pred
125+
# For the s&t distribution:
126+
first_term_st <- sum(final_st_distr * (gcd_final^beta))
127+
second_term_st <- 0.5 *
128+
sum(outer(final_st_distr, final_st_distr) * (gcd^beta))
129+
energy_score_st <- first_term_st - second_term_st
130+
131+
132+
# return
133+
return(c(
134+
pred = dist_mean_pred, st = dist_mean_st,
135+
win_prob = win_prob,
136+
win_distance = win_distance,
137+
win_distance_fraction = win_distance_fraction,
138+
global_prob_of_the_starting = as.numeric(
139+
bf$distr[loc_i_starting, date_starting] / 52
140+
),
141+
elapsed_days = elapsed_days,
142+
elapsed_km = elapsed_km,
143+
null_ll = null_ll,
144+
ll = ll,
145+
energy_score_bf = energy_score_pred,
146+
energy_score_st = energy_score_st,
147+
energy_improvement = energy_score_st - energy_score_pred,
148+
pred_elapsed_dist_by_pred = pred_elapsed_dist_by_pred,
149+
pred_elapsed_dist_by_st = pred_elapsed_dist_by_st
150+
))
151+
}
152+
153+
154+
#' Calculate interval metrics
155+
#'
156+
#' Calculate interval‐based validation metrics—including distance, likelihood,
157+
#' and energy‐score metrics—for all transition pairs in a BirdFlowIntervals
158+
#' object.
159+
#'
160+
#' @param birdflow_intervals A `BirdFlowIntervals` object containing
161+
#' transition data
162+
#' @param bf A fitted `BirdFlow` model
163+
#'
164+
#' @return A list with two elements:
165+
#' \describe{
166+
#' \item{metrics}{A named numeric vector of summary metrics across all
167+
#' intervals:
168+
#' \describe{
169+
#' \item{mean_pred}{Mean weighted average distance (km) from
170+
#' BF predictions}
171+
#' \item{mean_st}{Mean weighted average distance (km) from S&T
172+
#' distributions}
173+
#' \item{mean_win_prob}{Mean win probability (BF vs. S&T)}
174+
#' \item{mean_win_distance}{Mean absolute distance improvement (km)}
175+
#' \item{mean_win_distance_fraction}{Mean normalized distance improvement}
176+
#' \item{mean_global_prob_of_the_starting}{Mean relative abundance at
177+
#' start cells}
178+
#' \item{mean_elapsed_days}{Mean elapsed days per interval}
179+
#' \item{mean_elapsed_km}{Mean observed great‐circle distance (km)}
180+
#' \item{mean_null_ll}{Mean log‐likelihood under the S&T null
181+
#' distribution}
182+
#' \item{mean_ll}{Mean log‐likelihood under the BF prediction}
183+
#' \item{mean_energy_score_bf}{Mean energy score of BF predictions}
184+
#' \item{mean_energy_score_st}{Mean energy score of S&T distributions}
185+
#' \item{mean_energy_improvement}{Mean difference in energy score}
186+
#' \item{mean_pred_elapsed_dist_by_pred}{Mean predicted elapsed distance
187+
#' by BF}
188+
#' \item{mean_pred_elapsed_dist_by_st}{Mean predicted elapsed distance
189+
#' by S&T}
190+
#' \item{weighted_mean_win_prob}{Global‐abundance‐weighted mean win
191+
#' probability}
192+
#' \item{weighted_mean_win_distance}{Global‐abundance‐weighted mean win
193+
#' distance}
194+
#' \item{weighted_mean_win_distance_fraction}{Global‐abundance‐weighted
195+
#' mean distance fraction}
196+
#' \item{weighted_mean_null_ll}{Global‐abundance‐weighted mean null
197+
#' log‐likelihood}
198+
#' \item{weighted_mean_ll}{Global‐abundance‐weighted mean log‐likelihood}
199+
#' \item{weighted_energy_improvement}{Global‐abundance‐weighted mean
200+
#' energy improvement}
201+
#' \item{n_intervals}{Number of transition pairs evaluated}
202+
#' }
203+
#' }
204+
#' \item{per_interval}{A `data.frame` of the raw, per‐transition metrics
205+
#' (same fields as above without the “mean_” prefix)}
206+
#' }
207+
#' @export
208+
#' @examples
209+
#' route_df <- data.frame(
210+
#' route_id = c("001", "001", "001", "001", "001", "003", "003", "003", "004"),
211+
#' date = as.Date(c("2025-01-01", "2025-01-08", "2025-01-15", "2025-01-21",
212+
#' "2025-02-10", "2025-03-01", "2025-05-01", "2025-06-01",
213+
#' "2025-05-01")),
214+
#' lon = c(-75.0060, -75.0060, -74.0060, -87.6298, -87.6298, -87.6298,
215+
#' -89.6298, -85.6298, -95.3698),
216+
#' lat = c(39.7128, 39.7128, 40.7128, 41.8781, 41.8781, 41.8781,
217+
#' 42.8781, 40.8781, 29.7604),
218+
#' route_type = c("tracking", "tracking", "tracking", "tracking",
219+
#' "tracking", "motus", "motus", "motus", "motus")
220+
#' )
221+
#'
222+
#' bf <- BirdFlowModels::amewoo
223+
#' species1 <- bf$species
224+
#' source1 <- "Testing"
225+
#'
226+
#' my_routes <- Routes(route_df,
227+
#' species = species1,
228+
#' source = source1
229+
#' )
230+
#' my_bfroutes <- as_BirdFlowRoutes(my_routes, bf = bf)
231+
#'
232+
#' # Constraints
233+
#' min_day <- 7
234+
#' max_day <- 180
235+
#' min_km <- 200
236+
#' max_km <- 8000
237+
#'
238+
#' my_intervals <- as_BirdFlowIntervals(my_bfroutes,
239+
#' max_n = 1000,
240+
#' min_day_interval = min_day,
241+
#' max_day_interval = max_day,
242+
#' min_km_interval = min_km,
243+
#' max_km_interval = max_km
244+
#' )
245+
#'
246+
#' eval_res <- calc_interval_metrics(my_intervals, bf)
247+
#' single_value_outputs <- eval_res[[1]]
248+
#' transition_level_outputs <- eval_res[[2]]
249+
calc_interval_metrics <- function(birdflow_intervals, bf) {
250+
# weekly distributions directly from S&T
251+
st_dists <- get_distr(bf, which = "all", from_marginals = FALSE)
252+
253+
# Great circle distances between cells
254+
gcd <- great_circle_distances(bf)
255+
256+
# Calculate distance metric & ll
257+
dists <- sapply(
258+
split(birdflow_intervals$data, seq_len(nrow(birdflow_intervals$data))),
259+
calc_single_interval_metrics, bf, gcd, st_dists
260+
)
261+
dists <- t(dists)
262+
dists <- as.data.frame(dists)
263+
264+
n_intervals <- nrow(birdflow_intervals$data)
265+
266+
output <- colMeans(dists)
267+
names(output) <- paste0("mean_", names(output))
268+
269+
output <-
270+
c(
271+
output,
272+
c(
273+
weighted_mean_win_prob = sum(
274+
(dists$global_prob_of_the_starting /
275+
sum(dists$global_prob_of_the_starting)
276+
) * dists$win_prob
277+
),
278+
weighted_mean_win_distance = sum(
279+
(
280+
dists$global_prob_of_the_starting /
281+
sum(dists$global_prob_of_the_starting)
282+
) * dists$win_distance
283+
),
284+
weighted_mean_win_distance_fraction = sum(
285+
(
286+
dists$global_prob_of_the_starting /
287+
sum(dists$global_prob_of_the_starting)
288+
) * dists$win_distance_fraction
289+
),
290+
weighted_mean_null_ll = sum(
291+
(dists$global_prob_of_the_starting /
292+
sum(dists$global_prob_of_the_starting)
293+
) * dists$null_ll
294+
),
295+
weighted_mean_ll = sum(
296+
(dists$global_prob_of_the_starting /
297+
sum(dists$global_prob_of_the_starting)
298+
) * dists$ll
299+
),
300+
weighted_energy_improvement = sum(
301+
(dists$global_prob_of_the_starting /
302+
sum(dists$global_prob_of_the_starting)
303+
) * dists$energy_improvement
304+
),
305+
n_intervals = n_intervals
306+
)
307+
)
308+
309+
return(list(output, dists))
310+
}

R/distribution_performance.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
#' states (locations in space and time) that can be reached in the model.}
5858
#'
5959
#' }
60+
#' @seealso [calc_interval_metrics()] to evaluate a BirdFlow model using
61+
#' movement data from real birds.
6062
#'
6163
#' @examples
6264
#' bf <- BirdFlowModels::amewoo
@@ -128,6 +130,7 @@ distribution_performance <- function(x, metrics = NULL, ...) {
128130
marginal_start_distr <- get_distr(x, from, from_marginals = TRUE)
129131
start_dm <- get_dynamic_mask(x, from)
130132
distr_cor[i] <- cor(start_distr[start_dm], marginal_start_distr[start_dm])
133+
131134
distr_states[i] <- sum(marginal_start_distr != 0)
132135

133136
# Calculate single step projection correlations
@@ -164,11 +167,13 @@ distribution_performance <- function(x, metrics = NULL, ...) {
164167

165168
projected <- projected[, , dim(projected)[3]] # subset to last timestep
166169
end_dm <- get_dynamic_mask(x, end)
170+
167171
# Two traverse correlations
168172
# "st_" starts with eBird S&T distribution
169173
st_traverse_cor <- cor(end_distr[end_dm], projected[end_dm, 1])
170174
# "md_" starts with marginal distribution
171175
md_traverse_cor <- cor(end_distr[end_dm], projected[end_dm, 2])
176+
172177
} # end traverse
173178

174179
result <- list(mean_step_cor = mean_step_cor,

0 commit comments

Comments
 (0)