Skip to content

Commit 3866325

Browse files
authored
handle missing values when calculating confidence intervals (#521)
1 parent e5095f0 commit 3866325

File tree

4 files changed

+52
-5
lines changed

4 files changed

+52
-5
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# infer v1.0.5.9000 (development version)
22

3+
* Fixed bug where `get_confidence_interval()` would error uninformatively when the supplied distribution of estimates contained missing values. The function will now warn and return a confidence interval calculated using the non-missing estimates.
4+
35
* Updated infrastructure for errors, warnings, and messages (#513). Most of these changes will not be visible to users, though:
46
- Many longer error messages are now broken up into several lines.
57
- For references to help-files, users can now click on the error message's text to navigate to the cited documentation.

R/get_confidence_interval.R

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,23 @@ switch_ci <- function(type, x, level, point_estimate) {
227227
)
228228
}
229229

230+
remove_missing_estimates <- function(estimates) {
231+
na_estimates <- is.na(estimates)
232+
na_estimates_n <- sum(na_estimates)
233+
234+
if (na_estimates_n > 0) {
235+
cli_warn("{na_estimates_n} estimates were missing and were removed when \\
236+
calculating the confidence interval.")
237+
}
238+
239+
estimates[!na_estimates]
240+
}
241+
230242
ci_percentile <- function(x, level) {
231243
# x[[ncol(x)]] pulls out the stat or estimate column
232-
ci_vec <- stats::quantile(x[[ncol(x)]], probs = (1 + c(-level, level)) / 2)
244+
estimates <- remove_missing_estimates(x[[ncol(x)]])
245+
246+
ci_vec <- stats::quantile(estimates, probs = (1 + c(-level, level)) / 2)
233247

234248
make_ci_df(ci_vec)
235249
}
@@ -247,7 +261,9 @@ ci_se <- function(x, level, point_estimate) {
247261
}
248262
} else {
249263
# x[[ncol(x)]] pulls out the stat or estimate column
250-
se <- stats::sd(x[[ncol(x)]])
264+
estimates <- remove_missing_estimates(x[[ncol(x)]])
265+
se <- stats::sd(estimates)
266+
251267
qfn <- "qnorm"
252268
}
253269

@@ -269,14 +285,16 @@ ci_bias_corrected <- function(x, level, point_estimate) {
269285
point_estimate <- check_obs_stat(point_estimate)
270286

271287
# x[[ncol(x)]] pulls out the stat or estimate column
272-
p <- mean(x[[ncol(x)]] <= point_estimate)
288+
estimates <- remove_missing_estimates(x[[ncol(x)]])
289+
290+
p <- mean(estimates <= point_estimate)
291+
273292
z0 <- stats::qnorm(p)
274293
# z_alpha_2 is z_(alpha/2)
275294
z_alpha_2 <- stats::qnorm((1 + c(-level, level)) / 2)
276295
new_probs <- stats::pnorm(2 * z0 + z_alpha_2)
277296

278-
# x[[ncol(x)]] pulls out the stat or estimate column
279-
ci_vec <- stats::quantile(x[[ncol(x)]], probs = new_probs)
297+
ci_vec <- stats::quantile(estimates, probs = new_probs)
280298

281299
make_ci_df(ci_vec)
282300
}

tests/testthat/_snaps/get_confidence_interval.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,3 +175,11 @@
175175
Error in `get_confidence_interval()`:
176176
! Confidence intervals using a `z` distribution for `stat = mean` are not implemented.
177177

178+
# handles missing values gracefully (#520)
179+
180+
Code
181+
res <- get_confidence_interval(boot_dist, 0.95)
182+
Condition
183+
Warning:
184+
4 estimates were missing and were removed when calculating the confidence interval.
185+

tests/testthat/test-get_confidence_interval.R

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,3 +471,22 @@ test_that("theoretical CIs check arguments properly", {
471471
)
472472
)
473473
})
474+
475+
test_that("handles missing values gracefully (#520)", {
476+
data <- data.frame(
477+
prop = seq(0, 1, length.out = 10),
478+
group = rep(c("a", "b"), each = 5L)
479+
)
480+
481+
set.seed(1)
482+
boot_dist <-
483+
data %>%
484+
specify(prop ~ group) %>%
485+
hypothesize(null = "independence") %>%
486+
generate(reps = 1000, type = "bootstrap") %>%
487+
calculate(stat = "diff in medians", order = c("b", "a"))
488+
489+
expect_snapshot(res <- get_confidence_interval(boot_dist, .95))
490+
491+
expect_s3_class(res, "data.frame")
492+
})

0 commit comments

Comments
 (0)