Skip to content

Commit 8a22cd6

Browse files
authored
merge pr #502: fix bug in prop.test() wrapper
2 parents d93c3dc + 9dc4990 commit 8a22cd6

File tree

5 files changed

+133
-16
lines changed

5 files changed

+133
-16
lines changed

NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77

88
* Newly accommodates variables with spaces in names in the wrapper functions `t_test()` and `prop_test()` (#472).
99

10+
* Fixed bug in two-sample `prop_test()` where the response and explanatory
11+
variable were passed in place of each other to `prop.test()`. This enables
12+
using `prop_test()` with explanatory variables with greater than 2 levels and,
13+
in the process, addresses a bug where `prop_test()` collapsed levels other than
14+
the `success` when the response variable had more than 2 levels.
15+
1016
# infer v1.0.4
1117

1218
* Fixed bug in p-value shading where shaded regions no longer correctly overlaid

R/wrappers.R

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,21 @@ check_conf_level <- function(conf_level, call = caller_env()) {
417417
#' to see this connection.
418418
#' @param ... Additional arguments for [prop.test()][stats::prop.test()].
419419
#'
420+
#' @details
421+
#' When testing with an explanatory variable with more than two levels, the
422+
#' `order` argument as used in the package is no longer well-defined. The function
423+
#' will thus raise a warning and ignore the value if supplied a non-NULL `order`
424+
#' argument.
425+
#'
426+
#' The columns present in the output depend on the output of both [prop.test()]
427+
#' and [broom::glance.htest()]. See the latter's documentation for column
428+
#' definitions; columns have been renamed with the following mapping:
429+
#'
430+
#' * `chisq_df` = `parameter`
431+
#' * `p_value` = `p.value`
432+
#' * `lower_ci` = `conf.low`
433+
#' * `upper_ci` = `conf.high`
434+
#'
420435
#' @examples
421436
#' # two-sample proportion test for difference in proportions of
422437
#' # college completion by respondent sex
@@ -483,6 +498,11 @@ prop_test <- function(x, formula,
483498
# process "success" arg
484499
lvls <- levels(factor(response_variable(x)))
485500

501+
if (length(lvls) > 2) {
502+
abort(glue("This test is not defined for response variables \\
503+
with more than 2 levels."))
504+
}
505+
486506
if (!is.null(success)) {
487507
check_type(success, rlang::is_string)
488508

@@ -497,16 +517,25 @@ prop_test <- function(x, formula,
497517

498518
# two sample
499519
if (has_explanatory(x)) {
500-
501-
order <- check_order(x, order, in_calculate = FALSE, stat = NULL)
502-
503520
# make a summary table to supply to prop.test
504521
sum_table <- x %>%
505-
select(response_name(x), explanatory_name(x)) %>%
506-
table()
507-
508-
# reorder according to the order and success arguments
509-
sum_table <- sum_table[lvls, order]
522+
select(explanatory_name(x), response_name(x)) %>%
523+
table()
524+
525+
length_exp_levels <- length(levels(explanatory_variable(x)))
526+
if (length_exp_levels == 2) {
527+
order <- check_order(x, order, in_calculate = FALSE, stat = NULL)
528+
# reorder according to the order and success arguments
529+
sum_table <- sum_table[order, lvls]
530+
} else if (length_exp_levels >= 3 && !is.null(order)) {
531+
warn(glue(
532+
"The `order` argument will be ignored as it is not well-defined \\
533+
for explanatory variables with more than 2 levels. ",
534+
"To silence this message, avoid passing the `order` argument."
535+
))
536+
# reorder according to the success argument
537+
sum_table <- sum_table[, lvls]
538+
}
510539

511540
prelim <- stats::prop.test(x = sum_table,
512541
alternative = alternative,

man/prop_test.Rd

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/_snaps/wrappers.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,30 @@
199199
Error in `prop_test()`:
200200
! b is not a valid level of resp.
201201

202+
# prop_test handles >2 explanatory levels gracefully
203+
204+
Code
205+
res_2 <- prop_test(dfr, resp ~ exp, order = c("a", "b"))
206+
Condition
207+
Warning:
208+
The `order` argument will be ignored as it is not well-defined for explanatory variables with more than 2 levels. To silence this message, avoid passing the `order` argument.
209+
210+
---
211+
212+
Code
213+
res_3 <- prop_test(dfr, resp ~ exp, order = c("a", "b", "c"))
214+
Condition
215+
Warning:
216+
The `order` argument will be ignored as it is not well-defined for explanatory variables with more than 2 levels. To silence this message, avoid passing the `order` argument.
217+
218+
# prop_test errors with >2 response levels
219+
220+
Code
221+
res_1 <- prop_test(dfr, resp ~ exp)
222+
Condition
223+
Error in `prop_test()`:
224+
! This test is not defined for response variables with more than 2 levels.
225+
202226
# wrappers can handled ordered factors
203227

204228
Code

tests/testthat/test-wrappers.R

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -258,11 +258,11 @@ test_that("conf_int argument works", {
258258
})
259259

260260
# generate some data to test the prop.test wrapper
261-
df <- data.frame(resp = c(rep("c", 450),
261+
df <- data.frame(exp = rep(c("a", "b"), each = 500),
262+
resp = c(rep("c", 450),
262263
rep("d", 50),
263264
rep("c", 400),
264265
rep("d", 100)),
265-
exp = rep(c("a", "b"), each = 500),
266266
stringsAsFactors = FALSE)
267267

268268
sum_df <- table(df)
@@ -384,17 +384,59 @@ test_that("prop_test output dimensionality is correct", {
384384
conf_int = FALSE)
385385
infer_2_sample_z <- prop_test(df, resp ~ exp, order = c("a", "b"), z = TRUE)
386386

387-
# introduce a third response level
388-
df$resp[c(1:10, 490:510, 990:1000)] <- "e"
389-
390-
infer_3_sample <- prop_test(df, resp ~ exp, order = c("a", "b"))
391-
392387
expect_length(infer_1_sample, 4)
393388
expect_length(infer_1_sample, length(infer_1_sample_z) + 1)
394389
expect_length(infer_2_sample, 6)
395390
expect_length(infer_2_sample_no_int, 4)
396391
expect_length(infer_2_sample_z, length(infer_2_sample) - 1)
397-
expect_length(infer_3_sample, 3)
392+
})
393+
394+
test_that("prop_test handles >2 explanatory levels gracefully", {
395+
set.seed(1)
396+
dfr <-
397+
tibble::tibble(
398+
exp = sample(c("a", "b", "c"), 100, replace = TRUE),
399+
resp = sample(c("d", "e"), 100, replace = TRUE)
400+
)
401+
402+
res_old <- prop.test(table(dfr))
403+
404+
# don't pass order
405+
expect_silent(
406+
res_1 <- prop_test(dfr, resp ~ exp)
407+
)
408+
409+
# pass 2-length order
410+
expect_snapshot(
411+
res_2 <- prop_test(dfr, resp ~ exp, order = c("a", "b"))
412+
)
413+
414+
# pass 3-length order
415+
expect_snapshot(
416+
res_3 <- prop_test(dfr, resp ~ exp, order = c("a", "b", "c"))
417+
)
418+
419+
expect_equal(res_1, res_2)
420+
expect_equal(res_2, res_3)
421+
422+
expect_named(res_1, c("statistic", "chisq_df", "p_value"))
423+
expect_equal(res_1$statistic, res_old$statistic)
424+
expect_equal(res_1$chisq_df, res_old$parameter)
425+
expect_equal(res_1$p_value, res_old$p.value)
426+
})
427+
428+
test_that("prop_test errors with >2 response levels", {
429+
set.seed(1)
430+
dfr <-
431+
tibble::tibble(
432+
exp = sample(c("a", "b"), 100, replace = TRUE),
433+
resp = sample(c("c", "d", "e"), 100, replace = TRUE)
434+
)
435+
436+
expect_snapshot(
437+
error = TRUE,
438+
res_1 <- prop_test(dfr, resp ~ exp)
439+
)
398440
})
399441

400442
test_that("prop_test z argument works as expected", {

0 commit comments

Comments
 (0)