merge pr #502: fix bug in prop.test() wrapper

simonpcouch · web-flow · commit 8a22cd6baae1 · 2023-09-05T13:12:49.000-04:00
diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,12 @@
 
 * Newly accommodates variables with spaces in names in the wrapper functions `t_test()` and `prop_test()` (#472).
 
+* Fixed bug in two-sample `prop_test()` where the response and explanatory 
+  variable were passed in place of each other to `prop.test()`. This enables
+  using `prop_test()` with explanatory variables with greater than 2 levels and,
+  in the process, addresses a bug where `prop_test()` collapsed levels other than
+  the `success` when the response variable had more than 2 levels.
+
 # infer v1.0.4
 
 * Fixed bug in p-value shading where shaded regions no longer correctly overlaid
diff --git a/R/wrappers.R b/R/wrappers.R
@@ -417,6 +417,21 @@ check_conf_level <- function(conf_level, call = caller_env()) {
 #'   to see this connection.
 #' @param ... Additional arguments for [prop.test()][stats::prop.test()].
 #'
+#' @details
+#' When testing with an explanatory variable with more than two levels, the
+#' `order` argument as used in the package is no longer well-defined. The function
+#' will thus raise a warning and ignore the value if supplied a non-NULL `order`
+#' argument.
+#'
+#' The columns present in the output depend on the output of both [prop.test()]
+#' and [broom::glance.htest()]. See the latter's documentation for column
+#' definitions; columns have been renamed with the following mapping:
+#'
+#' * `chisq_df` = `parameter`
+#' * `p_value` = `p.value`
+#' * `lower_ci` = `conf.low`
+#' * `upper_ci` = `conf.high`
+#'
 #' @examples
 #' # two-sample proportion test for difference in proportions of
 #' # college completion by respondent sex
@@ -483,6 +498,11 @@ prop_test <- function(x, formula,
   # process "success" arg
   lvls <- levels(factor(response_variable(x)))
 
+  if (length(lvls) > 2) {
+     abort(glue("This test is not defined for response variables \\
+                 with more than 2 levels."))
+  }
+
   if (!is.null(success)) {
     check_type(success, rlang::is_string)
 
@@ -497,16 +517,25 @@ prop_test <- function(x, formula,
 
   # two sample
   if (has_explanatory(x)) {
-
-    order <- check_order(x, order, in_calculate = FALSE, stat = NULL)
-
     # make a summary table to supply to prop.test
     sum_table <- x %>%
-      select(response_name(x), explanatory_name(x)) %>%
-      table()
-
-    # reorder according to the order and success arguments
-    sum_table <- sum_table[lvls, order]
+       select(explanatory_name(x), response_name(x)) %>%
+       table()
+
+    length_exp_levels <- length(levels(explanatory_variable(x)))
+    if (length_exp_levels == 2) {
+       order <- check_order(x, order, in_calculate = FALSE, stat = NULL)
+       # reorder according to the order and success arguments
+       sum_table <- sum_table[order, lvls]
+    } else if (length_exp_levels >= 3 && !is.null(order)) {
+       warn(glue(
+            "The `order` argument will be ignored as it is not well-defined \\
+             for explanatory variables with more than 2 levels. ",
+            "To silence this message, avoid passing the `order` argument."
+       ))
+       # reorder according to the success argument
+       sum_table <- sum_table[, lvls]
+    }
 
     prelim <- stats::prop.test(x = sum_table,
                                alternative = alternative,
diff --git a/man/prop_test.Rd b/man/prop_test.Rd
diff --git a/tests/testthat/_snaps/wrappers.md b/tests/testthat/_snaps/wrappers.md
@@ -199,6 +199,30 @@
       Error in `prop_test()`:
       ! b is not a valid level of resp.
 
+# prop_test handles >2 explanatory levels gracefully
+
+    Code
+      res_2 <- prop_test(dfr, resp ~ exp, order = c("a", "b"))
+    Condition
+      Warning:
+      The `order` argument will be ignored as it is not well-defined for explanatory variables with more than 2 levels. To silence this message, avoid passing the `order` argument.
+
+---
+
+    Code
+      res_3 <- prop_test(dfr, resp ~ exp, order = c("a", "b", "c"))
+    Condition
+      Warning:
+      The `order` argument will be ignored as it is not well-defined for explanatory variables with more than 2 levels. To silence this message, avoid passing the `order` argument.
+
+# prop_test errors with >2 response levels
+
+    Code
+      res_1 <- prop_test(dfr, resp ~ exp)
+    Condition
+      Error in `prop_test()`:
+      ! This test is not defined for response variables with more than 2 levels.
+
 # wrappers can handled ordered factors
 
     Code
diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R
@@ -258,11 +258,11 @@ test_that("conf_int argument works", {
 })
 
 # generate some data to test the prop.test wrapper
-df <- data.frame(resp = c(rep("c", 450),
+df <- data.frame(exp = rep(c("a", "b"), each = 500),
+                 resp = c(rep("c", 450),
                           rep("d", 50),
                           rep("c", 400),
                           rep("d", 100)),
-                 exp = rep(c("a", "b"), each = 500),
                  stringsAsFactors = FALSE)
 
 sum_df <- table(df)
@@ -384,17 +384,59 @@ test_that("prop_test output dimensionality is correct", {
                                      conf_int = FALSE)
   infer_2_sample_z <- prop_test(df, resp ~ exp, order = c("a", "b"), z = TRUE)
 
-  # introduce a third response level
-  df$resp[c(1:10, 490:510, 990:1000)] <- "e"
-
-  infer_3_sample <- prop_test(df, resp ~ exp, order = c("a", "b"))
-
   expect_length(infer_1_sample, 4)
   expect_length(infer_1_sample, length(infer_1_sample_z) + 1)
   expect_length(infer_2_sample, 6)
   expect_length(infer_2_sample_no_int, 4)
   expect_length(infer_2_sample_z, length(infer_2_sample) - 1)
-  expect_length(infer_3_sample, 3)
+})
+
+test_that("prop_test handles >2 explanatory levels gracefully", {
+   set.seed(1)
+   dfr <-
+      tibble::tibble(
+         exp = sample(c("a", "b", "c"), 100, replace = TRUE),
+         resp = sample(c("d", "e"), 100, replace = TRUE)
+      )
+
+   res_old <- prop.test(table(dfr))
+
+   # don't pass order
+   expect_silent(
+      res_1 <- prop_test(dfr, resp ~ exp)
+   )
+
+   # pass 2-length order
+   expect_snapshot(
+      res_2 <- prop_test(dfr, resp ~ exp, order = c("a", "b"))
+   )
+
+   # pass 3-length order
+   expect_snapshot(
+      res_3 <- prop_test(dfr, resp ~ exp, order = c("a", "b", "c"))
+   )
+
+   expect_equal(res_1, res_2)
+   expect_equal(res_2, res_3)
+
+   expect_named(res_1, c("statistic", "chisq_df", "p_value"))
+   expect_equal(res_1$statistic, res_old$statistic)
+   expect_equal(res_1$chisq_df, res_old$parameter)
+   expect_equal(res_1$p_value, res_old$p.value)
+})
+
+test_that("prop_test errors with >2 response levels", {
+   set.seed(1)
+   dfr <-
+      tibble::tibble(
+         exp = sample(c("a", "b"), 100, replace = TRUE),
+         resp = sample(c("c", "d", "e"), 100, replace = TRUE)
+      )
+
+   expect_snapshot(
+      error = TRUE,
+      res_1 <- prop_test(dfr, resp ~ exp)
+   )
 })
 
 test_that("prop_test z argument works as expected", {