Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DO NOT MERGE: Remove subset and subassign methods #1469

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
# Generated by roxygen2: do not edit by hand

S3method("$",tbl_df)
S3method("$<-",tbl_df)
S3method("[",tbl_df)
S3method("[<-",tbl_df)
S3method("[[",tbl_df)
S3method("[[<-",tbl_df)
S3method("names<-",tbl_df)
S3method("row.names<-",tbl_df)
S3method(as.data.frame,tbl_df)
Expand Down
301 changes: 0 additions & 301 deletions R/subsetting.R
Original file line number Diff line number Diff line change
@@ -1,304 +1,3 @@
#' Subsetting tibbles
#'
#' @description
#' Accessing columns, rows, or cells via `$`, `[[`, or `[` is mostly similar to
#' [regular data frames][base::Extract]. However, the
#' behavior is different for tibbles and data frames in some cases:
#' * `[` always returns a tibble by default, even if
#' only one column is accessed.
#' * Partial matching of column names with `$` and `[[` is not supported, and
#' `NULL` is returned.
#' For `$`, a warning is given.
#' * Only scalars (vectors of length one) or vectors with the
#' same length as the number of rows can be used for assignment.
#' * Rows outside of the tibble's boundaries cannot be accessed.
#' * When updating with `[[<-` and `[<-`, type changes of entire columns are
#' supported, but updating a part of a column requires that the new value is
#' coercible to the existing type.
#' See [vec_slice()] for the underlying implementation.
#'
#' Unstable return type and implicit partial matching can lead to surprises and
#' bugs that are hard to catch. If you rely on code that requires the original
#' data frame behavior, coerce to a data frame via [as.data.frame()].
#'
#' @details
#' For better compatibility with older code written for regular data frames,
#' `[` supports a `drop` argument which defaults to `FALSE`.
#' New code should use `[[` to turn a column into a vector.
#'
#' @param x A tibble.
#' @param value A value to store in a row, column, range or cell.
#' Tibbles are stricter than data frames in what is accepted here.
#'
#' @name subsetting
#' @examples
#' df <- data.frame(a = 1:3, bc = 4:6)
#' tbl <- tibble(a = 1:3, bc = 4:6)
#'
#' # Subsetting single columns:
#' df[, "a"]
#' tbl[, "a"]
#' tbl[, "a", drop = TRUE]
#' as.data.frame(tbl)[, "a"]
#'
#' # Subsetting single rows with the drop argument:
#' df[1, , drop = TRUE]
#' tbl[1, , drop = TRUE]
#' as.list(tbl[1, ])
#'
#' @examplesIf (Sys.getenv("NOT_CRAN") != "true" || Sys.getenv("IN_PKGDOWN") == "true")
#' # Accessing non-existent columns:
#' df$b
#' tbl$b
#'
#' df[["b", exact = FALSE]]
#' tbl[["b", exact = FALSE]]
#'
#' df$bd <- c("n", "e", "w")
#' tbl$bd <- c("n", "e", "w")
#' df$b
#' tbl$b
#' @examples
#'
#' df$b <- 7:9
#' tbl$b <- 7:9
#' df$b
#' tbl$b
#'
#' # Identical behavior:
#' tbl[1, ]
#' tbl[1, c("bc", "a")]
#' tbl[, c("bc", "a")]
#' tbl[c("bc", "a")]
#' tbl["a"]
#' tbl$a
#' tbl[["a"]]
NULL

#' @rdname subsetting
#' @param name A [name] or a string.
#' @export
`$.tbl_df` <- function(x, name) {
out <- .subset2(x, name)
if (is.null(out)) {
warn(paste0("Unknown or uninitialised column: ", tick(name), "."))
}
out
}


#' @rdname subsetting
#' @export
`$<-.tbl_df` <- function(x, name, value) {
tbl_subassign(
x,
i = NULL, as_string(name), list(value),
i_arg = NULL, j_arg = name, value_arg = substitute(value)
)
}

#' @rdname subsetting
#' @param i,j Row and column indices. If `j` is omitted, `i` is used as column index.
#' @param ... Ignored.
#' @param exact Ignored, with a warning.
#' @export
`[[.tbl_df` <- function(x, i, j, ..., exact = TRUE) {
if (!exact) {
warn("`exact` ignored.")
}

n_dots <- dots_n(...)
if (n_dots > 0) {
warn("Extra arguments ignored.")
}

# Ignore exact as an argument for counting
n_real_args <- nargs() - !missing(exact) - n_dots

# Column subsetting if nargs() == 2L
if (n_real_args <= 2L) {
if (missing(i)) {
abort_subset_columns_non_missing_only()
}
tbl_subset2(x, j = i, j_arg = substitute(i))
} else if (missing(j) || missing(i)) {
abort_subset_columns_non_missing_only()
} else {
i_arg <- substitute(i)
i <- vectbl_as_row_location2(i, fast_nrow(x), i_arg)
x <- tbl_subset2(x, j = j, j_arg = substitute(j))

if (is.null(x)) {
x
} else {
# Drop inner names with double subscript
vectbl_set_names(vec_slice(x, i), NULL)
}
}
}

#' @rdname subsetting
#' @export
`[[<-.tbl_df` <- function(x, i, j, ..., value) {
i_arg <- substitute(i)
j_arg <- substitute(j)
value_arg <- substitute(value)

if (missing(i)) {
abort_assign_columns_non_missing_only()
}

if (missing(j)) {
if (nargs() < 4) {
j <- i
i <- NULL
j_arg <- i_arg
i_arg <- NULL
} else {
abort_assign_columns_non_missing_only()
}
}

if (!is.null(i)) {
i <- vectbl_as_row_location2(i, fast_nrow(x), i_arg, assign = TRUE)
}

if (is.object(j)) {
j <- vectbl_as_col_subscript2(j, j_arg, assign = TRUE)
}

# Side effect: check scalar
if (is.symbol(j)) {
# FIXME: as_utf8_character() needs rlang > 0.4.11
j <- chr_unserialise_unicode(as.character(j))
} else {
if (!is.vector(j) || length(j) != 1L || is.na(j) || (is.numeric(j) && j < 0) || is.logical(j)) {
vectbl_as_col_location2(j, length(x), j_arg = j_arg, assign = TRUE)
}
}

j <- vectbl_as_new_col_index(j, x, j_arg)

# New columns are added to the end, provide index to avoid matching column
# names again
value <- list(value)

# j is already pretty
tbl_subassign(x, i, j, value, i_arg = i_arg, j_arg = NULL, value_arg = value_arg)
}


#' @rdname subsetting
#' @param drop Coerce to a vector if fetching one column via `tbl[, j]` .
#' Default `FALSE`, ignored when accessing a column via `tbl[j]` .
#' @export
`[.tbl_df` <- function(x, i, j, drop = FALSE, ...) {
i_arg <- substitute(i)
j_arg <- substitute(j)

if (missing(i)) {
i <- NULL
i_arg <- NULL
} else if (is.null(i)) {
i <- integer()
}

if (missing(j)) {
j <- NULL
j_arg <- NULL
} else if (is.null(j)) {
j <- integer()
}

# Ignore drop as an argument for counting
n_real_args <- nargs() - !missing(drop)

# Column or matrix subsetting if nargs() == 2L
if (n_real_args <= 2L) {
if (!missing(drop)) {
warn("`drop` argument ignored for subsetting a tibble with `x[j]`, it has an effect only for `x[i, j]`.")
drop <- FALSE
}

j <- i
i <- NULL
j_arg <- i_arg
i_arg <- NULL

# Special case, returns a vector:
if (is.matrix(j)) {
return(tbl_subset_matrix(x, j, j_arg))
}
}

# From here on, i, j and drop contain correct values:
if (is.null(j)) {
xo <- x
} else {
j <- vectbl_as_col_location(j, length(x), names(x), j_arg = j_arg, assign = FALSE)

if (anyNA(j)) {
abort_na_column_index(which(is.na(j)))
}

xo <- .subset(x, j)

if (anyDuplicated.default(j)) {
xo <- set_repaired_names(xo, repair_hint = FALSE, .name_repair = "minimal")
}
}

if (is.null(i)) {
nrow <- fast_nrow(x)
} else {
i <- vectbl_as_row_index(i, x, i_arg)
xo <- lapply(xo, vec_slice, i = i)
nrow <- length(i)
}

if (drop && length(xo) == 1L) {
tbl_subset2(xo, 1L, j_arg)
} else {
attr(xo, "row.names") <- .set_row_names(nrow)
vectbl_restore(xo, x)
}
}

#' @rdname subsetting
#' @export
`[<-.tbl_df` <- function(x, i, j, ..., value) {
i_arg <- substitute(i)
j_arg <- substitute(j)

if (missing(i)) {
i <- NULL
i_arg <- NULL
} else if (is.null(i)) {
i <- integer()
}

if (missing(j)) {
j <- NULL
j_arg <- NULL
} else if (is.null(j)) {
j <- integer()
}

if (is.null(j) && nargs() < 4) {
j <- i
i <- NULL
j_arg <- i_arg
i_arg <- NULL

# Special case:
if (is.matrix(j)) {
return(tbl_subassign_matrix(x, j, value, j_arg, substitute(value)))
}
}

tbl_subassign(x, i, j, value, i_arg, j_arg, substitute(value))
}

vectbl_as_row_index <- function(i, x, i_arg, assign = FALSE) {
stopifnot(!is.null(i))

Expand Down
Loading