diff --git a/NAMESPACE b/NAMESPACE index baf27afad..e903775af 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,11 +1,5 @@ # Generated by roxygen2: do not edit by hand -S3method("$",tbl_df) -S3method("$<-",tbl_df) -S3method("[",tbl_df) -S3method("[<-",tbl_df) -S3method("[[",tbl_df) -S3method("[[<-",tbl_df) S3method("names<-",tbl_df) S3method("row.names<-",tbl_df) S3method(as.data.frame,tbl_df) diff --git a/R/subsetting.R b/R/subsetting.R index 088b952b9..7cb923570 100644 --- a/R/subsetting.R +++ b/R/subsetting.R @@ -1,304 +1,3 @@ -#' Subsetting tibbles -#' -#' @description -#' Accessing columns, rows, or cells via `$`, `[[`, or `[` is mostly similar to -#' [regular data frames][base::Extract]. However, the -#' behavior is different for tibbles and data frames in some cases: -#' * `[` always returns a tibble by default, even if -#' only one column is accessed. -#' * Partial matching of column names with `$` and `[[` is not supported, and -#' `NULL` is returned. -#' For `$`, a warning is given. -#' * Only scalars (vectors of length one) or vectors with the -#' same length as the number of rows can be used for assignment. -#' * Rows outside of the tibble's boundaries cannot be accessed. -#' * When updating with `[[<-` and `[<-`, type changes of entire columns are -#' supported, but updating a part of a column requires that the new value is -#' coercible to the existing type. -#' See [vec_slice()] for the underlying implementation. -#' -#' Unstable return type and implicit partial matching can lead to surprises and -#' bugs that are hard to catch. If you rely on code that requires the original -#' data frame behavior, coerce to a data frame via [as.data.frame()]. -#' -#' @details -#' For better compatibility with older code written for regular data frames, -#' `[` supports a `drop` argument which defaults to `FALSE`. -#' New code should use `[[` to turn a column into a vector. -#' -#' @param x A tibble. -#' @param value A value to store in a row, column, range or cell. -#' Tibbles are stricter than data frames in what is accepted here. -#' -#' @name subsetting -#' @examples -#' df <- data.frame(a = 1:3, bc = 4:6) -#' tbl <- tibble(a = 1:3, bc = 4:6) -#' -#' # Subsetting single columns: -#' df[, "a"] -#' tbl[, "a"] -#' tbl[, "a", drop = TRUE] -#' as.data.frame(tbl)[, "a"] -#' -#' # Subsetting single rows with the drop argument: -#' df[1, , drop = TRUE] -#' tbl[1, , drop = TRUE] -#' as.list(tbl[1, ]) -#' -#' @examplesIf (Sys.getenv("NOT_CRAN") != "true" || Sys.getenv("IN_PKGDOWN") == "true") -#' # Accessing non-existent columns: -#' df$b -#' tbl$b -#' -#' df[["b", exact = FALSE]] -#' tbl[["b", exact = FALSE]] -#' -#' df$bd <- c("n", "e", "w") -#' tbl$bd <- c("n", "e", "w") -#' df$b -#' tbl$b -#' @examples -#' -#' df$b <- 7:9 -#' tbl$b <- 7:9 -#' df$b -#' tbl$b -#' -#' # Identical behavior: -#' tbl[1, ] -#' tbl[1, c("bc", "a")] -#' tbl[, c("bc", "a")] -#' tbl[c("bc", "a")] -#' tbl["a"] -#' tbl$a -#' tbl[["a"]] -NULL - -#' @rdname subsetting -#' @param name A [name] or a string. -#' @export -`$.tbl_df` <- function(x, name) { - out <- .subset2(x, name) - if (is.null(out)) { - warn(paste0("Unknown or uninitialised column: ", tick(name), ".")) - } - out -} - - -#' @rdname subsetting -#' @export -`$<-.tbl_df` <- function(x, name, value) { - tbl_subassign( - x, - i = NULL, as_string(name), list(value), - i_arg = NULL, j_arg = name, value_arg = substitute(value) - ) -} - -#' @rdname subsetting -#' @param i,j Row and column indices. If `j` is omitted, `i` is used as column index. -#' @param ... Ignored. -#' @param exact Ignored, with a warning. -#' @export -`[[.tbl_df` <- function(x, i, j, ..., exact = TRUE) { - if (!exact) { - warn("`exact` ignored.") - } - - n_dots <- dots_n(...) - if (n_dots > 0) { - warn("Extra arguments ignored.") - } - - # Ignore exact as an argument for counting - n_real_args <- nargs() - !missing(exact) - n_dots - - # Column subsetting if nargs() == 2L - if (n_real_args <= 2L) { - if (missing(i)) { - abort_subset_columns_non_missing_only() - } - tbl_subset2(x, j = i, j_arg = substitute(i)) - } else if (missing(j) || missing(i)) { - abort_subset_columns_non_missing_only() - } else { - i_arg <- substitute(i) - i <- vectbl_as_row_location2(i, fast_nrow(x), i_arg) - x <- tbl_subset2(x, j = j, j_arg = substitute(j)) - - if (is.null(x)) { - x - } else { - # Drop inner names with double subscript - vectbl_set_names(vec_slice(x, i), NULL) - } - } -} - -#' @rdname subsetting -#' @export -`[[<-.tbl_df` <- function(x, i, j, ..., value) { - i_arg <- substitute(i) - j_arg <- substitute(j) - value_arg <- substitute(value) - - if (missing(i)) { - abort_assign_columns_non_missing_only() - } - - if (missing(j)) { - if (nargs() < 4) { - j <- i - i <- NULL - j_arg <- i_arg - i_arg <- NULL - } else { - abort_assign_columns_non_missing_only() - } - } - - if (!is.null(i)) { - i <- vectbl_as_row_location2(i, fast_nrow(x), i_arg, assign = TRUE) - } - - if (is.object(j)) { - j <- vectbl_as_col_subscript2(j, j_arg, assign = TRUE) - } - - # Side effect: check scalar - if (is.symbol(j)) { - # FIXME: as_utf8_character() needs rlang > 0.4.11 - j <- chr_unserialise_unicode(as.character(j)) - } else { - if (!is.vector(j) || length(j) != 1L || is.na(j) || (is.numeric(j) && j < 0) || is.logical(j)) { - vectbl_as_col_location2(j, length(x), j_arg = j_arg, assign = TRUE) - } - } - - j <- vectbl_as_new_col_index(j, x, j_arg) - - # New columns are added to the end, provide index to avoid matching column - # names again - value <- list(value) - - # j is already pretty - tbl_subassign(x, i, j, value, i_arg = i_arg, j_arg = NULL, value_arg = value_arg) -} - - -#' @rdname subsetting -#' @param drop Coerce to a vector if fetching one column via `tbl[, j]` . -#' Default `FALSE`, ignored when accessing a column via `tbl[j]` . -#' @export -`[.tbl_df` <- function(x, i, j, drop = FALSE, ...) { - i_arg <- substitute(i) - j_arg <- substitute(j) - - if (missing(i)) { - i <- NULL - i_arg <- NULL - } else if (is.null(i)) { - i <- integer() - } - - if (missing(j)) { - j <- NULL - j_arg <- NULL - } else if (is.null(j)) { - j <- integer() - } - - # Ignore drop as an argument for counting - n_real_args <- nargs() - !missing(drop) - - # Column or matrix subsetting if nargs() == 2L - if (n_real_args <= 2L) { - if (!missing(drop)) { - warn("`drop` argument ignored for subsetting a tibble with `x[j]`, it has an effect only for `x[i, j]`.") - drop <- FALSE - } - - j <- i - i <- NULL - j_arg <- i_arg - i_arg <- NULL - - # Special case, returns a vector: - if (is.matrix(j)) { - return(tbl_subset_matrix(x, j, j_arg)) - } - } - - # From here on, i, j and drop contain correct values: - if (is.null(j)) { - xo <- x - } else { - j <- vectbl_as_col_location(j, length(x), names(x), j_arg = j_arg, assign = FALSE) - - if (anyNA(j)) { - abort_na_column_index(which(is.na(j))) - } - - xo <- .subset(x, j) - - if (anyDuplicated.default(j)) { - xo <- set_repaired_names(xo, repair_hint = FALSE, .name_repair = "minimal") - } - } - - if (is.null(i)) { - nrow <- fast_nrow(x) - } else { - i <- vectbl_as_row_index(i, x, i_arg) - xo <- lapply(xo, vec_slice, i = i) - nrow <- length(i) - } - - if (drop && length(xo) == 1L) { - tbl_subset2(xo, 1L, j_arg) - } else { - attr(xo, "row.names") <- .set_row_names(nrow) - vectbl_restore(xo, x) - } -} - -#' @rdname subsetting -#' @export -`[<-.tbl_df` <- function(x, i, j, ..., value) { - i_arg <- substitute(i) - j_arg <- substitute(j) - - if (missing(i)) { - i <- NULL - i_arg <- NULL - } else if (is.null(i)) { - i <- integer() - } - - if (missing(j)) { - j <- NULL - j_arg <- NULL - } else if (is.null(j)) { - j <- integer() - } - - if (is.null(j) && nargs() < 4) { - j <- i - i <- NULL - j_arg <- i_arg - i_arg <- NULL - - # Special case: - if (is.matrix(j)) { - return(tbl_subassign_matrix(x, j, value, j_arg, substitute(value))) - } - } - - tbl_subassign(x, i, j, value, i_arg, j_arg, substitute(value)) -} - vectbl_as_row_index <- function(i, x, i_arg, assign = FALSE) { stopifnot(!is.null(i)) diff --git a/man/subsetting.Rd b/man/subsetting.Rd deleted file mode 100644 index 7c5c343d1..000000000 --- a/man/subsetting.Rd +++ /dev/null @@ -1,112 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/subsetting.R -\name{subsetting} -\alias{subsetting} -\alias{$.tbl_df} -\alias{$<-.tbl_df} -\alias{[[.tbl_df} -\alias{[[<-.tbl_df} -\alias{[.tbl_df} -\alias{[<-.tbl_df} -\title{Subsetting tibbles} -\usage{ -\method{$}{tbl_df}(x, name) - -\method{$}{tbl_df}(x, name) <- value - -\method{[[}{tbl_df}(x, i, j, ..., exact = TRUE) - -\method{[[}{tbl_df}(x, i, j, ...) <- value - -\method{[}{tbl_df}(x, i, j, drop = FALSE, ...) - -\method{[}{tbl_df}(x, i, j, ...) <- value -} -\arguments{ -\item{x}{A tibble.} - -\item{name}{A \link{name} or a string.} - -\item{value}{A value to store in a row, column, range or cell. -Tibbles are stricter than data frames in what is accepted here.} - -\item{i, j}{Row and column indices. If \code{j} is omitted, \code{i} is used as column index.} - -\item{...}{Ignored.} - -\item{exact}{Ignored, with a warning.} - -\item{drop}{Coerce to a vector if fetching one column via \code{tbl[, j]} . -Default \code{FALSE}, ignored when accessing a column via \code{tbl[j]} .} -} -\description{ -Accessing columns, rows, or cells via \code{$}, \code{[[}, or \code{[} is mostly similar to -\link[base:Extract]{regular data frames}. However, the -behavior is different for tibbles and data frames in some cases: -\itemize{ -\item \code{[} always returns a tibble by default, even if -only one column is accessed. -\item Partial matching of column names with \code{$} and \code{[[} is not supported, and -\code{NULL} is returned. -For \code{$}, a warning is given. -\item Only scalars (vectors of length one) or vectors with the -same length as the number of rows can be used for assignment. -\item Rows outside of the tibble's boundaries cannot be accessed. -\item When updating with \verb{[[<-} and \verb{[<-}, type changes of entire columns are -supported, but updating a part of a column requires that the new value is -coercible to the existing type. -See \code{\link[=vec_slice]{vec_slice()}} for the underlying implementation. -} - -Unstable return type and implicit partial matching can lead to surprises and -bugs that are hard to catch. If you rely on code that requires the original -data frame behavior, coerce to a data frame via \code{\link[=as.data.frame]{as.data.frame()}}. -} -\details{ -For better compatibility with older code written for regular data frames, -\code{[} supports a \code{drop} argument which defaults to \code{FALSE}. -New code should use \code{[[} to turn a column into a vector. -} -\examples{ -df <- data.frame(a = 1:3, bc = 4:6) -tbl <- tibble(a = 1:3, bc = 4:6) - -# Subsetting single columns: -df[, "a"] -tbl[, "a"] -tbl[, "a", drop = TRUE] -as.data.frame(tbl)[, "a"] - -# Subsetting single rows with the drop argument: -df[1, , drop = TRUE] -tbl[1, , drop = TRUE] -as.list(tbl[1, ]) - -\dontshow{if ((Sys.getenv("NOT_CRAN") != "true" || Sys.getenv("IN_PKGDOWN") == "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -# Accessing non-existent columns: -df$b -tbl$b - -df[["b", exact = FALSE]] -tbl[["b", exact = FALSE]] - -df$bd <- c("n", "e", "w") -tbl$bd <- c("n", "e", "w") -df$b -tbl$b -\dontshow{\}) # examplesIf} - -df$b <- 7:9 -tbl$b <- 7:9 -df$b -tbl$b - -# Identical behavior: -tbl[1, ] -tbl[1, c("bc", "a")] -tbl[, c("bc", "a")] -tbl[c("bc", "a")] -tbl["a"] -tbl$a -tbl[["a"]] -}