diff --git a/DESCRIPTION b/DESCRIPTION index 3a9acd7..d065ad5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: nichenetr Type: Package Title: NicheNet: Modeling Intercellular Communication by Linking Ligands to Target Genes -Version: 2.0.4 +Version: 2.0.5 Authors@R: c(person("Robin", "Browaeys", role = c("aut")), person("Chananchida", "Sang-aram", role = c("aut", "cre"), email = "chananchida.sangaram@ugent.be")) Description: This package allows you the investigate intercellular communication from a computational perspective. More specifically, it allows to investigate how interacting cells influence each other's gene expression. Functionalities of this package (e.g. including predicting extracellular upstream regulators and their affected target genes) build upon a probabilistic model of ligand-target links that was inferred by data-integration. diff --git a/NAMESPACE b/NAMESPACE index 4c815ef..0a8c292 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method(get_expressed_genes,Seurat) +S3method(get_expressed_genes,default) export(add_hyperparameters_parameter_settings) export(add_ligand_popularity_measures_to_perfs) export(add_new_datasource) diff --git a/R/application_prediction.R b/R/application_prediction.R index c8d8e9c..bb91ae7 100644 --- a/R/application_prediction.R +++ b/R/application_prediction.R @@ -1078,159 +1078,142 @@ nichenet_seuratobj_aggregate = function(receiver, seurat_obj, condition_colname, background_expressed_genes = background_expressed_genes )) } -#' @title Determine expressed genes of a cell type from a Seurat object single-cell RNA seq dataset or Seurat spatial transcriptomics dataset -#' -#' @description \code{get_expressed_genes} Return the genes that are expressed in a given cell cluster based on the fraction of cells in that cluster that should express the cell. -#' @usage -#' get_expressed_genes(ident, seurat_obj, pct = 0.10, assay_oi = NULL) -#' -#' @param ident Name of cluster identity/identities of cells -#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. -#' @param pct We consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10. Choice of this parameter is important and depends largely on the used sequencing platform. We recommend to require a lower fraction (like the default 0.10) for 10X data than for e.g. Smart-seq2 data. -#' @param assay_oi If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes. +#' @title Determine expressed genes of a cell type from an input object +#' @description Return the genes that are expressed in given cell cluster(s) based on the fraction of cells in the cluster(s) that should express the cell. +#' @param celltype_oi Character vector of cell type(s) to be considered. If input is a Seurat object, these must correspond to the cell identities from \code{Idents}. +#' @param object Input matrix with rows as genes and columns as cells +#' @param pct We consider genes expressed if they are expressed in at least a specific fraction of cells of the given cluster(s). This number indicates this fraction. Default: 0.10. Choice of this parameter is important and depends largely on the used sequencing platform. We recommend to require a lower fraction (like the default 0.10) for 10X data than for e.g. Smart-seq2 data. +#' @return A vector containing gene symbols of the expressed genes +#' +#' @rdname get_expressed_genes +#' @export +get_expressed_genes <- function(celltype_oi, object, ...) { + UseMethod("get_expressed_genes", object) +} + +#' @param celltype_annot Vector of cell type annotations +#' @import dplyr +#' @rdname get_expressed_genes +#' @examples +#' \dontrun{ +#' # For sparse matrix +#' get_expressed_genes("CD8 T", GetAssayData(seuratObj), seuratObj$celltype, pct = 0.10) +#' } #' -#' @return A character vector with the gene symbols of the expressed genes +#' @export +get_expressed_genes.default <- function(celltype_oi, object, celltype_annot, pct = 0.1) { + requireNamespace("dplyr") + + # Check that length of metadata is equal to number of cells + if (length(celltype_annot) != ncol(object)) { + stop("Length of metadata is not equal to number of cells") + } + + # Check that celltype_oi is in metadata + if (!celltype_oi %in% celltype_annot) { + stop("Cell type of interest is not in celltype_annot") + } + + exprs_mat <- object %>% .[, celltype_annot == celltype_oi] + + # Get cells of interest in matrix + n_cells_oi_in_matrix <- ncol(exprs_mat) + + if (n_cells_oi_in_matrix < 5000) { + genes <- exprs_mat %>% apply(1, function(x) { + sum(x > 0)/n_cells_oi_in_matrix + }) %>% .[. >= pct] %>% names() + } + else { + splits <- split(1:nrow(exprs_mat), ceiling(seq_along(1:nrow(exprs_mat))/100)) + genes <- splits %>% lapply(function(genes_indices) { + begin_i <- genes_indices[1] + end_i <- genes_indices[length(genes_indices)] + exprs <- exprs_mat[begin_i:end_i, , drop = FALSE] + genes <- exprs %>% apply(1, function(x) { + sum(x > 0)/n_cells_oi_in_matrix + }) %>% .[. >= pct] %>% names() + }) %>% unlist() %>% + unname() + } + return(genes) + +} + +#' @param seurat_obj Single-cell expression or spatial dataset as Seurat object +#' @param assay_oi If wanted: specify yourself which assay to look for. If not NULL, the \code{DefaultAssay} of the Seurat object is used. +#' @param ... additional parameters passed to \code{GetAssayData} (in case the slot/layer needs to be specified) #' #' @import Seurat #' @import dplyr #' #' @examples #' \dontrun{ -#' get_expressed_genes(ident = "CD8 T", seurat_obj = seuratObj, pct = 0.10) +#' # For Seurat object +#' get_expressed_genes(celltype_oi = "CD8 T", seurat_obj = seuratObj, pct = 0.10) #' } #' +#' @rdname get_expressed_genes #' @export #' -get_expressed_genes = function(ident, seurat_obj, pct = 0.1, assay_oi = NULL){ +get_expressed_genes.Seurat = function(celltype_oi, seurat_obj, pct = 0.1, assay_oi = NULL, ...){ requireNamespace("Seurat") requireNamespace("dplyr") - # input check - - - if (!"RNA" %in% names(seurat_obj@assays)) { - if ("Spatial" %in% names(seurat_obj@assays)) { - if (class(seurat_obj@assays$Spatial@data) != "matrix" & - class(seurat_obj@assays$Spatial@data) != "dgCMatrix") { - warning("Spatial Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$Spatial@data' for default or 'seurat_obj@assays$SCT@data' for when the single-cell transform pipeline was applied") - } - if (sum(dim(seurat_obj@assays$Spatial@data)) == 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$Spatial@data'") - } - } - } - else { - if (class(seurat_obj@assays$RNA@data) != "matrix" & - class(seurat_obj@assays$RNA@data) != "dgCMatrix") { - warning("Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data or seurat_obj@assays$SCT@data for when the single-cell transform pipeline was applied") - } - if ("integrated" %in% names(seurat_obj@assays)) { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$integrated@data)) == - 0) - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data") - } - else if ("SCT" %in% names(seurat_obj@assays)) { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$SCT@data)) == - 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$SCT@data' for data corrected via SCT") - } - } - else { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data'") - } + # If assay_oi is not specified, use DefaultAssay + if (is.null(assay_oi)){ + assay_oi <- DefaultAssay(seurat_obj) + } else { + # Check that assay exists in Seurat object + if (!assay_oi %in% Seurat::Assays(seurat_obj)){ + stop("assay_oi not found in Seurat object") } + # Set assay_oi to DefaultAssay + DefaultAssay(seurat_obj) <- assay_oi + } - if (sum(ident %in% unique(Idents(seurat_obj))) != length(ident)) { - stop("One or more provided cell clusters is not part of the 'Idents' of your Seurat object") + + # If assay_oi is Spatial, give warning + if (assay_oi == "Spatial"){ + warning("The Spatial assay will be used to define expressed gene. If the spatial data is spot-based (mixture of cells) and not single-cell resolution, we recommend against directly using nichenetr on because you want to look at cell-cell interactions, and not at spot-spot interactions! ;-) ") } - if(!is.null(assay_oi)){ - if(! assay_oi %in% Seurat::Assays(seurat_obj)){ - stop("assay_oi should be an assay of your Seurat object") - } + # Check that celltype_oi is in Seurat object + if (!all(celltype_oi %in% unique(Idents(seurat_obj)))) { + stop("One or more provided cell clusters is not part of the 'Idents' of your Seurat object") } # Get cell identities of cluster of interest + cells_oi <- Idents(seurat_obj) %>% .[. %in% celltype_oi] %>% names() + # Get expression matrix from assay_oi + #cells_oi_in_matrix <- intersect(colnames(seurat_obj[[assay_oi]]@data), cells_oi) + #exprs_mat = seurat_obj[[assay_oi]]@data %>% .[, cells_oi_in_matrix] - cells_oi = Idents(seurat_obj) %>% .[Idents(seurat_obj) %in% - ident] %>% names() - - # Get exprs matrix: from assay oi or from most advanced assay if assay oi not specifcied - - if(!is.null(assay_oi)){ - cells_oi_in_matrix = intersect(colnames(seurat_obj[[assay_oi]]@data), cells_oi) - exprs_mat = seurat_obj[[assay_oi]]@data %>% .[, cells_oi_in_matrix] - } else { - if ("integrated" %in% names(seurat_obj@assays)) { - warning("Seurat object is result from the Seurat integration workflow. The expressed genes are now defined based on the integrated slot. You can change this via the assay_oi parameter of the get_expressed_genes() functions. Recommended assays: RNA or SCT") - cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$integrated@data), - cells_oi) - if (length(cells_oi_in_matrix) != length(cells_oi)) - stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$integrated@data). Please check that the expression matrix contains cells in columns and genes in rows.") - exprs_mat = seurat_obj@assays$integrated@data %>% .[, - cells_oi_in_matrix] - } - else if ("SCT" %in% names(seurat_obj@assays) & !"Spatial" %in% - names(seurat_obj@assays)) { - warning("Seurat object is result from the Seurat single-cell transform workflow. The expressed genes are defined based on the SCT slot. You can change this via the assay_oi parameter of the get_expressed_genes() functions. Recommended assays: RNA or SCT") - cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$SCT@data), - cells_oi) - if (length(cells_oi_in_matrix) != length(cells_oi)) - stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$SCT@data). Please check that the expression matrix contains cells in columns and genes in rows.") - exprs_mat = seurat_obj@assays$SCT@data %>% .[, cells_oi_in_matrix] - } - else if ("Spatial" %in% names(seurat_obj@assays) & - !"SCT" %in% names(seurat_obj@assays)) { - warning("Seurat object is result from the Seurat spatial object. The expressed genes are defined based on the Spatial slot. If the spatial data is spot-based (mixture of cells) and not single-cell resolution, we recommend against directly using nichenetr on spot-based data (because you want to look at cell-cell interactions, and not at spot-spot interactions! ;-) )") - cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$Spatial@data), - cells_oi) - if (length(cells_oi_in_matrix) != length(cells_oi)) - stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$Spatial@data). Please check that the expression matrix contains cells in columns and genes in rows.") - exprs_mat = seurat_obj@assays$Spatial@data %>% .[, cells_oi_in_matrix] - } - else if ("Spatial" %in% names(seurat_obj@assays) & - "SCT" %in% names(seurat_obj@assays)) { - warning("Seurat object is result from the Seurat spatial object, followed by the SCT workflow. If the spatial data is spot-based (mixture of cells) and not single-cell resolution, we recommend against directly using nichenetr on spot-based data (because you want to look at cell-cell interactions, and not at spot-spot interactions! The expressed genes are defined based on the SCT slot, but this can be changed via the assay_oi parameter.") - cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$SCT@data), - cells_oi) - if (length(cells_oi_in_matrix) != length(cells_oi)) - stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$Spatial@data). Please check that the expression matrix contains cells in columns and genes in rows.") - exprs_mat = seurat_obj@assays$SCT@data %>% .[, cells_oi_in_matrix] - } - else { - if (sum(cells_oi %in% colnames(seurat_obj@assays$RNA@data)) == - 0) - stop("None of the cells are in colnames of 'seurat_obj@assays$RNA@data'. The expression matrix should contain cells in columns and genes in rows.") - cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$RNA@data), - cells_oi) - if (length(cells_oi_in_matrix) != length(cells_oi)) - stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$RNA@data). Please check that the expression matrix contains cells in columns and genes in rows.") - exprs_mat = seurat_obj@assays$RNA@data %>% .[, cells_oi_in_matrix] - } + exprs_mat <- subset(seurat_obj, idents = celltype_oi) %>% + GetAssayData(assay = assay_oi, ...) + if (length(cells_oi) != ncol(exprs_mat)){ + warning("Not all cells of interest are in your expression matrix.") } - # use defined cells and exprs matrix to get expressed genes - - n_cells_oi_in_matrix = length(cells_oi_in_matrix) + # Use defined cells and exprs matrix to get expressed genes + n_cells_oi_in_matrix <- ncol(exprs_mat) if (n_cells_oi_in_matrix < 5000) { - genes = exprs_mat %>% apply(1, function(x) { + genes <- exprs_mat %>% apply(1, function(x) { sum(x > 0)/n_cells_oi_in_matrix }) %>% .[. >= pct] %>% names() } else { - splits = split(1:nrow(exprs_mat), ceiling(seq_along(1:nrow(exprs_mat))/100)) - genes = splits %>% lapply(function(genes_indices, exprs, - pct, n_cells_oi_in_matrix) { - begin_i = genes_indices[1] - end_i = genes_indices[length(genes_indices)] - exprs = exprs[begin_i:end_i, , drop = FALSE] - genes = exprs %>% apply(1, function(x) { + splits <- split(1:nrow(exprs_mat), ceiling(seq_along(1:nrow(exprs_mat))/100)) + genes <- splits %>% lapply(function(genes_indices) { + begin_i <- genes_indices[1] + end_i <- genes_indices[length(genes_indices)] + exprs <- exprs_mat[begin_i:end_i, , drop = FALSE] + genes <- exprs %>% apply(1, function(x) { sum(x > 0)/n_cells_oi_in_matrix }) %>% .[. >= pct] %>% names() - }, exprs_mat, pct, n_cells_oi_in_matrix) %>% unlist() %>% + }) %>% unlist() %>% unname() } return(genes) diff --git a/man/get_expressed_genes.Rd b/man/get_expressed_genes.Rd index c64c437..b3f121e 100644 --- a/man/get_expressed_genes.Rd +++ b/man/get_expressed_genes.Rd @@ -2,28 +2,46 @@ % Please edit documentation in R/application_prediction.R \name{get_expressed_genes} \alias{get_expressed_genes} -\title{Determine expressed genes of a cell type from a Seurat object single-cell RNA seq dataset or Seurat spatial transcriptomics dataset} +\alias{get_expressed_genes.default} +\alias{get_expressed_genes.Seurat} +\title{Determine expressed genes of a cell type from an input object} \usage{ -get_expressed_genes(ident, seurat_obj, pct = 0.10, assay_oi = NULL) +get_expressed_genes(celltype_oi, object, ...) + +\method{get_expressed_genes}{default}(celltype_oi, object, celltype_annot, pct = 0.1) + +\method{get_expressed_genes}{Seurat}(celltype_oi, seurat_obj, pct = 0.1, assay_oi = NULL, ...) } \arguments{ -\item{ident}{Name of cluster identity/identities of cells} +\item{celltype_oi}{Character vector of cell type(s) to be considered. If input is a Seurat object, these must correspond to the cell identities from \code{Idents}.} + +\item{object}{Input matrix with rows as genes and columns as cells} + +\item{...}{additional parameters passed to \code{GetAssayData} (in case the slot/layer needs to be specified)} -\item{seurat_obj}{Single-cell expression dataset as Seurat object https://satijalab.org/seurat/.} +\item{celltype_annot}{Vector of cell type annotations} -\item{pct}{We consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10. Choice of this parameter is important and depends largely on the used sequencing platform. We recommend to require a lower fraction (like the default 0.10) for 10X data than for e.g. Smart-seq2 data.} +\item{pct}{We consider genes expressed if they are expressed in at least a specific fraction of cells of the given cluster(s). This number indicates this fraction. Default: 0.10. Choice of this parameter is important and depends largely on the used sequencing platform. We recommend to require a lower fraction (like the default 0.10) for 10X data than for e.g. Smart-seq2 data.} -\item{assay_oi}{If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes.} +\item{seurat_obj}{Single-cell expression or spatial dataset as Seurat object} + +\item{assay_oi}{If wanted: specify yourself which assay to look for. If not NULL, the \code{DefaultAssay} of the Seurat object is used.} } \value{ -A character vector with the gene symbols of the expressed genes +A vector containing gene symbols of the expressed genes } \description{ -\code{get_expressed_genes} Return the genes that are expressed in a given cell cluster based on the fraction of cells in that cluster that should express the cell. +Return the genes that are expressed in given cell cluster(s) based on the fraction of cells in the cluster(s) that should express the cell. } \examples{ \dontrun{ -get_expressed_genes(ident = "CD8 T", seurat_obj = seuratObj, pct = 0.10) +# For sparse matrix +get_expressed_genes("CD8 T", GetAssayData(seuratObj), seuratObj$celltype, pct = 0.10) +} + +\dontrun{ +# For Seurat object +get_expressed_genes(celltype_oi = "CD8 T", seurat_obj = seuratObj, pct = 0.10) } } diff --git a/man/nichenet_seuratobj_aggregate.Rd b/man/nichenet_seuratobj_aggregate.Rd index d616437..050af68 100644 --- a/man/nichenet_seuratobj_aggregate.Rd +++ b/man/nichenet_seuratobj_aggregate.Rd @@ -41,7 +41,7 @@ nichenet_seuratobj_aggregate(receiver, seurat_obj, condition_colname, condition_ \item{verbose}{Print out the current analysis stage. Default: TRUE.} -\item{assay_oi}{If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes.} +\item{assay_oi}{If wanted: specify yourself which assay to look for. If not NULL, the \code{DefaultAssay} of the Seurat object is used.} } \value{ A list with the following elements: diff --git a/man/nichenet_seuratobj_aggregate_cluster_de.Rd b/man/nichenet_seuratobj_aggregate_cluster_de.Rd index 23f66c0..d81070e 100644 --- a/man/nichenet_seuratobj_aggregate_cluster_de.Rd +++ b/man/nichenet_seuratobj_aggregate_cluster_de.Rd @@ -43,7 +43,7 @@ nichenet_seuratobj_aggregate_cluster_de(seurat_obj, receiver_affected, receiver_ \item{verbose}{Print out the current analysis stage. Default: TRUE.} -\item{assay_oi}{If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes.} +\item{assay_oi}{If wanted: specify yourself which assay to look for. If not NULL, the \code{DefaultAssay} of the Seurat object is used.} } \value{ A list with the following elements: diff --git a/man/nichenet_seuratobj_cluster_de.Rd b/man/nichenet_seuratobj_cluster_de.Rd index c7aad0e..0b2b8f0 100644 --- a/man/nichenet_seuratobj_cluster_de.Rd +++ b/man/nichenet_seuratobj_cluster_de.Rd @@ -37,7 +37,7 @@ nichenet_seuratobj_cluster_de(seurat_obj, receiver_affected, receiver_reference, \item{verbose}{Print out the current analysis stage. Default: TRUE.} -\item{assay_oi}{If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes.} +\item{assay_oi}{If wanted: specify yourself which assay to look for. If not NULL, the \code{DefaultAssay} of the Seurat object is used.} } \value{ A list with the following elements: