Skip to content

Commit

Permalink
Merge pull request #1797 from lldelisle/fix_annotations
Browse files Browse the repository at this point in the history
Be more flexible for annotation
  • Loading branch information
timoast authored Oct 21, 2024
2 parents f870f90 + 50670ef commit 4fe0f8b
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 7 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Signac
Title: Analysis of Single-Cell Chromatin Data
Version: 1.14.9000
Version: 1.14.9001
Date: 2024-10-21
Authors@R: c(
person(given = 'Tim', family = 'Stuart', email = '[email protected]', role = c('aut', 'cre'), comment = c(ORCID = '0000-0002-3044-0897')),
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Other changes:

* Improve error messages for `FindMotifs()` ([#1788](https://github.com/stuart-lab/signac/issues/1788))
* Add documentation about the required format for gene annotations, and ensure this format is present when creating the assay ([#1797](https://github.com/stuart-lab/signac/pull/1797); [@lldelisle](https://github.com/lldelisle))

# Signac 1.14.0

Expand Down
39 changes: 35 additions & 4 deletions R/objects.R
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,14 @@ ChromatinAssay <- setClass(
#' information about the genome used. Alternatively, the name of a UCSC genome
#' can be provided and the sequence information will be downloaded from UCSC.
#' @param annotation A set of \code{\link[GenomicRanges]{GRanges}} containing
#' annotations for the genome used
#' annotations for the genome used. It must have the following columns:
#' \itemize{
#' \item{tx_id or transcript_id: Transcript ID}
#' \item{gene_name: Gene name}
#' \item{gene_id: Gene ID}
#' \item{gene_biotype: Gene biotype (e.g. "protein_coding", "lincRNA")}
#' \item{type: Annotation type (e.g. "exon", "gap")}
#' }
#' @param bias A Tn5 integration bias matrix
#' @param positionEnrichment A named list of matrices containing positional
#' signal enrichment information for each cell. Should be a cell x position
Expand All @@ -173,6 +180,7 @@ ChromatinAssay <- setClass(
#' @importFrom SeuratObject CreateAssayObject
#' @importFrom Matrix rowSums colSums
#' @importFrom GenomicRanges isDisjoint
#' @importFrom S4Vectors mcols
#' @concept assay
#'
#' @export
Expand Down Expand Up @@ -217,6 +225,12 @@ CreateChromatinAssay <- function(
if (!is.null(x = annotation) & !inherits(x = annotation, what = "GRanges")) {
stop("Annotation must be a GRanges object.")
}
if (!any(c("tx_id", "transcript_id") %in% colnames(x = mcols(x = annotation)))) {
stop("Annotation must have transcript id stored in `tx_id` or `transcript_id`.")
}
if (any(!c("gene_name", "gene_id", "gene_biotype", "type") %in% colnames(x = mcols(x = annotation)))) {
stop("Annotation must have `gene_name`, `gene_id`, `gene_biotype` and `type`.")
}
# remove low-count cells
ncount.cell <- colSums(x = data.use > 0)
data.use <- data.use[, ncount.cell >= min.features]
Expand Down Expand Up @@ -349,7 +363,14 @@ CreateChromatinAssay <- function(
#' @param seqinfo A \code{\link[GenomeInfoDb]{Seqinfo}} object containing basic
#' information about the genome used. Alternatively, the name of a UCSC genome
#' can be provided and the sequence information will be downloaded from UCSC.
#' @param annotation Genomic annotation
#' @param annotation Genomic annotation. It must have the following columns:
#' \itemize{
#' \item{tx_id or transcript_id: Transcript ID}
#' \item{gene_name: Gene name}
#' \item{gene_id: Gene ID}
#' \item{gene_biotype: Gene biotype (e.g. "protein_coding", "lincRNA")}
#' \item{type: Annotation type (e.g. "exon", "gap")}
#' }
#' @param motifs A \code{\link{Motif}} object
#' @param fragments A list of \code{\link{Fragment}} objects
#' @param bias Tn5 integration bias matrix
Expand Down Expand Up @@ -790,6 +811,7 @@ RenameCells.Fragment <- function(object, new.names, ...) {
#' @importFrom SeuratObject SetAssayData
#' @importFrom GenomeInfoDb genome Seqinfo
#' @importFrom lifecycle deprecated is_present
#' @importFrom S4Vectors mcols
#' @method SetAssayData ChromatinAssay
#' @concept assay
#' @export
Expand Down Expand Up @@ -867,9 +889,18 @@ SetAssayData.ChromatinAssay <- function(
annotation.genome <- unique(x = genome(x = new.data))
if (!is.null(x = current.genome)) {
if (!is.na(x = annotation.genome) &
(current.genome != annotation.genome)) {
(current.genome != annotation.genome)) {
stop("Annotation genome does not match genome of the object")
}
}
}
if (!any(c("tx_id", "transcript_id") %in% colnames(x = mcols(x = new.data)))) {
stop("Annotation must have transcript id stored in `tx_id` or `transcript_id`.")
}
if (any(!c("gene_name", "gene_id", "gene_biotype", "type") %in% colnames(x = mcols(x = new.data)))) {
stop("Annotation must have `gene_name`, `gene_id`, `gene_biotype` and `type`.")
}
if (!"tx_id" %in% colnames(x = mcols(x = new.data))) {
new.data$tx_id <- new.data$transcript_id
}
methods::slot(object = object, name = layer) <- new.data
} else if (layer == "bias") {
Expand Down
9 changes: 8 additions & 1 deletion man/CreateChromatinAssay.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion man/as.ChromatinAssay.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4fe0f8b

Please sign in to comment.