Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:stuart-lab/signac-private into d…
Browse files Browse the repository at this point in the history
…evelop
  • Loading branch information
timoast committed Jun 10, 2024
2 parents 0d4f276 + 22884f8 commit f483dff
Show file tree
Hide file tree
Showing 7 changed files with 220 additions and 10 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: Signac
Title: Analysis of Single-Cell Chromatin Data
Version: 1.13.9001
Date: 2024-04-17
Version: 1.13.9002
Date: 2024-06-06
Authors@R: c(
person(given = 'Tim', family = 'Stuart', email = '[email protected]', role = c('aut', 'cre'), comment = c(ORCID = '0000-0002-3044-0897')),
person(given = 'Avi', family = 'Srivastava', email = '[email protected]', role = 'aut', comment = c(ORCID = '0000-0001-9798-2079')),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ export(RunChromVAR)
export(RunSVD)
export(RunTFIDF)
export(SetMotifData)
export(SortIdents)
export(SplitFragments)
export(StringToGRanges)
export(SubsetMatrix)
Expand Down Expand Up @@ -320,6 +321,7 @@ importFrom(SeuratObject,GetAssayData)
importFrom(SeuratObject,Idents)
importFrom(SeuratObject,Key)
importFrom(SeuratObject,LayerData)
importFrom(SeuratObject,Layers)
importFrom(SeuratObject,Project)
importFrom(SeuratObject,RenameCells)
importFrom(SeuratObject,RowMergeSparseMatrices)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Unreleased

New features:

* Added `SortIdents()` function to automatically order cell metadata according
to similarity ([@JavenTyr](https://github.com/JavenTyr))

Other changes:

* Change to SeuratObject v5.0.2 dependency
* Increase R dependency to 4.1
* Deprecate `CoverageBrowser()`
Expand Down
130 changes: 130 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,136 @@ CellsPerGroup <- function(
return(lut)
}

#' Sorts cell metadata variable by similarity using hierarchical clustering
#'
#' Compute distance matrix from a feature/variable matrix and
#' perform hierarchical clustering to order variables (for example, cell types)
#' according to their similarity.
#'
#' @param object A Seurat object containing single-cell data.
#' @param layer The layer of the data to use (default is "data").
#' @param assay Name of assay to use. If NULL, use the default assay
#' @param label Metadata attribute to sort. If NULL,
#' uses the active identities.
#' @param dendrogram Logical, whether to plot the dendrogram (default is FALSE).
#' @param method The distance method to use for hierarchical clustering
#' (default is 'euclidean', other options from dist{stats} are 'maximum',
#' 'manhattan', 'canberra', 'binary' and 'minkowski').
#' @param verbose Display messages
#'
#' @return The Seurat object with metadata variable reordered by similarity.
#' If the metadata variable was a character vector, it will be converted to a
#' factor and the factor levels set according to the similarity ordering. If
#' active identities were used (label=NULL), the levels will be updated according
#' to similarity ordering.
#'
#' @examples
#' atac_small$test <- sample(1:10, ncol(atac_small), replace = TRUE)
#' atac_small <- SortIdents(object = atac_small, label = 'test')
#' print(levels(atac_small$test))
#'
#' @importFrom stats dist hclust
#' @importFrom SeuratObject Layers Idents Idents<- DefaultAssay LayerData
#
#' @export
SortIdents <- function(
object,
layer = "data",
assay = NULL,
label = NULL,
dendrogram = FALSE,
method = 'euclidean',
verbose = TRUE
){
assay <- SetIfNull(x = assay, y = DefaultAssay(object = object))
if (!(layer %in% Layers(object = object[[assay]]))) {
stop("Requested layer is not present in ", assay)
}
allowed.methods <- c("euclidean", "maximum", "manhattan",
"canberra", "binary", "minkowski")
if (!(method %in% allowed.methods)) {
stop("Selected method must be one of: ",
paste(allowed.methods, collapse = ", "))
}
feat_cell_matrix <- LayerData(object, layer = layer, assay = assay)

if (is.null(x = label)) {
cell_types <- Idents(object = object)
uniq_cell_types = unique(x = cell_types)
} else {
if (length(x = label) > 1) {
stop("Label must be a single character vector or NULL")
}
if (!(label %in% colnames(x = object[[]]))) {
stop("Requested metadata '", label, "' not present in object")
}
cell_types <- object[[label]]
uniq_cell_types = unique(x = cell_types[, 1])
}

if (length(x = uniq_cell_types) / ncol(x = object) > 0.7) {
stop("Most cells have a different value for the requested metadata variable.
Are you sure this is a categorical variable?")
}
if (length(x = uniq_cell_types) < 3) {
stop("Must have more than three different variables")
}

# Initialize a one-hot matrix with rows representing cells
# and columns representing cell types
one_hot_matrix <- matrix(data = 0,
nrow = ncol(x = feat_cell_matrix),
ncol = length(x = uniq_cell_types),
dimnames = list(colnames(x = feat_cell_matrix),
uniq_cell_types)
)

# Fill in the one-hot matrix
for (i in seq_along(along.with = uniq_cell_types)) {
# convert to character in case the level of a factor is returned
cell_type <- as.character(x = uniq_cell_types[i])
one_hot_matrix[cell_types == cell_type, i] <- 1
}
cell_type_counts = colSums(x = one_hot_matrix)

if (verbose) {
message("Creating pseudobulk profiles for ", ncol(x = one_hot_matrix),
" variables across ", nrow(x = feat_cell_matrix), " features")
}

# Aggregate (sum) features by label
# Normalize by number of cells per label
bulk_matrix <- sweep(x = feat_cell_matrix %*% one_hot_matrix, MARGIN = 2,
cell_type_counts, FUN = "/")

# Calculate distance matrix and perform hierarchical clustering
if (verbose) {
message("Computing ", method, " distance between pseudobulk profiles")
}
distance_matrix <- dist(x = t(x = bulk_matrix), method = method)
if (verbose) {
message("Clustering distance matrix")
}
hc <- hclust(d = distance_matrix)

if (dendrogram){
plot(hc, main = paste0("Assay: ", assay, " Layer: ", layer),
xlab = SetIfNull(x = label, y = "Idents"),
sub = "", cex = 0.9)
}

ordered_cell_types <- uniq_cell_types[hc$order]
if (is.null(x = label)) {
Idents(object = object) <- factor(x = Idents(object = object),
levels = ordered_cell_types)
} else {
object[[label]] <- factor(x = object[[label]][, 1],
levels = ordered_cell_types)
}

return(object)
}

#' Closest Feature
#'
#' Find the closest feature to a given set of genomic regions
Expand Down
52 changes: 52 additions & 0 deletions man/SortIdents.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,22 @@ test_that("CellsPerGroup works", {
)
})

test_that("SortIdents works",{
set.seed(1)
atac_small$test <- sample(1:10, ncol(atac_small), replace = TRUE)
atac_small <- SortIdents(object = atac_small, label = "test")
expect_equal(
object = levels(atac_small$test),
expected = c("10", "6", "3", "7", "9", "5", "4", "8", "1", "2")
)
Idents(atac_small) <- sample(1:10, ncol(atac_small), replace = TRUE)
atac_small <- SortIdents(object = atac_small)
expect_equal(
object = levels(Idents(atac_small)),
expected = c("1", "9", "10", "2", "3", "5", "4", "7", "6", "8")
)
})

test_that("GRanges conversion works", {
correct_string <- c("chr1-1-10", "chr2-12-3121")
correct_ranges <- GRanges(
Expand Down
19 changes: 11 additions & 8 deletions vignettes/pbmc_vignette.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -717,15 +717,18 @@ important information including gene annotation, peak coordinates, and genomic
links (if they're stored in the object). See the
[visualization vignette](visualization.html) for more information.

We can then visualize the DA peaks open in CD4 naive cells and CD14 monocytes,
near some key marker genes for these cell types, CD4 and FCN1 respectively.
Here we'll highlight the DA peaks regions in grey.
For plotting purposes, it's nice to have related cell types grouped together. We
can automatically sort the plotting order according to similarities across the
annotated cell types by running the `SortIdents()` function:

```{r message=FALSE, warning=FALSE}
# set plotting order
levels(pbmc) <- sort(levels(pbmc))
pbmc <- SortIdents(pbmc)
```

We can then visualize the DA peaks open in CD4 naive cells and CD14 monocytes,
near some key marker genes for these cell types, CD4 and LYZ respectively.
Here we'll highlight the DA peaks regions in grey.

```{r message=FALSE, warning=FALSE, out.width='90%', fig.height=6}
# find DA peaks overlapping gene of interest
regions_highlight <- subsetByOverlaps(StringToGRanges(open_cd4naive), LookupGeneCoords(pbmc, "CD4"))
Expand All @@ -740,14 +743,14 @@ CoveragePlot(
```

```{r message=FALSE, warning=FALSE, out.width='90%', fig.height=6}
regions_highlight <- subsetByOverlaps(StringToGRanges(open_cd14mono), LookupGeneCoords(pbmc, "FCN1"))
regions_highlight <- subsetByOverlaps(StringToGRanges(open_cd14mono), LookupGeneCoords(pbmc, "LYZ"))
CoveragePlot(
object = pbmc,
region = "FCN1",
region = "LYZ",
region.highlight = regions_highlight,
extend.upstream = 1000,
extend.downstream = 1000
extend.downstream = 5000
)
```

Expand Down

0 comments on commit f483dff

Please sign in to comment.