Skip to content

Commit

Permalink
Merge branch 'release/v1.1.0' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
pinin4fjords committed Jul 22, 2021
2 parents 0a2366e + 83cdfae commit c9bbbf9
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 6 deletions.
35 changes: 32 additions & 3 deletions scanpy-scripts-tests.bats
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,19 @@ setup() {
output_dir="${test_dir}/outputs"
raw_matrix="${data_dir}/matrix.mtx"
singlet_obs="${data_dir}/singlet_obs.txt"
batch_obs="${data_dir}/batch_obs.txt"
read_opt="-x $data_dir --show-obj stdout"
read_obj="${output_dir}/read.h5ad"
filter_opt="--save-raw -p n_genes 200 2500 -p c:n_counts 0 50000 -p n_cells 3 inf -p pct_counts_mito 0 0.2 -c mito '!True' --show-obj stdout"
filter_obj="${output_dir}/filter.h5ad"
test_clustering='louvain_k10_r0_5'
scrublet_tsv="${output_dir}/scrublet.tsv"
scrublet_png="${output_dir}/scrublet.png"
scrublet_obj="${output_dir}/scrublet.h5ad"
scrublet_batched_obj="${output_dir}/scrublet_batched.h5ad"
scrublet_simulate_obj="${output_dir}/scrublet_simulate.h5ad"
scrublet_opt="--input-obj-sim ${scrublet_simulate_obj} --filter --export-table ${scrublet_tsv}"
scrublet_batched_opt="--filter --batch-key batch"
norm_mtx="${output_dir}/norm"
norm_opt="--save-layer filtered -t 10000 -l all -n after -X ${norm_mtx} --show-obj stdout"
norm_obj="${output_dir}/norm.h5ad"
Expand All @@ -41,12 +45,11 @@ setup() {
fdg_opt="--neighbors-key k10 --layout fr -E ${fdg_embed} --init-pos paga"
fdg_obj="${output_dir}/fdg.h5ad"
louvain_tsv="${output_dir}/louvain.tsv"
louvain_opt="-r 0.5,1 --neighbors-key k10 --key-added k10 --export-cluster ${louvain_tsv}"
louvain_opt="-r 0.1,0.5,1 --neighbors-key k10 --key-added k10 --export-cluster ${louvain_tsv}"
louvain_obj="${output_dir}/louvain.h5ad"
leiden_tsv="${output_dir}/leiden.tsv"
leiden_opt="-r 0.3,0.7 --neighbors-key k10 --key-added k10 -F loom --loom-write-obsm-varm --export-cluster ${leiden_tsv}"
leiden_obj="${output_dir}/leiden.loom"
test_clustering='louvain_k10_r0_5'
diffexp_tsv="${output_dir}/diffexp.tsv"
diffexp_opt="-g ${test_clustering} --reference rest --filter-params min_in_group_fraction:0.25,min_fold_change:1.5 --save ${diffexp_tsv}"
diffexp_obj="${output_dir}/diffexp.h5ad"
Expand Down Expand Up @@ -126,14 +129,26 @@ setup() {
[ -f "$singlet_obs" ]
}

@test "Make a batch variable" {

if [ "$resume" = 'true' ] && [ -f "$batch_obs" ]; then
skip "$singlet_obs exists"
fi

run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" > $batch_obs

[ "$status" -eq 0 ]
[ -f "$batch_obs" ]
}

# Read 10x dataset

@test "Scanpy object creation from 10x" {
if [ "$resume" = 'true' ] && [ -f "$read_obj" ]; then
skip "$read_obj exists and resume is set to 'true'"
fi

run rm -f $read_obj && eval "$scanpy read --extra-obs $singlet_obs $read_opt $read_obj"
run rm -f $read_obj && eval "paste -d $'\t' $singlet_obs $batch_obs > obs.txt && $scanpy read --extra-obs obs.txt $read_opt $read_obj"

[ "$status" -eq 0 ]
[ -f "$read_obj" ]
Expand Down Expand Up @@ -218,6 +233,20 @@ setup() {
[ -f "$scrublet_png" ]
}

# Detect multiplets with Scrublet (batched)

@test "Run Scrublet for multiplet detection (batched)" {
if [ "$resume" = 'true' ] && [ -f "$scrublet_batched_obj" ]; then
skip "$scrublet_batched_obj exists and resume is set to 'true'"
fi

run rm -f $scrublet_batched_obj && eval "$scanpy multiplet scrublet $scrublet_batched_opt $read_obj $scrublet_batched_obj"

[ "$status" -eq 0 ]
[ -f "$scrublet_batched_obj" ]
}


# Regress out variables

@test "Regress out unwanted variable" {
Expand Down
7 changes: 7 additions & 0 deletions scanpy_scripts/cmd_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -1671,6 +1671,13 @@
'scrublet': [
*COMMON_OPTIONS['input'],
*COMMON_OPTIONS['output'],
click.option(
'--batch-key', 'batch_key',
type=click.STRING,
default=None,
help='The name of the column in adata.obs that differentiates among '
'experiments/batches. Doublets will be detected in each batch separately.'
),
click.option(
'--input-obj-sim', 'adata_sim',
type=click.Path(exists=True, dir_okay=False),
Expand Down
7 changes: 7 additions & 0 deletions scanpy_scripts/lib/_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,15 @@ def filter_anndata(
logging.warning('`pct_counts_%s` exists, not overwriting '
'without --force-recalc', pt)
pct_top.remove(pt)

# Calculate mito stats if we can, even if we're not filtering by them

if 'mito' not in qc_vars and 'mito' in adata.var.keys():
qc_vars.append('mito')

sc.pp.calculate_qc_metrics(
adata, layer=layer, qc_vars=qc_vars, percent_top=pct_top, inplace=True)

adata.obs['n_counts'] = adata.obs['total_counts']
adata.obs['n_genes'] = adata.obs['n_genes_by_counts']
adata.var['n_counts'] = adata.var['total_counts']
Expand Down
34 changes: 32 additions & 2 deletions scanpy_scripts/lib/_scrublet.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
import scanpy.external as sce
import numpy as np
from ..obj_utils import write_obs
import anndata
import pandas as pd

# Wrapper for scrublet allowing text export and filtering

def scrublet(adata, adata_sim=None, filter=False, export_table=None, **kwargs):
def scrublet(adata, adata_sim=None, filter=False, batch_key=None, export_table=None, **kwargs):
"""
Wrapper function for sce.pp.scrublet(), to allow filtering of resulting object
"""
Expand All @@ -19,8 +21,36 @@ def scrublet(adata, adata_sim=None, filter=False, export_table=None, **kwargs):
if adata_sim:
adata_sim = sc.read(adata_sim)

sce.pp.scrublet(adata, adata_sim=adata_sim, **kwargs)
# Scrublet shouldn't be run on multi-batch data, so we run the batches
# separately and copy the stats back to the input object

alldata = []
if batch_key is not None:
if batch_key not in adata.obs.keys():
raise ValueError('`batch_key` must be a column of .obs in the input annData object.')

batches = np.unique(adata.obs[batch_key])

# Run Scrublet independently on batches and return just the
# scrublet-relevant parts of the objects to add to the input object

def get_adata_scrub_parts(ad):
return {'obs': ad.obs, 'uns': ad.uns['scrublet']}

scrubbed = [ get_adata_scrub_parts(sce.pp.scrublet(adata[adata.obs[batch_key] == batch,], adata_sim=adata_sim, copy = True, **kwargs)) for batch in batches ]
scrubbed_obs = pd.concat([ scrub['obs'] for scrub in scrubbed])

# Now reset the obs to get the scrublet scores

adata.obs = scrubbed_obs.loc[adata.obs_names.values]

# Save the .uns from each batch separately

adata.uns['scrublet'] = dict(zip(batches, [ scrub['uns'] for scrub in scrubbed ]))

else:
sce.pp.scrublet(adata, adata_sim=adata_sim, **kwargs)

# Do any export before optional filtering

if export_table:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='scanpy-scripts',
version='1.0.1',
version='1.1.0',
author='nh3',
author_email='[email protected]',
description='Scripts for using scanpy from the command line',
Expand Down

0 comments on commit c9bbbf9

Please sign in to comment.