Skip to content

Commit c9bbbf9

Browse files
committed
Merge branch 'release/v1.1.0' into master
2 parents 0a2366e + 83cdfae commit c9bbbf9

File tree

5 files changed

+79
-6
lines changed

5 files changed

+79
-6
lines changed

scanpy-scripts-tests.bats

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,19 @@ setup() {
88
output_dir="${test_dir}/outputs"
99
raw_matrix="${data_dir}/matrix.mtx"
1010
singlet_obs="${data_dir}/singlet_obs.txt"
11+
batch_obs="${data_dir}/batch_obs.txt"
1112
read_opt="-x $data_dir --show-obj stdout"
1213
read_obj="${output_dir}/read.h5ad"
1314
filter_opt="--save-raw -p n_genes 200 2500 -p c:n_counts 0 50000 -p n_cells 3 inf -p pct_counts_mito 0 0.2 -c mito '!True' --show-obj stdout"
1415
filter_obj="${output_dir}/filter.h5ad"
16+
test_clustering='louvain_k10_r0_5'
1517
scrublet_tsv="${output_dir}/scrublet.tsv"
1618
scrublet_png="${output_dir}/scrublet.png"
1719
scrublet_obj="${output_dir}/scrublet.h5ad"
20+
scrublet_batched_obj="${output_dir}/scrublet_batched.h5ad"
1821
scrublet_simulate_obj="${output_dir}/scrublet_simulate.h5ad"
1922
scrublet_opt="--input-obj-sim ${scrublet_simulate_obj} --filter --export-table ${scrublet_tsv}"
23+
scrublet_batched_opt="--filter --batch-key batch"
2024
norm_mtx="${output_dir}/norm"
2125
norm_opt="--save-layer filtered -t 10000 -l all -n after -X ${norm_mtx} --show-obj stdout"
2226
norm_obj="${output_dir}/norm.h5ad"
@@ -41,12 +45,11 @@ setup() {
4145
fdg_opt="--neighbors-key k10 --layout fr -E ${fdg_embed} --init-pos paga"
4246
fdg_obj="${output_dir}/fdg.h5ad"
4347
louvain_tsv="${output_dir}/louvain.tsv"
44-
louvain_opt="-r 0.5,1 --neighbors-key k10 --key-added k10 --export-cluster ${louvain_tsv}"
48+
louvain_opt="-r 0.1,0.5,1 --neighbors-key k10 --key-added k10 --export-cluster ${louvain_tsv}"
4549
louvain_obj="${output_dir}/louvain.h5ad"
4650
leiden_tsv="${output_dir}/leiden.tsv"
4751
leiden_opt="-r 0.3,0.7 --neighbors-key k10 --key-added k10 -F loom --loom-write-obsm-varm --export-cluster ${leiden_tsv}"
4852
leiden_obj="${output_dir}/leiden.loom"
49-
test_clustering='louvain_k10_r0_5'
5053
diffexp_tsv="${output_dir}/diffexp.tsv"
5154
diffexp_opt="-g ${test_clustering} --reference rest --filter-params min_in_group_fraction:0.25,min_fold_change:1.5 --save ${diffexp_tsv}"
5255
diffexp_obj="${output_dir}/diffexp.h5ad"
@@ -126,14 +129,26 @@ setup() {
126129
[ -f "$singlet_obs" ]
127130
}
128131

132+
@test "Make a batch variable" {
133+
134+
if [ "$resume" = 'true' ] && [ -f "$batch_obs" ]; then
135+
skip "$singlet_obs exists"
136+
fi
137+
138+
run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" > $batch_obs
139+
140+
[ "$status" -eq 0 ]
141+
[ -f "$batch_obs" ]
142+
}
143+
129144
# Read 10x dataset
130145

131146
@test "Scanpy object creation from 10x" {
132147
if [ "$resume" = 'true' ] && [ -f "$read_obj" ]; then
133148
skip "$read_obj exists and resume is set to 'true'"
134149
fi
135150

136-
run rm -f $read_obj && eval "$scanpy read --extra-obs $singlet_obs $read_opt $read_obj"
151+
run rm -f $read_obj && eval "paste -d $'\t' $singlet_obs $batch_obs > obs.txt && $scanpy read --extra-obs obs.txt $read_opt $read_obj"
137152

138153
[ "$status" -eq 0 ]
139154
[ -f "$read_obj" ]
@@ -218,6 +233,20 @@ setup() {
218233
[ -f "$scrublet_png" ]
219234
}
220235

236+
# Detect multiplets with Scrublet (batched)
237+
238+
@test "Run Scrublet for multiplet detection (batched)" {
239+
if [ "$resume" = 'true' ] && [ -f "$scrublet_batched_obj" ]; then
240+
skip "$scrublet_batched_obj exists and resume is set to 'true'"
241+
fi
242+
243+
run rm -f $scrublet_batched_obj && eval "$scanpy multiplet scrublet $scrublet_batched_opt $read_obj $scrublet_batched_obj"
244+
245+
[ "$status" -eq 0 ]
246+
[ -f "$scrublet_batched_obj" ]
247+
}
248+
249+
221250
# Regress out variables
222251

223252
@test "Regress out unwanted variable" {

scanpy_scripts/cmd_options.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,6 +1671,13 @@
16711671
'scrublet': [
16721672
*COMMON_OPTIONS['input'],
16731673
*COMMON_OPTIONS['output'],
1674+
click.option(
1675+
'--batch-key', 'batch_key',
1676+
type=click.STRING,
1677+
default=None,
1678+
help='The name of the column in adata.obs that differentiates among '
1679+
'experiments/batches. Doublets will be detected in each batch separately.'
1680+
),
16741681
click.option(
16751682
'--input-obj-sim', 'adata_sim',
16761683
type=click.Path(exists=True, dir_okay=False),

scanpy_scripts/lib/_filter.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,15 @@ def filter_anndata(
6666
logging.warning('`pct_counts_%s` exists, not overwriting '
6767
'without --force-recalc', pt)
6868
pct_top.remove(pt)
69+
70+
# Calculate mito stats if we can, even if we're not filtering by them
71+
72+
if 'mito' not in qc_vars and 'mito' in adata.var.keys():
73+
qc_vars.append('mito')
74+
6975
sc.pp.calculate_qc_metrics(
7076
adata, layer=layer, qc_vars=qc_vars, percent_top=pct_top, inplace=True)
77+
7178
adata.obs['n_counts'] = adata.obs['total_counts']
7279
adata.obs['n_genes'] = adata.obs['n_genes_by_counts']
7380
adata.var['n_counts'] = adata.var['total_counts']

scanpy_scripts/lib/_scrublet.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
import scanpy.external as sce
77
import numpy as np
88
from ..obj_utils import write_obs
9+
import anndata
10+
import pandas as pd
911

1012
# Wrapper for scrublet allowing text export and filtering
1113

12-
def scrublet(adata, adata_sim=None, filter=False, export_table=None, **kwargs):
14+
def scrublet(adata, adata_sim=None, filter=False, batch_key=None, export_table=None, **kwargs):
1315
"""
1416
Wrapper function for sce.pp.scrublet(), to allow filtering of resulting object
1517
"""
@@ -19,8 +21,36 @@ def scrublet(adata, adata_sim=None, filter=False, export_table=None, **kwargs):
1921
if adata_sim:
2022
adata_sim = sc.read(adata_sim)
2123

22-
sce.pp.scrublet(adata, adata_sim=adata_sim, **kwargs)
24+
# Scrublet shouldn't be run on multi-batch data, so we run the batches
25+
# separately and copy the stats back to the input object
2326

27+
alldata = []
28+
if batch_key is not None:
29+
if batch_key not in adata.obs.keys():
30+
raise ValueError('`batch_key` must be a column of .obs in the input annData object.')
31+
32+
batches = np.unique(adata.obs[batch_key])
33+
34+
# Run Scrublet independently on batches and return just the
35+
# scrublet-relevant parts of the objects to add to the input object
36+
37+
def get_adata_scrub_parts(ad):
38+
return {'obs': ad.obs, 'uns': ad.uns['scrublet']}
39+
40+
scrubbed = [ get_adata_scrub_parts(sce.pp.scrublet(adata[adata.obs[batch_key] == batch,], adata_sim=adata_sim, copy = True, **kwargs)) for batch in batches ]
41+
scrubbed_obs = pd.concat([ scrub['obs'] for scrub in scrubbed])
42+
43+
# Now reset the obs to get the scrublet scores
44+
45+
adata.obs = scrubbed_obs.loc[adata.obs_names.values]
46+
47+
# Save the .uns from each batch separately
48+
49+
adata.uns['scrublet'] = dict(zip(batches, [ scrub['uns'] for scrub in scrubbed ]))
50+
51+
else:
52+
sce.pp.scrublet(adata, adata_sim=adata_sim, **kwargs)
53+
2454
# Do any export before optional filtering
2555

2656
if export_table:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name='scanpy-scripts',
8-
version='1.0.1',
8+
version='1.1.0',
99
author='nh3',
1010
author_email='[email protected]',
1111
description='Scripts for using scanpy from the command line',

0 commit comments

Comments
 (0)