Skip to content

Commit

Permalink
Merge branch 'release/v1.0.0' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
pinin4fjords committed Jul 2, 2021
2 parents 38554ab + 4bc7499 commit 3431fe9
Show file tree
Hide file tree
Showing 14 changed files with 358 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: [3.7, 3.8]

steps:
- uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,6 @@ Commands:
paga Trajectory inference by abstract graph analysis.
dpt Calculate diffusion pseudotime relative to the root cells.
integrate Integrate cells from different experimental batches.
multiplet Execute methods for multiplet removal.
plot Visualise data.
```
45 changes: 45 additions & 0 deletions scanpy-scripts-tests.bats
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ setup() {
read_obj="${output_dir}/read.h5ad"
filter_opt="--save-raw -p n_genes 200 2500 -p c:n_counts 0 50000 -p n_cells 3 inf -p pct_counts_mito 0 0.2 -c mito '!True' --show-obj stdout"
filter_obj="${output_dir}/filter.h5ad"
scrublet_tsv="${output_dir}/scrublet.tsv"
scrublet_png="${output_dir}/scrublet.png"
scrublet_obj="${output_dir}/scrublet.h5ad"
scrublet_simulate_obj="${output_dir}/scrublet_simulate.h5ad"
scrublet_opt="--input-obj-sim ${scrublet_simulate_obj} --filter --export-table ${scrublet_tsv}"
norm_mtx="${output_dir}/norm"
norm_opt="--save-layer filtered -t 10000 -l all -n after -X ${norm_mtx} --show-obj stdout"
norm_obj="${output_dir}/norm.h5ad"
Expand Down Expand Up @@ -173,6 +178,46 @@ setup() {
[ -f "$hvg_obj" ]
}

# Do separate doublet simulation step (normally we'd just let the main scrublet
# process do this).

@test "Run Scrublet doublet simulation" {
if [ "$resume" = 'true' ] && [ -f "$scrublet_simulate_obj" ]; then
skip "$scrublet_simulate_obj exists and resume is set to 'true'"
fi

run rm -f $srublet_simulate_obj && eval "$scanpy multiplet scrublet_simulate_doublets $hvg_obj $scrublet_simulate_obj"

[ "$status" -eq 0 ]
[ -f "$scrublet_simulate_obj" ]
}

# Detect multiplets with Scrublet

@test "Run Scrublet for multiplet detection" {
if [ "$resume" = 'true' ] && [ -f "$scrublet_obj" ]; then
skip "$scrublet_obj exists and resume is set to 'true'"
fi

run rm -f $scrublet_obj && eval "$scanpy multiplet scrublet $scrublet_opt $hvg_obj $scrublet_obj"

[ "$status" -eq 0 ]
[ -f "$scrublet_obj" ] && [ -f "$scrublet_tsv" ]
}

# Run the doublet plot from Scrublet

@test "Run Scrublet score distribution plot" {
if [ "$resume" = 'true' ] && [ -f "$scrublet_png" ]; then
skip "$scrublet_png exists and resume is set to 'true'"
fi

run rm -f $scrublet_png && eval "$scanpy plot scrublet $scrublet_obj $scrublet_png"

[ "$status" -eq 0 ]
[ -f "$scrublet_png" ]
}

# Regress out variables

@test "Regress out unwanted variable" {
Expand Down
11 changes: 11 additions & 0 deletions scanpy_scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
PLOT_MATRIX_CMD,
PLOT_HEATMAP_CMD,
HARMONY_INTEGRATE_CMD,
SCRUBLET_MULTIPLET_CMD,
SCRUBLET_MULTIPLET_SIMULATE_CMD,
SCRUBLET_MULTIPLET_PLOT_CMD,
BBKNN_CMD,
MNN_CORRECT_CMD,
COMBAT_CMD,
Expand Down Expand Up @@ -114,6 +117,13 @@ def integrate():
integrate.add_command(MNN_CORRECT_CMD)
integrate.add_command(COMBAT_CMD)

@cli.group(cls=NaturalOrderGroup)
def multiplet():
"""Execute methods for multiplet removal."""

multiplet.add_command(SCRUBLET_MULTIPLET_CMD)
multiplet.add_command(SCRUBLET_MULTIPLET_SIMULATE_CMD)


@cli.group(cls=NaturalOrderGroup)
def plot():
Expand All @@ -126,3 +136,4 @@ def plot():
plot.add_command(PLOT_DOT_CMD)
plot.add_command(PLOT_MATRIX_CMD)
plot.add_command(PLOT_HEATMAP_CMD)
plot.add_command(SCRUBLET_MULTIPLET_PLOT_CMD)
220 changes: 201 additions & 19 deletions scanpy_scripts/cmd_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,14 @@
),
],


'neighbor_metric': click.option(
'--metric', '-t',
type=click.Choice(['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan', 'braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']),
default='euclidean',
show_default=True,
help='A known metric’s name.'
),

'layer':click.option(
'--layer',
type=CommaSeparatedText(simplify=True),
Expand Down Expand Up @@ -232,7 +239,7 @@

'use_raw': click.option(
'--use-raw/--no-raw', 'use_raw',
default=True,
default=None,
show_default=True,
help='Use expression values in `.raw` if present.',
),
Expand Down Expand Up @@ -537,6 +544,28 @@
show_default=True,
help="Layer to batch correct. By default corrects the contents of .X."
),

'scrublet': [
click.option(
'--sim-doublet-ratio',
type=click.FLOAT,
default=2.0,
show_default=True,
help='Number of doublets to simulate relative to the number of '
'observed transcriptomes.',
),
click.option(
'--synthetic-doublet-umi-subsampling',
type=click.FLOAT,
default=1.0,
show_default=True,
help='Where input_obj_sim not suplied, rate for sampling UMIs when '
'creating synthetic doublets. If 1.0, each doublet is created by '
'simply adding the UMI counts from two randomly sampled observed '
'transcriptomes. For values less than 1, the UMI counts are added '
'and then randomly sampled at the specified rate.'
),
],
}

COMMON_OPTIONS['opt_output'] = [
Expand Down Expand Up @@ -899,13 +928,7 @@
'connectivities. Use rapids for the RAPIDS implementation of UMAP '
'(experimental, GPU only).'
),
click.option(
'--metric', '-t',
type=click.Choice(['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan', 'braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']),
default='euclidean',
show_default=True,
help='A known metric’s name.'
),
COMMON_OPTIONS['neighbor_metric'],
],

'umap': [
Expand Down Expand Up @@ -1585,13 +1608,7 @@
'potentially increasing the degree of batch correction. Use this flag to disable '
'that behaviour.',
),
click.option(
'--metric', '-t',
type=click.Choice(['angular', 'cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan', 'braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']),
default='angular',
show_default=True,
help='A known metric’s name.'
),
COMMON_OPTIONS['neighbor_metric'],
click.option(
'--neighbors-within-batch',
type=click.INT,
Expand All @@ -1613,7 +1630,7 @@
'each cell. Set to 0 to skip.'
),
click.option(
'--n-trees',
'--annoy-n-trees',
type=click.INT,
default=10,
show_default=True,
Expand Down Expand Up @@ -1650,6 +1667,171 @@
'connectivity value of 1)'
),
],

'scrublet': [
*COMMON_OPTIONS['input'],
*COMMON_OPTIONS['output'],
click.option(
'--input-obj-sim', 'adata_sim',
type=click.Path(exists=True, dir_okay=False),
default=None,
help='(Advanced use case) Optional annData object generated by '
'sc.external.pp.scrublet_simulate_doublets(), with same number of '
'vars as adata. This should have been built from input_obj after '
'filtering genes and cells and selcting highly-variable genes.'
),
click.option(
'--threshold',
type=click.FLOAT,
default=None,
show_default=True,
help='Doublet score threshold for calling a transcriptome a '
'doublet. If not set, this is set automatically by looking for the '
'minimum between the two modes of the doublet_scores_sim_ histogram. '
'It is best practice to check the threshold visually using the '
'doublet_scores_sim_ histogram and/or based on co-localization of '
'predicted doublets in a 2-D embedding.'
),
*COMMON_OPTIONS['scrublet'],
click.option(
'--expected-doublet-rate',
type=click.FLOAT,
default=0.05,
show_default=True,
help='Where input_obj_sim not suplied, the estimated doublet rate '
'for the experiment.'
),
click.option(
'--stdev-doublet-rate',
type=click.FLOAT,
default=0.02,
show_default=True,
help='Where input_obje_sim not suplied, uncertainty in the expected '
'doublet rate.'
),
click.option(
'--knn-dist-metric', '-t',
type=click.Choice(['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan', 'braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']),
default='euclidean',
show_default=True,
help='A known metric’s name.'
),
click.option(
'--no-normalize-variance', 'normalize_variance',
is_flag=True,
default=True,
help='Default is to normalize the data such that each gene has a '
'variance of 1. sklearn.decomposition.TruncatedSVD will be used for '
'dimensionality reduction, if --no-mean-center is set. Use this flag '
'to disable that behaviour.'
),
click.option(
'--log-transform',
is_flag=True,
default=False,
show_default=True,
help='Whether to use :func:~scanpy.pp.log1p to log-transform the '
'data prior to PCA.'
),
click.option(
'--no-mean-center', 'mean_center',
is_flag=True,
default=True,
help='If True, center the data such that each gene has a mean of 0. '
'sklearn.decomposition.PCA will be used for dimensionality '
'reduction.'
),
click.option(
'--n-pcs', 'n_prin_comps',
type=click.INT,
default=30,
show_default=True,
help='Number of principal components used to embed the '
'transcriptomes prior to k-nearest-neighbor graph construction.'
),
click.option(
'--no-approx', 'use_approx_neighbors',
is_flag=True,
default=True,
help='Default behaviour is to use the approximate nearest neighbor '
'method (annoy) for the KNN classifier. Use this flag to disable '
'that behaviour.'
),
click.option(
'--get-doublet-neighbor-parents',
is_flag=True,
default=False,
show_default=True,
help='If set, return (in .uns) the parent transcriptomes that '
'generated the doublet neighbors of each observed transcriptome. '
'This information can be used to infer the cell states that '
'generated a given doublet state.'
),
click.option(
'--n-neighbors', '-k',
type=CommaSeparatedText(click.INT, simplify=True),
default=None,
show_default=True,
help='Number of neighbors used to construct the KNN graph of '
'observed transcriptomes and simulated doublets. If not set, this is '
'automatically set to np.round(0.5 * np.sqrt(n_obs)).'
),
click.option(
'--filter', 'filter',
is_flag=True,
default=False,
help='By default, the output object is annotated but not filtered '
'according to the scrublet status. Setting this flag will cause '
'predicted multiplet elements to be removed.'
),
click.option(
'--no-verbose', 'verbose',
is_flag=True,
default=True,
help='Default behaviour is to print progress updates. Use this flag '
'to disable that.'
),
click.option(
'--export-table',
type=click.Path(dir_okay=False, writable=True),
default=None,
show_default=True,
help='Export a table of double scores and calls to the specified file.',
),
COMMON_OPTIONS['random_state'],
],

'plot_scrublet': [
*COMMON_OPTIONS['input'],
*COMMON_OPTIONS['plot'],
click.option(
'--scale-hist-obs', '-b',
type=click.Choice(['linear', 'log', 'symlog', 'logit']),
default='log',
show_default=True,
help='Set y axis scale transformation in matplotlib for the plot of observed transcriptomes.'
),
click.option(
'--scale-hist-sim', '-s',
type=click.Choice(['linear', 'log', 'symlog', 'logit']),
default='linear',
show_default=True,
help='Set y axis scale transformation in matplotlib for the plot of observed transcriptomes.'
),
],

'scrublet_simulate_doublets': [
*COMMON_OPTIONS['input'],
*COMMON_OPTIONS['output'],
*COMMON_OPTIONS['scrublet'],
click.option(
'--layer', '-l',
type=click.STRING,
default=None,
help="Layer of adata where raw values are stored, or ‘X’ if values "
"are in .X."
),
],

'embed': [
*COMMON_OPTIONS['input'],
Expand Down Expand Up @@ -1818,10 +2000,10 @@
),
click.option(
'--color',
type=click.STRING,
type=CommaSeparatedText(simplify=True),
default=None,
show_default=True,
help='Key for annotation of observations/cells or variables/genes.',
help='Key(s) for annotation of observations/cells or variables/genes. Comma-separated if more than one',
),
click.option(
'--legend-loc',
Expand Down
2 changes: 2 additions & 0 deletions scanpy_scripts/cmd_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import click
import pandas as pd
import scanpy as sc
import scanpy.external as sce
from .cmd_options import CMD_OPTIONS
from .lib._paga import plot_paga
from .obj_utils import _save_matrix
from .lib._scrublet import plot_scrublet

def make_subcmd(cmd_name, func, cmd_desc, arg_desc, opt_set = None):
"""
Expand Down
Loading

0 comments on commit 3431fe9

Please sign in to comment.