diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 3bbfddb5..a04f041e 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -11,10 +11,15 @@ jobs: python-version: [3.7, 3.8] steps: + - uses: actions/checkout@v2 with: path: scanpy-scripts - + + - uses: psf/black@stable + with: + options: '--check --verbose --include="\.pyi?$" .' + - uses: actions/checkout@v2 with: repository: theislab/scanpy @@ -38,11 +43,13 @@ jobs: popd sudo apt-get install libhdf5-dev - pip install -U setuptools>=40.1 wheel 'cmake<3.20' + pip install -U setuptools>=40.1 wheel 'cmake<3.20' pytest pip install $(pwd)/scanpy-scripts python -m pip install $(pwd)/scanpy --no-deps --ignore-installed -vv + - name: Run unit tests + run: pytest --doctest-modules -v ./scanpy-scripts + - name: Test with bats run: | ./scanpy-scripts/scanpy-scripts-tests.bats - diff --git a/scanpy_scripts/__init__.py b/scanpy_scripts/__init__.py index 6ae586f2..d408a2cc 100644 --- a/scanpy_scripts/__init__.py +++ b/scanpy_scripts/__init__.py @@ -3,13 +3,15 @@ """ import pkg_resources -__version__ = pkg_resources.get_distribution('scanpy-scripts').version +__version__ = pkg_resources.get_distribution("scanpy-scripts").version -__author__ = ', '.join([ - 'Ni Huang', - 'Pablo Moreno', - 'Jonathan Manning', - 'Philipp Angerer', -]) +__author__ = ", ".join( + [ + "Ni Huang", + "Pablo Moreno", + "Jonathan Manning", + "Philipp Angerer", + ] +) from . import lib diff --git a/scanpy_scripts/cli.py b/scanpy_scripts/cli.py index c521ac95..f44b3f60 100755 --- a/scanpy_scripts/cli.py +++ b/scanpy_scripts/cli.py @@ -42,20 +42,20 @@ @click.group(cls=NaturalOrderGroup) @click.option( - '--debug', + "--debug", is_flag=True, default=False, - help='Print debug information', + help="Print debug information", ) @click.option( - '--verbosity', + "--verbosity", type=click.INT, default=3, - help='Set scanpy verbosity', + help="Set scanpy verbosity", ) @click.version_option( - version='0.2.0', - prog_name='scanpy', + version="0.2.0", + prog_name="scanpy", ) def cli(debug=False, verbosity=3): """ @@ -64,11 +64,12 @@ def cli(debug=False, verbosity=3): log_level = logging.DEBUG if debug else logging.INFO logging.basicConfig( level=log_level, - format=('%(asctime)s; %(levelname)s; %(filename)s; ' - '%(funcName)s(): %(message)s'), - datefmt='%y-%m-%d %H:%M:%S', + format=( + "%(asctime)s; %(levelname)s; %(filename)s; " "%(funcName)s(): %(message)s" + ), + datefmt="%y-%m-%d %H:%M:%S", ) - logging.debug('debugging') + logging.debug("debugging") sc.settings.verbosity = verbosity return 0 @@ -112,15 +113,18 @@ def cluster(): def integrate(): """Integrate cells from different experimental batches.""" + integrate.add_command(HARMONY_INTEGRATE_CMD) integrate.add_command(BBKNN_CMD) integrate.add_command(MNN_CORRECT_CMD) integrate.add_command(COMBAT_CMD) + @cli.group(cls=NaturalOrderGroup) def multiplet(): """Execute methods for multiplet removal.""" + multiplet.add_command(SCRUBLET_MULTIPLET_CMD) multiplet.add_command(SCRUBLET_MULTIPLET_SIMULATE_CMD) diff --git a/scanpy_scripts/click_utils.py b/scanpy_scripts/click_utils.py index 58fcbe96..ba6ac48e 100644 --- a/scanpy_scripts/click_utils.py +++ b/scanpy_scripts/click_utils.py @@ -3,6 +3,7 @@ """ import click +import sys class NaturalOrderGroup(click.Group): @@ -12,6 +13,7 @@ class NaturalOrderGroup(click.Group): @click.group(cls=NaturalOrderGroup) """ + def list_commands(self, ctx): """List command names as they are in commands dict. @@ -25,38 +27,67 @@ class CommaSeparatedText(click.ParamType): """ Comma separated text """ + def __init__(self, dtype=click.STRING, simplify=False, length=None): self.dtype = dtype self.dtype_name = _get_type_name(dtype) self.simplify = simplify self.length = length if length and length <= 3: - self.name = ','.join([f'{self.dtype_name}'] * length) + self.name = ",".join([f"{self.dtype_name}"] * length) else: - self.name = '{}[,{}...]'.format(self.dtype_name, self.dtype_name) + self.name = "{}[,{}...]".format(self.dtype_name, self.dtype_name) def convert(self, value, param, ctx): + """ + >>> @click.command() + ... @click.option('--test-param') + ... def test_cmd(): + ... pass + ... + >>> ctx = click.Context(test_cmd) + >>> param = test_cmd.params[0] + >>> test_cst1 = CommaSeparatedText() + >>> test_cst2 = CommaSeparatedText(click.INT, length=2) + >>> test_cst3 = CommaSeparatedText(click.FLOAT, simplify=True) + >>> + >>> test_cst1.convert(None, param, ctx) + >>> test_cst2.convert('7,2', param, ctx) + [7, 2] + >>> test_cst2.convert('7.2', param, ctx) + Traceback (most recent call last): + ... + click.exceptions.BadParameter: 7.2 is not a valid integer + >>> test_cst2.convert('7', param, ctx) + Traceback (most recent call last): + ... + click.exceptions.BadParameter: 7 is not a valid comma separated list of length 2 + >>> test_cst3.convert('7.2', param, ctx) + 7.2 + """ try: if value is None: converted = None else: - converted = list(map(self.dtype, str(value).split(','))) + converted = list(map(self.dtype, str(value).split(","))) if self.simplify and len(converted) == 1: converted = converted[0] except ValueError: self.fail( - '{} is not a valid comma separated list of {}'.format( - value, self.dtype_name), + "{} is not a valid comma separated list of {}".format( + value, self.dtype_name + ), param, - ctx + ctx, ) if self.length: if len(converted) != self.length: self.fail( - '{} is not a valid comma separated list of length {}'.format( - value, self.length), + "{} is not a valid comma separated list of length {}".format( + value, self.length + ), param, - ctx + ctx, ) return converted @@ -65,26 +96,50 @@ class Dictionary(click.ParamType): """ Text to be parsed as a python dict definition """ + def __init__(self, keys=None): - self.name = 'TEXT:VAL[,TEXT:VAL...]' + self.name = "TEXT:VAL[,TEXT:VAL...]" self.keys = keys def convert(self, value, param, ctx): + """ + >>> @click.command() + ... @click.option('--my-param', type=Dictionary(keys=('abc', 'def', 'ghi', 'jkl', 'mno'))) + ... def test_cmd(): + ... pass + ... + >>> ctx = click.Context(test_cmd) + >>> param = test_cmd.params[0] + >>> dict_param = param.type + >>> dict_str1 = 'abc:0.1,def:TRUE,ghi:False,jkl:None,mno:some_string' + >>> dict_str2 = 'abc:0.1,def:TRUE,ghi:False,jkl:None,mnp:some_string' + >>> dict_str3 = '' + >>> dict_param.convert(dict_str1, param, ctx) + {'abc': 0.1, 'def': True, 'ghi': False, 'jkl': None, 'mno': 'some_string'} + >>> dict_param.convert(dict_str2, param, ctx) + Traceback (most recent call last): + ... + click.exceptions.BadParameter: mnp is not a valid key (('abc', 'def', 'ghi', 'jkl', 'mno')) + >>> dict_param.convert(dict_str3, param, ctx) + Traceback (most recent call last): + ... + click.exceptions.BadParameter: is not a valid python dict definition + """ try: converted = dict() - for token in value.split(','): - if ':' not in token: + for token in value.split(","): + if ":" not in token: raise ValueError - key, _, value = token.partition(':') + key, _, value = token.partition(":") if not key: raise ValueError if isinstance(self.keys, (list, tuple)) and key not in self.keys: - self.fail(f'{key} is not a valid key ({self.keys})') - if value == 'None': + self.fail(f"{key} is not a valid key ({self.keys})") + if value == "None": value = None - elif value.lower() == 'true': + elif value.lower() == "true": value = True - elif value.lower() == 'false': + elif value.lower() == "false": value = False else: try: @@ -94,39 +149,76 @@ def convert(self, value, param, ctx): converted[key] = value return converted except ValueError: - self.fail( - f'{value} is not a valid python dict definition', - param, - ctx - ) + self.fail(f"{value} is not a valid python dict definition", param, ctx) def _get_type_name(obj): - name = 'text' + name = "text" try: - name = getattr(obj, 'name') + name = getattr(obj, "name") except AttributeError: - name = getattr(obj, '__name__') + name = getattr(obj, "__name__") return name def valid_limit(ctx, param, value): + """ + Callback function that checks order of numeric inputs + + >>> @click.command() + ... @click.option('--test-param', help='Sample help') + ... def test_cmd(): + ... pass + ... + >>> ctx = click.Context(test_cmd) + >>> param = test_cmd.params[0] + >>> valid_limit(ctx, param, value=[0.0125, 3]) + [0.0125, 3] + >>> valid_limit(ctx, param, value=[0.0125, -0.0125]) + Traceback (most recent call last): + ... + click.exceptions.BadParameter: lower limit must not exceed upper limit + >>> valid_limit(ctx, param, value=[0.0125, 0.0125]) + [0.0125, 0.0125] + """ if value[0] > value[1]: - param.type.fail( - 'lower limit must not exceed upper limit', param, ctx) + param.type.fail("lower limit must not exceed upper limit", param, ctx) return value def valid_parameter_limits(ctx, param, value): + """ + Callback function that checks order of multiple numeric inputs + + >>> @click.command() + ... @click.option('--test-param', type=(click.STRING, click.FLOAT, click.FLOAT), multiple=True) + ... def test_cmd(): + ... pass + ... + >>> ctx = click.Context(test_cmd) + >>> param = test_cmd.params[0] + >>> valid_parameter_limits(ctx, param, [['a', 0.0, 2.0]]) + [['a', 0.0, 2.0]] + >>> valid_parameter_limits(ctx, param, [['b', 0.0, 0.0]]) + [['b', 0.0, 0.0]] + >>> valid_parameter_limits(ctx, param, [['c', 0.0, -1.0]]) + Traceback (most recent call last): + ... + click.exceptions.BadParameter: lower limit must not exceed upper limit + >>> valid_parameter_limits(ctx, param, [['a', 0.0, 2.0], ['c', 0.0, -1.0]]) + Traceback (most recent call last): + ... + click.exceptions.BadParameter: lower limit must not exceed upper limit + """ for val in value: if val[1] > val[2]: - param.type.fail( - 'lower limit must not exceed upper limit', param, ctx) + param.type.fail("lower limit must not exceed upper limit", param, ctx) return value def mutually_exclusive_with(param_name): - internal_name = param_name.strip('-').replace('-', '_').lower() + internal_name = param_name.strip("-").replace("-", "_").lower() + def valid_mutually_exclusive(ctx, param, value): try: other_value = ctx.params[internal_name] @@ -135,22 +227,35 @@ def valid_mutually_exclusive(ctx, param, value): if (value is None) == (other_value is None): param.type.fail( 'mutually exclusive with "{}", one and only one must be ' - 'specified.'.format(param_name), + "specified.".format(param_name), param, ctx, ) return value + return valid_mutually_exclusive def required_by(param_name): - internal_name = param_name.strip('-').replace('-', '_').lower() + internal_name = param_name.strip("-").replace("-", "_").lower() + def required(ctx, param, value): try: other_value = ctx.params[internal_name] except KeyError: return value if other_value and not value: - param.type.fail('required by "{}".'.format(param_name), param, ctx,) + param.type.fail( + 'required by "{}".'.format(param_name), + param, + ctx, + ) return value + return required + + +if __name__ == "__main__": + import doctest + + sys.exit(doctest.testmod(verbose=True)[0]) diff --git a/scanpy_scripts/cmd_options.py b/scanpy_scripts/cmd_options.py index 031e47b8..4d741637 100644 --- a/scanpy_scripts/cmd_options.py +++ b/scanpy_scripts/cmd_options.py @@ -13,723 +13,755 @@ ) COMMON_OPTIONS = { - 'input': [ + "input": [ click.argument( - 'input_obj', - metavar='', + "input_obj", + metavar="", type=click.Path(exists=True, dir_okay=False), ), click.option( - '--input-format', '-f', - type=click.Choice(['anndata', 'loom']), - default='anndata', + "--input-format", + "-f", + type=click.Choice(["anndata", "loom"]), + default="anndata", show_default=True, - help='Input object format.', + help="Input object format.", ), ], - - 'output': [ + "output": [ click.argument( - 'output_obj', - metavar='', + "output_obj", + metavar="", type=click.Path(dir_okay=False, writable=True), ), click.option( - '--output-format', '-F', - type=click.Choice(['anndata', 'loom', 'zarr']), - default='anndata', + "--output-format", + "-F", + type=click.Choice(["anndata", "loom", "zarr"]), + default="anndata", show_default=True, - help='Output object format.', + help="Output object format.", ), click.option( - '--zarr-chunk-size', '-z', + "--zarr-chunk-size", + "-z", type=click.INT, default=1000, show_default=True, - help='Chunk size for writing output in zarr format.', + help="Chunk size for writing output in zarr format.", ), click.option( - '--loom-write-obsm-varm', '-b', + "--loom-write-obsm-varm", + "-b", is_flag=True, default=False, show_default=True, - help='Write obsm and varm to the Loom file?', + help="Write obsm and varm to the Loom file?", ), click.option( - '--export-mtx', '-X', + "--export-mtx", + "-X", type=click.Path(dir_okay=True, writable=True), default=None, show_default=True, - help='When specified, using it as prefix for exporting mtx files. ' + help="When specified, using it as prefix for exporting mtx files. " 'If not empty and not ending with "/" or "_", a "_" will be ' - 'appended.', + "appended.", ), click.option( - '--show-obj', - type=click.Choice(['stdout', 'stderr']), + "--show-obj", + type=click.Choice(["stdout", "stderr"]), default=None, show_default=True, - help='Print output object summary info to specified stream.', + help="Print output object summary info to specified stream.", ), ], - - 'save': [ + "save": [ click.option( - '--save-raw', '-r', + "--save-raw", + "-r", is_flag=True, default=False, show_default=True, - help='Save adata to adata.raw before processing.', + help="Save adata to adata.raw before processing.", ), click.option( - '--save-layer', '-y', + "--save-layer", + "-y", type=click.STRING, default=None, show_default=True, - help='Save adata.X to the specified layer before processing.', + help="Save adata.X to the specified layer before processing.", ), ], - - 'plot': [ + "plot": [ click.argument( - 'output_fig', - metavar='', + "output_fig", + metavar="", type=click.Path(dir_okay=False, writable=True), ), click.option( - '--fig-size', + "--fig-size", type=CommaSeparatedText(click.INT, length=2), default="7,7", show_default=True, - help='Figure size.', + help="Figure size.", ), click.option( - '--fig-dpi', + "--fig-dpi", type=click.INT, default=80, show_default=True, - help='Figure DPI.', + help="Figure DPI.", ), click.option( - '--fig-fontsize', + "--fig-fontsize", type=click.INT, default=15, show_default=True, - help='Figure font size.', + help="Figure font size.", ), ], - - 'frame_title': [ + "frame_title": [ click.option( - '--frameon/--frameoff', 'frameon', + "--frameon/--frameoff", + "frameon", default=True, show_default=True, - help='Draw a frame around the plot', + help="Draw a frame around the plot", ), click.option( - '--title', + "--title", type=CommaSeparatedText(simplify=True), default=None, show_default=True, - help='Provide title for the plot or panels.', + help="Provide title for the plot or panels.", ), ], - - 'use_pc': [ + "use_pc": [ click.option( - '--n-pcs', '-n', + "--n-pcs", + "-n", type=click.INT, default=None, show_default=True, - help='Use this many PCs. Use `.X` if --n-pcs is 0 when --use-rep is ' - 'None.', + help="Use this many PCs. Use `.X` if --n-pcs is 0 when --use-rep is " + "None.", ), - click.option( - '--use-rep', '-u', + "--use-rep", + "-u", type=click.STRING, default=None, show_default=True, - help='Use the indicated representation. If None, the representation is ' - 'chosen automatically: for `.n_vars` < 50, `.X` is used, otherwise ' - '`X_pca` is used. If `X_pca` is not present, it\'s computed with ' - 'default parameters.' + help="Use the indicated representation. If None, the representation is " + "chosen automatically: for `.n_vars` < 50, `.X` is used, otherwise " + "`X_pca` is used. If `X_pca` is not present, it's computed with " + "default parameters.", ), ], - - 'knn_graph': [ + "knn_graph": [ click.option( - '--neighbors-key', + "--neighbors-key", type=click.STRING, default=None, show_default=False, - help='If not specified, look in .uns[‘neighbors’] for neighbors ' - 'settings and .obsp[‘connectivities’], .obsp[‘distances’] for connectivities and ' - 'distances respectively (default storage places for pp.neighbors). If specified, ' - 'look in .uns[neighbors_key] for neighbors settings and ' - '.obsp[.uns[neighbors_key][‘connectivities_key’]], ' - '.obsp[.uns[neighbors_key][‘distances_key’]] for connectivities and distances ' - 'respectively.' + help="If not specified, look in .uns[‘neighbors’] for neighbors " + "settings and .obsp[‘connectivities’], .obsp[‘distances’] for connectivities and " + "distances respectively (default storage places for pp.neighbors). If specified, " + "look in .uns[neighbors_key] for neighbors settings and " + ".obsp[.uns[neighbors_key][‘connectivities_key’]], " + ".obsp[.uns[neighbors_key][‘distances_key’]] for connectivities and distances " + "respectively.", ), click.option( - '--obsp', + "--obsp", type=click.STRING, default=None, show_default=True, - help='Use .obsp[obsp] as adjacency. You can’t specify both obsp and ' - 'neighbors_key at the same time.' + help="Use .obsp[obsp] as adjacency. You can’t specify both obsp and " + "neighbors_key at the same time.", ), click.option( - '--directed/--undirected', 'directed', + "--directed/--undirected", + "directed", default=True, show_default=True, - help='Interpret the adjacency matrix as directed graph.', + help="Interpret the adjacency matrix as directed graph.", ), click.option( - '--use-weights', + "--use-weights", is_flag=True, default=False, show_default=True, - help='Use weights from KNN graph.', + help="Use weights from KNN graph.", ), ], - - 'neighbor_metric': click.option( - '--metric', '-t', - type=click.Choice(['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan', 'braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']), - default='euclidean', + "neighbor_metric": click.option( + "--metric", + "-t", + type=click.Choice( + [ + "cityblock", + "cosine", + "euclidean", + "l1", + "l2", + "manhattan", + "braycurtis", + "canberra", + "chebyshev", + "correlation", + "dice", + "hamming", + "jaccard", + "kulsinski", + "mahalanobis", + "minkowski", + "rogerstanimoto", + "russellrao", + "seuclidean", + "sokalmichener", + "sokalsneath", + "sqeuclidean", + "yule", + ] + ), + default="euclidean", show_default=True, - help='A known metric’s name.' + help="A known metric’s name.", ), - - 'layer':click.option( - '--layer', + "layer": click.option( + "--layer", type=CommaSeparatedText(simplify=True), default=None, show_default=True, - help='Name of the AnnData object layer that wants to be plotted. By ' - 'default adata.raw.X is plotted. If use_raw=False is set, then adata.X ' - 'is plotted. If layer is set to a valid layer name, then the layer is ' - 'plotted. layer takes precedence over use_raw.', + help="Name of the AnnData object layer that wants to be plotted. By " + "default adata.raw.X is plotted. If use_raw=False is set, then adata.X " + "is plotted. If layer is set to a valid layer name, then the layer is " + "plotted. layer takes precedence over use_raw.", ), - - 'n_comps': click.option( - '--n-comps', + "n_comps": click.option( + "--n-comps", type=click.INT, default=None, show_default=True, - help='Number of components to compute', + help="Number of components to compute", ), - - 'key_added': click.option( - '--key-added', + "key_added": click.option( + "--key-added", type=CommaSeparatedText(simplify=True), default=None, show_default=True, - help='Key under which to add the computed results', + help="Key under which to add the computed results", ), - - 'random_state': click.option( - '--random-state', '-S', + "random_state": click.option( + "--random-state", + "-S", type=click.INT, default=0, show_default=True, - help='Seed for random number generator.', + help="Seed for random number generator.", ), - - 'use_raw': click.option( - '--use-raw/--no-raw', 'use_raw', + "use_raw": click.option( + "--use-raw/--no-raw", + "use_raw", default=None, show_default=True, - help='Use expression values in `.raw` if present.', + help="Use expression values in `.raw` if present.", ), - - 'zero_center': click.option( - '--no-zero-center', 'zero_center', + "zero_center": click.option( + "--no-zero-center", + "zero_center", is_flag=True, flag_value=False, default=True, - help='When set, omit zero-centering variables to allow efficient ' - 'handling of sparse input.', + help="When set, omit zero-centering variables to allow efficient " + "handling of sparse input.", ), - - 'n_jobs': click.option( - '--n-jobs', '-J', + "n_jobs": click.option( + "--n-jobs", + "-J", type=click.INT, default=None, show_default=True, - help='Number of jobs for parallel computation.', + help="Number of jobs for parallel computation.", ), - - 'restrict_to': click.option( - '--restrict-to', + "restrict_to": click.option( + "--restrict-to", type=(click.STRING, CommaSeparatedText()), default=(None, None), show_default=True, - help='Restrict the clustering to the categories within the key for ' + help="Restrict the clustering to the categories within the key for " 'sample annotation, in the form of "obs_key list_of_categories".', ), - - 'export_embedding': click.option( - '--export-embedding', '-E', + "export_embedding": click.option( + "--export-embedding", + "-E", type=click.Path(dir_okay=False, writable=True), default=None, show_default=True, - help='Export embeddings in a tab-separated text table.', + help="Export embeddings in a tab-separated text table.", ), - - 'export_cluster': click.option( - '--export-cluster', + "export_cluster": click.option( + "--export-cluster", type=click.Path(dir_okay=False, writable=True), default=None, show_default=True, - help='Export embeddings in a tab-separated text table.', + help="Export embeddings in a tab-separated text table.", ), - - 'var_names': click.option( - '--var-names', - type=(CommaSeparatedText()), - show_default=True, - help='var_names should be a valid subset of adata.var_names.', + "var_names": click.option( + "--var-names", + type=(CommaSeparatedText()), + show_default=True, + help="var_names should be a valid subset of adata.var_names.", ), - - 'gene_symbols': click.option( - '--gene-symbols', - type=CommaSeparatedText(simplify=True), - default=None, - show_default=True, - help='Column name in .var DataFrame that stores gene symbols. By ' - 'default this is assumed to be the index column of the .var ' - 'DataFrame. Setting this option allows alternative names to be ' - 'used.', + "gene_symbols": click.option( + "--gene-symbols", + type=CommaSeparatedText(simplify=True), + default=None, + show_default=True, + help="Column name in .var DataFrame that stores gene symbols. By " + "default this is assumed to be the index column of the .var " + "DataFrame. Setting this option allows alternative names to be " + "used.", ), - - 'diffexp_plot': [ + "diffexp_plot": [ click.option( - '--rgg', + "--rgg", is_flag=True, default=False, show_default=True, - help='When set, use the rank_genes_groups_ form of the function, ' - 'where gene lists are automatically selected.', + help="When set, use the rank_genes_groups_ form of the function, " + "where gene lists are automatically selected.", ), click.option( - '--groupby', + "--groupby", type=CommaSeparatedText(simplify=True), default=None, show_default=True, - help='The key of the observation grouping to consider.', + help="The key of the observation grouping to consider.", ), click.option( - '--log', + "--log", is_flag=True, default=False, show_default=True, - help='Plot on logarithmic axis.', + help="Plot on logarithmic axis.", ), click.option( - '--num-categories', + "--num-categories", type=click.INT, default=7, show_default=True, - help='Only used if groupby observation is not categorical. This value ' - 'determines the number of groups into which the groupby observation ' - 'should be subdivided.', + help="Only used if groupby observation is not categorical. This value " + "determines the number of groups into which the groupby observation " + "should be subdivided.", ), click.option( - '--dendrogram', + "--dendrogram", is_flag=True, default=False, show_default=False, - help='If True, a dendrogram based on the hierarchical clustering ' - 'between the groupby categories is added. The dendrogram information is ' - 'computed using scanpy.tl.dendrogram(). If tl.dendrogram has not been ' - 'called previously the function is called with default parameters.', + help="If True, a dendrogram based on the hierarchical clustering " + "between the groupby categories is added. The dendrogram information is " + "computed using scanpy.tl.dendrogram(). If tl.dendrogram has not been " + "called previously the function is called with default parameters.", ), click.option( - '--standard-scale', - type=click.Choice(['var', 'obs']), + "--standard-scale", + type=click.Choice(["var", "obs"]), default=None, show_default=True, - help='Whether or not to standardize that dimension between 0 and 1, ' - 'meaning for each variable or group, subtract the minimum and divide ' - 'each by its maximum.' + help="Whether or not to standardize that dimension between 0 and 1, " + "meaning for each variable or group, subtract the minimum and divide " + "each by its maximum.", ), - ], - - 'sviol': [ + "sviol": [ click.option( - '--no-stripplot', 'stripplot', + "--no-stripplot", + "stripplot", is_flag=True, default=True, show_default=True, - help='When set, do not add a stripplot on top of the violin plot.', + help="When set, do not add a stripplot on top of the violin plot.", ), click.option( - '--no-jitter', 'jitter', + "--no-jitter", + "jitter", is_flag=True, default=True, show_default=True, - help='Suppress jitter in the stripplot (only when stripplot is True)' + help="Suppress jitter in the stripplot (only when stripplot is True)", ), click.option( - '--size', + "--size", type=click.INT, default=1, show_default=True, - help='Size of the jitter points.' + help="Size of the jitter points.", ), click.option( - '--order', + "--order", type=CommaSeparatedText(), default=None, show_default=True, - help='Order in which to show the categories.' + help="Order in which to show the categories.", ), click.option( - '--scale', - type=click.Choice(['area', 'count', 'width']), - default='width', + "--scale", + type=click.Choice(["area", "count", "width"]), + default="width", show_default=True, - help='The method used to scale the width of each violin. If ‘area’, ' - 'each violin will have the same area. If ‘count’, the width of the ' - 'violins will be scaled by the number of observations in that bin. If ' - '‘width’, each violin will have the same width.' + help="The method used to scale the width of each violin. If ‘area’, " + "each violin will have the same area. If ‘count’, the width of the " + "violins will be scaled by the number of observations in that bin. If " + "‘width’, each violin will have the same width.", ), click.option( - '--row-palette', + "--row-palette", type=CommaSeparatedText(simplify=True), - default='muted', + default="muted", show_default=True, - help='The row palette determines the colors to use in each of the ' - 'stacked violin plots. The value should be a valid seaborn palette name ' - 'or a valic matplotlib colormap (see ' - 'https://seaborn.pydata.org/generated/seaborn.color_palette.html). ' - 'Alternatively, a single color name or hex value can be passed. E.g. ' - '‘red’ or ‘#cc33ff’.' + help="The row palette determines the colors to use in each of the " + "stacked violin plots. The value should be a valid seaborn palette name " + "or a valic matplotlib colormap (see " + "https://seaborn.pydata.org/generated/seaborn.color_palette.html). " + "Alternatively, a single color name or hex value can be passed. E.g. " + "‘red’ or ‘#cc33ff’.", ), ], - - - 'dot': [ + "dot": [ click.option( - '--expression-cutoff', + "--expression-cutoff", type=click.FLOAT, default=0, show_default=True, - help='Expression cutoff that is used for binarizing the gene expression ' - 'and determining the fraction of cells expressing given genes. A gene is ' - 'expressed only if the expression value is greater than this threshold.' + help="Expression cutoff that is used for binarizing the gene expression " + "and determining the fraction of cells expressing given genes. A gene is " + "expressed only if the expression value is greater than this threshold.", ), click.option( - '--mean-only-expressed', + "--mean-only-expressed", is_flag=True, default=False, show_default=True, - help='If True, gene expression is averaged only over the cells ' - 'expressing the given genes.', + help="If True, gene expression is averaged only over the cells " + "expressing the given genes.", ), click.option( - '--color-map', + "--color-map", type=CommaSeparatedText(simplify=True), - default='Reds', + default="Reds", show_default=True, - help='String denoting matplotlib color map.', + help="String denoting matplotlib color map.", ), click.option( - '--dot-max', + "--dot-max", type=click.FLOAT, default=None, show_default=True, - help='If none, the maximum dot size is set to the maximum fraction ' - 'value found (e.g. 0.6). If given, the value should be a number between ' - '0 and 1. All fractions larger than dot_max are clipped to this value.' + help="If none, the maximum dot size is set to the maximum fraction " + "value found (e.g. 0.6). If given, the value should be a number between " + "0 and 1. All fractions larger than dot_max are clipped to this value.", ), click.option( - '--dot-min', + "--dot-min", type=click.FLOAT, default=None, show_default=True, - help='If none, the minimum dot size is set to 0. If given, the value ' - 'should be a number between 0 and 1. All fractions smaller than dot_min ' - 'are clipped to this value.' + help="If none, the minimum dot size is set to 0. If given, the value " + "should be a number between 0 and 1. All fractions smaller than dot_min " + "are clipped to this value.", ), click.option( - '--smallest-dot', + "--smallest-dot", type=click.FLOAT, default=0, show_default=True, - help='If none, the smallest dot has size 0. All expression levels with ' - 'dot_min are potted with smallest_dot dot size.' + help="If none, the smallest dot has size 0. All expression levels with " + "dot_min are potted with smallest_dot dot size.", ), ], - - 'heat': [ + "heat": [ click.option( - '--show-gene-labels', + "--show-gene-labels", is_flag=True, default=None, show_default=True, - help='By default gene labels are shown when there are 50 or less ' - 'genes. Otherwise the labels are removed.' + help="By default gene labels are shown when there are 50 or less " + "genes. Otherwise the labels are removed.", ), ], - - 'swap_axes': click.option( - '--swap-axes', + "swap_axes": click.option( + "--swap-axes", is_flag=True, default=False, show_default=True, - help='By default, the x axis contains var_names (e.g. genes) and the y ' - 'axis the groupby categories. By setting swap_axes then x are the ' - 'groupby categories and y the var_names. When swapping axes ' - 'var_group_positions are no longer used.', + help="By default, the x axis contains var_names (e.g. genes) and the y " + "axis the groupby categories. By setting swap_axes then x are the " + "groupby categories and y the var_names. When swapping axes " + "var_group_positions are no longer used.", ), - - 'rank_genes_groups_plots': [ + "rank_genes_groups_plots": [ click.option( - '--groups', + "--groups", type=CommaSeparatedText(), default=None, show_default=True, - help='The groups for which to show the gene ranking.' + help="The groups for which to show the gene ranking.", ), click.option( - '--n-genes', '-n', + "--n-genes", + "-n", type=click.INT, default=10, show_default=True, - help='Number of genes to show.' + help="Number of genes to show.", ), ], - - 'root': click.option( - '--root', + "root": click.option( + "--root", type=click.INT, default=0, show_default=True, - help='If choosing a tree layout, this is the index of the root node.', + help="If choosing a tree layout, this is the index of the root node.", ), - - 'plot_embed': [ + "plot_embed": [ click.option( - '--use-raw/--no-raw', + "--use-raw/--no-raw", default=None, show_default=True, - help='Use `.raw` attribute for coloring with gene expression. If ' - '`None`, uses `.raw` if present.', + help="Use `.raw` attribute for coloring with gene expression. If " + "`None`, uses `.raw` if present.", ), click.option( - '--groups', + "--groups", type=click.STRING, default=None, - help='Key for categorical in `.obs`. You can pass your predefined ' - 'groups by choosing any categorical annotation of observations.', + help="Key for categorical in `.obs`. You can pass your predefined " + "groups by choosing any categorical annotation of observations.", ), ], - - 'batch_key': click.option( - '--batch-key', 'key', + "batch_key": click.option( + "--batch-key", + "key", type=click.STRING, required=True, - help='The name of the column in adata.obs that differentiates among ' - 'experiments/batches.' + help="The name of the column in adata.obs that differentiates among " + "experiments/batches.", ), - - 'batch_layer': click.option( - '--layer', '-l', + "batch_layer": click.option( + "--layer", + "-l", type=click.STRING, default=None, show_default=True, - help="Layer to batch correct. By default corrects the contents of .X." + help="Layer to batch correct. By default corrects the contents of .X.", ), - - 'scrublet': [ + "scrublet": [ click.option( - '--sim-doublet-ratio', + "--sim-doublet-ratio", type=click.FLOAT, default=2.0, show_default=True, - help='Number of doublets to simulate relative to the number of ' - 'observed transcriptomes.', + help="Number of doublets to simulate relative to the number of " + "observed transcriptomes.", ), click.option( - '--synthetic-doublet-umi-subsampling', + "--synthetic-doublet-umi-subsampling", type=click.FLOAT, default=1.0, show_default=True, - help='Where input_obj_sim not suplied, rate for sampling UMIs when ' - 'creating synthetic doublets. If 1.0, each doublet is created by ' - 'simply adding the UMI counts from two randomly sampled observed ' - 'transcriptomes. For values less than 1, the UMI counts are added ' - 'and then randomly sampled at the specified rate.' + help="Where input_obj_sim not suplied, rate for sampling UMIs when " + "creating synthetic doublets. If 1.0, each doublet is created by " + "simply adding the UMI counts from two randomly sampled observed " + "transcriptomes. For values less than 1, the UMI counts are added " + "and then randomly sampled at the specified rate.", ), ], } -COMMON_OPTIONS['opt_output'] = [ +COMMON_OPTIONS["opt_output"] = [ click.option( - '--output-obj', + "--output-obj", type=click.Path(dir_okay=False, writable=True), - help='Optionally output an object to the specified path.', + help="Optionally output an object to the specified path.", ), - *COMMON_OPTIONS['output'][1:], + *COMMON_OPTIONS["output"][1:], ] CMD_OPTIONS = { - 'read': [ + "read": [ click.option( - '--input-10x-h5', '-i', + "--input-10x-h5", + "-i", type=click.Path(exists=True, dir_okay=False), - callback=mutually_exclusive_with('--input-10x-mtx'), - help='Input 10x data in Cell-Ranger hdf5 format.', + callback=mutually_exclusive_with("--input-10x-mtx"), + help="Input 10x data in Cell-Ranger hdf5 format.", ), click.option( - '--input-10x-mtx', '-x', + "--input-10x-mtx", + "-x", type=click.Path(exists=True, file_okay=False), - callback=mutually_exclusive_with('--input-10x-h5'), - help='Path of input folder containing 10x data in mtx format.', + callback=mutually_exclusive_with("--input-10x-h5"), + help="Path of input folder containing 10x data in mtx format.", ), - *COMMON_OPTIONS['output'], + *COMMON_OPTIONS["output"], click.option( - '--genome', '-g', - callback=required_by('--input-10x-h5'), - default='hg19', + "--genome", + "-g", + callback=required_by("--input-10x-h5"), + default="hg19", show_default=True, - help='Name of the genome group in hdf5 file, required by ' + help="Name of the genome group in hdf5 file, required by " '"--input-10x-h5".', ), click.option( - '--var-names', '-v', - type=click.Choice(['gene_symbols', 'gene_ids']), - callback=required_by('--input-10x-mtx'), - default='gene_symbols', + "--var-names", + "-v", + type=click.Choice(["gene_symbols", "gene_ids"]), + callback=required_by("--input-10x-mtx"), + default="gene_symbols", show_default=True, - help='Attribute to be used as the index of the variable table, ' + help="Attribute to be used as the index of the variable table, " 'required by "--input-10x-mtx".', ), click.option( - '--extra-obs', + "--extra-obs", type=click.Path(exists=True, dir_okay=False), default=None, show_default=True, - help='Extra cell metadata table, must be tab-separated with a header ' - 'row and an index column, and with matched dimension.', + help="Extra cell metadata table, must be tab-separated with a header " + "row and an index column, and with matched dimension.", ), click.option( - '--extra-var', + "--extra-var", type=click.Path(exists=True, dir_okay=False), default=None, show_default=True, - help='Extra gene metadata table, must be tab-separated with a header ' - 'row and an index column, and with matched dimension.', + help="Extra gene metadata table, must be tab-separated with a header " + "row and an index column, and with matched dimension.", ), ], - - 'filter': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['save'][0], # --save-raw + "filter": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["save"][0], # --save-raw click.option( - '--gene-name', '-g', + "--gene-name", + "-g", type=click.STRING, - default='index', + default="index", show_default=True, - help='Name of the variable that contains gene names, used for flagging ' + help="Name of the variable that contains gene names, used for flagging " 'mitochondria genes when column "mito" is absent from `.var`.', ), click.option( - '--list-attr', '-l', + "--list-attr", + "-l", is_flag=True, default=False, - help='When set, list attributes that can be filtered on.', + help="When set, list attributes that can be filtered on.", ), click.option( - '--param', '-p', + "--param", + "-p", type=(click.STRING, click.FLOAT, click.FLOAT), multiple=True, callback=valid_parameter_limits, - help='Numerical parameters used to filter the data, ' + help="Numerical parameters used to filter the data, " 'in the format of "-p name min max". ' - 'Multiple -p entries allowed.', + "Multiple -p entries allowed.", ), click.option( - '--category', '-c', + "--category", + "-c", type=(click.STRING, CommaSeparatedText()), multiple=True, - help='Categorical attributes used to filter the data, ' + help="Categorical attributes used to filter the data, " 'in the format of "-c ", ' - 'where entries with attribute with value in are kept. ' + "where entries with attribute with value in are kept. " 'If is preceded by "!", entries with value in are ' - 'removed. Multiple -c entries allowed.', + "removed. Multiple -c entries allowed.", ), click.option( - '--subset', '-s', + "--subset", + "-s", type=(click.STRING, click.File()), multiple=True, help='Similar to --category in the format of "-s ", ' - 'but the to be a one-column table that provides the values. ' - 'Multiple -s entries allowed.', + "but the to be a one-column table that provides the values. " + "Multiple -s entries allowed.", ), click.option( - '--force-recalc', + "--force-recalc", is_flag=True, default=False, - help='When set, re-calculate `pct_counts_` and ' - '`pct_counts_in_top__genes` even if they exist.', + help="When set, re-calculate `pct_counts_` and " + "`pct_counts_in_top__genes` even if they exist.", ), ], - - 'norm': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - *COMMON_OPTIONS['save'], - COMMON_OPTIONS['key_added'], - click.option( - '--no-log-transform', 'log_transform', + "norm": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + *COMMON_OPTIONS["save"], + COMMON_OPTIONS["key_added"], + click.option( + "--no-log-transform", + "log_transform", is_flag=True, default=True, show_default=True, - help='When set, do not apply (natural) log transform following normalisation.', + help="When set, do not apply (natural) log transform following normalisation.", ), click.option( - '--normalize-to', '-t', 'target_sum', + "--normalize-to", + "-t", + "target_sum", type=float, default=10_000, show_default=True, - help='Normalize per cell nUMI to this number.', + help="Normalize per cell nUMI to this number.", ), click.option( - '--exclude-highly-expressed', '-e', 'exclude_highly_expressed', + "--exclude-highly-expressed", + "-e", + "exclude_highly_expressed", is_flag=True, default=False, show_default=True, - help='Exclude (very) highly expressed genes for the computation of ' - 'the normalization factor (size factor) for each cell. A gene is considered ' - 'highly expressed, if it has more than max_fraction of the total counts in at ' - 'least one cell. The not-excluded genes will sum up to the number ' - 'specified by --normalize-to.' + help="Exclude (very) highly expressed genes for the computation of " + "the normalization factor (size factor) for each cell. A gene is considered " + "highly expressed, if it has more than max_fraction of the total counts in at " + "least one cell. The not-excluded genes will sum up to the number " + "specified by --normalize-to.", ), click.option( - '--max-fraction', '-m', 'max_fraction', + "--max-fraction", + "-m", + "max_fraction", type=float, default=0.05, show_default=True, - help='If exclude_highly_expressed=True, consider cells as highly ' - 'expressed that have more counts than max_fraction of the original total counts ' - 'in at least one cell.' + help="If exclude_highly_expressed=True, consider cells as highly " + "expressed that have more counts than max_fraction of the original total counts " + "in at least one cell.", ), click.option( - '--layers', '-l', + "--layers", + "-l", type=CommaSeparatedText(simplify=True), default=None, show_default=True, - help="List of layers to normalize. Set to 'all' to normalize all layers." + help="List of layers to normalize. Set to 'all' to normalize all layers.", ), click.option( - '--layer-norm', '-n', 'layer_norm', - type=click.Choice(['after', 'X']), + "--layer-norm", + "-n", + "layer_norm", + type=click.Choice(["after", "X"]), default=None, show_default=True, help="Specifies how to normalize layers: 1) If None, after " @@ -738,69 +770,74 @@ "'after', for each layer in layers each cell has a total count equal to " "target_sum. 3) If 'X', for each layer in layers each cell has a total count " "equal to the median of total counts for observations (cells) of adata.X before " - "normalization.'" + "normalization.'", ), ], - - 'hvg': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], + "hvg": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], click.option( - '--mean-limits', '-m', + "--mean-limits", + "-m", type=(click.FLOAT, click.FLOAT), callback=valid_limit, default=(0.0125, 3), show_default=True, - help='Cutoffs for the mean of expression' - 'in the format of "-m min max".', + help="Cutoffs for the mean of expression" 'in the format of "-m min max".', ), click.option( - '--disp-limits', '-d', + "--disp-limits", + "-d", type=(click.FLOAT, click.FLOAT), callback=valid_limit, - default=(0.5, float('inf')), + default=(0.5, float("inf")), show_default=True, - help='Cutoffs for the dispersion of expression' + help="Cutoffs for the dispersion of expression" 'in the format of "-d min max".', ), click.option( - '--span', + "--span", type=click.FLOAT, default=0.3, show_default=True, help="The fraction of the data (cells) used when estimating the " - "variance in the loess model fit if flavor='seurat_v3'." + "variance in the loess model fit if flavor='seurat_v3'.", ), click.option( - '--n-bins', '-b', + "--n-bins", + "-b", type=click.INT, default=20, show_default=True, - help='Number of bins for binning the mean gene expression.', + help="Number of bins for binning the mean gene expression.", ), click.option( - '--n-top-genes', '-t', + "--n-top-genes", + "-t", type=click.INT, default=None, show_default=True, - help='Number of highly-variable genes to keep.', + help="Number of highly-variable genes to keep.", ), click.option( - '--flavor', '-v', - type=click.Choice(['seurat', 'cell_ranger', 'seurat_v3']), - default='seurat', + "--flavor", + "-v", + type=click.Choice(["seurat", "cell_ranger", "seurat_v3"]), + default="seurat", show_default=True, - help='Choose the flavor for computing normalized dispersion.', + help="Choose the flavor for computing normalized dispersion.", ), click.option( - '--subset', '-s', + "--subset", + "-s", is_flag=True, default=False, - help='When set, inplace subset to highly-variable genes, otherwise ' - 'only flag highly-variable genes.', + help="When set, inplace subset to highly-variable genes, otherwise " + "only flag highly-variable genes.", ), click.option( - '--batch-key', 'batch_key', + "--batch-key", + "batch_key", type=click.STRING, default=None, help="If specified, highly-variable genes are selected within each " @@ -809,278 +846,285 @@ "flavors, genes are first sorted by how many batches they are a HVG. For " "dispersion-based flavors ties are broken by normalized dispersion. If flavor = " "'seurat_v3', ties are broken by the median (across batches) rank based on " - "within-batch normalized variance." + "within-batch normalized variance.", ), ], - - 'scale': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - *COMMON_OPTIONS['save'], - COMMON_OPTIONS['zero_center'], - click.option( - '--max-value', '-m', + "scale": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + *COMMON_OPTIONS["save"], + COMMON_OPTIONS["zero_center"], + click.option( + "--max-value", + "-m", type=click.FLOAT, default=None, show_default=True, - help='When specified, clip to this value after scaling, otherwise do ' - 'not clip', + help="When specified, clip to this value after scaling, otherwise do " + "not clip", ), click.option( - '--layer', '-l', + "--layer", + "-l", type=CommaSeparatedText(simplify=True), default=None, - help="If provided, which element of layers to scale." + help="If provided, which element of layers to scale.", ), ], - - 'regress': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - *COMMON_OPTIONS['save'], - COMMON_OPTIONS['n_jobs'], - click.option( - '--keys', '-k', + "regress": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + *COMMON_OPTIONS["save"], + COMMON_OPTIONS["n_jobs"], + click.option( + "--keys", + "-k", type=CommaSeparatedText(simplify=True), default=None, show_default=True, - help='Key(s) for observation annotation on which to regress.', + help="Key(s) for observation annotation on which to regress.", ), ], - - 'pca': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['zero_center'], - COMMON_OPTIONS['random_state'], - COMMON_OPTIONS['export_embedding'], - COMMON_OPTIONS['n_comps'], + "pca": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["zero_center"], + COMMON_OPTIONS["random_state"], + COMMON_OPTIONS["export_embedding"], + COMMON_OPTIONS["n_comps"], click.option( - '--svd-solver', '-V', - type=click.Choice(['auto', 'arpack', 'randomized']), - default='auto', + "--svd-solver", + "-V", + type=click.Choice(["auto", "arpack", "randomized"]), + default="auto", show_default=True, - help='SVD solver to use.' + help="SVD solver to use.", ), click.option( - '--use-all', '-a', 'use_highly_variable', + "--use-all", + "-a", + "use_highly_variable", is_flag=True, flag_value=False, default=True, - help='When set, use all genes for PCA, otherwise use ' - 'highly-variable genes by default.' + help="When set, use all genes for PCA, otherwise use " + "highly-variable genes by default.", ), click.option( - '--chunked', '-K', + "--chunked", + "-K", is_flag=True, default=False, - help='When set, perform an incremental PCA on segments of ' - '--chunk-size, which automatically zero centers and ignore settings of ' - '--random-state and --svd-solver.', + help="When set, perform an incremental PCA on segments of " + "--chunk-size, which automatically zero centers and ignore settings of " + "--random-state and --svd-solver.", ), click.option( - '--chunk-size', '-Z', + "--chunk-size", + "-Z", type=click.INT, - callback=required_by('--chunked'), + callback=required_by("--chunked"), default=None, show_default=True, - help='Number of observations to include in each chunk, required by ' - '--chunked.', + help="Number of observations to include in each chunk, required by " + "--chunked.", ), ], - - 'neighbor': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - *COMMON_OPTIONS['use_pc'], - COMMON_OPTIONS['key_added'], - COMMON_OPTIONS['random_state'], - click.option( - '--n-neighbors', '-k', + "neighbor": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + *COMMON_OPTIONS["use_pc"], + COMMON_OPTIONS["key_added"], + COMMON_OPTIONS["random_state"], + click.option( + "--n-neighbors", + "-k", type=CommaSeparatedText(click.INT, simplify=True), default=15, show_default=True, - help='The size of local neighborhood (in terms of number of ' - 'neighboring data points) used for manifold approximation. Larger ' - 'values result in more global views of the manifold, while smaller ' - 'values result in more local data being preserved. In general values ' - 'should be in the range 2 to 100. If --knn is set, number of nearest ' - 'neighbors to be searched, othwise a Gaussian kernel width is set to ' - 'the distance of the --n-neighbors neighbor.', + help="The size of local neighborhood (in terms of number of " + "neighboring data points) used for manifold approximation. Larger " + "values result in more global views of the manifold, while smaller " + "values result in more local data being preserved. In general values " + "should be in the range 2 to 100. If --knn is set, number of nearest " + "neighbors to be searched, othwise a Gaussian kernel width is set to " + "the distance of the --n-neighbors neighbor.", ), click.option( - '--no-knn', 'knn', + "--no-knn", + "knn", is_flag=True, flag_value=False, default=True, show_default=True, - help='When NOT set, use a hard threshold to restrict the number of ' - 'neighbors to --n-neighbors. Otherwise, use a Gaussian kernel to ' - 'assign low weights to neighbors more distant than the --n-neighbors ' - 'nearest neighbor', + help="When NOT set, use a hard threshold to restrict the number of " + "neighbors to --n-neighbors. Otherwise, use a Gaussian kernel to " + "assign low weights to neighbors more distant than the --n-neighbors " + "nearest neighbor", ), click.option( - '--method', '-m', - type=click.Choice(['umap', 'gauss', 'rapids']), - default='umap', + "--method", + "-m", + type=click.Choice(["umap", "gauss", "rapids"]), + default="umap", show_default=True, - help='Use umap or gauss with adaptive width for computing ' - 'connectivities. Use rapids for the RAPIDS implementation of UMAP ' - '(experimental, GPU only).' + help="Use umap or gauss with adaptive width for computing " + "connectivities. Use rapids for the RAPIDS implementation of UMAP " + "(experimental, GPU only).", ), - COMMON_OPTIONS['neighbor_metric'], + COMMON_OPTIONS["neighbor_metric"], ], - - 'umap': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['knn_graph'][0], # --neighbors-key - COMMON_OPTIONS['random_state'], - COMMON_OPTIONS['key_added'], - COMMON_OPTIONS['export_embedding'], - click.option( - '--init-pos', + "umap": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["knn_graph"][0], # --neighbors-key + COMMON_OPTIONS["random_state"], + COMMON_OPTIONS["key_added"], + COMMON_OPTIONS["export_embedding"], + click.option( + "--init-pos", type=click.STRING, - default='spectral', + default="spectral", show_default=True, - help='How to initialize the low dimensional embedding. Can be ' + help="How to initialize the low dimensional embedding. Can be " '"spectral", "paga" or "random", or any key of `.obsm`.', ), click.option( - '--min-dist', + "--min-dist", type=click.FLOAT, default=0.5, show_default=True, - help='The effective minimum distance between embedded points. Smaller ' - 'values will result in a more clustered embedding, while larger values ' - 'will results in a more even dispersal of points.', + help="The effective minimum distance between embedded points. Smaller " + "values will result in a more clustered embedding, while larger values " + "will results in a more even dispersal of points.", ), click.option( - '--spread', + "--spread", type=click.FLOAT, default=1.0, show_default=True, - help='The effective scale of embedded points, which determines the ' - 'scale at which embedded points will be spread out.', + help="The effective scale of embedded points, which determines the " + "scale at which embedded points will be spread out.", ), click.option( - '--n-components', + "--n-components", type=click.INT, default=2, show_default=True, - help='The number of dimensions of the embedding.', + help="The number of dimensions of the embedding.", ), click.option( - '--maxiter', + "--maxiter", type=click.INT, default=None, show_default=True, - help='The number of iterations of the optimization.', + help="The number of iterations of the optimization.", ), click.option( - '--alpha', + "--alpha", type=click.FLOAT, default=1.0, show_default=True, - help='The initial learning rate for the embedding optimization.', + help="The initial learning rate for the embedding optimization.", ), click.option( - '--gamma', + "--gamma", type=click.FLOAT, default=1.0, show_default=True, - help='Weighting applied to negative samples in low dimensional ' - 'embedding optimization.', + help="Weighting applied to negative samples in low dimensional " + "embedding optimization.", ), click.option( - '--negative-sample-rate', + "--negative-sample-rate", type=click.INT, default=5, show_default=True, - help='The number of negative edge samples to use per positive edge ' - 'sample in optimizing the low dimensional embedding.', + help="The number of negative edge samples to use per positive edge " + "sample in optimizing the low dimensional embedding.", ), click.option( - '--method', - type=click.Choice(['umap', 'rapids']), - default='umap', + "--method", + type=click.Choice(["umap", "rapids"]), + default="umap", show_default=True, - help='Use the original ‘umap’ implementation, or ‘rapids’ ' - '(experimental, GPU only).' + help="Use the original ‘umap’ implementation, or ‘rapids’ " + "(experimental, GPU only).", ), ], - - 'tsne': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - *COMMON_OPTIONS['use_pc'], - COMMON_OPTIONS['random_state'], - COMMON_OPTIONS['key_added'], - COMMON_OPTIONS['n_jobs'], - COMMON_OPTIONS['export_embedding'], - click.option( - '--perplexity', + "tsne": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + *COMMON_OPTIONS["use_pc"], + COMMON_OPTIONS["random_state"], + COMMON_OPTIONS["key_added"], + COMMON_OPTIONS["n_jobs"], + COMMON_OPTIONS["export_embedding"], + click.option( + "--perplexity", type=click.FLOAT, default=30, show_default=True, - help='The perplexity is related to the number of nearest neighbors ' - 'that is used in other manifold learning algorithms. Larger datasets ' - 'usually require a larger perplexity. Consider selecting a value ' - 'between 5 and 50. The choice is not extremely critical since t-SNE ' - 'is quite insensitive to this parameter.', + help="The perplexity is related to the number of nearest neighbors " + "that is used in other manifold learning algorithms. Larger datasets " + "usually require a larger perplexity. Consider selecting a value " + "between 5 and 50. The choice is not extremely critical since t-SNE " + "is quite insensitive to this parameter.", ), click.option( - '--early-exaggeration', + "--early-exaggeration", type=click.FLOAT, default=12, show_default=True, - help='Controls how tight natural clusters in the original space are in ' - 'the embedded space and how much space will be between them. For ' - 'larger values, the space between natural clusters will be larger in ' - 'the embedded space. Again, the choice of this parameter is not very ' - 'critical. If the cost function increases during initial optimization, ' - 'the early exaggeration factor or the learning rate might be too high.', + help="Controls how tight natural clusters in the original space are in " + "the embedded space and how much space will be between them. For " + "larger values, the space between natural clusters will be larger in " + "the embedded space. Again, the choice of this parameter is not very " + "critical. If the cost function increases during initial optimization, " + "the early exaggeration factor or the learning rate might be too high.", ), click.option( - '--learning-rate', + "--learning-rate", type=click.FLOAT, default=1000, show_default=True, help='Note that the R-package "Rtsne" uses a default of 200. The ' - 'learning rate can be a critical parameter. It should be between 100 ' - 'and 1000. If the cost function increases during initial optimization, ' - 'the early exaggeration factor or the learning rate might be too high. ' - 'If the cost function gets stuck in a bad local minimum increasing the ' - 'learning rate helps sometimes.', + "learning rate can be a critical parameter. It should be between 100 " + "and 1000. If the cost function increases during initial optimization, " + "the early exaggeration factor or the learning rate might be too high. " + "If the cost function gets stuck in a bad local minimum increasing the " + "learning rate helps sometimes.", ), click.option( - '--no-fast-tsne', 'use_fast_tsne', + "--no-fast-tsne", + "use_fast_tsne", is_flag=True, flag_value=False, default=True, show_default=True, - help='When NOT set, use the MulticoreTSNE package by D. Ulyanov if ' - 'installed.', + help="When NOT set, use the MulticoreTSNE package by D. Ulyanov if " + "installed.", ), ], - - 'fdg': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['random_state'], - COMMON_OPTIONS['export_embedding'], - COMMON_OPTIONS['root'], - click.option( - '--init-pos', + "fdg": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["random_state"], + COMMON_OPTIONS["export_embedding"], + COMMON_OPTIONS["root"], + click.option( + "--init-pos", type=click.STRING, default=None, - help='Use precomputed coordinates for initialization. Can be any key ' + help="Use precomputed coordinates for initialization. Can be any key " 'of `.obsm` or "paga" if .uns["paga"] is present', ), click.option( - '--layout', - type=click.Choice(['fa', 'fr', 'grid_fr', 'kk', 'lgl', 'drl', 'rt', 'rt_circular']), - default='fa', + "--layout", + type=click.Choice( + ["fa", "fr", "grid_fr", "kk", "lgl", "drl", "rt", "rt_circular"] + ), + default="fa", show_default=True, help='Name of any valid igraph layout, including "fa" (ForceAtlas2), ' '"fr" (Fruchterman Reingold), "grid_fr" (Grid Fruchterman Reingold, ' @@ -1089,292 +1133,292 @@ 'pretty fast) and "rt" (Reingold Tilford tree layout).', ), click.option( - '--key-added-ext', + "--key-added-ext", type=click.STRING, default=None, show_default=True, - help="By default, append 'layout'" + help="By default, append 'layout'", ), click.option( - '--init-pos', + "--init-pos", type=click.STRING, default=None, show_default=True, help='How to initialize the low dimensional embedding. Can be "paga", ' - 'or any valid key of `.obsm`.', + "or any valid key of `.obsm`.", ), - COMMON_OPTIONS['knn_graph'][0], # --neighbors-key - COMMON_OPTIONS['knn_graph'][1], # --obsp + COMMON_OPTIONS["knn_graph"][0], # --neighbors-key + COMMON_OPTIONS["knn_graph"][1], # --obsp ], - - 'louvain': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['export_cluster'], - *COMMON_OPTIONS['knn_graph'], - COMMON_OPTIONS['restrict_to'], - COMMON_OPTIONS['random_state'], - COMMON_OPTIONS['key_added'], - click.option( - '--flavor', - type=click.Choice(['vtraag', 'igraph']), - default='vtraag', - show_default=True, - help='Choose between two packages for computing the clustering. ' + "louvain": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["export_cluster"], + *COMMON_OPTIONS["knn_graph"], + COMMON_OPTIONS["restrict_to"], + COMMON_OPTIONS["random_state"], + COMMON_OPTIONS["key_added"], + click.option( + "--flavor", + type=click.Choice(["vtraag", "igraph"]), + default="vtraag", + show_default=True, + help="Choose between two packages for computing the clustering. " '"vtraag" is much powerful, and the default.', ), click.option( - '--resolution', '-r', + "--resolution", + "-r", type=CommaSeparatedText(click.FLOAT, simplify=True), default=1, show_default=True, help='For the default flavor "vtraag", you can provide a resolution. ' - 'Higher resolution means finding more and smaller clusters.', + "Higher resolution means finding more and smaller clusters.", ), ], - - 'leiden': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['export_cluster'], - *COMMON_OPTIONS['knn_graph'], - COMMON_OPTIONS['restrict_to'], - COMMON_OPTIONS['random_state'], - COMMON_OPTIONS['key_added'], - click.option( - '--resolution', '-r', + "leiden": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["export_cluster"], + *COMMON_OPTIONS["knn_graph"], + COMMON_OPTIONS["restrict_to"], + COMMON_OPTIONS["random_state"], + COMMON_OPTIONS["key_added"], + click.option( + "--resolution", + "-r", type=CommaSeparatedText(click.FLOAT, simplify=True), default=1, show_default=True, - help='A parameter value controlling the coarseness of the clustering. ' + help="A parameter value controlling the coarseness of the clustering. " 'Higher values lead to more clusters. Set to "None" if overriding ' - '--partition_type to one that doesn\'t accept `resolution_parameter`.', + "--partition_type to one that doesn't accept `resolution_parameter`.", ), click.option( - '--n-iterations', + "--n-iterations", type=click.INT, default=-1, show_default=True, - help='How many iterations of the Leiden clustering algorithm to ' - 'perform. -1 has the algorithm run until it reaches its optimal ' - 'clustering.', + help="How many iterations of the Leiden clustering algorithm to " + "perform. -1 has the algorithm run until it reaches its optimal " + "clustering.", ), ], - - 'diffexp': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['use_raw'], - COMMON_OPTIONS['key_added'], - click.option( - '--layer', '-l', + "diffexp": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["use_raw"], + COMMON_OPTIONS["key_added"], + click.option( + "--layer", + "-l", type=click.STRING, default=None, - help="Key from adata.layers whose value will be used to perform tests on." + help="Key from adata.layers whose value will be used to perform tests on.", ), click.option( - '--groupby', '-g', + "--groupby", + "-g", type=click.STRING, required=True, - help='The key of the observations grouping to consider.', + help="The key of the observations grouping to consider.", ), click.option( - '--groups', + "--groups", type=CommaSeparatedText(simplify=True), - default='all', + default="all", show_default=True, - help='Subset of groups to which comparison shall be restricted.', + help="Subset of groups to which comparison shall be restricted.", ), click.option( - '--reference', + "--reference", type=click.STRING, - default='rest', + default="rest", show_default=True, help='If "rest", compare each group to the union of the rest of the ' - 'groups. If a group identifier, compare with respect to this group.', + "groups. If a group identifier, compare with respect to this group.", ), click.option( - '--n-genes', '-n', + "--n-genes", + "-n", type=click.INT, default=None, show_default=True, - help='The number of genes that appear in the retured tables. By ' - 'default return all available genes depending on the value of ' - '--use-raw.' + help="The number of genes that appear in the retured tables. By " + "default return all available genes depending on the value of " + "--use-raw.", ), click.option( - '--method', - type=click.Choice( - ['logreg', 't-test', 'wilcoxon', 't-test_overestim_var']), - default='t-test_overestim_var', + "--method", + type=click.Choice(["logreg", "t-test", "wilcoxon", "t-test_overestim_var"]), + default="t-test_overestim_var", show_default=True, - help='Method of performing differential expression analysis.', + help="Method of performing differential expression analysis.", ), click.option( - '--corr-method', - type=click.Choice(['benjamini-hochberg', 'bonferroni']), - default='benjamini-hochberg', + "--corr-method", + type=click.Choice(["benjamini-hochberg", "bonferroni"]), + default="benjamini-hochberg", show_default=True, help='P-value correction method. Used only for "t-test", ' '"t-test_overestim_var" and "wilcoxon".', ), click.option( - '--rankby-abs', + "--rankby-abs", is_flag=True, default=False, show_default=True, - help='Rank genes by the absolute value of the score, not by the score. ' - 'The returned scores are never the absolute values.', + help="Rank genes by the absolute value of the score, not by the score. " + "The returned scores are never the absolute values.", ), click.option( - '--pts', + "--pts", is_flag=True, default=False, show_default=True, - help='Compute the fraction of cells expressing the genes.' + help="Compute the fraction of cells expressing the genes.", ), click.option( - '--tie-correct', + "--tie-correct", is_flag=True, default=False, show_default=True, help="Use tie correction for 'wilcoxon' scores. Used only for " - "'wilcoxon'." + "'wilcoxon'.", ), click.option( - '--filter-params', - type=Dictionary(keys=[ - 'min_in_group_fraction', - 'max_out_group_fraction', - 'min_fold_change', - ]), + "--filter-params", + type=Dictionary( + keys=[ + "min_in_group_fraction", + "max_out_group_fraction", + "min_fold_change", + ] + ), default=None, show_default=True, - help='Parameters for filtering DE results, valid parameters are: ' + help="Parameters for filtering DE results, valid parameters are: " '"min_in_group_fraction" (float), "max_out_group_fraction" (float), ' '"min_fold_change" (float).', ), click.option( - '--logreg-param', + "--logreg-param", type=Dictionary(), default=None, show_default=True, - help='Parameters passed to `sklearn.linear_model.LogisticRegression`.', + help="Parameters passed to `sklearn.linear_model.LogisticRegression`.", ), click.option( - '--save', + "--save", type=click.Path(dir_okay=False, writable=True), default=None, show_default=True, - help='Tab-separated table to store results of differential expression ' - 'analysis.', + help="Tab-separated table to store results of differential expression " + "analysis.", ), ], - - 'paga': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['knn_graph'][0], # --neighbors-key - COMMON_OPTIONS['key_added'], + "paga": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["knn_graph"][0], # --neighbors-key + COMMON_OPTIONS["key_added"], click.option( - '--groups', + "--groups", type=click.STRING, required=True, - help='Key for categorical in `.obs`. You can pass your predefined ' - 'groups by choosing any categorical annotation of observations.', + help="Key for categorical in `.obs`. You can pass your predefined " + "groups by choosing any categorical annotation of observations.", ), click.option( - '--model', - type=click.Choice(['v1.2', 'v1.0']), - default='v1.2', + "--model", + type=click.Choice(["v1.2", "v1.0"]), + default="v1.2", show_default=True, - help='The PAGA connectivity model.', + help="The PAGA connectivity model.", ), click.option( - '--use-rna-velocity', + "--use-rna-velocity", is_flag=True, default=False, show_default=True, - help='Use RNA velocity to orient edges in the abstracted graph and ' - 'estimate transitions. Requires that adata.uns contains a directed single-cell ' - 'graph with key velocity_graph. This feature might be subject to change in the ' - 'future.', + help="Use RNA velocity to orient edges in the abstracted graph and " + "estimate transitions. Requires that adata.uns contains a directed single-cell " + "graph with key velocity_graph. This feature might be subject to change in the " + "future.", ), ], - - 'diffmap': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['knn_graph'][0], # --neighbors-key - COMMON_OPTIONS['key_added'], - COMMON_OPTIONS['export_embedding'], - COMMON_OPTIONS['n_comps'], + "diffmap": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["knn_graph"][0], # --neighbors-key + COMMON_OPTIONS["key_added"], + COMMON_OPTIONS["export_embedding"], + COMMON_OPTIONS["n_comps"], ], - - 'dpt': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['knn_graph'][0], # --neighbors-key - COMMON_OPTIONS['key_added'], + "dpt": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["knn_graph"][0], # --neighbors-key + COMMON_OPTIONS["key_added"], click.option( - '--root', + "--root", type=(click.STRING, click.STRING), default=(None, None), show_default=True, - help='Specify a categorical annotaion of observations (`.obs`) and a ' - 'value representing the root cells.', + help="Specify a categorical annotaion of observations (`.obs`) and a " + "value representing the root cells.", ), click.option( - '--n-dcs', + "--n-dcs", type=click.INT, default=10, show_default=True, - help='The number of diffusion components to use.', + help="The number of diffusion components to use.", ), click.option( - '--n-branchings', + "--n-branchings", type=click.INT, default=0, show_default=True, - help='Number of branchings to detect.', + help="Number of branchings to detect.", ), click.option( - '--min-group-size', + "--min-group-size", type=click.FLOAT, default=0.01, show_default=True, - help='During recursive splitting of branches for --n-branchings > 1, ' - 'do not consider branches/groups that contain fewer than this fraction ' - 'of the total number of data points.', + help="During recursive splitting of branches for --n-branchings > 1, " + "do not consider branches/groups that contain fewer than this fraction " + "of the total number of data points.", ), click.option( - '--disallow-kendall-tau-shift', 'allow_kendall_tau_shift', + "--disallow-kendall-tau-shift", + "allow_kendall_tau_shift", is_flag=True, default=True, show_default=True, - help='By default: If a very small branch is detected upon ' - 'splitting, shift away from maximum correlation in Kendall tau criterion of ' - '[Haghverdi16] to stabilize the splitting. Use flag to disable this.' + help="By default: If a very small branch is detected upon " + "splitting, shift away from maximum correlation in Kendall tau criterion of " + "[Haghverdi16] to stabilize the splitting. Use flag to disable this.", ), ], - - 'combat': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['batch_key'], - COMMON_OPTIONS['batch_layer'], + "combat": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["batch_key"], + COMMON_OPTIONS["batch_layer"], click.option( - '--key-added', + "--key-added", type=click.STRING, default=None, show_default=True, help="Key under which to add the computed results. By default a new " "layer will be created called 'combat', 'combat_{layer}' or " "'combat_layer_{key_added}' where those parameters were specified. A value of 'X' " - "causes batch-corrected values to overwrite the original content of .X." + "causes batch-corrected values to overwrite the original content of .X.", ), click.option( - '--covariates', + "--covariates", type=(CommaSeparatedText()), default=None, show_default=True, @@ -1383,727 +1427,761 @@ "parameter refers to the design matrix X in Equation 2.1 in [Johnson07] and to " "the mod argument in the original combat function in the sva R package. Note " "that not including covariates may introduce bias or lead to the removal of " - "biological signal in unbalanced designs." + "biological signal in unbalanced designs.", ), - ], - - 'harmony': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['batch_key'], + "harmony": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["batch_key"], click.option( - '--basis', + "--basis", type=click.STRING, - default='X_pca', + default="X_pca", show_default=True, help="The name of the field in adata.obsm where the PCA table is " - "stored. Defaults to 'X_pca', which is the default for sc.tl.pca()." + "stored. Defaults to 'X_pca', which is the default for sc.tl.pca().", ), click.option( - '--adjusted-basis', + "--adjusted-basis", type=click.STRING, - default='X_pca_harmony', + default="X_pca_harmony", show_default=True, - help='The name of the field in adata.obsm where the adjusted PCA ' - 'table will be stored after running this function.' + help="The name of the field in adata.obsm where the adjusted PCA " + "table will be stored after running this function.", ), click.option( - '--theta', + "--theta", type=click.FLOAT, default=2, show_default=True, - help='Diversity clustering penalty parameter. theta=0 does not encourage any ' - 'diversity. Larger values of theta result in more diverse clusters.' + help="Diversity clustering penalty parameter. theta=0 does not encourage any " + "diversity. Larger values of theta result in more diverse clusters.", ), click.option( - '--lambda', 'lamb', + "--lambda", + "lamb", type=click.FLOAT, default=1, show_default=True, - help='Ridge regression penalty parameter. Lambda must be strictly ' - 'positive. Smaller values result in more aggressive correction.' + help="Ridge regression penalty parameter. Lambda must be strictly " + "positive. Smaller values result in more aggressive correction.", ), click.option( - '--sigma', + "--sigma", type=click.FLOAT, default=0.1, show_default=True, - help='Width of soft kmeans clusters. Sigma scales the distance from ' - 'a cell to cluster centroids. Larger values of sigma result in cells assigned to ' - 'more clusters. Smaller values of sigma make soft kmeans cluster approach hard ' - 'clustering.' + help="Width of soft kmeans clusters. Sigma scales the distance from " + "a cell to cluster centroids. Larger values of sigma result in cells assigned to " + "more clusters. Smaller values of sigma make soft kmeans cluster approach hard " + "clustering.", ), click.option( - '--n-clust', 'nclust', + "--n-clust", + "nclust", type=click.INT, default=None, show_default=False, - help='Number of clusters in model. nclust=1 equivalent to simple ' - 'linear regression.' + help="Number of clusters in model. nclust=1 equivalent to simple " + "linear regression.", ), click.option( - '--tau', + "--tau", type=click.INT, default=0, show_default=True, - help='Protection against overclustering small datasets with large ones. ' - 'tau is the expected number of cells per cluster.' + help="Protection against overclustering small datasets with large ones. " + "tau is the expected number of cells per cluster.", ), click.option( - '--block-size', + "--block-size", type=click.FLOAT, default=0.05, show_default=True, - help='What proportion of cells to update during clustering. Between ' - '0 to 1, default 0.05. Larger values may be faster but less accurate.' + help="What proportion of cells to update during clustering. Between " + "0 to 1, default 0.05. Larger values may be faster but less accurate.", ), click.option( - '--max-iter-cluster', 'max_iter_kmeans', + "--max-iter-cluster", + "max_iter_kmeans", type=click.INT, default=20, show_default=True, - help='Maximum number of rounds to run clustering at each round of ' - 'Harmony.' + help="Maximum number of rounds to run clustering at each round of " + "Harmony.", ), click.option( - '--max-iter-harmony', + "--max-iter-harmony", type=click.INT, default=10, show_default=True, - help='Maximum number of rounds to run Harmony. One round of Harmony ' - 'involves one clustering and one correction step.' + help="Maximum number of rounds to run Harmony. One round of Harmony " + "involves one clustering and one correction step.", ), click.option( - '--epsilon-cluster', + "--epsilon-cluster", type=click.FLOAT, default=1e-5, show_default=True, - help='Convergence tolerance for clustering round of Harmony Set to ' - '-Inf to never stop early.' + help="Convergence tolerance for clustering round of Harmony Set to " + "-Inf to never stop early.", ), click.option( - '--epsilon-harmony', + "--epsilon-harmony", type=click.FLOAT, default=1e-5, show_default=True, - help='Convergence tolerance for clustering round of Harmony Set to ' - '-Inf to never stop early.' + help="Convergence tolerance for clustering round of Harmony Set to " + "-Inf to never stop early.", ), - COMMON_OPTIONS['random_state'], + COMMON_OPTIONS["random_state"], ], - - 'mnn': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - *COMMON_OPTIONS['save'], - COMMON_OPTIONS['batch_key'], - COMMON_OPTIONS['batch_layer'], - click.option( - '--key-added', + "mnn": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + *COMMON_OPTIONS["save"], + COMMON_OPTIONS["batch_key"], + COMMON_OPTIONS["batch_layer"], + click.option( + "--key-added", type=click.STRING, default=None, show_default=True, help="Key under which to add the computed results. By default a new " "layer will be created called 'mnn', 'mnn_{layer}' or " "'mnn_layer_{key_added}' where those parameters were specified. A value of 'X' " - "causes batch-corrected values to overwrite the original content of .X." + "causes batch-corrected values to overwrite the original content of .X.", ), click.option( - '--var-subset', + "--var-subset", type=(click.STRING, CommaSeparatedText()), multiple=True, help="The subset of vars (list of str) to be used when performing " "MNN correction in the format of '--var-subset '. Typically, use " "the highly variable genes (HVGs) like '--var-subset highly_variable True'. When " - "unset, uses all vars." + "unset, uses all vars.", ), click.option( - '--n-neighbors', '-k', + "--n-neighbors", + "-k", type=CommaSeparatedText(click.INT, simplify=True), default=20, show_default=True, - help='Number of mutual nearest neighbors.' + help="Number of mutual nearest neighbors.", ), click.option( - '--sigma', + "--sigma", type=click.FLOAT, default=1.0, show_default=True, - help='The bandwidth of the Gaussian smoothing kernel used to ' - 'compute the correction vectors.' + help="The bandwidth of the Gaussian smoothing kernel used to " + "compute the correction vectors.", ), click.option( - '--no-cos_norm_in', 'cos_norm_in', + "--no-cos_norm_in", + "cos_norm_in", is_flag=True, default=True, - help='Default behaviour is to perform cosine normalization on the ' - 'input data prior to calculating distances between cells. Use this ' - 'flag to disable that behaviour.' + help="Default behaviour is to perform cosine normalization on the " + "input data prior to calculating distances between cells. Use this " + "flag to disable that behaviour.", ), click.option( - '--no-cos_norm_out', 'cos_norm_out', + "--no-cos_norm_out", + "cos_norm_out", is_flag=True, default=True, - help='Default behaviour is to perform cosine normalization prior to ' - 'computing corrected expression values. Use this flag to disable that ' - 'behaviour.' + help="Default behaviour is to perform cosine normalization prior to " + "computing corrected expression values. Use this flag to disable that " + "behaviour.", ), click.option( - '--svd-dim', + "--svd-dim", type=click.INT, default=None, show_default=True, - help='The number of dimensions to use for summarizing biological ' - 'substructure within each batch. If not set, biological components ' - 'will not be removed from the correction vectors.' + help="The number of dimensions to use for summarizing biological " + "substructure within each batch. If not set, biological components " + "will not be removed from the correction vectors.", ), click.option( - '--no-var-adj', + "--no-var-adj", is_flag=True, default=True, - help='Default behaviour is to adjust variance of the correction ' - 'vectors. Use this flag to disable that behaviour. Note this step takes most ' - 'computing time.' + help="Default behaviour is to adjust variance of the correction " + "vectors. Use this flag to disable that behaviour. Note this step takes most " + "computing time.", ), click.option( - '--compute-angle', + "--compute-angle", is_flag=True, default=False, - help='When set, compute the angle between each cell’s correction ' - 'vector and the biological subspace of the reference batch.' + help="When set, compute the angle between each cell’s correction " + "vector and the biological subspace of the reference batch.", ), click.option( - '--svd-mode', - type=click.Choice(['svd', 'rsvd', 'irlb']), - default='rsvd', + "--svd-mode", + type=click.Choice(["svd", "rsvd", "irlb"]), + default="rsvd", show_default=True, help="'svd' computes SVD using a non-randomized SVD-via-ID " "algorithm, while 'rsvd' uses a randomized version. 'irlb' performs truncated " "SVD by implicitly restarted Lanczos bidiagonalization (forked from " - "https://github.com/airysen/irlbpy)." + "https://github.com/airysen/irlbpy).", ), ], - - 'bbknn': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - COMMON_OPTIONS['key_added'], - COMMON_OPTIONS['batch_key'], - click.option( - '--use-rep', '-u', + "bbknn": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + COMMON_OPTIONS["key_added"], + COMMON_OPTIONS["batch_key"], + click.option( + "--use-rep", + "-u", type=click.STRING, - default='X_pca', + default="X_pca", show_default=True, - help='The dimensionality reduction in .obsm to use for neighbour ' - 'detection.' + help="The dimensionality reduction in .obsm to use for neighbour " + "detection.", ), - COMMON_OPTIONS['use_pc'][0], # --n-pcs + COMMON_OPTIONS["use_pc"][0], # --n-pcs click.option( - '--no-approx', 'approx', + "--no-approx", + "approx", is_flag=True, default=True, - help='Default behaviour is to use annoy’s approximate neighbour ' - 'finding. This results in a quicker run time for large datasets while also ' - 'potentially increasing the degree of batch correction. Use this flag to disable ' - 'that behaviour.', + help="Default behaviour is to use annoy’s approximate neighbour " + "finding. This results in a quicker run time for large datasets while also " + "potentially increasing the degree of batch correction. Use this flag to disable " + "that behaviour.", ), - COMMON_OPTIONS['neighbor_metric'], + COMMON_OPTIONS["neighbor_metric"], click.option( - '--neighbors-within-batch', + "--neighbors-within-batch", type=click.INT, default=3, show_default=True, - help='How many top neighbours to report for each batch; total ' - 'number of neighbours will be this number times the number of batches.' + help="How many top neighbours to report for each batch; total " + "number of neighbours will be this number times the number of batches.", ), click.option( - '--trim', + "--trim", type=click.INT, default=None, show_default=True, - help='Trim the neighbours of each cell to these many top ' - 'connectivities. May help with population independence and improve the tidiness ' - 'of clustering. The lower the value the more independent the individual ' - 'populations, at the cost of more conserved batch effect. If None, sets the ' - 'parameter value automatically to 10 times the total number of neighbours for ' - 'each cell. Set to 0 to skip.' + help="Trim the neighbours of each cell to these many top " + "connectivities. May help with population independence and improve the tidiness " + "of clustering. The lower the value the more independent the individual " + "populations, at the cost of more conserved batch effect. If None, sets the " + "parameter value automatically to 10 times the total number of neighbours for " + "each cell. Set to 0 to skip.", ), click.option( - '--annoy-n-trees', + "--annoy-n-trees", type=click.INT, default=10, show_default=True, - help='Only used when approx=True. The number of trees to construct ' - 'in the annoy forest. More trees give higher precision when querying, at the ' - 'cost of increased run time and resource intensity.' + help="Only used when approx=True. The number of trees to construct " + "in the annoy forest. More trees give higher precision when querying, at the " + "cost of increased run time and resource intensity.", ), click.option( - '--no-use-faiss', 'use_faiss', + "--no-use-faiss", + "use_faiss", is_flag=True, default=True, - help='Default behaviour If approx=False and the metric is ' - '“euclidean”, is to use the faiss package to compute nearest neighbours if ' - 'installed. This improves performance at a minor cost to numerical precision as ' - 'faiss operates on float32. Use this flag to disable that behaviour.' + help="Default behaviour If approx=False and the metric is " + "“euclidean”, is to use the faiss package to compute nearest neighbours if " + "installed. This improves performance at a minor cost to numerical precision as " + "faiss operates on float32. Use this flag to disable that behaviour.", ), click.option( - '--set-op-mix-ratio', + "--set-op-mix-ratio", type=click.FLOAT, default=1, show_default=True, - help='UMAP connectivity computation parameter, float between 0 and ' - '1, controlling the blend between a connectivity matrix formed exclusively from ' - 'mutual nearest neighbour pairs (0) and a union of all observed neighbour ' - 'relationships with the mutual pairs emphasised (1).' + help="UMAP connectivity computation parameter, float between 0 and " + "1, controlling the blend between a connectivity matrix formed exclusively from " + "mutual nearest neighbour pairs (0) and a union of all observed neighbour " + "relationships with the mutual pairs emphasised (1).", ), click.option( - '--local-connectivity', + "--local-connectivity", type=click.INT, default=1, show_default=True, - help='UMAP connectivity computation parameter, how many nearest ' - 'neighbors of each cell are assumed to be fully connected (and given a ' - 'connectivity value of 1)' + help="UMAP connectivity computation parameter, how many nearest " + "neighbors of each cell are assumed to be fully connected (and given a " + "connectivity value of 1)", ), ], - - 'scrublet': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], + "scrublet": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], click.option( - '--batch-key', 'batch_key', + "--batch-key", + "batch_key", type=click.STRING, - help='The name of the column in adata.obs that differentiates among ' - 'experiments/batches. Doublets will be detected in each batch separately.' + help="The name of the column in adata.obs that differentiates among " + "experiments/batches. Doublets will be detected in each batch separately.", ), click.option( - '--input-obj-sim', 'adata_sim', + "--input-obj-sim", + "adata_sim", type=click.Path(exists=True, dir_okay=False), default=None, - help='(Advanced use case) Optional annData object generated by ' - 'sc.external.pp.scrublet_simulate_doublets(), with same number of ' - 'vars as adata. This should have been built from input_obj after ' - 'filtering genes and cells and selcting highly-variable genes.' + help="(Advanced use case) Optional annData object generated by " + "sc.external.pp.scrublet_simulate_doublets(), with same number of " + "vars as adata. This should have been built from input_obj after " + "filtering genes and cells and selcting highly-variable genes.", ), click.option( - '--threshold', + "--threshold", type=click.FLOAT, default=None, show_default=True, - help='Doublet score threshold for calling a transcriptome a ' - 'doublet. If not set, this is set automatically by looking for the ' - 'minimum between the two modes of the doublet_scores_sim_ histogram. ' - 'It is best practice to check the threshold visually using the ' - 'doublet_scores_sim_ histogram and/or based on co-localization of ' - 'predicted doublets in a 2-D embedding.' + help="Doublet score threshold for calling a transcriptome a " + "doublet. If not set, this is set automatically by looking for the " + "minimum between the two modes of the doublet_scores_sim_ histogram. " + "It is best practice to check the threshold visually using the " + "doublet_scores_sim_ histogram and/or based on co-localization of " + "predicted doublets in a 2-D embedding.", ), - *COMMON_OPTIONS['scrublet'], + *COMMON_OPTIONS["scrublet"], click.option( - '--expected-doublet-rate', + "--expected-doublet-rate", type=click.FLOAT, default=0.05, show_default=True, - help='Where input_obj_sim not suplied, the estimated doublet rate ' - 'for the experiment.' + help="Where input_obj_sim not suplied, the estimated doublet rate " + "for the experiment.", ), click.option( - '--stdev-doublet-rate', + "--stdev-doublet-rate", type=click.FLOAT, default=0.02, show_default=True, - help='Where input_obje_sim not suplied, uncertainty in the expected ' - 'doublet rate.' - ), - click.option( - '--knn-dist-metric', '-t', - type=click.Choice(['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan', 'braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']), - default='euclidean', - show_default=True, - help='A known metric’s name.' + help="Where input_obje_sim not suplied, uncertainty in the expected " + "doublet rate.", ), click.option( - '--no-normalize-variance', 'normalize_variance', + "--knn-dist-metric", + "-t", + type=click.Choice( + [ + "cityblock", + "cosine", + "euclidean", + "l1", + "l2", + "manhattan", + "braycurtis", + "canberra", + "chebyshev", + "correlation", + "dice", + "hamming", + "jaccard", + "kulsinski", + "mahalanobis", + "minkowski", + "rogerstanimoto", + "russellrao", + "seuclidean", + "sokalmichener", + "sokalsneath", + "sqeuclidean", + "yule", + ] + ), + default="euclidean", + show_default=True, + help="A known metric’s name.", + ), + click.option( + "--no-normalize-variance", + "normalize_variance", is_flag=True, default=True, - help='Default is to normalize the data such that each gene has a ' - 'variance of 1. sklearn.decomposition.TruncatedSVD will be used for ' - 'dimensionality reduction, if --no-mean-center is set. Use this flag ' - 'to disable that behaviour.' + help="Default is to normalize the data such that each gene has a " + "variance of 1. sklearn.decomposition.TruncatedSVD will be used for " + "dimensionality reduction, if --no-mean-center is set. Use this flag " + "to disable that behaviour.", ), click.option( - '--log-transform', + "--log-transform", is_flag=True, default=False, show_default=True, - help='Whether to use :func:~scanpy.pp.log1p to log-transform the ' - 'data prior to PCA.' + help="Whether to use :func:~scanpy.pp.log1p to log-transform the " + "data prior to PCA.", ), click.option( - '--no-mean-center', 'mean_center', + "--no-mean-center", + "mean_center", is_flag=True, default=True, - help='If True, center the data such that each gene has a mean of 0. ' - 'sklearn.decomposition.PCA will be used for dimensionality ' - 'reduction.' + help="If True, center the data such that each gene has a mean of 0. " + "sklearn.decomposition.PCA will be used for dimensionality " + "reduction.", ), click.option( - '--n-pcs', 'n_prin_comps', + "--n-pcs", + "n_prin_comps", type=click.INT, default=30, show_default=True, - help='Number of principal components used to embed the ' - 'transcriptomes prior to k-nearest-neighbor graph construction.' + help="Number of principal components used to embed the " + "transcriptomes prior to k-nearest-neighbor graph construction.", ), click.option( - '--no-approx', 'use_approx_neighbors', + "--no-approx", + "use_approx_neighbors", is_flag=True, default=True, - help='Default behaviour is to use the approximate nearest neighbor ' - 'method (annoy) for the KNN classifier. Use this flag to disable ' - 'that behaviour.' + help="Default behaviour is to use the approximate nearest neighbor " + "method (annoy) for the KNN classifier. Use this flag to disable " + "that behaviour.", ), click.option( - '--get-doublet-neighbor-parents', + "--get-doublet-neighbor-parents", is_flag=True, default=False, show_default=True, - help='If set, return (in .uns) the parent transcriptomes that ' - 'generated the doublet neighbors of each observed transcriptome. ' - 'This information can be used to infer the cell states that ' - 'generated a given doublet state.' + help="If set, return (in .uns) the parent transcriptomes that " + "generated the doublet neighbors of each observed transcriptome. " + "This information can be used to infer the cell states that " + "generated a given doublet state.", ), click.option( - '--n-neighbors', '-k', + "--n-neighbors", + "-k", type=CommaSeparatedText(click.INT, simplify=True), default=None, show_default=True, - help='Number of neighbors used to construct the KNN graph of ' - 'observed transcriptomes and simulated doublets. If not set, this is ' - 'automatically set to np.round(0.5 * np.sqrt(n_obs)).' + help="Number of neighbors used to construct the KNN graph of " + "observed transcriptomes and simulated doublets. If not set, this is " + "automatically set to np.round(0.5 * np.sqrt(n_obs)).", ), click.option( - '--filter', 'filter', + "--filter", + "filter", is_flag=True, default=False, - help='By default, the output object is annotated but not filtered ' - 'according to the scrublet status. Setting this flag will cause ' - 'predicted multiplet elements to be removed.' + help="By default, the output object is annotated but not filtered " + "according to the scrublet status. Setting this flag will cause " + "predicted multiplet elements to be removed.", ), click.option( - '--no-verbose', 'verbose', + "--no-verbose", + "verbose", is_flag=True, default=True, - help='Default behaviour is to print progress updates. Use this flag ' - 'to disable that.' + help="Default behaviour is to print progress updates. Use this flag " + "to disable that.", ), click.option( - '--export-table', + "--export-table", type=click.Path(dir_okay=False, writable=True), default=None, show_default=True, - help='Export a table of double scores and calls to the specified file.', + help="Export a table of double scores and calls to the specified file.", ), - COMMON_OPTIONS['random_state'], + COMMON_OPTIONS["random_state"], ], - - 'plot_scrublet': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['plot'], - click.option( - '--scale-hist-obs', '-b', - type=click.Choice(['linear', 'log', 'symlog', 'logit']), - default='log', + "plot_scrublet": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["plot"], + click.option( + "--scale-hist-obs", + "-b", + type=click.Choice(["linear", "log", "symlog", "logit"]), + default="log", show_default=True, - help='Set y axis scale transformation in matplotlib for the plot of observed transcriptomes.' + help="Set y axis scale transformation in matplotlib for the plot of observed transcriptomes.", ), - click.option( - '--scale-hist-sim', '-s', - type=click.Choice(['linear', 'log', 'symlog', 'logit']), - default='linear', + click.option( + "--scale-hist-sim", + "-s", + type=click.Choice(["linear", "log", "symlog", "logit"]), + default="linear", show_default=True, - help='Set y axis scale transformation in matplotlib for the plot of observed transcriptomes.' + help="Set y axis scale transformation in matplotlib for the plot of observed transcriptomes.", ), - ], - - 'scrublet_simulate_doublets': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['output'], - *COMMON_OPTIONS['scrublet'], + ], + "scrublet_simulate_doublets": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["output"], + *COMMON_OPTIONS["scrublet"], click.option( - '--layer', '-l', + "--layer", + "-l", type=click.STRING, default=None, help="Layer of adata where raw values are stored, or ‘X’ if values " - "are in .X." + "are in .X.", ), ], - - 'embed': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['plot'], - *COMMON_OPTIONS['frame_title'], - COMMON_OPTIONS['layer'], + "embed": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["plot"], + *COMMON_OPTIONS["frame_title"], + COMMON_OPTIONS["layer"], click.option( - '--basis', + "--basis", type=click.STRING, - default='umap', + default="umap", show_default=True, - help='Name of the embedding to plot, must be a key of `.obsm` without ' + help="Name of the embedding to plot, must be a key of `.obsm` without " 'the prefix "X_".', ), click.option( - '--color', + "--color", type=CommaSeparatedText(simplify=True), default=None, show_default=True, - help='Keys for annotations of observations/cells or variables/genes.', + help="Keys for annotations of observations/cells or variables/genes.", ), click.option( - '--legend-loc', - type=click.Choice(['right margin', 'on data']), - default='right margin', + "--legend-loc", + type=click.Choice(["right margin", "on data"]), + default="right margin", show_default=True, help='Location of legend, either "on data", "right margin" or valid ' - 'keywords for `matplotlib.legend`.', + "keywords for `matplotlib.legend`.", ), click.option( - '--legend-fontsize', + "--legend-fontsize", type=click.INT, default=15, show_default=True, - help='Legend font size.', + help="Legend font size.", ), click.option( - '--size', + "--size", type=click.FLOAT, default=None, show_default=True, - help='Point size. Automatically computed if not specified.', + help="Point size. Automatically computed if not specified.", ), - COMMON_OPTIONS['gene_symbols'], + COMMON_OPTIONS["gene_symbols"], click.option( - '--edges', + "--edges", is_flag=True, default=False, show_default=True, - help='Show edges.', + help="Show edges.", ), click.option( - '--edges-width', + "--edges-width", type=click.FLOAT, default=0.1, show_default=True, - help='Width of edges.', + help="Width of edges.", ), click.option( - '--edges-color', + "--edges-color", type=click.STRING, default=None, show_default=True, - help='Color of edges. See draw_networkx_edges().', + help="Color of edges. See draw_networkx_edges().", ), - COMMON_OPTIONS['knn_graph'][0], # --neighbors-key + COMMON_OPTIONS["knn_graph"][0], # --neighbors-key click.option( - '--no-sort-order', 'sort_order', + "--no-sort-order", + "sort_order", is_flag=True, default=True, show_default=True, - help='Disable default behaviour: for continuous annotations used as ' - 'color parameter, plot data points with higher values on top of others.', + help="Disable default behaviour: for continuous annotations used as " + "color parameter, plot data points with higher values on top of others.", ), - *COMMON_OPTIONS['plot_embed'], + *COMMON_OPTIONS["plot_embed"], click.option( - '--components', + "--components", type=click.STRING, default=None, show_default=True, help="For instance, ['1,2', '2,3']. To plot all available components use 'all'.", ), click.option( - '--projection', - type=click.Choice(['2d', '3d']), - default='2d', + "--projection", + type=click.Choice(["2d", "3d"]), + default="2d", show_default=True, - help="Projection of plot." + help="Projection of plot.", ), - ], - - 'plot_paga': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['plot'], - *COMMON_OPTIONS['frame_title'], - *COMMON_OPTIONS['plot_embed'], - COMMON_OPTIONS['random_state'], - click.option( - '--use-key', + "plot_paga": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["plot"], + *COMMON_OPTIONS["frame_title"], + *COMMON_OPTIONS["plot_embed"], + COMMON_OPTIONS["random_state"], + click.option( + "--use-key", type=click.STRING, - default='paga', + default="paga", show_default=True, - help='The key in `.uns` that contains trajectory information.', + help="The key in `.uns` that contains trajectory information.", ), click.option( - '--layout', - type=click.Choice(['fa', 'fr', 'grid_fr', 'kk', 'lgl', 'drl', 'rt']), - default='fr', + "--layout", + type=click.Choice(["fa", "fr", "grid_fr", "kk", "lgl", "drl", "rt"]), + default="fr", show_default=True, - help='Plotting layout that computes positions.', + help="Plotting layout that computes positions.", ), click.option( - '--init-pos', + "--init-pos", type=click.STRING, default=None, show_default=True, - help='Plotting layout that computes positions.', + help="Plotting layout that computes positions.", ), click.option( - '--threshold', + "--threshold", type=click.FLOAT, default=0.01, show_default=True, - help='Do not draw edges for weights below this threshold. Set to 0 to ' - 'include all edges.', + help="Do not draw edges for weights below this threshold. Set to 0 to " + "include all edges.", ), - COMMON_OPTIONS['root'], + COMMON_OPTIONS["root"], click.option( - '--root', + "--root", type=click.INT, default=0, show_default=True, - help='If choosing a tree layout, this is the index of the root node.', + help="If choosing a tree layout, this is the index of the root node.", ), click.option( - '--transitions', + "--transitions", type=click.STRING, default=None, show_default=True, help='Key for `.uns["paga"]` that specifies the matrix, e.g. ' - '`transition_confidence`, that stores the arrows.', + "`transition_confidence`, that stores the arrows.", ), click.option( - '--single-component', + "--single-component", is_flag=True, default=False, show_default=True, - help='Restrict to largest connected component', + help="Restrict to largest connected component", ), click.option( - '--solid-edges', - type=click.Choice(['connectivities', 'connectivities_tree']), - default='connectivities', + "--solid-edges", + type=click.Choice(["connectivities", "connectivities_tree"]), + default="connectivities", show_default=True, help='Key for `.uns["paga"]` that specifies the matrix that stores the ' - 'edges to be drawn solid black.', + "edges to be drawn solid black.", ), click.option( - '--basis', + "--basis", type=click.STRING, default=None, show_default=True, - help='Name of the embedding to plot, must be a key of `.obsm` without ' + help="Name of the embedding to plot, must be a key of `.obsm` without " 'the prefix "X_".', ), click.option( - '--color', + "--color", type=CommaSeparatedText(simplify=True), default=None, show_default=True, - help='Key(s) for annotation of observations/cells or variables/genes. Comma-separated if more than one', + help="Key(s) for annotation of observations/cells or variables/genes. Comma-separated if more than one", ), click.option( - '--legend-loc', - type=click.Choice(['right margin', 'on data']), - default='right margin', + "--legend-loc", + type=click.Choice(["right margin", "on data"]), + default="right margin", show_default=True, help='Location of legend, either "on data", "right margin" or valid ' - 'keywords for `matplotlib.legend`.', + "keywords for `matplotlib.legend`.", ), click.option( - '--size', + "--size", type=click.FLOAT, default=None, show_default=True, - help='Point size. Automatically computed if not specified.', + help="Point size. Automatically computed if not specified.", ), click.option( - '--node-size-scale', + "--node-size-scale", type=click.FLOAT, default=1.0, show_default=True, - help='Increase of decrease the size of the nodes.', + help="Increase of decrease the size of the nodes.", ), click.option( - '--fontsize', + "--fontsize", type=click.INT, default=None, show_default=True, - help='Font size for node labels.', + help="Font size for node labels.", ), click.option( - '--edge-width-scale', + "--edge-width-scale", type=click.FLOAT, default=1.0, show_default=True, - help='Increase of decrease the width of the edges.', + help="Increase of decrease the width of the edges.", ), click.option( - '--arrowsize', + "--arrowsize", type=click.INT, default=30, show_default=True, - help='For directed graphs, specify the length and width of the arrowhead.', + help="For directed graphs, specify the length and width of the arrowhead.", ), - *COMMON_OPTIONS['opt_output'], + *COMMON_OPTIONS["opt_output"], ], - - 'sviol': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['plot'], - COMMON_OPTIONS['use_raw'], - COMMON_OPTIONS['var_names'], - *COMMON_OPTIONS['rank_genes_groups_plots'], - COMMON_OPTIONS['layer'], - *COMMON_OPTIONS['diffexp_plot'], - COMMON_OPTIONS['gene_symbols'], - *COMMON_OPTIONS['sviol'], - COMMON_OPTIONS['swap_axes'], + "sviol": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["plot"], + COMMON_OPTIONS["use_raw"], + COMMON_OPTIONS["var_names"], + *COMMON_OPTIONS["rank_genes_groups_plots"], + COMMON_OPTIONS["layer"], + *COMMON_OPTIONS["diffexp_plot"], + COMMON_OPTIONS["gene_symbols"], + *COMMON_OPTIONS["sviol"], + COMMON_OPTIONS["swap_axes"], ], - - 'dot': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['plot'], - COMMON_OPTIONS['use_raw'], - COMMON_OPTIONS['var_names'], - *COMMON_OPTIONS['rank_genes_groups_plots'], - COMMON_OPTIONS['layer'], - *COMMON_OPTIONS['diffexp_plot'], - COMMON_OPTIONS['gene_symbols'], - *COMMON_OPTIONS['dot'], + "dot": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["plot"], + COMMON_OPTIONS["use_raw"], + COMMON_OPTIONS["var_names"], + *COMMON_OPTIONS["rank_genes_groups_plots"], + COMMON_OPTIONS["layer"], + *COMMON_OPTIONS["diffexp_plot"], + COMMON_OPTIONS["gene_symbols"], + *COMMON_OPTIONS["dot"], ], - - 'matrix': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['plot'], - COMMON_OPTIONS['use_raw'], - COMMON_OPTIONS['var_names'], - *COMMON_OPTIONS['rank_genes_groups_plots'], - COMMON_OPTIONS['layer'], - *COMMON_OPTIONS['diffexp_plot'], - COMMON_OPTIONS['gene_symbols'], + "matrix": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["plot"], + COMMON_OPTIONS["use_raw"], + COMMON_OPTIONS["var_names"], + *COMMON_OPTIONS["rank_genes_groups_plots"], + COMMON_OPTIONS["layer"], + *COMMON_OPTIONS["diffexp_plot"], + COMMON_OPTIONS["gene_symbols"], ], - - 'heat': [ - *COMMON_OPTIONS['input'], - *COMMON_OPTIONS['plot'], - COMMON_OPTIONS['use_raw'], - COMMON_OPTIONS['var_names'], - *COMMON_OPTIONS['rank_genes_groups_plots'], - COMMON_OPTIONS['layer'], - *COMMON_OPTIONS['diffexp_plot'], - COMMON_OPTIONS['gene_symbols'], - *COMMON_OPTIONS['heat'], - COMMON_OPTIONS['swap_axes'], + "heat": [ + *COMMON_OPTIONS["input"], + *COMMON_OPTIONS["plot"], + COMMON_OPTIONS["use_raw"], + COMMON_OPTIONS["var_names"], + *COMMON_OPTIONS["rank_genes_groups_plots"], + COMMON_OPTIONS["layer"], + *COMMON_OPTIONS["diffexp_plot"], + COMMON_OPTIONS["gene_symbols"], + *COMMON_OPTIONS["heat"], + COMMON_OPTIONS["swap_axes"], ], - } diff --git a/scanpy_scripts/cmd_utils.py b/scanpy_scripts/cmd_utils.py index 63d09fc3..f9986485 100644 --- a/scanpy_scripts/cmd_utils.py +++ b/scanpy_scripts/cmd_utils.py @@ -11,7 +11,8 @@ from .obj_utils import _save_matrix from .lib._scrublet import plot_scrublet -def make_subcmd(cmd_name, func, cmd_desc, arg_desc, opt_set = None): + +def make_subcmd(cmd_name, func, cmd_desc, arg_desc, opt_set=None): """ Factory function that returns a sub-command function """ @@ -22,23 +23,23 @@ def make_subcmd(cmd_name, func, cmd_desc, arg_desc, opt_set = None): def add_docstring(cmd_desc, arg_desc): def docstring_dec(obj): - obj.__doc__ = obj.__doc__.format( - cmd_desc=cmd_desc, arg_desc=arg_desc) + obj.__doc__ = obj.__doc__.format(cmd_desc=cmd_desc, arg_desc=arg_desc) return obj + return docstring_dec @add_options(option_spec) @add_docstring(cmd_desc, arg_desc) def cmd( - input_obj=None, - output_obj=None, - input_format=None, - output_format=None, - zarr_chunk_size=None, - loom_write_obsm_varm=False, - export_mtx=None, - show_obj=None, - **kwargs + input_obj=None, + output_obj=None, + input_format=None, + output_format=None, + zarr_chunk_size=None, + loom_write_obsm_varm=False, + export_mtx=None, + show_obj=None, + **kwargs, ): """{cmd_desc}\n\n\b\n{arg_desc}""" if input_obj: @@ -66,61 +67,74 @@ def add_options(options): """ Returns a decorator to group multiple click decorators """ + def _add_options(func): for option in reversed(options): func = option(func) return func + return _add_options + def _fix_booleans(df): - for var in df.columns: - if (df[var].dtype.kind == 'O' and - df[var].dtype.name == 'object' and - set(pd.Categorical(df[var])).issubset(set(['True', 'False', 'nan'])) + for var in df.columns: + if ( + df[var].dtype.kind == "O" + and df[var].dtype.name == "object" + and set(pd.Categorical(df[var])).issubset(set(["True", "False", "nan"])) ): - d = {'False': True, 'False': False, 'nan': False} - df[var] = df[var].map(d).astype(bool) - return(df) + d = {"False": True, "False": False, "nan": False} + df[var] = df[var].map(d).astype(bool) + return df -def _read_obj(input_obj, input_format='anndata', **kwargs): - if input_format == 'anndata': + +def _read_obj(input_obj, input_format="anndata", **kwargs): + if input_format == "anndata": adata = sc.read(input_obj, **kwargs) - elif input_format == 'loom': + elif input_format == "loom": adata = sc.read_loom(input_obj, **kwargs) else: - raise NotImplementedError( - 'Unsupported input format: {}'.format(input_format)) + raise NotImplementedError("Unsupported input format: {}".format(input_format)) adata.var = _fix_booleans(adata.var) adata.obs = _fix_booleans(adata.obs) return adata + def _write_obj( - adata, - output_obj, - output_format='anndata', - chunk_size=None, - export_mtx=None, - show_obj=None, - write_obsm_varm=False, - **kwargs + adata, + output_obj, + output_format="anndata", + chunk_size=None, + export_mtx=None, + show_obj=None, + write_obsm_varm=False, + **kwargs, ): - if output_format == 'anndata': - adata.write(output_obj, compression='gzip') - elif output_format == 'loom': - adata.write_loom(output_obj, write_obsm_varm=write_obsm_varm ) - elif output_format == 'zarr': + if output_format == "anndata": + adata.write(output_obj, compression="gzip") + elif output_format == "loom": + adata.write_loom(output_obj, write_obsm_varm=write_obsm_varm) + elif output_format == "zarr": adata.write_zarr(output_obj, chunk_size=chunk_size, **kwargs) else: - raise NotImplementedError( - 'Unsupported output format: {}'.format(output_format)) + raise NotImplementedError("Unsupported output format: {}".format(output_format)) if export_mtx: write_mtx(adata, fname_prefix=export_mtx, **kwargs) if show_obj: - click.echo(adata, err=show_obj == 'stderr') + click.echo(adata, err=show_obj == "stderr") return 0 -def write_mtx(adata, fname_prefix='', var=None, obs=None, use_raw=False, use_layer=None): + +def write_mtx( + adata, + fname_prefix="", + var=None, + obs=None, + use_raw=False, + use_layer=None, + compression=None, +): """Export AnnData object to mtx formt * Parameters + adata : AnnData @@ -133,22 +147,49 @@ def write_mtx(adata, fname_prefix='', var=None, obs=None, use_raw=False, use_lay A list of column names to be exported to gene table + obs : list A list of column names to be exported to barcode/cell table + + use_raw : bool + Take data the matrix from .raw.X? + + use_layer: str + Specify a layer to use instead of .X (non-raw only) + + compression: None, str or dict + Compression parameter for Pandas' to_csv(). For compression, a dict + with a 'method' key, e.g. {'method': 'gzip', 'compresslevel': 1, + 'mtime': 1} + + >>> import os + >>> from pathlib import Path + >>> adata = sc.datasets.pbmc3k() + >>> # Test uncompressed write + >>> Path("uncompressed").mkdir(parents=True, exist_ok=True) + >>> write_mtx(adata, fname_prefix = 'uncompressed/', use_raw = False, use_layer = None, var = ['gene_name']) + >>> sorted(os.listdir('uncompressed')) + ['barcodes.tsv', 'genes.tsv', 'matrix.mtx'] + >>> # Test that the matrix is the same when we read it back + >>> test_readable = sc.read_10x_mtx('uncompressed') + >>> if any(test_readable.obs_names != adata.obs_names) or any(test_readable.var_names != adata.var_names) or (test_readable.X[1].sum() - adata.X[1].sum()) > 1e-5: + ... print("Re-read matrix is different to the one we stored, something is wrong with the writing") + >>> # Test compressed write + >>> Path("compressed").mkdir(parents=True, exist_ok=True) + >>> write_mtx(adata, fname_prefix = 'compressed/', use_raw = False, use_layer = None, var = ['gene_name'], compression = {'method': 'gzip'}) + >>> sorted(os.listdir('compressed')) + ['barcodes.tsv.gz', 'genes.tsv.gz', 'matrix.mtx.gz'] """ - if fname_prefix and not (fname_prefix.endswith('/') or fname_prefix.endswith('_')): - fname_prefix = fname_prefix + '_' + if fname_prefix and not (fname_prefix.endswith("/") or fname_prefix.endswith("_")): + fname_prefix = fname_prefix + "_" if var is None: var = [] if obs is None: obs = [] import scipy.sparse as sp + if use_raw: var_source = adata.raw.var mat = sp.coo_matrix(adata.raw.X) else: var_source = adata.var if use_layer is not None: - mat=sp.coo_matrix(adata.layers[use_layer]) + mat = sp.coo_matrix(adata.layers[use_layer]) else: mat = sp.coo_matrix(adata.X) @@ -157,74 +198,105 @@ def write_mtx(adata, fname_prefix='', var=None, obs=None, use_raw=False, use_lay n_obs, n_var = mat.shape n_entry = len(mat.data) - header = '%%MatrixMarket matrix coordinate real general\n%\n{} {} {}\n'.format( - n_var, n_obs, n_entry) - df = pd.DataFrame({'col': mat.col + 1, 'row': mat.row + 1, 'data': mat.data}) - mtx_fname = fname_prefix + 'matrix.mtx' - gene_fname = fname_prefix + 'genes.tsv' - barcode_fname = fname_prefix + 'barcodes.tsv' - with open(mtx_fname, 'a') as fh: - fh.write(header) - df.to_csv(fh, sep=' ', header=False, index=False) + # Define the header lines as a Pandas DataFrame so we can use the same compression + header = pd.DataFrame( + ["%%MatrixMarket matrix coordinate real general", f"{n_var} {n_obs} {n_entry}"] + ) + df = pd.DataFrame({"col": mat.col + 1, "row": mat.row + 1, "data": mat.data}) + + # Define outputs + mtx_fname = fname_prefix + "matrix.mtx" + gene_fname = fname_prefix + "genes.tsv" + barcode_fname = fname_prefix + "barcodes.tsv" + + # Write matrix with Pandas CSV and use its compression where requested + if ( + compression is not None + and type(compression) is dict + and "method" in compression + ): + compressed_exts = {"zip": "zip", "gzip": "gz", "bz2": "bz2", "zstd": "zst"} + ext = compressed_exts.get(compression["method"], "None") + + if ext is None: + errmsg = "Invalid compression method" + raise Exception(errmsg) + + mtx_fname += f".{ext}" + gene_fname += f".{ext}" + barcode_fname += f".{ext}" + else: + compression = None + + header.to_csv(mtx_fname, header=False, index=False, compression=compression) + df.to_csv( + mtx_fname, sep=" ", header=False, index=False, compression=compression, mode="a" + ) + + # Now write the obs and var, also with compression if appropriate obs_df = adata.obs[obs].reset_index(level=0) - obs_df.to_csv(barcode_fname, sep='\t', header=False, index=False) + obs_df.to_csv( + barcode_fname, sep="\t", header=False, index=False, compression=compression + ) var_df = var_source[var].reset_index(level=0) if not var: - var_df['gene'] = var_df['index'] - var_df.to_csv(gene_fname, sep='\t', header=False, index=False) + var_df["gene"] = var_df["index"] + var_df.to_csv( + gene_fname, sep="\t", header=False, index=False, compression=compression + ) def make_plot_function(func_name, kind=None): - """Make plot function that handles common plotting parameters - """ + """Make plot function that handles common plotting parameters""" # Provide a function translation plot_funcs = { - 'embedding': sc.pl.embedding, - 'scatter': sc.pl.scatter, - 'sviol': sc.pl.stacked_violin, - 'rgg_sviol': sc.pl.rank_genes_groups_stacked_violin, - 'dot': sc.pl.dotplot, - 'rgg_dot': sc.pl.rank_genes_groups_dotplot, - 'matrix': sc.pl.matrixplot, - 'rgg_matrix': sc.pl.rank_genes_groups_matrixplot, - 'heat': sc.pl.heatmap, - 'rgg_heat': sc.pl.rank_genes_groups_heatmap, + "embedding": sc.pl.embedding, + "scatter": sc.pl.scatter, + "sviol": sc.pl.stacked_violin, + "rgg_sviol": sc.pl.rank_genes_groups_stacked_violin, + "dot": sc.pl.dotplot, + "rgg_dot": sc.pl.rank_genes_groups_dotplot, + "matrix": sc.pl.matrixplot, + "rgg_matrix": sc.pl.rank_genes_groups_matrixplot, + "heat": sc.pl.heatmap, + "rgg_heat": sc.pl.rank_genes_groups_heatmap, } def plot_function( - adata, - output_fig=None, - fig_size=None, - fig_dpi=300, - fig_fontsize=15, - **kwargs, + adata, + output_fig=None, + fig_size=None, + fig_dpi=300, + fig_fontsize=15, + **kwargs, ): sc.settings.set_figure_params(dpi=fig_dpi, fontsize=fig_fontsize) if fig_size: from matplotlib import rcParams - rcParams.update({'figure.figsize': fig_size}) + + rcParams.update({"figure.figsize": fig_size}) # Choose the function to run - is_rgg=False + is_rgg = False if func_name in plot_funcs: - if 'rgg' in kwargs: - if kwargs['rgg'] == True: - is_rgg=True - func=plot_funcs[ 'rgg_' + func_name ] - kwargs.pop('var_names', None) + if "rgg" in kwargs: + if kwargs["rgg"] == True: + is_rgg = True + func = plot_funcs["rgg_" + func_name] + kwargs.pop("var_names", None) else: - func = plot_funcs[ func_name ] - kwargs.pop('groups', None) - kwargs.pop('n_genes', None) - - kwargs.pop('rgg') + func = plot_funcs[func_name] + kwargs.pop("groups", None) + kwargs.pop("n_genes", None) + + kwargs.pop("rgg") else: - func = plot_funcs[ func_name ] + func = plot_funcs[func_name] else: func = globals()[func_name] @@ -235,63 +307,64 @@ def plot_function( if output_fig: import os import matplotlib.pyplot as plt - sc.settings.figdir = os.path.dirname(output_fig) or '.' + + sc.settings.figdir = os.path.dirname(output_fig) or "." figname = os.path.basename(output_fig) showfig = False # Run the selected function - func( - adata, - save=figname, - show=showfig, - **kwargs) + func(adata, save=figname, show=showfig, **kwargs) # Rename output to the spefied file name. We need to work out what # prefix the function will have used for its output files. if output_fig: - prefix='' - if func_name == 'scatter' or func_name == 'embedding': - prefix = kwargs.get('basis', func.__name__) + prefix = "" + if func_name == "scatter" or func_name == "embedding": + prefix = kwargs.get("basis", func.__name__) elif kind: - prefix = kind + prefix = kind elif func_name in plot_funcs: - prefix = plot_funcs[ func_name ].__name__.split('.')[-1] - if func_name in [ 'sviol', 'rgg_sviol', 'dot', 'rgg_dot', 'matrix', 'rgg_matrix' ]: - prefix = prefix + '_' - - os.rename( - os.path.join(sc.settings.figdir, prefix + figname), output_fig) + prefix = plot_funcs[func_name].__name__.split(".")[-1] + if func_name in [ + "sviol", + "rgg_sviol", + "dot", + "rgg_dot", + "matrix", + "rgg_matrix", + ]: + prefix = prefix + "_" + + os.rename(os.path.join(sc.settings.figdir, prefix + figname), output_fig) plt.close() return plot_function + # Wrap matrix-processing functions in logic to back up .X or specified input # layers prior to processing -def make_matrix_function(func): +def make_matrix_function(func): def matrix_function( adata, - save_raw=True, - save_layer=None, + save_raw=True, + save_layer=None, **kwargs, ): # For the subset of matrix functions that allow layer specification, # pass that as the thing to save. - - layer=None - if 'layer' in kwargs: - layer = kwargs['layer'] + + layer = None + if "layer" in kwargs: + layer = kwargs["layer"] _save_matrix(adata, save_raw, save_layer=save_layer, layer=layer) - func( - adata, - **kwargs - ) + func(adata, **kwargs) return adata return matrix_function diff --git a/scanpy_scripts/cmds.py b/scanpy_scripts/cmds.py index c60ecf82..1b3394c5 100644 --- a/scanpy_scripts/cmds.py +++ b/scanpy_scripts/cmds.py @@ -32,237 +32,239 @@ from .lib._combat import combat from .lib._scrublet import scrublet, scrublet_simulate_doublets -LANG = os.environ.get('LANG', None) - -if LANG is None or not (LANG.endswith('UTF-8') or - LANG.endswith('UTF8') or - LANG.endswith('utf-8') or - LANG.endswith('utf8')): - print('This programme requires a UTF-8 locale, please check your $LANG setting.') +LANG = os.environ.get("LANG", None) + +if LANG is None or not ( + LANG.endswith("UTF-8") + or LANG.endswith("UTF8") + or LANG.endswith("utf-8") + or LANG.endswith("utf8") +): + print("This programme requires a UTF-8 locale, please check your $LANG setting.") sys.exit(0) -_I_DESC = ': input file in format specfied by --input-format' -_O_DESC = ': output file in format specfied by --output-format' -_P_DESC = ': output figure in pdf or png format' -_IO_DESC = '\n'.join([_I_DESC, _O_DESC]) -_IP_DESC = '\n'.join([_I_DESC, _P_DESC]) +_I_DESC = ": input file in format specfied by --input-format" +_O_DESC = ": output file in format specfied by --output-format" +_P_DESC = ": output figure in pdf or png format" +_IO_DESC = "\n".join([_I_DESC, _O_DESC]) +_IP_DESC = "\n".join([_I_DESC, _P_DESC]) READ_CMD = make_subcmd( - 'read', + "read", read_10x, - cmd_desc='Read 10x data and save in specified format.', + cmd_desc="Read 10x data and save in specified format.", arg_desc=_O_DESC, ) FILTER_CMD = make_subcmd( - 'filter', + "filter", make_matrix_function(filter_anndata), - cmd_desc='Filter data based on specified conditions.', + cmd_desc="Filter data based on specified conditions.", arg_desc=_IO_DESC, ) NORM_CMD = make_subcmd( - 'norm', + "norm", make_matrix_function(normalize), - cmd_desc='Normalise data per cell.', + cmd_desc="Normalise data per cell.", arg_desc=_IO_DESC, ) HVG_CMD = make_subcmd( - 'hvg', + "hvg", hvg, - cmd_desc='Find highly variable genes.', + cmd_desc="Find highly variable genes.", arg_desc=_IO_DESC, ) SCALE_CMD = make_subcmd( - 'scale', + "scale", make_matrix_function(sc.pp.scale), - cmd_desc='Scale data per gene.', + cmd_desc="Scale data per gene.", arg_desc=_IO_DESC, ) REGRESS_CMD = make_subcmd( - 'regress', + "regress", make_matrix_function(sc.pp.regress_out), - cmd_desc='Regress-out observation variables.', + cmd_desc="Regress-out observation variables.", arg_desc=_IO_DESC, ) PCA_CMD = make_subcmd( - 'pca', + "pca", pca, - cmd_desc='Dimensionality reduction by PCA.', + cmd_desc="Dimensionality reduction by PCA.", arg_desc=_IO_DESC, ) NEIGHBOR_CMD = make_subcmd( - 'neighbor', + "neighbor", neighbors, - cmd_desc='Compute a neighbourhood graph of observations.', + cmd_desc="Compute a neighbourhood graph of observations.", arg_desc=_IO_DESC, ) UMAP_CMD = make_subcmd( - 'umap', + "umap", umap, - cmd_desc='Embed the neighborhood graph using UMAP.', + cmd_desc="Embed the neighborhood graph using UMAP.", arg_desc=_IO_DESC, ) TSNE_CMD = make_subcmd( - 'tsne', + "tsne", tsne, - cmd_desc='Embed the cells using t-SNE.', + cmd_desc="Embed the cells using t-SNE.", arg_desc=_IO_DESC, ) FDG_CMD = make_subcmd( - 'fdg', + "fdg", fdg, - cmd_desc='Embed the neighborhood graph using force-directed graph.', + cmd_desc="Embed the neighborhood graph using force-directed graph.", arg_desc=_IO_DESC, ) DIFFMAP_CMD = make_subcmd( - 'diffmap', + "diffmap", diffmap, - cmd_desc='Embed the neighborhood graph using diffusion map.', + cmd_desc="Embed the neighborhood graph using diffusion map.", arg_desc=_IO_DESC, ) LOUVAIN_CMD = make_subcmd( - 'louvain', + "louvain", louvain, - cmd_desc='Find clusters by Louvain algorithm.', + cmd_desc="Find clusters by Louvain algorithm.", arg_desc=_IO_DESC, ) LEIDEN_CMD = make_subcmd( - 'leiden', + "leiden", leiden, - cmd_desc='Find clusters by Leiden algorithm.', + cmd_desc="Find clusters by Leiden algorithm.", arg_desc=_IO_DESC, ) DIFFEXP_CMD = make_subcmd( - 'diffexp', + "diffexp", diffexp, - cmd_desc='Find markers for each clusters.', + cmd_desc="Find markers for each clusters.", arg_desc=_IO_DESC, ) PAGA_CMD = make_subcmd( - 'paga', + "paga", paga, - cmd_desc='Trajectory inference by abstract graph analysis.', + cmd_desc="Trajectory inference by abstract graph analysis.", arg_desc=_IO_DESC, ) DPT_CMD = make_subcmd( - 'dpt', + "dpt", dpt, - cmd_desc='Calculate diffusion pseudotime relative to the root cells.', + cmd_desc="Calculate diffusion pseudotime relative to the root cells.", arg_desc=_IO_DESC, ) PLOT_EMBED_CMD = make_subcmd( - 'embed', - make_plot_function('embedding'), - cmd_desc='Plot cell embeddings.', + "embed", + make_plot_function("embedding"), + cmd_desc="Plot cell embeddings.", arg_desc=_IP_DESC, ) PLOT_STACKED_VIOLIN_CMD = make_subcmd( - 'sviol', - make_plot_function('sviol'), - cmd_desc='Plot stacked violin plots.', + "sviol", + make_plot_function("sviol"), + cmd_desc="Plot stacked violin plots.", arg_desc=_IP_DESC, ) PLOT_DOT_CMD = make_subcmd( - 'dot', - make_plot_function('dot'), - cmd_desc='Plot a dot plot of expression values.', + "dot", + make_plot_function("dot"), + cmd_desc="Plot a dot plot of expression values.", arg_desc=_IP_DESC, ) PLOT_MATRIX_CMD = make_subcmd( - 'matrix', - make_plot_function('matrix'), - cmd_desc='Plot a heatmap of the mean expression values per cluster.', + "matrix", + make_plot_function("matrix"), + cmd_desc="Plot a heatmap of the mean expression values per cluster.", arg_desc=_IP_DESC, ) PLOT_HEATMAP_CMD = make_subcmd( - 'heat', - make_plot_function('heat'), - cmd_desc='Plot a heatmap of the expression values of genes.', + "heat", + make_plot_function("heat"), + cmd_desc="Plot a heatmap of the expression values of genes.", arg_desc=_IP_DESC, ) PLOT_PAGA_CMD = make_subcmd( - 'paga', - make_plot_function('plot_paga', kind='paga'), - cmd_desc='Plot PAGA trajectories.', + "paga", + make_plot_function("plot_paga", kind="paga"), + cmd_desc="Plot PAGA trajectories.", arg_desc=_IP_DESC, - opt_set='plot_paga' + opt_set="plot_paga", ) COMBAT_CMD = make_subcmd( - 'combat', + "combat", combat, - cmd_desc='ComBat function for batch effect correction', - arg_desc=_IO_DESC + cmd_desc="ComBat function for batch effect correction", + arg_desc=_IO_DESC, ) HARMONY_INTEGRATE_CMD = make_subcmd( - 'harmony', + "harmony", sce.pp.harmony_integrate, - cmd_desc='Use harmonypy [Korunsky19] to integrate different experiments.', + cmd_desc="Use harmonypy [Korunsky19] to integrate different experiments.", arg_desc=_IO_DESC, ) BBKNN_CMD = make_subcmd( - 'bbknn', + "bbknn", bbknn, - cmd_desc='Batch balanced kNN [Polanski19].', + cmd_desc="Batch balanced kNN [Polanski19].", arg_desc=_IO_DESC, ) MNN_CORRECT_CMD = make_subcmd( - 'mnn', + "mnn", make_matrix_function(mnn_correct), - cmd_desc='Correct batch effects by matching mutual nearest neighbors [Haghverdi18] [Kang18].', + cmd_desc="Correct batch effects by matching mutual nearest neighbors [Haghverdi18] [Kang18].", arg_desc=_IO_DESC, ) SCRUBLET_MULTIPLET_CMD = make_subcmd( - 'scrublet', + "scrublet", scrublet, - cmd_desc='Filter out likely multiplets from droplet data using Scrublet [Wolock2019].', + cmd_desc="Filter out likely multiplets from droplet data using Scrublet [Wolock2019].", arg_desc=_IO_DESC, ) SCRUBLET_MULTIPLET_SIMULATE_CMD = make_subcmd( - 'scrublet_simulate_doublets', + "scrublet_simulate_doublets", scrublet_simulate_doublets, - cmd_desc='Simulate doublets with random transcriptome pairs for Scrublet [Wolock2019].', + cmd_desc="Simulate doublets with random transcriptome pairs for Scrublet [Wolock2019].", arg_desc=_IO_DESC, ) SCRUBLET_MULTIPLET_PLOT_CMD = make_subcmd( - 'scrublet', - make_plot_function('plot_scrublet', 'scrublet_score_distribution'), - cmd_desc='Plot histogram of doublet scores for observed transcriptomes and simulated doublets..', + "scrublet", + make_plot_function("plot_scrublet", "scrublet_score_distribution"), + cmd_desc="Plot histogram of doublet scores for observed transcriptomes and simulated doublets..", arg_desc=_IP_DESC, - opt_set='plot_scrublet' + opt_set="plot_scrublet", ) diff --git a/scanpy_scripts/lib/_bbknn.py b/scanpy_scripts/lib/_bbknn.py index e9cbc0bc..dfb54c21 100644 --- a/scanpy_scripts/lib/_bbknn.py +++ b/scanpy_scripts/lib/_bbknn.py @@ -12,23 +12,24 @@ # Wrapper for bbknn allowing use of non-standard slot + def bbknn(adata, key=None, key_added=None, **kwargs): """ Wrapper function for sce.pp.bbknn(), for supporting non-standard neighbors slot """ - _backup_default_key(adata.uns, 'neighbors') - _backup_default_key(adata.obsp, 'distances') - _backup_default_key(adata.obsp, 'connectivities') - sce.pp.bbknn(adata, batch_key = key, **kwargs) + _backup_default_key(adata.uns, "neighbors") + _backup_default_key(adata.obsp, "distances") + _backup_default_key(adata.obsp, "connectivities") + sce.pp.bbknn(adata, batch_key=key, **kwargs) if key_added: - _rename_default_key(adata.uns, 'neighbors', f'{key_added}') - _rename_default_key(adata.obsp, 'distances', f'{key_added}_distances') - _rename_default_key(adata.obsp, 'connectivities', f'{key_added}_connectivities') + _rename_default_key(adata.uns, "neighbors", f"{key_added}") + _rename_default_key(adata.obsp, "distances", f"{key_added}_distances") + _rename_default_key(adata.obsp, "connectivities", f"{key_added}_connectivities") else: - _delete_backup_key(adata.uns, 'neighbors') - _delete_backup_key(adata.obsp, 'distances') - _delete_backup_key(adata.obsp, 'connectivities') - + _delete_backup_key(adata.uns, "neighbors") + _delete_backup_key(adata.obsp, "distances") + _delete_backup_key(adata.obsp, "connectivities") + return adata diff --git a/scanpy_scripts/lib/_combat.py b/scanpy_scripts/lib/_combat.py index 7ef65bea..0d3e0650 100644 --- a/scanpy_scripts/lib/_combat.py +++ b/scanpy_scripts/lib/_combat.py @@ -6,6 +6,7 @@ # Wrapper for mnn allowing use of non-standard slot + def combat(adata, key=None, key_added=None, layer=None, **kwargs): """ Wrapper function for scanpy.pp.combat(), for supporting non-standard slots @@ -16,33 +17,33 @@ def combat(adata, key=None, key_added=None, layer=None, **kwargs): # specified to overwrite it anyway. if layer: - if key_added and key_added != 'X': - adata.layers['X_backup'] = adata.X + if key_added and key_added != "X": + adata.layers["X_backup"] = adata.X adata.X = adata.layers[layer] - + # If we're storing results in .X (whether from .X or from a layer), run in # place to save copying objects. - if key_added and key_added == 'X': - sc.pp.combat(adata, key = key, **kwargs) - + if key_added and key_added == "X": + sc.pp.combat(adata, key=key, **kwargs) + # If we're storing in 'layers' (key_added is not set, or is not X, then - # don't run in place, and put the matrix in the specified layer. + # don't run in place, and put the matrix in the specified layer. else: - cdata = sc.pp.combat(adata, key=key, inplace = False, **kwargs) - - combat_key = 'combat' + cdata = sc.pp.combat(adata, key=key, inplace=False, **kwargs) + + combat_key = "combat" if layer: combat_key = f"{combat_key}_{layer}" - + # If we ran from a layer, restore the .X we had to overwrite - - adata.X = adata.layers['X_backup'] - del adata.layers['X_backup'] - + + adata.X = adata.layers["X_backup"] + del adata.layers["X_backup"] + if key_added: combat_key = f"{combat_key}_{key_added}" diff --git a/scanpy_scripts/lib/_diffexp.py b/scanpy_scripts/lib/_diffexp.py index 20efecfa..c19c9ae5 100644 --- a/scanpy_scripts/lib/_diffexp.py +++ b/scanpy_scripts/lib/_diffexp.py @@ -6,18 +6,19 @@ import scanpy as sc import logging + def diffexp( - adata, - use_raw=None, - n_genes=None, - key_added='rank_genes_groups', - layer=None, - logreg_param=None, - filter_params=None, - save=None, - groupby=None, - groups=None, - **kwargs, + adata, + use_raw=None, + n_genes=None, + key_added="rank_genes_groups", + layer=None, + logreg_param=None, + filter_params=None, + save=None, + groupby=None, + groups=None, + **kwargs, ): """ Wrapper function for sc.tl.rank_genes_groups. @@ -32,27 +33,33 @@ def diffexp( for key, val in logreg_param: kwargs[key] = val - key_added = key_added if key_added else 'rank_genes_groups' - diff_key = (key_added + f'_{layer}') if layer else key_added + key_added = key_added if key_added else "rank_genes_groups" + diff_key = (key_added + f"_{layer}") if layer else key_added - if groups == 'all': + if groups == "all": - # Avoid divisions by zeros for singlet groups. See + # Avoid divisions by zeros for singlet groups. See # https://github.com/theislab/scanpy/pull/1490#issuecomment-726031442. - + groups_to_test = list( - adata.obs[groupby] - .value_counts() - .loc[lambda x: x > 1] - .index + adata.obs[groupby].value_counts().loc[lambda x: x > 1].index ) if len(groups_to_test) < len(adata.obs[groupby].cat.categories): groups = groups_to_test - logging.warning('Singlet groups removed before passing to rank_genes_groups()') + logging.warning( + "Singlet groups removed before passing to rank_genes_groups()" + ) sc.tl.rank_genes_groups( - adata, use_raw=use_raw, n_genes=n_genes, key_added=diff_key, groupby=groupby, groups = groups, **kwargs) + adata, + use_raw=use_raw, + n_genes=n_genes, + key_added=diff_key, + groupby=groupby, + groups=groups, + **kwargs, + ) de_tbl = extract_de_table(adata.uns[diff_key]) @@ -60,16 +67,16 @@ def diffexp( sc.tl.filter_rank_genes_groups( adata, key=diff_key, - key_added=diff_key + '_filtered', + key_added=diff_key + "_filtered", use_raw=use_raw, **filter_params, ) - de_tbl = extract_de_table(adata.uns[diff_key + '_filtered']) - de_tbl = de_tbl.loc[de_tbl.genes.astype(str) != 'nan', :] + de_tbl = extract_de_table(adata.uns[diff_key + "_filtered"]) + de_tbl = de_tbl.loc[de_tbl.genes.astype(str) != "nan", :] if save: - de_tbl.to_csv(save, sep='\t', header=True, index=False) + de_tbl.to_csv(save, sep="\t", header=True, index=False) return de_tbl @@ -79,7 +86,7 @@ def diffexp_paired(adata, groupby, pair, **kwargs): Restrict DE to between a pair of clusters, return both up and down genes """ test, ref = pair - de_key = f'de.{test}-{ref}' + de_key = f"de.{test}-{ref}" up_de = diffexp( adata, key_added=de_key, @@ -89,7 +96,7 @@ def diffexp_paired(adata, groupby, pair, **kwargs): **kwargs, ) ref, test = pair - de_key = f'de.{test}-{ref}' + de_key = f"de.{test}-{ref}" down_de = diffexp( adata, key_added=de_key, @@ -105,22 +112,34 @@ def extract_de_table(de_dict): """ Extract DE table from adata.uns """ - if de_dict['params']['method'] == 'logreg': - requested_fields = ('scores',) + if de_dict["params"]["method"] == "logreg": + requested_fields = ("scores",) else: - requested_fields = ('scores', 'logfoldchanges', 'pvals', 'pvals_adj',) - gene_df = _recarray_to_dataframe(de_dict['names'], 'genes')[ - ['cluster', 'rank', 'genes']] - gene_df['ref'] = de_dict['params']['reference'] - gene_df = gene_df[['cluster', 'ref', 'rank', 'genes']] - de_df = pd.DataFrame({ - field: _recarray_to_dataframe(de_dict[field], field)[field] - for field in requested_fields if field in de_dict - }) + requested_fields = ( + "scores", + "logfoldchanges", + "pvals", + "pvals_adj", + ) + gene_df = _recarray_to_dataframe(de_dict["names"], "genes")[ + ["cluster", "rank", "genes"] + ] + gene_df["ref"] = de_dict["params"]["reference"] + gene_df = gene_df[["cluster", "ref", "rank", "genes"]] + de_df = pd.DataFrame( + { + field: _recarray_to_dataframe(de_dict[field], field)[field] + for field in requested_fields + if field in de_dict + } + ) return gene_df.merge(de_df, left_index=True, right_index=True) def _recarray_to_dataframe(array, field_name): - return pd.DataFrame(array).reset_index().rename( - columns={'index': 'rank'}).melt( - id_vars='rank', var_name='cluster', value_name=field_name) + return ( + pd.DataFrame(array) + .reset_index() + .rename(columns={"index": "rank"}) + .melt(id_vars="rank", var_name="cluster", value_name=field_name) + ) diff --git a/scanpy_scripts/lib/_diffmap.py b/scanpy_scripts/lib/_diffmap.py index e3687058..28400e4b 100644 --- a/scanpy_scripts/lib/_diffmap.py +++ b/scanpy_scripts/lib/_diffmap.py @@ -10,20 +10,20 @@ def diffmap( - adata, - key_added=None, - export_embedding=None, - **kwargs, + adata, + key_added=None, + export_embedding=None, + **kwargs, ): """ Wrapper function for sc.tl.diffmap, for supporting named slot """ sc.tl.diffmap(adata, **kwargs) - - diffmap_key = 'X_diffmap' + + diffmap_key = "X_diffmap" if key_added: - diffmap_key = f'{diffmap_key}_{key_added}' - _rename_obsm_key(adata, 'X_diffmap', diffmap_key) + diffmap_key = f"{diffmap_key}_{key_added}" + _rename_obsm_key(adata, "X_diffmap", diffmap_key) if export_embedding is not None: write_embedding(adata, diffmap_key, export_embedding, key_added=key_added) diff --git a/scanpy_scripts/lib/_dpt.py b/scanpy_scripts/lib/_dpt.py index 98956d19..b68f0c54 100644 --- a/scanpy_scripts/lib/_dpt.py +++ b/scanpy_scripts/lib/_dpt.py @@ -10,27 +10,30 @@ def dpt( - adata, - root=None, - use_diffmap='X_diffmap', - key_added=None, - **kwargs, + adata, + root=None, + use_diffmap="X_diffmap", + key_added=None, + **kwargs, ): """ Wrapper function for sc.tl.dpt """ if root is None or not (isinstance(root, (list, tuple)) and len(root) == 2): root = (None, None) - if 'iroot' not in adata.uns.keys() and root[0] is None: - raise ValueError('Annotate your data with root cell first, i.e. ' - 'boolean vector `.uns["iroot"]` is required.') + if "iroot" not in adata.uns.keys() and root[0] is None: + raise ValueError( + "Annotate your data with root cell first, i.e. " + 'boolean vector `.uns["iroot"]` is required.' + ) if root[0] is not None: - adata.uns['iroot'] = np.random.choice( - np.flatnonzero(adata.obs[root[0]] == root[1])) + adata.uns["iroot"] = np.random.choice( + np.flatnonzero(adata.obs[root[0]] == root[1]) + ) sc.tl.dpt(adata, **kwargs) if key_added: - dpt_key = f'dpt_pseudotime_{key_added}' - _rename_default_key(adata.obs, 'dpt_pseudotime', dpt_key) - + dpt_key = f"dpt_pseudotime_{key_added}" + _rename_default_key(adata.obs, "dpt_pseudotime", dpt_key) + return adata diff --git a/scanpy_scripts/lib/_fdg.py b/scanpy_scripts/lib/_fdg.py index 6fbdb213..ec177935 100644 --- a/scanpy_scripts/lib/_fdg.py +++ b/scanpy_scripts/lib/_fdg.py @@ -12,12 +12,12 @@ def fdg( - adata, - layout='fa', - key_added_ext=None, - random_state=0, - export_embedding=None, - **kwargs + adata, + layout="fa", + key_added_ext=None, + random_state=0, + export_embedding=None, + **kwargs, ): """ Wrapper function for sc.tl.draw_graph, for supporting named slot of fdg @@ -31,7 +31,7 @@ def fdg( **kwargs, ) - fdg_key = f'X_draw_graph_{key_added_ext or layout}' + fdg_key = f"X_draw_graph_{key_added_ext or layout}" if export_embedding is not None: write_embedding(adata, fdg_key, export_embedding, key_added=key_added_ext) diff --git a/scanpy_scripts/lib/_filter.py b/scanpy_scripts/lib/_filter.py index 546f19f5..c3dd8bda 100644 --- a/scanpy_scripts/lib/_filter.py +++ b/scanpy_scripts/lib/_filter.py @@ -10,13 +10,13 @@ def filter_anndata( - adata, - gene_name='index', - list_attr=False, - param=None, - category=None, - subset=None, - force_recalc=False, + adata, + gene_name="index", + list_attr=False, + param=None, + category=None, + subset=None, + force_recalc=False, ): """ Wrapper function for sc.pp.filter_cells() and sc.pp.filter_genes(), mainly @@ -26,25 +26,29 @@ def filter_anndata( category = [] if category is None else category subset = [] if subset is None else subset - logging.debug('--gene-name=%s', gene_name) - logging.debug('--param=%s', param) - logging.debug('--category=%s', category) - logging.debug('--subset=%s', subset) + logging.debug("--gene-name=%s", gene_name) + logging.debug("--param=%s", param) + logging.debug("--category=%s", category) + logging.debug("--subset=%s", subset) - if 'mito' not in adata.var.keys() and gene_name: + if "mito" not in adata.var.keys() and gene_name: try: gene_names = getattr(adata.var, gene_name) - k_mito = gene_names.str.startswith('MT-') + k_mito = gene_names.str.startswith("MT-") if k_mito.sum() > 0: - adata.var['mito'] = k_mito - adata.var['mito'] = adata.var['mito'].astype('category') + adata.var["mito"] = k_mito + adata.var["mito"] = adata.var["mito"].astype("category") else: - logging.warning('No MT genes found, skip calculating ' - 'expression of mitochondria genes') + logging.warning( + "No MT genes found, skip calculating " + "expression of mitochondria genes" + ) except AttributeError: logging.warning( - 'Specified gene column [%s] not found, skip calculating ' - 'expression of mitochondria genes', gene_name) + "Specified gene column [%s] not found, skip calculating " + "expression of mitochondria genes", + gene_name, + ) attributes = _get_attributes(adata) if list_attr: @@ -52,59 +56,63 @@ def filter_anndata( return 0 conditions, qc_vars, pct_top = _get_filter_conditions( - attributes, param, category, subset) + attributes, param, category, subset + ) - layer = 'counts' if 'counts' in adata.layers.keys() else None + layer = "counts" if "counts" in adata.layers.keys() else None obs_columns = adata.obs.columns for qv in qc_vars: - if f'pct_counts_{qv}' in obs_columns and not force_recalc: - logging.warning('`pct_counts_%s` exists, not overwriting ' - 'without --force-recalc', qv) + if f"pct_counts_{qv}" in obs_columns and not force_recalc: + logging.warning( + "`pct_counts_%s` exists, not overwriting " "without --force-recalc", qv + ) qc_vars.remove(qv) for pt in pct_top: - if f'pct_counts_in_top_{pt}_genes' in obs_columns and not force_recalc: - logging.warning('`pct_counts_%s` exists, not overwriting ' - 'without --force-recalc', pt) + if f"pct_counts_in_top_{pt}_genes" in obs_columns and not force_recalc: + logging.warning( + "`pct_counts_%s` exists, not overwriting " "without --force-recalc", pt + ) pct_top.remove(pt) - # Calculate mito stats if we can, even if we're not filtering by them + # Calculate mito stats if we can, even if we're not filtering by them - if 'mito' not in qc_vars and 'mito' in adata.var.keys(): - qc_vars.append('mito') + if "mito" not in qc_vars and "mito" in adata.var.keys(): + qc_vars.append("mito") sc.pp.calculate_qc_metrics( - adata, layer=layer, qc_vars=qc_vars, percent_top=pct_top, inplace=True) + adata, layer=layer, qc_vars=qc_vars, percent_top=pct_top, inplace=True + ) - adata.obs['n_counts'] = adata.obs['total_counts'] - adata.obs['n_genes'] = adata.obs['n_genes_by_counts'] - adata.var['n_counts'] = adata.var['total_counts'] - adata.var['n_cells'] = adata.var['n_cells_by_counts'] + adata.obs["n_counts"] = adata.obs["total_counts"] + adata.obs["n_genes"] = adata.obs["n_genes_by_counts"] + adata.var["n_counts"] = adata.var["total_counts"] + adata.var["n_cells"] = adata.var["n_cells_by_counts"] k_cell = np.ones(len(adata.obs)).astype(bool) - for cond in conditions['c']['numerical']: + for cond in conditions["c"]["numerical"]: name, vmin, vmax = cond attr = adata.obs[name] k_cell = k_cell & (attr >= vmin) & (attr <= vmax) - for cond in conditions['c']['categorical']: + for cond in conditions["c"]["categorical"]: name, values = cond attr = getattr(adata.obs, name).astype(str) - if values[0].startswith('!'): + if values[0].startswith("!"): values[0] = values[0][1:] k_cell = k_cell & (~attr.isin(values)) else: k_cell = k_cell & attr.isin(values) k_gene = np.ones(len(adata.var)).astype(bool) - for cond in conditions['g']['numerical']: + for cond in conditions["g"]["numerical"]: name, vmin, vmax = cond attr = adata.var[name] k_gene = k_gene & (attr >= vmin) & (attr <= vmax) - for cond in conditions['g']['categorical']: + for cond in conditions["g"]["categorical"]: name, values = cond attr = getattr(adata.var, name).astype(str) - if values[0].startswith('!'): + if values[0].startswith("!"): values[0] = values[0][1:] k_gene = k_gene & ~(attr.isin(values)) else: @@ -118,143 +126,150 @@ def filter_anndata( def _get_attributes(adata): attributes = { - 'c': { - 'numerical': [], - 'categorical': ['index'], - 'bool': [], + "c": { + "numerical": [], + "categorical": ["index"], + "bool": [], }, - 'g': { - 'numerical': [], - 'categorical': ['index'], - 'bool': [], + "g": { + "numerical": [], + "categorical": ["index"], + "bool": [], }, } for attr, dtype in adata.obs.dtypes.to_dict().items(): typ = dtype.kind - if typ == 'O': - if dtype.name == 'category' and dtype.categories.is_boolean(): - attributes['c']['bool'].append(attr) - attributes['c']['categorical'].append(attr) - elif typ in ('i', 'f', 'u'): - attributes['c']['numerical'].append(attr) - elif typ == 'b': - attributes['c']['bool'].append(attr) - attributes['c']['categorical'].append(attr) + if typ == "O": + if dtype.name == "category" and dtype.categories.is_boolean(): + attributes["c"]["bool"].append(attr) + attributes["c"]["categorical"].append(attr) + elif typ in ("i", "f", "u"): + attributes["c"]["numerical"].append(attr) + elif typ == "b": + attributes["c"]["bool"].append(attr) + attributes["c"]["categorical"].append(attr) for attr, dtype in adata.var.dtypes.to_dict().items(): typ = dtype.kind - if typ == 'O': - if dtype.name == 'category' and dtype.categories.is_boolean(): - attributes['g']['bool'].append(attr) - attributes['g']['categorical'].append(attr) - elif typ in ('i', 'f', 'u'): - attributes['g']['numerical'].append(attr) - elif typ == 'b': - attributes['g']['bool'].append(attr) - attributes['g']['categorical'].append(attr) - - attributes['c']['numerical'].extend([ - 'n_genes', - 'n_counts', - ]) - - for attr in attributes['g']['bool']: - attr2 = 'pct_counts_' + attr + if typ == "O": + if dtype.name == "category" and dtype.categories.is_boolean(): + attributes["g"]["bool"].append(attr) + attributes["g"]["categorical"].append(attr) + elif typ in ("i", "f", "u"): + attributes["g"]["numerical"].append(attr) + elif typ == "b": + attributes["g"]["bool"].append(attr) + attributes["g"]["categorical"].append(attr) + + attributes["c"]["numerical"].extend( + [ + "n_genes", + "n_counts", + ] + ) + + for attr in attributes["g"]["bool"]: + attr2 = "pct_counts_" + attr if attr2 not in adata.obs.columns: - attr2 += '*' - attributes['c']['numerical'].append(attr2) - - attributes['g']['numerical'].extend([ - 'n_cells', - 'n_counts', - 'mean_counts', - 'pct_dropout_by_counts', - ]) + attr2 += "*" + attributes["c"]["numerical"].append(attr2) + + attributes["g"]["numerical"].extend( + [ + "n_cells", + "n_counts", + "mean_counts", + "pct_dropout_by_counts", + ] + ) logging.debug(attributes) return attributes def _attributes_exists(name, attributes, dtype): - cond_cat = '' - if name.startswith('c:') or name.startswith('g:'): - cond_cat, _, cond_name = name.partition(':') + cond_cat = "" + if name.startswith("c:") or name.startswith("g:"): + cond_cat, _, cond_name = name.partition(":") found = int(cond_name in attributes[cond_cat][dtype]) else: cond_name = name - if cond_name in attributes['c'][dtype]: - cond_cat += 'c' - if cond_name in attributes['g'][dtype]: - cond_cat += 'g' + if cond_name in attributes["c"][dtype]: + cond_cat += "c" + if cond_name in attributes["g"][dtype]: + cond_cat += "g" found = len(cond_cat) return found, cond_cat, cond_name def _get_filter_conditions(attributes, param, category, subset): conditions = { - 'c': { - 'numerical': [], - 'categorical': [], - 'bool': [], + "c": { + "numerical": [], + "categorical": [], + "bool": [], }, - 'g': { - 'numerical': [], - 'categorical': [], - 'bool': [], + "g": { + "numerical": [], + "categorical": [], + "bool": [], }, } - percent_top_pattern = re.compile(r'^pct_counts_in_top_(?P\d+)_genes$') + percent_top_pattern = re.compile(r"^pct_counts_in_top_(?P\d+)_genes$") pct_top = [] - qc_vars_pattern = re.compile(r'^pct_counts_(?P\S+)$') + qc_vars_pattern = re.compile(r"^pct_counts_(?P\S+)$") qc_vars = [] for name, vmin, vmax in param: - found, cond_cat, cond_name = _attributes_exists( - name, attributes, 'numerical') + found, cond_cat, cond_name = _attributes_exists(name, attributes, "numerical") pt_match = percent_top_pattern.match(cond_name) qv_match = qc_vars_pattern.match(cond_name) if found > 1: - raise click.ClickException(f'Ambiguous parameter "{name}" found in ' - 'both cell and gene table') + raise click.ClickException( + f'Ambiguous parameter "{name}" found in ' "both cell and gene table" + ) if found < 1: if pt_match: - pct_top.append(int(pt_match['n'])) - cond_cat = 'c' - elif qv_match and qv_match['qc_var'] in attributes['g']['bool']: - qc_vars.append(qv_match['qc_var']) - cond_cat = 'c' + pct_top.append(int(pt_match["n"])) + cond_cat = "c" + elif qv_match and qv_match["qc_var"] in attributes["g"]["bool"]: + qc_vars.append(qv_match["qc_var"]) + cond_cat = "c" else: raise click.ClickException(f'Parameter "{name}" unavailable') if pt_match or qv_match: vmin *= 100 vmax *= 100 - conditions[cond_cat]['numerical'].append([cond_name, vmin, vmax]) + conditions[cond_cat]["numerical"].append([cond_name, vmin, vmax]) for name, values in category + subset: - found, cond_cat, cond_name = _attributes_exists( - name, attributes, 'categorical') + found, cond_cat, cond_name = _attributes_exists(name, attributes, "categorical") if found > 1: - raise click.ClickException(f'Ambiguous attribute "{name}" found in ' - 'both cell and gene table') + raise click.ClickException( + f'Ambiguous attribute "{name}" found in ' "both cell and gene table" + ) if found < 1: raise click.ClickException(f'Attribute "{name}" unavailable') if not isinstance(values, (list, tuple)): fh = values - values = fh.read().rstrip().split('\n') + values = fh.read().rstrip().split("\n") fh.close() - conditions[cond_cat]['categorical'].append((cond_name, values)) + conditions[cond_cat]["categorical"].append((cond_name, values)) logging.debug((conditions, qc_vars, pct_top)) return conditions, qc_vars, sorted(pct_top) -def _repr_obj(obj, padding=' ', level=0): +def _repr_obj(obj, padding=" ", level=0): if isinstance(obj, dict): - obj_str = '\n'.join(['\n'.join([ - padding * level + k + ':', _repr_obj(v, level=level+1) - ]) for k, v in obj.items()]) + obj_str = "\n".join( + [ + "\n".join([padding * level + k + ":", _repr_obj(v, level=level + 1)]) + for k, v in obj.items() + ] + ) elif isinstance(obj, (tuple, list, set)): - obj_str = '\n'.join([_repr_obj(elm, level=level) for elm in obj]) + obj_str = "\n".join([_repr_obj(elm, level=level) for elm in obj]) else: obj_str = padding * level + repr(obj) return obj_str diff --git a/scanpy_scripts/lib/_hvg.py b/scanpy_scripts/lib/_hvg.py index 3cd53b73..1c340c68 100644 --- a/scanpy_scripts/lib/_hvg.py +++ b/scanpy_scripts/lib/_hvg.py @@ -5,11 +5,12 @@ import numpy as np import scanpy as sc + def hvg( - adata, - mean_limits=(0.0125, 3), - disp_limits=(0.5, float('inf')), - **kwargs, + adata, + mean_limits=(0.0125, 3), + disp_limits=(0.5, float("inf")), + **kwargs, ): """ Wrapper function for sc.highly_variable_genes() @@ -17,9 +18,9 @@ def hvg( # Check for n_top_genes beeing greater than the total genes - if 'n_top_genes' in kwargs and kwargs['n_top_genes'] is not None: - kwargs['n_top_genes'] = min(adata.n_vars, kwargs['n_top_genes']) - + if "n_top_genes" in kwargs and kwargs["n_top_genes"] is not None: + kwargs["n_top_genes"] = min(adata.n_vars, kwargs["n_top_genes"]) + sc.pp.highly_variable_genes( adata, min_mean=mean_limits[0], @@ -28,5 +29,5 @@ def hvg( max_disp=disp_limits[1], **kwargs, ) - + return adata diff --git a/scanpy_scripts/lib/_leiden.py b/scanpy_scripts/lib/_leiden.py index 0b2b7b0c..df72aa65 100644 --- a/scanpy_scripts/lib/_leiden.py +++ b/scanpy_scripts/lib/_leiden.py @@ -7,56 +7,62 @@ def leiden( - adata, - resolution, - neighbors_key=None, - obsp=None, - key_added=None, - export_cluster=None, - **kwargs + adata, + resolution, + neighbors_key=None, + obsp=None, + key_added=None, + export_cluster=None, + **kwargs, ): """ Wrapper function for sc.tl.leiden, for supporting multiple resolutions. """ keys = [] - if kwargs.get('restrict_to', None) and not kwargs['restrict_to'][0]: - kwargs['restrict_to'] = None - + if kwargs.get("restrict_to", None) and not kwargs["restrict_to"][0]: + kwargs["restrict_to"] = None + if not isinstance(resolution, (list, tuple)): - if key_added is not None and not key_added.startswith('leiden_'): - key_added = f'leiden_{key_added}' + if key_added is not None and not key_added.startswith("leiden_"): + key_added = f"leiden_{key_added}" elif key_added is None: - key_added = 'leiden' + key_added = "leiden" sc.tl.leiden( adata, resolution=resolution, neighbors_key=neighbors_key, obsp=obsp, key_added=key_added, - **kwargs + **kwargs, ) keys.append(key_added) else: for i, res in enumerate(resolution): - res_key = str(res).replace('.', '_') + res_key = str(res).replace(".", "_") if key_added is None: - graph_key = ('_' + f'{neighbors_key or obsp}') if neighbors or obsp else '' - key = f'leiden{graph_key}_r{res_key}' + graph_key = ( + ("_" + f"{neighbors_key or obsp}") if neighbors or obsp else "" + ) + key = f"leiden{graph_key}_r{res_key}" elif not isinstance(key_added, (list, tuple)): - key = f'leiden_{key_added}_r{res_key}' + key = f"leiden_{key_added}_r{res_key}" elif len(key_added) == len(resolution): key = key_added[i] else: - raise ValueError('`key_added` can only be None, a scalar, or an ' - 'iterable of the same length as `resolution`.') - keys.extend(leiden( - adata, - resolution=res, - neighbors_key=neighbors_key, - obsp=obsp, - key_added=key, - **kwargs, - )) + raise ValueError( + "`key_added` can only be None, a scalar, or an " + "iterable of the same length as `resolution`." + ) + keys.extend( + leiden( + adata, + resolution=res, + neighbors_key=neighbors_key, + obsp=obsp, + key_added=key, + **kwargs, + ) + ) if export_cluster: write_obs(adata, keys, export_cluster) diff --git a/scanpy_scripts/lib/_louvain.py b/scanpy_scripts/lib/_louvain.py index dc380bad..329c313b 100644 --- a/scanpy_scripts/lib/_louvain.py +++ b/scanpy_scripts/lib/_louvain.py @@ -7,58 +7,64 @@ def louvain( - adata, - resolution, - neighbors_key=None, - obsp=None, - key_added=None, - export_cluster=None, - **kwargs + adata, + resolution, + neighbors_key=None, + obsp=None, + key_added=None, + export_cluster=None, + **kwargs, ): """ Wrapper function for sc.tl.louvain, for supporting multiple resolutions. """ keys = [] - if kwargs['restrict_to'] and not kwargs['restrict_to'][0]: - kwargs['restrict_to'] = None - + if kwargs["restrict_to"] and not kwargs["restrict_to"][0]: + kwargs["restrict_to"] = None + if not isinstance(resolution, (list, tuple)): - if key_added is not None and not key_added.startswith('louvain_'): - key_added = f'louvain_{key_added}' + if key_added is not None and not key_added.startswith("louvain_"): + key_added = f"louvain_{key_added}" elif key_added is None: - key_added = 'louvain' + key_added = "louvain" sc.tl.louvain( adata, resolution=resolution, key_added=key_added, neighbors_key=neighbors_key, obsp=obsp, - **kwargs + **kwargs, ) keys.append(key_added) else: for i, res in enumerate(resolution): - res_key = str(res).replace('.', '_') + res_key = str(res).replace(".", "_") if key_added is None: - graph_key = ('_' + f'{neighbors_key or obsp}') if neighbors or obsp else '' - key = f'louvain{graph_key}_r{res_key}' + graph_key = ( + ("_" + f"{neighbors_key or obsp}") if neighbors or obsp else "" + ) + key = f"louvain{graph_key}_r{res_key}" elif not isinstance(key_added, (list, tuple)): - key = f'louvain_{key_added}_r{res_key}' + key = f"louvain_{key_added}_r{res_key}" elif len(key_added) == len(resolution): key = key_added[i] else: - raise ValueError('`key_added` can only be None, a scalar, or an ' - 'iterable of the same length as `resolution`.') - keys.extend(louvain( - adata, - resolution=res, - neighbors_key=neighbors_key, - obsp=obsp, - key_added=key, - **kwargs, - )) + raise ValueError( + "`key_added` can only be None, a scalar, or an " + "iterable of the same length as `resolution`." + ) + keys.extend( + louvain( + adata, + resolution=res, + neighbors_key=neighbors_key, + obsp=obsp, + key_added=key, + **kwargs, + ) + ) if export_cluster: write_obs(adata, keys, export_cluster) diff --git a/scanpy_scripts/lib/_mnn.py b/scanpy_scripts/lib/_mnn.py index 0a559a64..3c45721d 100644 --- a/scanpy_scripts/lib/_mnn.py +++ b/scanpy_scripts/lib/_mnn.py @@ -3,11 +3,12 @@ """ import scanpy.external as sce -import numpy as np +import numpy as np import click # Wrapper for mnn allowing use of non-standard slot + def mnn_correct(adata, key=None, key_added=None, var_subset=None, layer=None, **kwargs): """ Wrapper function for sce.pp.mnn_correct(), for supporting non-standard neighbors slot @@ -16,7 +17,7 @@ def mnn_correct(adata, key=None, key_added=None, var_subset=None, layer=None, ** # mnn will use .X, so we need to put other layers there for processing if layer: - adata.layers['X_backup'] = adata.X + adata.layers["X_backup"] = adata.X adata.X = adata.layers[layer] # mnn_correct() wants batches in separate adatas @@ -24,62 +25,72 @@ def mnn_correct(adata, key=None, key_added=None, var_subset=None, layer=None, ** batches = np.unique(adata.obs[key]) alldata = [] for batch in batches: - alldata.append( adata[adata.obs[key] == batch,] ) + alldata.append( + adata[ + adata.obs[key] == batch, + ] + ) # Process var_subset into a list of strings that can be provided to # mnn_correct() if var_subset is not None and len(var_subset) > 0 and var_subset[0] is not None: - + subset = [] - - for name, values in var_subset : + + for name, values in var_subset: if name in adata.var: - if adata.var[name].dtype == 'bool': - values = [ True if x.lower() == "true" else x for x in values ] + if adata.var[name].dtype == "bool": + values = [True if x.lower() == "true" else x for x in values] else: raise click.ClickException(f'Var "{name}" unavailable') - ind = [ x in values for x in adata.var[name] ] - subset = subset + adata.var.index[ ind ].to_list() + ind = [x in values for x in adata.var[name]] + subset = subset + adata.var.index[ind].to_list() var_subset = set(subset) - print('Will use %d selected genes for MNN' % len(var_subset)) + print("Will use %d selected genes for MNN" % len(var_subset)) else: var_subset = None # Here's the main bit - cdata = sce.pp.mnn_correct(*alldata, var_subset = var_subset, do_concatenate = True, index_unique = None, **kwargs) - + cdata = sce.pp.mnn_correct( + *alldata, + var_subset=var_subset, + do_concatenate=True, + index_unique=None, + **kwargs, + ) + # If user has specified key_added = X then they want us to overwrite .X, # othwerwise copy the .X to a named layer of the original object. In either # case make sure obs and var are the same as the original. - if key_added is None or key_added != 'X': + if key_added is None or key_added != "X": - mnn_key = 'mnn' + mnn_key = "mnn" if layer: mnn_key = f"{mnn_key}_{layer}" - + # Layers is set (so we're not storing computed results in the .X, # and we had to overwrite .X to run mnn), and key_added shows we're # not storing in the .X, so we need to restore from the backup. - adata.X = adata.layers['X_backup'] + adata.X = adata.layers["X_backup"] if key_added: mnn_key = f"{mnn_key}_{key_added}" - + adata.layers[mnn_key] = cdata[0][adata.obs.index, adata.var.index].X else: adata.X = cdata[0][adata.obs.index, adata.var.index].X - # Delete the backup of .X if we needed one + # Delete the backup of .X if we needed one if layer: - del adata.layers['X_backup'] + del adata.layers["X_backup"] return adata diff --git a/scanpy_scripts/lib/_neighbors.py b/scanpy_scripts/lib/_neighbors.py index bb752a47..e791fec8 100644 --- a/scanpy_scripts/lib/_neighbors.py +++ b/scanpy_scripts/lib/_neighbors.py @@ -19,14 +19,16 @@ def neighbors(adata, n_neighbors=15, key_added=None, **kwargs): else: for i, n_nb in enumerate(n_neighbors): if key_added is None: - graph_key = f'k{n_nb}' + graph_key = f"k{n_nb}" elif not isinstance(key_added, (list, tuple)): - graph_key = f'{key_added}_k{n_nb}' + graph_key = f"{key_added}_k{n_nb}" elif len(key_added) == len(n_neighbors): graph_key = key_added[i] else: - raise ValueError('`key_added` can only be None, a scalar, or an ' - 'iterable of the same length as `n_neighbors`.') + raise ValueError( + "`key_added` can only be None, a scalar, or an " + "iterable of the same length as `n_neighbors`." + ) neighbors( adata, n_neighbors=n_nb, diff --git a/scanpy_scripts/lib/_paga.py b/scanpy_scripts/lib/_paga.py index 0aace0d3..033569a2 100644 --- a/scanpy_scripts/lib/_paga.py +++ b/scanpy_scripts/lib/_paga.py @@ -14,9 +14,9 @@ def paga( - adata, - key_added=None, - **kwargs, + adata, + key_added=None, + **kwargs, ): """ Wrapper function for sc.tl.paga, for supporting named slot @@ -24,31 +24,30 @@ def paga( sc.tl.paga(adata, **kwargs) if key_added: - paga_key = f'paga_{key_added}' - _rename_default_key(adata.uns, 'paga', paga_key) + paga_key = f"paga_{key_added}" + _rename_default_key(adata.uns, "paga", paga_key) else: - _delete_backup_key(adata.uns, 'paga') + _delete_backup_key(adata.uns, "paga") return adata def plot_paga( - adata, - use_key='paga', - basis=None, - layout=None, - init_pos=None, - legend_loc='on data', - color=None, - size=None, - title=None, - show=None, - save=None, - **kwargs, + adata, + use_key="paga", + basis=None, + layout=None, + init_pos=None, + legend_loc="on data", + color=None, + size=None, + title=None, + show=None, + save=None, + **kwargs, ): - """Make PAGA plot - """ - if basis is not None and f'X_{basis}' in adata.obsm.keys(): + """Make PAGA plot""" + if basis is not None and f"X_{basis}" in adata.obsm.keys(): ax = sc.pl.embedding( adata, basis=basis, @@ -60,21 +59,21 @@ def plot_paga( show=False, ) - grouping = adata.uns[use_key]['groups'] + grouping = adata.uns[use_key]["groups"] categories = list(adata.obs[grouping].cat.categories) - obsm = adata.obsm[f'X_{basis}'] + obsm = adata.obsm[f"X_{basis}"] group_pos = np.zeros((len(categories), 2)) for i, label in enumerate(categories): - offset = 1 if basis.startswith('diffmap') else 0 - _scatter = obsm[adata.obs[grouping] == label, (0+offset):(2+offset)] + offset = 1 if basis.startswith("diffmap") else 0 + _scatter = obsm[adata.obs[grouping] == label, (0 + offset) : (2 + offset)] x_pos, y_pos = np.median(_scatter, axis=0) group_pos[i] = [x_pos, y_pos] - _set_default_key(adata.uns, 'paga', use_key) - kwargs['node_size_scale'] = 0 - kwargs['fontsize'] = 1 - kwargs['pos'] = group_pos - kwargs['color'] = None + _set_default_key(adata.uns, "paga", use_key) + kwargs["node_size_scale"] = 0 + kwargs["fontsize"] = 1 + kwargs["pos"] = group_pos + kwargs["color"] = None try: sc.pl.paga( adata, @@ -85,9 +84,9 @@ def plot_paga( **kwargs, ) finally: - _restore_default_key(adata.uns, 'paga', use_key) + _restore_default_key(adata.uns, "paga", use_key) else: - _set_default_key(adata.uns, 'paga', use_key) + _set_default_key(adata.uns, "paga", use_key) try: sc.pl.paga( adata, @@ -97,9 +96,9 @@ def plot_paga( title=title, show=show, save=save, - **kwargs + **kwargs, ) finally: - _restore_default_key(adata.uns, 'paga', use_key) + _restore_default_key(adata.uns, "paga", use_key) return adata diff --git a/scanpy_scripts/lib/_pca.py b/scanpy_scripts/lib/_pca.py index 074fc250..28efc80f 100644 --- a/scanpy_scripts/lib/_pca.py +++ b/scanpy_scripts/lib/_pca.py @@ -6,28 +6,29 @@ import scanpy as sc from ..obj_utils import write_embedding + def pca(adata, key_added=None, export_embedding=None, **kwargs): """ Wrapper function for sc.pp.pca, for supporting named slot """ # omit "svd_solver" to let scanpy choose automatically - if 'svd_solver' in kwargs and kwargs['svd_solver'] == 'auto': - del kwargs['svd_solver'] + if "svd_solver" in kwargs and kwargs["svd_solver"] == "auto": + del kwargs["svd_solver"] if key_added: - if 'X_pca' in adata.obsm.keys(): - adata.obsm['X_pca_bkup'] = adata.obsm['X_pca'] + if "X_pca" in adata.obsm.keys(): + adata.obsm["X_pca_bkup"] = adata.obsm["X_pca"] sc.pp.pca(adata, **kwargs) - pca_key = f'X_pca_{key_added}' - adata.obsm[pca_key] = adata.obsm['X_pca'] - del adata.obsm['X_pca'] - if 'X_pca_bkup' in adata.obsm.keys(): - adata.obsm['X_pca'] = adata.obsm['X_pca_bkup'] - del adata.obsm['X_pca_bkup'] + pca_key = f"X_pca_{key_added}" + adata.obsm[pca_key] = adata.obsm["X_pca"] + del adata.obsm["X_pca"] + if "X_pca_bkup" in adata.obsm.keys(): + adata.obsm["X_pca"] = adata.obsm["X_pca_bkup"] + del adata.obsm["X_pca_bkup"] else: sc.pp.pca(adata, **kwargs) - pca_key = 'X_pca' + pca_key = "X_pca" if export_embedding is not None: write_embedding(adata, pca_key, export_embedding, key_added=key_added) diff --git a/scanpy_scripts/lib/_read.py b/scanpy_scripts/lib/_read.py index 926ffbee..20e3ee67 100644 --- a/scanpy_scripts/lib/_read.py +++ b/scanpy_scripts/lib/_read.py @@ -7,12 +7,12 @@ def read_10x( - input_10x_h5, - input_10x_mtx, - genome='hg19', - var_names='gene_symbols', - extra_obs=None, - extra_var=None + input_10x_h5, + input_10x_mtx, + genome="hg19", + var_names="gene_symbols", + extra_obs=None, + extra_var=None, ): """ Wrapper function for sc.read_10x_h5() and sc.read_10x_mtx(), mainly to @@ -24,20 +24,20 @@ def read_10x( adata = sc.read_10x_mtx(input_10x_mtx, var_names=var_names) if extra_obs: - obs_tbl = pd.read_csv(extra_obs, sep='\t', header=0, index_col=0) + obs_tbl = pd.read_csv(extra_obs, sep="\t", header=0, index_col=0) adata.obs = adata.obs.merge( obs_tbl, - how='left', + how="left", left_index=True, right_index=True, suffixes=(False, False), ) if extra_var: - var_tbl = pd.read_csv(extra_var, sep='\t', header=0, index_col=0) + var_tbl = pd.read_csv(extra_var, sep="\t", header=0, index_col=0) adata.var = adata.var.merge( var_tbl, - how='left', + how="left", left_index=True, right_index=True, suffixes=(False, False), diff --git a/scanpy_scripts/lib/_scrublet.py b/scanpy_scripts/lib/_scrublet.py index f6a4ac1f..d1462de9 100644 --- a/scanpy_scripts/lib/_scrublet.py +++ b/scanpy_scripts/lib/_scrublet.py @@ -11,6 +11,7 @@ # Wrapper for scrublet allowing text export and filtering + def scrublet(adata, adata_sim=None, filter=False, export_table=None, **kwargs): """ Wrapper function for sce.pp.scrublet(), to allow filtering of resulting object @@ -22,7 +23,7 @@ def scrublet(adata, adata_sim=None, filter=False, export_table=None, **kwargs): adata_sim = sc.read(adata_sim) sce.pp.scrublet(adata, adata_sim=adata_sim, **kwargs) - + # Do any export before optional filtering if export_table: @@ -35,14 +36,17 @@ def scrublet(adata, adata_sim=None, filter=False, export_table=None, **kwargs): return adata + # Run the doublet simulation. + def scrublet_simulate_doublets(adata, **kwargs): adata_sim = sce.pp.scrublet_simulate_doublets(adata, **kwargs) adata._init_as_actual( X=adata_sim.X, obs=adata_sim.obs, obsm=adata_sim.obsm, uns=adata.uns ) + # Just absorb the extra plotting args before passing to # scanpy.external.pl.scrublet_score_distribution diff --git a/scanpy_scripts/lib/_tsne.py b/scanpy_scripts/lib/_tsne.py index 5a4e3e7e..9a930524 100644 --- a/scanpy_scripts/lib/_tsne.py +++ b/scanpy_scripts/lib/_tsne.py @@ -12,41 +12,43 @@ def tsne( - adata, - key_added=None, - random_state=0, - export_embedding=None, - **kwargs, + adata, + key_added=None, + random_state=0, + export_embedding=None, + **kwargs, ): """ Wrapper function for sc.tl.tsne, for supporting named slot of tsne embeddings """ if not isinstance(random_state, (list, tuple)): - _backup_obsm_key(adata, 'X_tsne') + _backup_obsm_key(adata, "X_tsne") sc.tl.tsne(adata, random_state=random_state, **kwargs) - tsne_key = 'X_tsne' + tsne_key = "X_tsne" if key_added: - tsne_key = f'X_tsne_{key_added}' - _rename_obsm_key(adata, 'X_tsne', tsne_key) + tsne_key = f"X_tsne_{key_added}" + _rename_obsm_key(adata, "X_tsne", tsne_key) else: - _delete_obsm_backup_key(adata, 'X_tsne') + _delete_obsm_backup_key(adata, "X_tsne") if export_embedding is not None: write_embedding(adata, tsne_key, export_embedding, key_added=key_added) else: for i, rseed in enumerate(random_state): if key_added is None: - tsne_key = f'r{rseed}' + tsne_key = f"r{rseed}" elif not isinstance(key_added, (list, tuple)): - tsne_key = f'{key_added}_r{rseed}' + tsne_key = f"{key_added}_r{rseed}" elif len(key_added) == len(random_state): tsne_key = key_added[i] else: - raise ValueError('`key_added` can only be None, a scalar, or ' - 'an iterable of the same length as ' - '`random_state`.') + raise ValueError( + "`key_added` can only be None, a scalar, or " + "an iterable of the same length as " + "`random_state`." + ) tsne( adata, key_added=tsne_key, diff --git a/scanpy_scripts/lib/_umap.py b/scanpy_scripts/lib/_umap.py index 197edf83..ce8fc9b6 100644 --- a/scanpy_scripts/lib/_umap.py +++ b/scanpy_scripts/lib/_umap.py @@ -9,44 +9,47 @@ _backup_obsm_key, _rename_obsm_key, _delete_obsm_backup_key, - write_embedding + write_embedding, ) + def umap( - adata, - key_added=None, - random_state=0, - export_embedding=None, - **kwargs, + adata, + key_added=None, + random_state=0, + export_embedding=None, + **kwargs, ): """ Wrapper function for sc.tl.umap, for supporting named slot of umap embeddings """ if not isinstance(random_state, (list, tuple)): - _backup_obsm_key(adata, 'X_umap') + _backup_obsm_key(adata, "X_umap") sc.tl.umap(adata, random_state=random_state, **kwargs) - umap_key = 'X_umap' + umap_key = "X_umap" if key_added: - umap_key = f'X_umap_{key_added}' - _rename_obsm_key(adata, 'X_umap', umap_key) + umap_key = f"X_umap_{key_added}" + _rename_obsm_key(adata, "X_umap", umap_key) else: - _delete_obsm_backup_key(adata, 'X_umap') + _delete_obsm_backup_key(adata, "X_umap") if export_embedding is not None: write_embedding(adata, umap_key, export_embedding, key_added=key_added) else: for i, rseed in enumerate(random_state): if key_added is None: - umap_key = f'r{rseed}' + umap_key = f"r{rseed}" elif not isinstance(key_added, (list, tuple)): - umap_key = f'{key_added}_r{rseed}' + umap_key = f"{key_added}_r{rseed}" elif len(key_added) == len(random_state): umap_key = key_added[i] else: - raise ValueError('`key_added` can only be None, a scalar, or an ' - 'iterable of the same length as `random_state`.') + raise ValueError( + "`key_added` can only be None, a scalar, or an " + "iterable of the same length as `random_state`." + ) umap( adata, key_added=umap_key, diff --git a/scanpy_scripts/obj_utils.py b/scanpy_scripts/obj_utils.py index 13fc45a6..5e444090 100644 --- a/scanpy_scripts/obj_utils.py +++ b/scanpy_scripts/obj_utils.py @@ -5,32 +5,33 @@ import scanpy as sc import pandas as pd -def write_obs(adata, keys, obs_fn, sep='\t'): - """Export cell clustering as a text table - """ + +def write_obs(adata, keys, obs_fn, sep="\t"): + """Export cell clustering as a text table""" if not isinstance(keys, (list, tuple)): keys = [keys] for key in keys: if key not in adata.obs.keys(): - raise KeyError(f'{key} is not a valid `.uns` key') - adata.obs[keys].reset_index(level=0).rename(columns={'index': 'cells'}).to_csv( - obs_fn, sep=sep, header=True, index=False) + raise KeyError(f"{key} is not a valid `.uns` key") + adata.obs[keys].reset_index(level=0).rename(columns={"index": "cells"}).to_csv( + obs_fn, sep=sep, header=True, index=False + ) -def write_embedding(adata, key, embed_fn, n_comp=None, sep='\t', key_added=None): - """Export cell embeddings as a txt table - """ +def write_embedding(adata, key, embed_fn, n_comp=None, sep="\t", key_added=None): + """Export cell embeddings as a txt table""" if key_added: - if embed_fn.endswith('.tsv'): + if embed_fn.endswith(".tsv"): embed_fn = embed_fn[0:-4] - embed_fn = f'{embed_fn}_{key_added}.tsv' + embed_fn = f"{embed_fn}_{key_added}.tsv" if key not in adata.obsm.keys(): - raise KeyError(f'{key} is not a valid `.obsm` key') + raise KeyError(f"{key} is not a valid `.obsm` key") mat = adata.obsm[key].copy() if n_comp is not None and mat.shape[1] >= n_comp: mat = mat[:, 0:n_comp] pd.DataFrame(mat, index=adata.obs_names).to_csv( - embed_fn, sep=sep, header=False, index=True) + embed_fn, sep=sep, header=False, index=True + ) # The functions below handles slot key. @@ -56,24 +57,25 @@ def write_embedding(adata, key, embed_fn, n_comp=None, sep='\t', key_added=None) # Specical treatment for obsm_key is needed, as the underlying data type is not # a python dictionary but a numpy array. + def _backup_default_key(slot, default): if default in slot.keys(): - bkup_key = f'{default}_bkup' + bkup_key = f"{default}_bkup" if bkup_key in slot.keys(): - sc.logging.warn(f'overwrite existing {bkup_key}') + sc.logging.warn(f"overwrite existing {bkup_key}") slot[bkup_key] = slot[default] def _restore_default_key(slot, default, key=None): if key != default: - bkup_key = f'{default}_bkup' + bkup_key = f"{default}_bkup" if bkup_key in slot.keys(): slot[default] = slot[bkup_key] del slot[bkup_key] def _delete_backup_key(slot, default): - bkup_key = f'{default}_bkup' + bkup_key = f"{default}_bkup" if bkup_key in slot.keys(): del slot[bkup_key] @@ -81,14 +83,14 @@ def _delete_backup_key(slot, default): def _set_default_key(slot, default, key): if key != default: if key not in slot.keys(): - raise KeyError(f'{key} does not exist') + raise KeyError(f"{key} does not exist") _backup_default_key(slot, default) slot[default] = slot[key] def _rename_default_key(slot, default, key): if not default in slot.keys(): - raise KeyError(f'{default} does not exist') + raise KeyError(f"{default} does not exist") slot[key] = slot[default] del slot[default] _restore_default_key(slot, default) @@ -96,22 +98,22 @@ def _rename_default_key(slot, default, key): def _backup_obsm_key(adata, key): if key in adata.obsm_keys(): - bkup_key = f'{key}_bkup' + bkup_key = f"{key}_bkup" if bkup_key in adata.obsm_keys(): - sc.logging.warn(f'overwrite existing {bkup_key}') + sc.logging.warn(f"overwrite existing {bkup_key}") adata.obsm[bkup_key] = adata.obsm[key] def _restore_obsm_key(adata, key, new_key=None): if new_key != key: - bkup_key = f'{key}_bkup' + bkup_key = f"{key}_bkup" if bkup_key in adata.obsm_keys(): adata.obsm[key] = adata.obsm[bkup_key] del adata.obsm[bkup_key] def _delete_obsm_backup_key(adata, key): - bkup_key = f'{key}_bkup' + bkup_key = f"{key}_bkup" if bkup_key in adata.obsm_keys(): del adata.obsm[bkup_key] @@ -119,27 +121,29 @@ def _delete_obsm_backup_key(adata, key): def _set_obsm_key(adata, key, new_key): if new_key != key: if new_key not in adata.obsm_keys(): - raise KeyError(f'{new_key} does not exist') + raise KeyError(f"{new_key} does not exist") _backup_obsm_key(adata, key) adata.obsm[key] = adata.obsm[new_key] def _rename_obsm_key(adata, from_key, to_key): if not from_key in adata.obsm_keys(): - raise KeyError(f'{from_key} does not exist') + raise KeyError(f"{from_key} does not exist") adata.obsm[to_key] = adata.obsm[from_key] del adata.obsm[from_key] _restore_obsm_key(adata, from_key) + # Place the content of .X or specified layer in a specified backup location. -def _save_matrix(adata, save_raw = False, save_layer = None, layer = None): + +def _save_matrix(adata, save_raw=False, save_layer=None, layer=None): if save_raw: adata.raw = adata if save_layer is not None: if layer is not None: if layer not in adata.layers(): - raise KeyError(f'Layer {layer} does not exist') + raise KeyError(f"Layer {layer} does not exist") adata.layers[save_layer] = adata.layers[layer] else: adata.layers[save_layer] = adata.X diff --git a/setup.py b/setup.py index ce1276e2..79b4af7b 100644 --- a/setup.py +++ b/setup.py @@ -1,58 +1,58 @@ from setuptools import setup, find_packages -with open('README.md', 'r') as fh: +with open("README.md", "r") as fh: long_description = fh.read() setup( - name='scanpy-scripts', - version='1.1.3', - author='nh3', - author_email='nh3@users.noreply.github.com', - description='Scripts for using scanpy from the command line', + name="scanpy-scripts", + version="1.1.4", + author="nh3", + author_email="nh3@users.noreply.github.com", + description="Scripts for using scanpy from the command line", long_description=long_description, - long_description_content_type='text/markdown', - url='https://github.com/ebi-gene-expression-group/scanpy-scripts', + long_description_content_type="text/markdown", + url="https://github.com/ebi-gene-expression-group/scanpy-scripts", packages=find_packages(), scripts=[ - 'scanpy-scripts-tests.bats', + "scanpy-scripts-tests.bats", ], entry_points=dict( console_scripts=[ - 'scanpy-cli=scanpy_scripts.cli:cli', - 'scanpy-read-10x=scanpy_scripts.cmds:READ_CMD', - 'scanpy-filter-cells=scanpy_scripts.cmds:FILTER_CMD', - 'scanpy-filter-genes=scanpy_scripts.cmds:FILTER_CMD', - 'scanpy-normalise-data=scanpy_scripts.cmds:NORM_CMD', - 'scanpy-find-variable-genes=scanpy_scripts.cmds:HVG_CMD', - 'scanpy-scale-data=scanpy_scripts.cmds:SCALE_CMD', - 'scanpy-regress=scanpy_scripts.cmds:REGRESS_CMD', - 'scanpy-run-pca=scanpy_scripts.cmds:PCA_CMD', - 'scanpy-neighbors=scanpy_scripts.cmds:NEIGHBOR_CMD', - 'scanpy-run-tsne=scanpy_scripts.cmds:TSNE_CMD', - 'scanpy-run-umap=scanpy_scripts.cmds:UMAP_CMD', - 'scanpy-find-cluster=scanpy_scripts.cli:cluster', - 'scanpy-find-markers=scanpy_scripts.cmds:DIFFEXP_CMD', + "scanpy-cli=scanpy_scripts.cli:cli", + "scanpy-read-10x=scanpy_scripts.cmds:READ_CMD", + "scanpy-filter-cells=scanpy_scripts.cmds:FILTER_CMD", + "scanpy-filter-genes=scanpy_scripts.cmds:FILTER_CMD", + "scanpy-normalise-data=scanpy_scripts.cmds:NORM_CMD", + "scanpy-find-variable-genes=scanpy_scripts.cmds:HVG_CMD", + "scanpy-scale-data=scanpy_scripts.cmds:SCALE_CMD", + "scanpy-regress=scanpy_scripts.cmds:REGRESS_CMD", + "scanpy-run-pca=scanpy_scripts.cmds:PCA_CMD", + "scanpy-neighbors=scanpy_scripts.cmds:NEIGHBOR_CMD", + "scanpy-run-tsne=scanpy_scripts.cmds:TSNE_CMD", + "scanpy-run-umap=scanpy_scripts.cmds:UMAP_CMD", + "scanpy-find-cluster=scanpy_scripts.cli:cluster", + "scanpy-find-markers=scanpy_scripts.cmds:DIFFEXP_CMD", ] ), install_requires=[ - 'packaging', - 'anndata', - 'scipy', - 'matplotlib', - 'pandas', - 'h5py<3.0.0', - 'scanpy==1.8.1', - 'louvain', - 'leidenalg', - 'loompy', - 'MulticoreTSNE', - 'Click<8', - 'umap-learn', - 'harmonypy>=0.0.5', - 'bbknn>=1.5.0', - 'mnnpy>=0.1.9.5', - 'scrublet', - 'scikit-misc', - 'fa2' + "packaging", + "anndata", + "scipy", + "matplotlib", + "pandas", + "h5py<3.0.0", + "scanpy==1.8.1", + "louvain", + "leidenalg", + "loompy", + "MulticoreTSNE", + "Click<8", + "umap-learn", + "harmonypy>=0.0.5", + "bbknn>=1.5.0", + "mnnpy>=0.1.9.5", + "scrublet", + "scikit-misc", + "fa2", ], )