Skip to content

Commit

Permalink
Move scanpy tools to scanpy 1.9.3 (#313)
Browse files Browse the repository at this point in the history
* Initial changes

* Use delta_frac in many most tests

* Update anndata_operations.xml changes validation of test output

* Update anndata_operations.xml fixes test

* Update anndata_operations.xml revert changes in test 1

* Update scanpy-filter-cells.xml adds assert to test output

* Update scanpy-filter-genes.xml adds assert to test output

* Update scanpy-find-cluster.xml adds assert method in test output

* Update scanpy-filter-genes.xml fixes lint

* Update scanpy-find-markers.xml adds assert in test output

* Update scanpy-find-variable-genes.xml adds assert in test output

* adds assert in test output

* adds assert in test output

* removes unknown character

* adds assert in test output

* fixes broken URL

* removes redundunt output

* Move to released bioconda package

* Versions and remove old unused macros

* Remove test-data h5 files not needed anymore

---------

Co-authored-by: Anil Thanki <[email protected]>
  • Loading branch information
pcm32 and anilthanki authored Feb 27, 2024
1 parent 56273bc commit ee197a8
Show file tree
Hide file tree
Showing 40 changed files with 181 additions and 193 deletions.
16 changes: 10 additions & 6 deletions tools/tertiary-analysis/scanpy/anndata_operations.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<description>modifies metadata and flags genes</description>
<tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>is a Swiss army knife for AnnData files</description>
<macros>
<import>scanpy_macros2.xml</import>
</macros>
Expand Down Expand Up @@ -446,7 +446,11 @@ for field_value in adata.obs["${split_on_obs.key}"].unique():
<tests>
<test>
<param name="input_obj_file" value="find_cluster.h5"/>
<output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/>
<output name="output_h5ad" ftype="h5ad">
<assert_contents>
<has_h5_keys keys="var/gene_symbols"/>
</assert_contents>
</output>
</test>
<test>
<param name="input_obj_file" value="mnn.h5"/>
Expand Down Expand Up @@ -511,7 +515,7 @@ for field_value in adata.obs["${split_on_obs.key}"].unique():
<param name="default" value="true"/>
<param name="r_source" value="read_10x.h5"/>
</conditional>
<output name="output_h5ad" file="anndata_ops_raw.h5" ftype="h5ad" compare="sim_size">
<output name="output_h5ad" ftype="h5ad">
<assert_contents>
<has_h5_keys keys="raw/X" />
</assert_contents>
Expand All @@ -526,7 +530,7 @@ for field_value in adata.obs["${split_on_obs.key}"].unique():
<param name="dest" value='filtered'/>
</repeat>
</conditional>
<output name="output_h5ad" file="anndata_ops_xlayer.h5" ftype="h5ad" compare="sim_size">
<output name="output_h5ad" ftype="h5ad">
<assert_contents>
<has_h5_keys keys="layers/filtered" />
</assert_contents>
Expand All @@ -541,7 +545,7 @@ for field_value in adata.obs["${split_on_obs.key}"].unique():
</repeat>
<param name="layer_sources" value='anndata_ops_xlayer.h5'/>
</conditional>
<output name="output_h5ad" file="anndata_ops_layer.h5" ftype="h5ad" compare="sim_size">
<output name="output_h5ad" ftype="h5ad">
<assert_contents>
<has_h5_keys keys="layers/filtered" />
</assert_contents>
Expand Down
8 changes: 6 additions & 2 deletions tools/tertiary-analysis/scanpy/scanpy-filter-cells.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_filter_cells" name="Scanpy FilterCells" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@" >
<tool id="scanpy_filter_cells" name="Scanpy FilterCells" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@" >
<description>based on counts and numbers of genes expressed</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand Down Expand Up @@ -88,7 +88,11 @@ PYTHONIOENCODING=utf-8 scanpy-filter-cells
<param name="min" value="0"/>
<param name="max" value="1e9"/>
</repeat>
<output name="output_h5" file="filter_cells.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" ftype="h5">
<assert_contents>
<has_h5_keys keys="obs/n_genes_by_counts"/>
</assert_contents>
</output>
</test>
</tests>

Expand Down
8 changes: 6 additions & 2 deletions tools/tertiary-analysis/scanpy/scanpy-filter-genes.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_filter_genes" name="Scanpy FilterGenes" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_filter_genes" name="Scanpy FilterGenes" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>based on counts and numbers of cells expressed</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand Down Expand Up @@ -82,7 +82,11 @@ $force_recalc
<param name="min" value="0"/>
<param name="max" value="1e9"/>
</repeat>
<output name="output_h5" file="filter_genes.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" ftype="h5">
<assert_contents>
<has_h5_keys keys="obs/n_genes_by_counts" />
</assert_contents>
</output>
</test>
</tests>

Expand Down
20 changes: 16 additions & 4 deletions tools/tertiary-analysis/scanpy/scanpy-find-cluster.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_find_cluster" name="Scanpy FindCluster" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_find_cluster" name="Scanpy FindCluster" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>based on community detection on KNN graph</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand Down Expand Up @@ -97,7 +97,11 @@ PYTHONIOENCODING=utf-8 scanpy-find-cluster
<param name="default" value="false"/>
<param name="resolution_file" value="resolution.txt"/>
<param name="random_seed" value="0"/>
<output name="output_h5" file="find_cluster.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" ftype="h5">
<assert_contents>
<has_h5_keys keys="obs/louvain"/>
</assert_contents>
</output>
<output name="output_txt" file="find_cluster.tsv" ftype="tsv"/>
</test>
<test>
Expand All @@ -107,7 +111,11 @@ PYTHONIOENCODING=utf-8 scanpy-find-cluster
<param name="default" value="false"/>
<param name="resolution" value="1.0"/>
<param name="random_seed" value="0"/>
<output name="output_h5" file="find_cluster.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" ftype="h5">
<assert_contents>
<has_h5_keys keys="obs/louvain"/>
</assert_contents>
</output>
<output name="output_txt" file="find_cluster.tsv" ftype="tsv"/>
</test>
<test>
Expand All @@ -119,7 +127,11 @@ PYTHONIOENCODING=utf-8 scanpy-find-cluster
<param name="resolution" value="1.0"/>
<param name="key_added" value="METHOD_RESOLUTION"/>
<param name="random_seed" value="0"/>
<output name="output_h5" file="louvain_1.0" ftype="h5" compare="sim_size"/>
<output name="output_h5" ftype="h5">
<assert_contents>
<has_h5_keys keys="obs/louvain_1.0"/>
</assert_contents>
</output>
<output name="output_txt" file="louvain_1.0.tsv" ftype="tsv"/>
</test>
</tests>
Expand Down
12 changes: 8 additions & 4 deletions tools/tertiary-analysis/scanpy/scanpy-find-markers.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_find_markers" name="Scanpy FindMarkers" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_find_markers" name="Scanpy FindMarkers" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>to find differentially expressed genes between groups</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand Down Expand Up @@ -113,8 +113,12 @@ PYTHONIOENCODING=utf-8 scanpy-find-markers
<param name="key_added" value="GROUPBY_marker"/>
<param name="method" value="t-test_overestim_var"/>
<param name="rankby_abs" value="false"/>
<output name="output_h5" file="find_markers.h5" ftype="h5" compare="sim_size"/>
<output name="output_tsv" file="diffexp.tsv" ftype="tabular" compare="sim_size"/>
<output name="output_h5" ftype="h5">
<assert_contents>
<has_h5_keys keys="uns/louvain_1.0_marker"/>
</assert_contents>
</output>
<output name="output_tsv" file="diffexp.tsv" ftype="tabular" compare="sim_size" delta_frac="0.1"/>
</test>
<test>
<param name="input_obj_file" value="louvain_1.0"/>
Expand All @@ -127,7 +131,7 @@ PYTHONIOENCODING=utf-8 scanpy-find-markers
<param name="key_added" value="GROUPBY_marker"/>
<param name="method" value="t-test_overestim_var"/>
<param name="rankby_abs" value="false"/>
<output name="output_tsv" file="diffexp.tsv" ftype="tabular" compare="sim_size"/>
<output name="output_tsv" file="diffexp.tsv" ftype="tabular" compare="sim_size" delta_frac="0.1"/>
</test>
<test>
<param name="input_obj_file" value="mnn.h5"/>
Expand Down
8 changes: 6 additions & 2 deletions tools/tertiary-analysis/scanpy/scanpy-find-variable-genes.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_find_variable_genes" name="Scanpy FindVariableGenes" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_find_variable_genes" name="Scanpy FindVariableGenes" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>based on normalised dispersion of expression</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand Down Expand Up @@ -71,7 +71,11 @@ PYTHONIOENCODING=utf-8 scanpy-find-variable-genes
<param name="max_mean" value="3"/>
<param name="min_disp" value="0.5"/>
<param name="max_disp" value="1e9"/>
<output name="output_h5" file="find_variable_genes.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" ftype="h5">
<assert_contents>
<has_h5_keys keys="uns/hvg"/>
</assert_contents>
</output>
</test>
</tests>

Expand Down
6 changes: 3 additions & 3 deletions tools/tertiary-analysis/scanpy/scanpy-integrate-bbknn.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_integrate_bbknn" name="Scanpy BBKNN" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_integrate_bbknn" name="Scanpy BBKNN" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>batch-balanced K-nearest neighbours</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand Down Expand Up @@ -124,13 +124,13 @@ echo "No batch variables passed, simply passing original input as output unchang
<param name="input_format" value="anndata"/>
<param name="output_format" value="anndata"/>
<param name="batch_key" value="louvain"/>
<output name="output_h5" file="bbknn.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" file="bbknn.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
</test>
<test>
<param name="input_obj_file" value="find_cluster.h5"/>
<param name="input_format" value="anndata"/>
<param name="output_format" value="anndata"/>
<output name="output_h5" file="bbknn_copy.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" file="bbknn_copy.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
</test>
</tests>

Expand Down
12 changes: 8 additions & 4 deletions tools/tertiary-analysis/scanpy/scanpy-integrate-combat.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_integrate_combat" name="Scanpy ComBat" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_integrate_combat" name="Scanpy ComBat" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>adjust expression for variables that might introduce batch effect</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand All @@ -8,7 +8,7 @@
<command detect_errors="exit_code"><![CDATA[
#if $batch_key
ln -s '${input_obj_file}' input.h5 &&
PYTHONIOENCODING=utf-8 scanpy-integrate combat
PYTHONIOENCODING=utf-8 scanpy-cli integrate combat
--batch-key '${batch_key}'
#if $batch_layer
--batch-layer '${batch_layer}'
Expand Down Expand Up @@ -58,13 +58,17 @@
<param name="input_format" value="anndata"/>
<param name="output_format" value="anndata"/>
<param name="batch_key" value="louvain"/>
<output name="output_h5" file="combat.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" ftype="h5">
<assert_contents>
<has_h5_keys keys="layers/combat"/>
</assert_contents>
</output>
</test>
<test>
<param name="input_obj_file" value="find_cluster.h5"/>
<param name="input_format" value="anndata"/>
<param name="output_format" value="anndata"/>
<output name="output_h5" file="combat_copy.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" file="combat_copy.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
</test>
</tests>

Expand Down
12 changes: 8 additions & 4 deletions tools/tertiary-analysis/scanpy/scanpy-integrate-harmony.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_integrate_harmony" name="Scanpy Harmony" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_integrate_harmony" name="Scanpy Harmony" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>adjust principal components for variables that might introduce batch effect</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand All @@ -8,7 +8,7 @@
<command detect_errors="exit_code"><![CDATA[
#if $batch_key
ln -s '${input_obj_file}' input.h5 &&
PYTHONIOENCODING=utf-8 scanpy-integrate harmony
PYTHONIOENCODING=utf-8 scanpy-cli integrate harmony
--batch-key '${batch_key}'
#if $basis
--basis '${basis}'
Expand Down Expand Up @@ -114,13 +114,17 @@
<param name="input_format" value="anndata"/>
<param name="output_format" value="anndata"/>
<param name="batch_key" value="louvain"/>
<output name="output_h5" file="harmony.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" ftype="h5">
<assert_contents>
<has_h5_keys keys="obsm/X_pca_harmony"/>
</assert_contents>
</output>
</test>
<test>
<param name="input_obj_file" value="find_cluster.h5"/>
<param name="input_format" value="anndata"/>
<param name="output_format" value="anndata"/>
<output name="output_h5" file="harmony_copy.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" file="harmony_copy.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
</test>
</tests>

Expand Down
6 changes: 3 additions & 3 deletions tools/tertiary-analysis/scanpy/scanpy-integrate-mnn.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_integrate_mnn" name="Scanpy MNN" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_integrate_mnn" name="Scanpy MNN" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>correct batch effects by matching mutual nearest neighbors</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand Down Expand Up @@ -102,13 +102,13 @@
<param name="input_format" value="anndata"/>
<param name="output_format" value="anndata"/>
<param name="batch_key" value="louvain"/>
<output name="output_h5" file="mnn.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" file="mnn.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
</test>
<test>
<param name="input_obj_file" value="find_cluster.h5"/>
<param name="input_format" value="anndata"/>
<param name="output_format" value="anndata"/>
<output name="output_h5" file="mnn_copy.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" file="mnn_copy.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
</test>
</tests>-->

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_plot_scrublet" name="Scanpy Plot Scrublet" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_plot_scrublet" name="Scanpy Plot Scrublet" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>visualise multiplet scoring distribution</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand Down Expand Up @@ -54,7 +54,7 @@ PYTHONIOENCODING=utf-8 scanpy-cli plot scrublet
<param name="input_format" value="anndata"/>
<param name="scale_hist_obs" value="linear"/>
<param name="scale_hist_sim" value="linear"/>
<output name="output_png" file="plot_scrublet.png" ftype="png" compare="sim_size"/>
<output name="output_png" file="plot_scrublet.png" ftype="png" compare="sim_size" delta_frac="0.1"/>
</test>
</tests>

Expand Down
8 changes: 4 additions & 4 deletions tools/tertiary-analysis/scanpy/scanpy-multiplet-scrublet.xml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_multiplet_scrublet" name="Scanpy Scrublet" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_multiplet_scrublet" name="Scanpy Scrublet" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>remove multiplets from annData objects with Scrublet</description>
<macros>
<import>scanpy_macros2.xml</import>
</macros>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
ln -s '${input_obj_file}' input.h5 &&
PYTHONIOENCODING=utf-8 scanpy-multiplet scrublet
PYTHONIOENCODING=utf-8 scanpy-cli multiplet scrublet
#if $threshold
--threshold '${threshold}'
#end if
Expand Down Expand Up @@ -103,7 +103,7 @@ $filter
<param name="input_obj_file" value="read_10x.h5"/>
<param name="input_format" value="anndata"/>
<param name="output_format" value="anndata"/>
<output name="output_h5" file="scrublet.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" file="scrublet.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
</test>
</tests>

Expand All @@ -114,7 +114,7 @@ $filter
Predict cell doublets using a nearest-neighbor classifier of observed transcriptomes and simulated doublets. Works best if the input is a raw (unnormalized) counts matrix from a single sample or a collection of similar samples from the same experiment. This function is a wrapper around functions that pre-process using Scanpy and directly call functions of Scrublet().
This is a wrapper around the Scanpy wrapper for Scrublet- see https://scanpy.readthedocs.io/en/docsearch/external/scanpy.external.pp.scrublet.html.
This is a wrapper around the Scanpy wrapper for Scrublet- see https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.scrublet.html.
**Note**
Expand Down
6 changes: 3 additions & 3 deletions tools/tertiary-analysis/scanpy/scanpy-neighbours.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_compute_graph" name="Scanpy ComputeGraph" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_compute_graph" name="Scanpy ComputeGraph" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>to derive kNN graph</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand Down Expand Up @@ -109,7 +109,7 @@ PYTHONIOENCODING=utf-8 scanpy-neighbors
<param name="knn" value="true"/>
<param name="random_seed" value="0"/>
<param name="method" value="umap"/>
<output name="output_h5" file="compute_graph.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" file="compute_graph.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
</test>
<test>
<param name="input_obj_file" value="run_pca.h5"/>
Expand All @@ -121,7 +121,7 @@ PYTHONIOENCODING=utf-8 scanpy-neighbors
<param name="knn" value="true"/>
<param name="random_seed" value="0"/>
<param name="method" value="umap"/>
<output name="output_h5" file="compute_graph.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" file="compute_graph.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
</test>
</tests>

Expand Down
8 changes: 6 additions & 2 deletions tools/tertiary-analysis/scanpy/scanpy-normalise-data.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_normalise_data" name="Scanpy NormaliseData" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
<tool id="scanpy_normalise_data" name="Scanpy NormaliseData" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
<description>to make all cells having the same total expression</description>
<macros>
<import>scanpy_macros2.xml</import>
Expand Down Expand Up @@ -75,7 +75,11 @@ PYTHONIOENCODING=utf-8 scanpy-normalise-data
<param name="output_format" value="anndata"/>
<param name="scale_factor" value="1e4"/>
<param name="save_raw" value="false"/>
<output name="output_h5" file="normalise_data.h5" ftype="h5" compare="sim_size"/>
<output name="output_h5" ftype="h5">
<assert_contents>
<has_h5_keys keys="var/n_cells_by_counts"/>
</assert_contents>
</output>
</test>
</tests>

Expand Down
Loading

0 comments on commit ee197a8

Please sign in to comment.