Move scanpy tools to scanpy 1.9.3 (#313)

* Initial changes * Use delta_frac in many most tests * Update anndata_operations.xml changes validation of test output * Update anndata_operations.xml fixes test * Update anndata_operations.xml revert changes in test 1 * Update scanpy-filter-cells.xml adds assert to test output * Update scanpy-filter-genes.xml adds assert to test output * Update scanpy-find-cluster.xml adds assert method in test output * Update scanpy-filter-genes.xml fixes lint * Update scanpy-find-markers.xml adds assert in test output * Update scanpy-find-variable-genes.xml adds assert in test output * adds assert in test output * adds assert in test output * removes unknown character * adds assert in test output * fixes broken URL * removes redundunt output * Move to released bioconda package * Versions and remove old unused macros * Remove test-data h5 files not needed anymore --------- Co-authored-by: Anil Thanki <[email protected]>
ebi-gene-expression-group · Feb 27, 2024 · ee197a8 · ee197a8
1 parent 56273bc
commit ee197a8
Show file tree

Hide file tree

Showing 40 changed files with 181 additions and 193 deletions.
diff --git a/tools/tertiary-analysis/scanpy/anndata_operations.xml b/tools/tertiary-analysis/scanpy/anndata_operations.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
-  <description>modifies metadata and flags genes</description>
+<tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+  <description>is a Swiss army knife for AnnData files</description>
   <macros>
     <import>scanpy_macros2.xml</import>
   </macros>
@@ -446,7 +446,11 @@ for field_value in adata.obs["${split_on_obs.key}"].unique():
   <tests>
     <test>
       <param name="input_obj_file" value="find_cluster.h5"/>
-      <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/>
+      <output name="output_h5ad" ftype="h5ad">
+        <assert_contents>
+          <has_h5_keys keys="var/gene_symbols"/>
+        </assert_contents>
+      </output>
     </test>
     <test>
       <param name="input_obj_file" value="mnn.h5"/>
@@ -511,7 +515,7 @@ for field_value in adata.obs["${split_on_obs.key}"].unique():
         <param name="default" value="true"/>
         <param name="r_source" value="read_10x.h5"/>
       </conditional>
-      <output name="output_h5ad" file="anndata_ops_raw.h5" ftype="h5ad" compare="sim_size">
+      <output name="output_h5ad" ftype="h5ad">
         <assert_contents>
           <has_h5_keys keys="raw/X" />
         </assert_contents>
@@ -526,7 +530,7 @@ for field_value in adata.obs["${split_on_obs.key}"].unique():
           <param name="dest" value='filtered'/>
         </repeat>
       </conditional>
-      <output name="output_h5ad" file="anndata_ops_xlayer.h5" ftype="h5ad" compare="sim_size">
+      <output name="output_h5ad" ftype="h5ad">
         <assert_contents>
           <has_h5_keys keys="layers/filtered" />
         </assert_contents>
@@ -541,7 +545,7 @@ for field_value in adata.obs["${split_on_obs.key}"].unique():
         </repeat>
         <param name="layer_sources" value='anndata_ops_xlayer.h5'/>
       </conditional>
-      <output name="output_h5ad" file="anndata_ops_layer.h5" ftype="h5ad" compare="sim_size">
+      <output name="output_h5ad" ftype="h5ad">
         <assert_contents>
           <has_h5_keys keys="layers/filtered" />
         </assert_contents>

diff --git a/tools/tertiary-analysis/scanpy/scanpy-filter-cells.xml b/tools/tertiary-analysis/scanpy/scanpy-filter-cells.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_filter_cells" name="Scanpy FilterCells" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@" >
+<tool id="scanpy_filter_cells" name="Scanpy FilterCells" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@" >
   <description>based on counts and numbers of genes expressed</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -88,7 +88,11 @@ PYTHONIOENCODING=utf-8 scanpy-filter-cells
         <param name="min" value="0"/>
         <param name="max" value="1e9"/>
       </repeat>
-      <output name="output_h5" file="filter_cells.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" ftype="h5">
+        <assert_contents>
+          <has_h5_keys keys="obs/n_genes_by_counts"/>
+        </assert_contents>
+      </output>
     </test>
   </tests>
 

diff --git a/tools/tertiary-analysis/scanpy/scanpy-filter-genes.xml b/tools/tertiary-analysis/scanpy/scanpy-filter-genes.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_filter_genes" name="Scanpy FilterGenes" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_filter_genes" name="Scanpy FilterGenes" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>based on counts and numbers of cells expressed</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -82,7 +82,11 @@ $force_recalc
         <param name="min" value="0"/>
         <param name="max" value="1e9"/>
       </repeat>
-      <output name="output_h5" file="filter_genes.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" ftype="h5">
+        <assert_contents>
+          <has_h5_keys keys="obs/n_genes_by_counts" />
+        </assert_contents>
+      </output>
     </test>
   </tests>
 

diff --git a/tools/tertiary-analysis/scanpy/scanpy-find-cluster.xml b/tools/tertiary-analysis/scanpy/scanpy-find-cluster.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_find_cluster" name="Scanpy FindCluster" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_find_cluster" name="Scanpy FindCluster" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>based on community detection on KNN graph</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -97,7 +97,11 @@ PYTHONIOENCODING=utf-8 scanpy-find-cluster
       <param name="default" value="false"/>
       <param name="resolution_file" value="resolution.txt"/>
       <param name="random_seed" value="0"/>
-      <output name="output_h5" file="find_cluster.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" ftype="h5">
+        <assert_contents>
+          <has_h5_keys keys="obs/louvain"/>
+        </assert_contents>
+      </output>
       <output name="output_txt" file="find_cluster.tsv" ftype="tsv"/>
     </test>
     <test>
@@ -107,7 +111,11 @@ PYTHONIOENCODING=utf-8 scanpy-find-cluster
       <param name="default" value="false"/>
       <param name="resolution" value="1.0"/>
       <param name="random_seed" value="0"/>
-      <output name="output_h5" file="find_cluster.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" ftype="h5">
+        <assert_contents>
+          <has_h5_keys keys="obs/louvain"/>
+        </assert_contents>
+      </output>
       <output name="output_txt" file="find_cluster.tsv" ftype="tsv"/>
     </test>
     <test>
@@ -119,7 +127,11 @@ PYTHONIOENCODING=utf-8 scanpy-find-cluster
       <param name="resolution" value="1.0"/>
       <param name="key_added" value="METHOD_RESOLUTION"/>
       <param name="random_seed" value="0"/>
-      <output name="output_h5" file="louvain_1.0" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" ftype="h5">
+        <assert_contents>
+          <has_h5_keys keys="obs/louvain_1.0"/>
+        </assert_contents>
+      </output>
       <output name="output_txt" file="louvain_1.0.tsv" ftype="tsv"/>
     </test>
   </tests>

diff --git a/tools/tertiary-analysis/scanpy/scanpy-find-markers.xml b/tools/tertiary-analysis/scanpy/scanpy-find-markers.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_find_markers" name="Scanpy FindMarkers" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_find_markers" name="Scanpy FindMarkers" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>to find differentially expressed genes between groups</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -113,8 +113,12 @@ PYTHONIOENCODING=utf-8 scanpy-find-markers
       <param name="key_added" value="GROUPBY_marker"/>
       <param name="method" value="t-test_overestim_var"/>
       <param name="rankby_abs" value="false"/>
-      <output name="output_h5" file="find_markers.h5" ftype="h5" compare="sim_size"/>
-      <output name="output_tsv" file="diffexp.tsv" ftype="tabular" compare="sim_size"/>
+      <output name="output_h5" ftype="h5">
+        <assert_contents>
+          <has_h5_keys keys="uns/louvain_1.0_marker"/>
+        </assert_contents>
+      </output>
+      <output name="output_tsv" file="diffexp.tsv" ftype="tabular" compare="sim_size" delta_frac="0.1"/>
     </test>
     <test>
       <param name="input_obj_file" value="louvain_1.0"/>
@@ -127,7 +131,7 @@ PYTHONIOENCODING=utf-8 scanpy-find-markers
       <param name="key_added" value="GROUPBY_marker"/>
       <param name="method" value="t-test_overestim_var"/>
       <param name="rankby_abs" value="false"/>
-      <output name="output_tsv" file="diffexp.tsv" ftype="tabular" compare="sim_size"/>
+      <output name="output_tsv" file="diffexp.tsv" ftype="tabular" compare="sim_size" delta_frac="0.1"/>
     </test>
     <test>
       <param name="input_obj_file" value="mnn.h5"/>

diff --git a/tools/tertiary-analysis/scanpy/scanpy-find-variable-genes.xml b/tools/tertiary-analysis/scanpy/scanpy-find-variable-genes.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_find_variable_genes" name="Scanpy FindVariableGenes" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_find_variable_genes" name="Scanpy FindVariableGenes" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>based on normalised dispersion of expression</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -71,7 +71,11 @@ PYTHONIOENCODING=utf-8 scanpy-find-variable-genes
       <param name="max_mean" value="3"/>
       <param name="min_disp" value="0.5"/>
       <param name="max_disp" value="1e9"/>
-      <output name="output_h5" file="find_variable_genes.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" ftype="h5">
+         <assert_contents>
+           <has_h5_keys keys="uns/hvg"/>
+         </assert_contents>
+      </output>
     </test>
   </tests>
 

diff --git a/tools/tertiary-analysis/scanpy/scanpy-integrate-bbknn.xml b/tools/tertiary-analysis/scanpy/scanpy-integrate-bbknn.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_integrate_bbknn" name="Scanpy BBKNN" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_integrate_bbknn" name="Scanpy BBKNN" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>batch-balanced K-nearest neighbours</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -124,13 +124,13 @@ echo "No batch variables passed, simply passing original input as output unchang
       <param name="input_format" value="anndata"/>
       <param name="output_format" value="anndata"/>
       <param name="batch_key" value="louvain"/>
-      <output name="output_h5" file="bbknn.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" file="bbknn.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
     </test>
     <test>
       <param name="input_obj_file" value="find_cluster.h5"/>
       <param name="input_format" value="anndata"/>
       <param name="output_format" value="anndata"/>
-      <output name="output_h5" file="bbknn_copy.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" file="bbknn_copy.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
     </test>
   </tests>
 

diff --git a/tools/tertiary-analysis/scanpy/scanpy-integrate-combat.xml b/tools/tertiary-analysis/scanpy/scanpy-integrate-combat.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_integrate_combat" name="Scanpy ComBat" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_integrate_combat" name="Scanpy ComBat" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>adjust expression for variables that might introduce batch effect</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -8,7 +8,7 @@
   <command detect_errors="exit_code"><![CDATA[
 #if $batch_key
   ln -s '${input_obj_file}' input.h5 &&
-  PYTHONIOENCODING=utf-8 scanpy-integrate combat 
+  PYTHONIOENCODING=utf-8 scanpy-cli integrate combat 
   --batch-key '${batch_key}'
   #if $batch_layer
     --batch-layer '${batch_layer}'
@@ -58,13 +58,17 @@
       <param name="input_format" value="anndata"/>
       <param name="output_format" value="anndata"/>
       <param name="batch_key" value="louvain"/>
-      <output name="output_h5" file="combat.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" ftype="h5">
+        <assert_contents>
+          <has_h5_keys keys="layers/combat"/>
+        </assert_contents>
+      </output>
     </test>
     <test>
       <param name="input_obj_file" value="find_cluster.h5"/>
       <param name="input_format" value="anndata"/>
       <param name="output_format" value="anndata"/>
-      <output name="output_h5" file="combat_copy.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" file="combat_copy.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
     </test>
   </tests>
 

diff --git a/tools/tertiary-analysis/scanpy/scanpy-integrate-harmony.xml b/tools/tertiary-analysis/scanpy/scanpy-integrate-harmony.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_integrate_harmony" name="Scanpy Harmony" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_integrate_harmony" name="Scanpy Harmony" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>adjust principal components for variables that might introduce batch effect</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -8,7 +8,7 @@
   <command detect_errors="exit_code"><![CDATA[
 #if $batch_key
   ln -s '${input_obj_file}' input.h5 &&
-  PYTHONIOENCODING=utf-8 scanpy-integrate harmony
+  PYTHONIOENCODING=utf-8 scanpy-cli integrate harmony
   --batch-key '${batch_key}'
   #if $basis
     --basis '${basis}'
@@ -114,13 +114,17 @@
       <param name="input_format" value="anndata"/>
       <param name="output_format" value="anndata"/>
       <param name="batch_key" value="louvain"/>
-      <output name="output_h5" file="harmony.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" ftype="h5">
+        <assert_contents>
+          <has_h5_keys keys="obsm/X_pca_harmony"/>
+        </assert_contents>
+      </output>
     </test>
     <test>
       <param name="input_obj_file" value="find_cluster.h5"/>
       <param name="input_format" value="anndata"/>
       <param name="output_format" value="anndata"/>
-      <output name="output_h5" file="harmony_copy.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" file="harmony_copy.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
     </test>
   </tests>
 

diff --git a/tools/tertiary-analysis/scanpy/scanpy-integrate-mnn.xml b/tools/tertiary-analysis/scanpy/scanpy-integrate-mnn.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_integrate_mnn" name="Scanpy MNN" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_integrate_mnn" name="Scanpy MNN" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>correct batch effects by matching mutual nearest neighbors</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -102,13 +102,13 @@
       <param name="input_format" value="anndata"/>
       <param name="output_format" value="anndata"/>
       <param name="batch_key" value="louvain"/>
-      <output name="output_h5" file="mnn.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" file="mnn.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
     </test>
     <test>
       <param name="input_obj_file" value="find_cluster.h5"/>
       <param name="input_format" value="anndata"/>
       <param name="output_format" value="anndata"/>
-      <output name="output_h5" file="mnn_copy.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" file="mnn_copy.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
     </test>
   </tests>-->
 

diff --git a/tools/tertiary-analysis/scanpy/scanpy-multiplet-scrublet-plot.xml b/tools/tertiary-analysis/scanpy/scanpy-multiplet-scrublet-plot.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_plot_scrublet" name="Scanpy Plot Scrublet" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_plot_scrublet" name="Scanpy Plot Scrublet" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>visualise multiplet scoring distribution</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -54,7 +54,7 @@ PYTHONIOENCODING=utf-8 scanpy-cli plot scrublet
       <param name="input_format" value="anndata"/>
       <param name="scale_hist_obs" value="linear"/>
       <param name="scale_hist_sim" value="linear"/>
-      <output name="output_png" file="plot_scrublet.png" ftype="png" compare="sim_size"/>
+      <output name="output_png" file="plot_scrublet.png" ftype="png" compare="sim_size" delta_frac="0.1"/>
     </test>
   </tests>
 

diff --git a/tools/tertiary-analysis/scanpy/scanpy-multiplet-scrublet.xml b/tools/tertiary-analysis/scanpy/scanpy-multiplet-scrublet.xml
@@ -1,13 +1,13 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_multiplet_scrublet" name="Scanpy Scrublet" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_multiplet_scrublet" name="Scanpy Scrublet" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>remove multiplets from annData objects with Scrublet</description>
   <macros>
     <import>scanpy_macros2.xml</import>
   </macros>
   <expand macro="requirements"/>
   <command detect_errors="exit_code"><![CDATA[
 ln -s '${input_obj_file}' input.h5 &&
-PYTHONIOENCODING=utf-8 scanpy-multiplet scrublet 
+PYTHONIOENCODING=utf-8 scanpy-cli multiplet scrublet 
 #if $threshold
     --threshold '${threshold}'
 #end if
@@ -103,7 +103,7 @@ $filter
       <param name="input_obj_file" value="read_10x.h5"/>
       <param name="input_format" value="anndata"/>
       <param name="output_format" value="anndata"/>
-      <output name="output_h5" file="scrublet.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" file="scrublet.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
     </test>
   </tests>
 
@@ -114,7 +114,7 @@ $filter
 
     Predict cell doublets using a nearest-neighbor classifier of observed transcriptomes and simulated doublets. Works best if the input is a raw (unnormalized) counts matrix from a single sample or a collection of similar samples from the same experiment. This function is a wrapper around functions that pre-process using Scanpy and directly call functions of Scrublet(). 
 
-    This is a wrapper around the Scanpy wrapper for Scrublet- see https://scanpy.readthedocs.io/en/docsearch/external/scanpy.external.pp.scrublet.html.
+    This is a wrapper around the Scanpy wrapper for Scrublet- see https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.scrublet.html.
 
     **Note**
 

diff --git a/tools/tertiary-analysis/scanpy/scanpy-neighbours.xml b/tools/tertiary-analysis/scanpy/scanpy-neighbours.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_compute_graph" name="Scanpy ComputeGraph" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_compute_graph" name="Scanpy ComputeGraph" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>to derive kNN graph</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -109,7 +109,7 @@ PYTHONIOENCODING=utf-8 scanpy-neighbors
       <param name="knn" value="true"/>
       <param name="random_seed" value="0"/>
       <param name="method" value="umap"/>
-      <output name="output_h5" file="compute_graph.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" file="compute_graph.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
     </test>
     <test>
       <param name="input_obj_file" value="run_pca.h5"/>
@@ -121,7 +121,7 @@ PYTHONIOENCODING=utf-8 scanpy-neighbors
       <param name="knn" value="true"/>
       <param name="random_seed" value="0"/>
       <param name="method" value="umap"/>
-      <output name="output_h5" file="compute_graph.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" file="compute_graph.h5" ftype="h5" compare="sim_size" delta_frac="0.1"/>
     </test>
   </tests>
 

diff --git a/tools/tertiary-analysis/scanpy/scanpy-normalise-data.xml b/tools/tertiary-analysis/scanpy/scanpy-normalise-data.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="scanpy_normalise_data" name="Scanpy NormaliseData" version="@TOOL_VERSION@+galaxy93" profile="@PROFILE@">
+<tool id="scanpy_normalise_data" name="Scanpy NormaliseData" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
   <description>to make all cells having the same total expression</description>
   <macros>
     <import>scanpy_macros2.xml</import>
@@ -75,7 +75,11 @@ PYTHONIOENCODING=utf-8 scanpy-normalise-data
       <param name="output_format" value="anndata"/>
       <param name="scale_factor" value="1e4"/>
       <param name="save_raw" value="false"/>
-      <output name="output_h5" file="normalise_data.h5" ftype="h5" compare="sim_size"/>
+      <output name="output_h5" ftype="h5">
+        <assert_contents>
+          <has_h5_keys keys="var/n_cells_by_counts"/>
+        </assert_contents>
+      </output>
     </test>
   </tests>