Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POC contrasts csv -> yaml #382

Merged
merged 10 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
branches:
- dev
- master
- dev_tmp
release:
types: [published]
workflow_dispatch:
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [[#345](https://github.com/nf-core/differentialabundance/pull/345)] - Plot differentially expressed genes by gene biotype ([@atrigila](https://github.com/atrigila), review by [@grst](https://github.com/grst))
- [[#343](https://github.com/nf-core/differentialabundance/pull/343)] - Add pipeline-level nf-tests ([@atrigila](https://github.com/atrigila), review by [@pinin4fjords](https://github.com/pinin4fjords) and [@nschcolnicov](https://github.com/nschcolnicov))
- [[#286](https://github.com/nf-core/differentialabundance/pull/286)] - Integration of limma voom for rnaseq data ([@KamilMaliszArdigen](https://github.com/KamilMaliszArdigen), review by [@pinin4fjords](https://github.com/pinin4fjords))
- [[#380](https://github.com/nf-core/differentialabundance/pull/380)] - Replace local filter_diff_table module with nf-core one, and create nf-tests for tabular_to_gsea_chip. ([@nschcolnicov](https://github.com/nschcolnicov), review by [@pinin4fjords](https://github.com/pinin4fjords))
- [[#382](https://github.com/nf-core/differentialabundance/pull/382)] - Add YAML formatted contrasts file handling. ([@nschcolnicov](https://github.com/nschcolnicov), review by [@TODO](https://github.com/TODO))

### Fixed

Expand Down
568 changes: 568 additions & 0 deletions bin/validate_fom_components_yaml.R

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,8 @@ process {
].join(' ').trim() }
}

withName: FILTER_DIFFTABLE {
ext.prefix = { "${meta.id}" }
withName: CUSTOM_FILTERDIFFERENTIALTABLE {
ext.prefix = { "${input_file.toString().split("\\.").init().join(".")}" }
publishDir = [
[
path: { "${params.outdir}/tables/differential" },
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"custom/filterdifferentialtable": {
"branch": "master",
"git_sha": "833a65446ea117d092d4940f098236b6766e7aac",
"installed_by": ["modules"]
},
"custom/matrixfilter": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
Expand Down
52 changes: 0 additions & 52 deletions modules/local/filter_difftable.nf

This file was deleted.

5 changes: 5 additions & 0 deletions modules/local/shinyngs/validatefomcomponents/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::r-shinyngs=2.0.0
45 changes: 45 additions & 0 deletions modules/local/shinyngs/validatefomcomponents/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
process SHINYNGS_VALIDATEFOMCOMPONENTS { //TODO this module and whole folder should be deleted once https://github.com/nf-core/differentialabundance/issues/362 is closed, is replaced by the nf-core version of it.
tag "$sample"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "community.wave.seqera.io/library/r-shinyngs_r-yaml:aa63537f6db6190c"

input:
tuple val(meta), path(sample), path(assay_files)
tuple val(meta2), path(feature_meta)
tuple val(meta3), path(contrasts)

output:
tuple val(meta), path("*/*.sample_metadata.tsv") , emit: sample_meta
tuple val(meta), path("*/*.feature_metadata.tsv") , emit: feature_meta, optional: true
tuple val(meta), path("*/*.assay.tsv") , emit: assays
tuple val(meta), path("*/*.contrasts_file.tsv") , emit: contrasts
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
// For full list of available args see
// https://github.com/pinin4fjords/shinyngs/blob/develop/exec/validate_fom_components.R
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: meta.id
def feature = feature_meta ? "--feature_metadata '$feature_meta'" : ''

"""
validate_fom_components_yaml.R \\
--sample_metadata "$sample" \\
$feature \\
--assay_files "${assay_files.join(',')}" \\
--contrasts_file "$contrasts" \\
--output_directory "$prefix" \\
$args

cat <<-END_VERSIONS > versions.yml
"${task.process}":
r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//')
r-shinyngs: \$(Rscript -e "library(shinyngs); cat(as.character(packageVersion('shinyngs')))")
END_VERSIONS
"""
}
100 changes: 100 additions & 0 deletions modules/local/shinyngs/validatefomcomponents/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
name: "shinyngs_validatefomcomponents"
description: validate consistency of feature and sample annotations with matrices
and contrasts
keywords:
- expression
- features
- observations
- validation
tools:
- "shinyngs":
description: "Provides Shiny applications for various array and NGS applications.
Currently very RNA-seq centric, with plans for expansion."
homepage: "https://github.com/pinin4fjords/shinyngs"
documentation: "https://rawgit.com/pinin4fjords/shinyngs/master/vignettes/shinyngs.html"
tool_dev_url: "https://github.com/pinin4fjords/shinyngs"
licence: ["AGPL v3"]
identifier: ""
input:
- - meta:
type: map
description: |
Groovy Map containing information on experiment, at a minimum an id.
e.g. [ id:'test' ]
- sample:
type: file
description: |
CSV-format sample sheet with sample metadata
- assay_files:
type: file
description: |
List of TSV-format matrix files representing different measures for the same samples (e.g. raw and normalised).
- - meta2:
type: map
description: |
Groovy Map containing information on features.
e.g. [ id:'test' ]
- feature_meta:
type: file
description: |
TSV-format feature (e.g. gene) metadata
- - meta3:
type: map
description: |
Groovy Map containing information on contrasts.
e.g. [ id:'test' ]
- contrasts:
type: file
description: |
CSV-format file with four columns identifying the sample sheet variable, reference level, treatment level, and optionally a comma-separated list of covariates used as blocking factors.
output:
- sample_meta:
- meta:
type: map
description: |
Groovy Map containing information on experiment.
e.g. [ id:'test' ]
- "*/*.sample_metadata.tsv":
type: file
description: File containing validated sample metadata
pattern: "/*.sample_metadata.tsv"
- feature_meta:
- meta:
type: map
description: |
Groovy Map containing information on experiment.
e.g. [ id:'test' ]
- "*/*.feature_metadata.tsv":
type: file
description: File containing validated feature metadata
pattern: "/*.feature_metadata.tsv"
optional: true
- assays:
- meta:
type: map
description: |
Groovy Map containing information on experiment.
e.g. [ id:'test' ]
- "*/*.assay.tsv":
type: file
description: Files containing validated matrices
pattern: "/*.assay.tsv"
- contrasts:
- meta:
type: map
description: |
Groovy Map containing information on experiment.
e.g. [ id:'test' ]
- "*/*.contrasts_file.tsv":
type: file
description: Files containing validated matrices
pattern: "/*.contrasts_file.tsv"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@pinin4fjords"
maintainers:
- "@pinin4fjords"
6 changes: 6 additions & 0 deletions modules/local/tabulartogseachip/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda

dependencies:
- conda-forge::gawk=5.1.0
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
process TABULAR_TO_GSEA_CHIP {

tag "$id"
label 'process_single'

conda "conda-forge::gawk=5.1.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gawk:5.1.0' :
'biocontainers/gawk:5.1.0' }"
conda "${moduleDir}/environment.yml"
container "community.wave.seqera.io/library/gawk:5.1.0--fa97c4ccf4cfbc4b"

input:
path tsv
Expand Down Expand Up @@ -41,4 +40,15 @@ process TABULAR_TO_GSEA_CHIP {
bash: \$(echo \$(bash --version | grep -Eo 'version [[:alnum:].]+' | sed 's/version //'))
END_VERSIONS
"""

stub:
"""
touch stub.chip

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bash: \$(echo \$(bash --version | grep -Eo 'version [[:alnum:].]+' | sed 's/version //'))
END_VERSIONS
"""

}
41 changes: 41 additions & 0 deletions modules/local/tabulartogseachip/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: tabular_to_gsea_chip
description: Convert tabular data into GSEA-compatible CHIP files.
keywords:
- GSEA
- CHIP
- Bioinformatics
tools:
- gawk:
description: |
GNU Awk (gawk) is a powerful programming language designed for text processing and typically used as a data extraction and reporting tool.
It is used here to manipulate tabular data and create the CHIP file format.
homepage: https://www.gnu.org/software/gawk/
documentation: https://www.gnu.org/software/gawk/manual/
licence: ["GPL-3.0-or-later"]
input:
- tsv:
type: file
description: |
Tab-separated values (TSV) file containing data to be converted into a CHIP file.
- - id:
type: string
description: |
Feature ID attribute in the abundance table as well as in the GTF file (e.g. the gene_id field).
- symbol:
type: string
description: |
Feature name attribute in the abundance table as well as in the GTF file (e.g. the gene symbol field).
output:
- chip:
type: file
description: Generated GSEA-compatible CHIP file.
pattern: "*.chip"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@pinin4fjords" # Replace with actual author handles
maintainers:
- "@nschcolnicov"
49 changes: 49 additions & 0 deletions modules/local/tabulartogseachip/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
nextflow_process {

name "Test Process TABULAR_TO_GSEA_CHIP"
script "../main.nf"
process "TABULAR_TO_GSEA_CHIP"
tag "modules"
tag "modules_nfcore"
tag "tabular_to_gsea_chip"

test("test_tabular_to_gsea_chip") {

when {
process {
"""
input[0] = Channel.fromPath("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/differentialabundance/modules_testdata/Mus_musculus.anno.feature_metadata.tsv")
input[1] = ["gene_id", "gene_name"]
"""
}
}

then {
assertAll (
{ assert process.success },
{ assert snapshot(process.out).match() },
)
}
}

test("test_tabular_to_gsea_chip - stub") {

options "-stub"

when {
process {
"""
input[0] = Channel.fromPath("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/differentialabundance/modules_testdata/Mus_musculus.anno.feature_metadata.tsv")
input[1] = ["gene_id", "gene_name"]
"""
}
}

then {
assertAll (
{ assert process.success },
{ assert snapshot(process.out).match() },
)
}
}
}
Loading
Loading