-
Notifications
You must be signed in to change notification settings - Fork 0
/
pipeline.nf
102 lines (78 loc) · 2.7 KB
/
pipeline.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
// Define the command-line options to specify the path to VCF files
params.sample_sheet = '.test_data/file_list.txt'
params.build = "hg38"
params.oc_modules = "/data/project/worthey_lab/projects/experimental_pipelines/tarun/opencravat/modules"
// Define the Scratch directory
def scratch_dir = System.getenv("USER_SCRATCH") ?: "/tmp"
params.outdir = "${scratch_dir}"
// Define the output directory for intermediate and final results
output_dir = params.outdir
log.info """\
D I T T O - N F P I P E L I N E
===================================
Parameters:
build : ${params.build}
sample_sheet : ${params.sample_sheet}
output_dir : ${output_dir}
oc_modules : ${params.oc_modules}
"""
.stripIndent()
// Define the process to run 'oc' with the specified parameters
process runOC {
// Define the conda environment file to be used
conda '../configs/envs/open-cravat.yaml'
input:
path var_ch
val var_build
val oc_mod_path
output:
path "${var_ch}.variant.csv"
script:
"""
oc config md ${oc_mod_path}
oc run ${var_ch} -l ${var_build} -t csv --mp 2 --package mypackage -d .
rm -rf ${var_ch}.sqlite ${var_ch}.err
cp ${var_ch}.variant.csv ${output_dir}/${var_ch}.variant.csv
"""
}
// Define the process to parse the annotation
process parseAnnotation {
// Define the conda environment file to be used
conda 'python=3.10'
input:
path var_ann_ch
output:
path "${var_ann_ch}_parsed.csv.gz"
script:
"""
python ${baseDir}/src/annotation_parsing/parse_single_sample.py -i ${var_ann_ch} -e parse -o ${var_ann_ch}_parsed.csv.gz -c ${baseDir}/configs/opencravat_test_config.json
"""
}
// Define the process for prediction
process prediction {
// Define the conda environment file to be used
conda '../configs/envs/ditto-nf.yaml'
input:
path var_parse_ch
script:
"""
python ${baseDir}/src/predict/predict.py -i ${var_parse_ch} -o ${output_dir} -c ${baseDir}/configs/col_config.yaml -d ${baseDir}/model/
"""
}
// Define the workflow by connecting the processes
// 'vcfFile' will be the channel containing the input VCF files
// Each file in the channel will be processed through the steps defined above.
workflow {
// Define input channels for the VCF files
vcfFile = Channel.fromPath(params.sample_sheet).splitCsv(header: false)
vcfBuild = params.build
oc_mod_path = params.oc_modules
// Run processes
runOC(vcfFile,vcfBuild,oc_mod_path )
parseAnnotation(runOC.out)
// Scatter the output of parseAnnotation to process each file separately
parseAnnotation.out.flatten().set { parsed_files }
prediction(parsed_files)
}