-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmain.nf
More file actions
160 lines (124 loc) · 4.23 KB
/
main.nf
File metadata and controls
160 lines (124 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
workflow {
// load input files
fpkm_txt_files = Channel.fromFilePairs("${params.input_dir}/${params.fpkm_txt}", size: 1, flat: true)
raw_txt_files = Channel.fromFilePairs("${params.input_dir}/${params.raw_txt}", size: 1, flat: true)
tpm_txt_files = Channel.fromFilePairs("${params.input_dir}/${params.tpm_txt}", size: 1, flat: true)
emx_txt_files = Channel.fromFilePairs("${params.input_dir}/${params.emx_txt}", size: 1, flat: true)
labels_files = Channel.fromFilePairs("${params.input_dir}/${params.labels_txt}", size: 1, flat: true)
data_txt_files = Channel.empty().mix(
fpkm_txt_files,
raw_txt_files,
tpm_txt_files,
emx_txt_files
)
// run convert if specified
if ( params.convert_txt_npy == true ) {
convert_txt_npy(data_txt_files)
}
// make sure that at most one quantile method (R or python) is enabled
if ( params.normalize_quantile_py == true && params.normalize_quantile_r == true ) {
error "error: only one quantile method (R or python) should be enabled"
}
// run normalize if specified
if ( params.normalize == true ) {
normalize(data_txt_files)
}
// run visualize if specified
if ( params.visualize == true ) {
inputs = data_txt_files.join(labels_files)
visualize(inputs)
}
// run partition if specified
if ( params.partition == true ) {
partition(data_txt_files)
}
}
/**
* The convert process takes an expression matrix and converts it from plaintext
* to binary.
*/
process convert_txt_npy {
tag "${dataset}"
publishDir "${params.output_dir}/${dataset}"
input:
tuple val(dataset), path(input_file)
output:
tuple val(dataset), path("*.npy"), path("*.rownames.txt"), path("*.colnames.txt")
script:
"""
convert.py ${input_file} `basename ${input_file} .txt`.npy
"""
}
/**
* The normalize process takes an FPKM expression matrix and applies a series
* of transformations (log2, k-s test outlier removal, quantile normalization)
* which produces a normalized expression matrix.
*/
process normalize {
tag "${dataset}"
publishDir "${params.output_dir}/${dataset}"
input:
tuple val(dataset), path(input_file)
output:
tuple val(dataset), path("${dataset}.emx.txt")
tuple val(dataset), path("${dataset}.kstest.txt")
script:
"""
mpirun -np ${params.normalize_np} normalize.py \
${input_file} \
${dataset}.emx.txt \
${params.normalize_log2 ? "--log2" : ""} \
${params.normalize_kstest ? "--kstest" : ""} \
--ks-log ${dataset}.kstest.txt \
${params.normalize_quantile_py ? "--quantile" : ""}
if [[ ${params.normalize_quantile_r} == true ]]; then
mv ${dataset}.emx.txt FPKM.txt
normalize_R --quantile
mv GEM.txt ${dataset}.emx.txt
fi
"""
}
/**
* The visualize process takes an expression matrix and produces a set of
* visualizations based on the input configuration.
*/
process visualize {
tag "${dataset}"
publishDir "${params.output_dir}/${dataset}"
input:
tuple val(dataset), path(data_file), path(labels_file)
output:
tuple val(dataset), path("*.png")
script:
"""
visualize.py \
${data_file} \
--labels ${labels_file} \
${params.visualize_density ? "--density density.png" : ""} \
${params.visualize_tsne ? "--tsne tsne.png" : ""} \
--tsne-na ${params.visualize_tsne_na} \
--tsne-npca ${params.visualize_tsne_npca}
"""
}
/**
* The partition process takes an expression matrix and produces several
* sub-matrices based on a partitioning scheme.
*/
process partition {
tag "${dataset}"
publishDir "${params.output_dir}/${dataset}"
input:
tuple val(dataset), path(input_file)
output:
tuple val(dataset), path("*.txt")
script:
"""
partition.py \
${input_file} \
partitions.txt \
--n-partitions ${params.partition_npartitions} \
--method ${params.partition_method}
"""
}