Skip to content

Commit cccb999

Browse files
author
Felix Van der Jeugt
committed
Merge branch '14-refine-outputs'
2 parents 223d8f7 + 5fea437 commit cccb999

File tree

2 files changed

+53
-10
lines changed

2 files changed

+53
-10
lines changed

README.md

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,54 @@ where:
9696

9797
### Additional options
9898

99-
* `-m meta_file`, `-n nucleotide_file`, `-a aa_file` and `-g gff_file`
100-
can be used to write output to specific files, instead of having the
101-
program create filenames with predetermined extentions. These take
102-
precedence over the `-o` option.
103-
10499
* Leaving out the `-o` option or using the name `stdout` causes
105100
FragGeneScanRs to only write the predicted proteins to standard output.
106101
The other files can still be requested with the specific options above.
107102

103+
* `-m meta_file`, `-n nucleotide_file`, `-a aa_file` and `-g gff_file`
104+
can be used to write output to specific files, instead of having the
105+
program create filenames with predetermined extentions. These take
106+
precedence over the `-o` option. Passing `-` to any of these options
107+
will write the respective content to stdout. Passing `-` to multiple
108+
of these options is accepted but results in interspersed output.
109+
110+
- The `-m meta_file` writes, for each sequence, the header of the
111+
sequence followed by a tab-separated value lines. Each line corresponds
112+
to a gene, and has 7 columns
113+
114+
1. the 1-based index of the start of the gene in the original sequence
115+
2. the 1-based index of the end of the gene in the original sequence
116+
3. whether the gene was predicted on the forward (`+`) or reverse (`-`) strand
117+
4. the frame where the gene started on (`1`, `2` or `3`)
118+
5. the score of the prediction
119+
6. the 1-based indices of predicted insertions, e.g. `I:14,15`
120+
7. the 1-based indices of predicted deletions, e.g. `D:14,15`
121+
122+
- The `-n nucleotide_file` writes, for each predicted gene, the
123+
corresponding sequence of nucleotides. For instance, for the first
124+
predicted gene in `example/NC_000913-454.fna`, it writes a header
125+
`>r1.1_2_79_-` and a sequence. The header refers to the original
126+
sequence header (`r1.1`), the start (`2`) and end (`79`) index of
127+
the predicted gene in the original sequence and the strand (`-`).
128+
129+
- The `-a aa_file` writes the same output as the `-n` nucleotide file,
130+
but uses the predicted amino acid sequence rather than the nucleotide
131+
sequence.
132+
133+
- The `-g gff_file` writes a version header and, for each predicted gene,
134+
a tab-separated line of metadata. The columns are:
135+
136+
1. the header of the original sequence
137+
2. the string `FGS`
138+
3. the string `CDS`
139+
4. the 1-based index of the start of the gene in the original sequence
140+
5. the 1-based index of the end of the gene in the original sequence
141+
6. the string `.`
142+
7. whether the gene was predicted on the forward (`+`) or reverse (`-`) strand
143+
8. the frame where the gene started on (`0`, `1` or `2`)
144+
9. an identifier for the predicted gene and the type of product,
145+
marked with `ID=` and `product=`. The latter is always `predicted protein`.
146+
108147
* Leaving out the `-s` options causes FragGeneScanRs to read sequences
109148
from standard input.
110149

src/bin/FragGeneScanRs.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ fn main() -> Result<()> {
4444
.long("output-prefix")
4545
.value_name("output_prefix")
4646
.takes_value(true)
47-
.help("Output metadata (.out and .gff), proteins (.faa) and genes (.ffn) to files with this prefix. Use 'stdout' to write the predicted proteins to standard output."))
47+
.help("Output metadata (.out and .gff), proteins (.faa) and genes (.ffn) to files with this prefix. Don't pass this argument or use 'stdout' to write the predicted proteins to standard output."))
4848
.arg(Arg::with_name("complete")
4949
.short("w")
5050
.long("complete")
@@ -90,25 +90,25 @@ fn main() -> Result<()> {
9090
.long("meta-file")
9191
.value_name("meta_file")
9292
.takes_value(true)
93-
.help("Output metadata to this file (supersedes -o)."))
93+
.help("Output metadata to this file (supersedes -o). Use - to write to standard output (use only once)."))
9494
.arg(Arg::with_name("gff-file")
9595
.short("g")
9696
.long("gff-file")
9797
.value_name("gff_file")
9898
.takes_value(true)
99-
.help("Output metadata to this gff formatted file (supersedes -o)."))
99+
.help("Output metadata to this gff formatted file (supersedes -o). Use - to write to standard output (use only once)."))
100100
.arg(Arg::with_name("aa-file")
101101
.short("a")
102102
.long("aa-file")
103103
.value_name("aa_file")
104104
.takes_value(true)
105-
.help("Output predicted proteins to this file (supersedes -o)."))
105+
.help("Output predicted proteins to this file (supersedes -o). Use - to write to standard output (use only once)."))
106106
.arg(Arg::with_name("nucleotide-file")
107107
.short("n")
108108
.long("nucleotide-file")
109109
.value_name("nucleotide_file")
110110
.takes_value(true)
111-
.help("Output predicted genes to this file (supersedes -o)."))
111+
.help("Output predicted genes to this file (supersedes -o). Use - to write to standard output (use only once)."))
112112
.arg(Arg::with_name("unordered")
113113
.short("u")
114114
.long("unordered")
@@ -129,6 +129,7 @@ fn main() -> Result<()> {
129129
matches.value_of("aa-file"),
130130
matches.value_of("output-prefix"),
131131
) {
132+
(Some("-"), _) => Some(Box::new(io::stdout())),
132133
(Some(filename), _) => Some(Box::new(File::create(filename)?)),
133134
(None, Some("stdout")) => Some(Box::new(io::stdout())),
134135
(None, Some(filename)) => Some(Box::new(File::create(filename.to_owned() + ".faa")?)),
@@ -139,6 +140,7 @@ fn main() -> Result<()> {
139140
matches.value_of("meta-file"),
140141
matches.value_of("output-prefix"),
141142
) {
143+
(Some("-"), _) => Some(Box::new(io::stdout())),
142144
(Some(filename), _) => Some(Box::new(File::create(filename)?)),
143145
(None, Some("stdout")) => None,
144146
(None, Some(filename)) => Some(Box::new(File::create(filename.to_owned() + ".out")?)),
@@ -149,6 +151,7 @@ fn main() -> Result<()> {
149151
matches.value_of("gff-file"),
150152
matches.value_of("output-prefix"),
151153
) {
154+
(Some("-"), _) => Some(Box::new(io::stdout())),
152155
(Some(filename), _) => Some(Box::new(File::create(filename)?)),
153156
(None, Some("stdout")) => None,
154157
(None, Some(filename)) => Some(Box::new(File::create(filename.to_owned() + ".gff")?)),
@@ -163,6 +166,7 @@ fn main() -> Result<()> {
163166
matches.value_of("nucleotide-file"),
164167
matches.value_of("output-prefix"),
165168
) {
169+
(Some("-"), _) => Some(Box::new(io::stdout())),
166170
(Some(filename), _) => Some(Box::new(File::create(filename)?)),
167171
(None, Some("stdout")) => None,
168172
(None, Some(filename)) => Some(Box::new(File::create(filename.to_owned() + ".ffn")?)),

0 commit comments

Comments
 (0)