Skip to content

Commit e9c89eb

Browse files
author
Lucile Soler
committed
add script to create ENA compliant file by adding info to CDS, specific to maker gff format coming from the functional annotation pipeline
1 parent ecef089 commit e9c89eb

File tree

1 file changed

+147
-0
lines changed

1 file changed

+147
-0
lines changed
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#!/usr/bin/env perl
2+
3+
use Carp;
4+
use strict;
5+
use Data::Dumper;
6+
use Getopt::Long;
7+
use File::Basename;
8+
use Statistics::R;
9+
use Pod::Usage;
10+
use Bio::Tools::GFF;
11+
use List::MoreUtils qw(uniq);
12+
13+
my $header = qq{
14+
########################################################
15+
# BILS 2023 - Sweden #
16+
# lucile.soler\@nbis.se #
17+
# Please cite NBIS (www.nbis.se) when using this tool. #
18+
########################################################
19+
};
20+
21+
my $output = undef;
22+
my $gff = undef;
23+
my $help= 0;
24+
my $output_gff;
25+
my $ID;
26+
my $description;
27+
my $product;
28+
29+
30+
if ( !GetOptions(
31+
"help|h" => \$help,
32+
"gff|g=s" => \$gff,
33+
"output|out|o=s" => \$output_gff)
34+
)
35+
36+
{
37+
pod2usage( { -message => "$header"."Failed to parse command line.",
38+
-verbose => 1,
39+
-exitval => 1} );
40+
}
41+
42+
# Print Help and exit
43+
if ($help) {
44+
pod2usage( { -message => "$header",
45+
-verbose => 2,
46+
-exitval => 2 } );
47+
}
48+
49+
if ( ! (defined($gff)) ){
50+
pod2usage( {
51+
-message => "$header\nAt least 1 parameters are mandatory.\n",
52+
-verbose => 0,
53+
-exitval => 1 } );
54+
}
55+
56+
my $ostream = IO::File->new();
57+
if(defined($output_gff)){
58+
$ostream->open($output_gff, 'w' ) or
59+
croak(
60+
sprintf( "Can not open '%s' for reading: %s", $output_gff, $! ) );
61+
}
62+
else{
63+
$ostream->fdopen( fileno(STDOUT), 'w' ) or
64+
croak( sprintf( "Can not open STDOUT for writing: %s", $! ) );
65+
}
66+
67+
my $input_fh1 = IO::File->new( $gff, '<' ) or croak "Can't read file '$gff' : $!";
68+
69+
while (my $line1 = $input_fh1->getline()){ #print {$STDERR} "$.\t$line";
70+
chomp $line1;
71+
72+
my @col_gffs1 = split(/\t/, $line1);
73+
my $chr2 = $col_gffs1[0];
74+
my $origin = $col_gffs1[1];
75+
my $feature2 = $col_gffs1[2];
76+
my $pos_start2 = $col_gffs1[3];
77+
my $pos_end2 = $col_gffs1[4];
78+
my $col5 = $col_gffs1[5];
79+
my $strand2 = $col_gffs1[6];
80+
my $col7 = $col_gffs1[7];
81+
my $desc2 = $col_gffs1[8];
82+
my $id2;
83+
84+
#print $feature2."\n";
85+
86+
if ($feature2 eq "mRNA"){
87+
$desc2 =~/ID=(\S+);Parent=\S+;(Dbxref=\S+;)_AED=\S+;makerName=\S+;(.*)/; #;Parent=*;(*)maker_name=*;(*)/;
88+
$ID = $1;
89+
$description = $2;
90+
$product = $3;
91+
92+
print $ostream $line1."\n";
93+
94+
} elsif ($feature2 eq "CDS"){
95+
$desc2 =~/ID=\S+;Parent=(\S+);makerName=\S+/; #;Parent=*;(*)maker_name=*;(*)/;
96+
97+
if ($ID eq $1) {
98+
99+
print $ostream $line1.";".$description.$product."\n";
100+
}else {
101+
# print "ID".$ID."\n";
102+
print $ostream $line1.";product=hypothetical protein"."\n";
103+
}
104+
}
105+
else {
106+
print $ostream $line1."\n";
107+
}
108+
109+
110+
111+
}
112+
113+
114+
115+
=head1 NAME
116+
117+
agat_create_ENA_compliant_gff_vmaker.pl
118+
119+
This script will create an ENA compliant GFF coming from a maker functional annotation gff (annotated by the pipeline)
120+
It copies the functional information from the mRNA into the CDS and keeping the same ID between the two gffs
121+
122+
=head1 SYNOPSIS
123+
124+
./agat_create_ENA_compliant_gff_vmaker.pl -gff annotated.gff -o outputname [Options]
125+
./agat_create_ENA_compliant_gff_vmaker.pl --help
126+
127+
=head1 OPTIONS
128+
129+
=over 8
130+
131+
=item B<--gff>, B<--gfffile> or B<-f>
132+
133+
Input GFF3 file corresponding to the file annotated by the functional annotation pipeline and coming from maker.
134+
135+
136+
=item B<--output> or B<-o>
137+
138+
Output name that will be used to create the output files.
139+
140+
=item B<--help> or B<-h>
141+
142+
Getting help.
143+
Display the full information.
144+
145+
=back
146+
147+
=cut

0 commit comments

Comments
 (0)