Skip to content

Commit

Permalink
add script to create ENA compliant file by adding info to CDS, specif…
Browse files Browse the repository at this point in the history
…ic to maker gff format coming from the functional annotation pipeline
  • Loading branch information
Lucile Soler committed Jan 9, 2024
1 parent ecef089 commit e9c89eb
Showing 1 changed file with 147 additions and 0 deletions.
147 changes: 147 additions & 0 deletions bin/agat_create_ENA_compliant_gff_vmaker.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#!/usr/bin/env perl

use Carp;
use strict;
use Data::Dumper;
use Getopt::Long;
use File::Basename;
use Statistics::R;

Check failure on line 8 in bin/agat_create_ENA_compliant_gff_vmaker.pl

View workflow job for this annotation

GitHub Actions / build

Can't locate Statistics/R.pm in @inc (you may need to install the Statistics::R module) (@inc contains: /home/runner/work/AGAT/AGAT/blib/lib /home/runner/work/AGAT/AGAT/blib/arch /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3 /home/runner/work/AGAT/AGAT/local/lib/perl5/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3 /home/runner/work/AGAT/AGAT/local/lib/perl5/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5 /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3 /home/runner/work/AGAT/AGAT/local/lib/perl5/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5 /home/runner/work/AGAT/AGAT/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.30.3/x64/lib/site_perl/5.30.3/x86_64-linux /opt/hostedtoolcache/perl/5.30.3/x64/lib/site_perl/5.30.3 /opt/hostedtoolcache/perl/5.30.3/x64/lib/5.30.3/x86_64-linux /opt/hostedtoolcache/perl/5.30.3/x64/lib/5.30.3 .)

Check failure on line 8 in bin/agat_create_ENA_compliant_gff_vmaker.pl

View workflow job for this annotation

GitHub Actions / build

BEGIN failed--compilation aborted

Check failure on line 8 in bin/agat_create_ENA_compliant_gff_vmaker.pl

View workflow job for this annotation

GitHub Actions / build

Can't locate Statistics/R.pm in @inc (you may need to install the Statistics::R module) (@inc contains: /home/runner/work/AGAT/AGAT/blib/lib /home/runner/work/AGAT/AGAT/blib/arch /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3 /home/runner/work/AGAT/AGAT/local/lib/perl5/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3 /home/runner/work/AGAT/AGAT/local/lib/perl5/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5 /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5/5.30.3 /home/runner/work/AGAT/AGAT/local/lib/perl5/x86_64-linux /home/runner/work/AGAT/AGAT/local/lib/perl5 /home/runner/work/AGAT/AGAT/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.30.3/x64/lib/site_perl/5.30.3/x86_64-linux /opt/hostedtoolcache/perl/5.30.3/x64/lib/site_perl/5.30.3 /opt/hostedtoolcache/perl/5.30.3/x64/lib/5.30.3/x86_64-linux /opt/hostedtoolcache/perl/5.30.3/x64/lib/5.30.3 .)

Check failure on line 8 in bin/agat_create_ENA_compliant_gff_vmaker.pl

View workflow job for this annotation

GitHub Actions / build

BEGIN failed--compilation aborted
use Pod::Usage;
use Bio::Tools::GFF;
use List::MoreUtils qw(uniq);

my $header = qq{
########################################################
# BILS 2023 - Sweden #
# lucile.soler\@nbis.se #
# Please cite NBIS (www.nbis.se) when using this tool. #
########################################################
};

my $output = undef;
my $gff = undef;
my $help= 0;
my $output_gff;
my $ID;
my $description;
my $product;


if ( !GetOptions(
"help|h" => \$help,
"gff|g=s" => \$gff,
"output|out|o=s" => \$output_gff)
)

{
pod2usage( { -message => "$header"."Failed to parse command line.",
-verbose => 1,
-exitval => 1} );
}

# Print Help and exit
if ($help) {
pod2usage( { -message => "$header",
-verbose => 2,
-exitval => 2 } );
}

if ( ! (defined($gff)) ){
pod2usage( {
-message => "$header\nAt least 1 parameters are mandatory.\n",
-verbose => 0,
-exitval => 1 } );
}

my $ostream = IO::File->new();
if(defined($output_gff)){
$ostream->open($output_gff, 'w' ) or
croak(
sprintf( "Can not open '%s' for reading: %s", $output_gff, $! ) );
}
else{
$ostream->fdopen( fileno(STDOUT), 'w' ) or
croak( sprintf( "Can not open STDOUT for writing: %s", $! ) );
}

my $input_fh1 = IO::File->new( $gff, '<' ) or croak "Can't read file '$gff' : $!";

while (my $line1 = $input_fh1->getline()){ #print {$STDERR} "$.\t$line";
chomp $line1;

my @col_gffs1 = split(/\t/, $line1);
my $chr2 = $col_gffs1[0];
my $origin = $col_gffs1[1];
my $feature2 = $col_gffs1[2];
my $pos_start2 = $col_gffs1[3];
my $pos_end2 = $col_gffs1[4];
my $col5 = $col_gffs1[5];
my $strand2 = $col_gffs1[6];
my $col7 = $col_gffs1[7];
my $desc2 = $col_gffs1[8];
my $id2;

#print $feature2."\n";

if ($feature2 eq "mRNA"){
$desc2 =~/ID=(\S+);Parent=\S+;(Dbxref=\S+;)_AED=\S+;makerName=\S+;(.*)/; #;Parent=*;(*)maker_name=*;(*)/;
$ID = $1;
$description = $2;
$product = $3;

print $ostream $line1."\n";

} elsif ($feature2 eq "CDS"){
$desc2 =~/ID=\S+;Parent=(\S+);makerName=\S+/; #;Parent=*;(*)maker_name=*;(*)/;

if ($ID eq $1) {

print $ostream $line1.";".$description.$product."\n";
}else {
# print "ID".$ID."\n";
print $ostream $line1.";product=hypothetical protein"."\n";
}
}
else {
print $ostream $line1."\n";
}



}



=head1 NAME
agat_create_ENA_compliant_gff_vmaker.pl
This script will create an ENA compliant GFF coming from a maker functional annotation gff (annotated by the pipeline)
It copies the functional information from the mRNA into the CDS and keeping the same ID between the two gffs
=head1 SYNOPSIS
./agat_create_ENA_compliant_gff_vmaker.pl -gff annotated.gff -o outputname [Options]
./agat_create_ENA_compliant_gff_vmaker.pl --help
=head1 OPTIONS
=over 8
=item B<--gff>, B<--gfffile> or B<-f>
Input GFF3 file corresponding to the file annotated by the functional annotation pipeline and coming from maker.
=item B<--output> or B<-o>
Output name that will be used to create the output files.
=item B<--help> or B<-h>
Getting help.
Display the full information.
=back
=cut

0 comments on commit e9c89eb

Please sign in to comment.