From 40da90ab803586a828ec3e11c4f643d3748a41a6 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Fri, 28 Jun 2024 14:42:37 +0200 Subject: [PATCH 1/2] fix #420. Now table 0 can be called (bioperl >= april 2024 must be present). Fix issue in get_proper_codon_table. Force agat_sp_extract_sequences.pl to pass by the get_proper_codon_table (the only one that was not yet using it). Add an explanation section Troubleshooting of the doc --- bin/agat_sp_add_start_and_stop.pl | 2 +- bin/agat_sp_extract_sequences.pl | 8 +-- ...sp_filter_incomplete_gene_coding_models.pl | 2 +- bin/agat_sp_fix_fusion.pl | 1 - bin/agat_sp_fix_longest_ORF.pl | 1 - ...agat_sp_fix_small_exon_from_extremities.pl | 2 +- bin/agat_sp_flag_premature_stop_codons.pl | 5 +- ..._prokka_fix_fragmented_gene_annotations.pl | 1 - docs/troubleshooting.md | 64 ++++++++++++++++++- lib/AGAT/Utilities.pm | 15 +++-- 10 files changed, 81 insertions(+), 20 deletions(-) diff --git a/bin/agat_sp_add_start_and_stop.pl b/bin/agat_sp_add_start_and_stop.pl index 20548571..8dd93004 100755 --- a/bin/agat_sp_add_start_and_stop.pl +++ b/bin/agat_sp_add_start_and_stop.pl @@ -66,7 +66,7 @@ my $gffout = prepare_gffout($config, $opt_output); $codon_table_id = get_proper_codon_table($codon_table_id); -print "Codon table ".$codon_table_id." in use. You can change it using --table option.\n"; + my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id, -no_iupac => 0); # ##################################### # # END Manage OPTION diff --git a/bin/agat_sp_extract_sequences.pl b/bin/agat_sp_extract_sequences.pl index dca93b8e..706818a4 100755 --- a/bin/agat_sp_extract_sequences.pl +++ b/bin/agat_sp_extract_sequences.pl @@ -96,6 +96,9 @@ # --- Manage config --- $config = get_agat_config({config_file_in => $config}); +# --- Check codon table +$codonTable = get_proper_codon_table($codonTable); + # activate warnings limit my %warnings; activate_warning_limit(\%warnings, 10); @@ -121,11 +124,6 @@ print "We will extract the $opt_type sequences.\n"; $opt_type=lc($opt_type); -# deal with codon table -if($codonTable<0 and $codonTable>25){ - print "$codonTable codon table is not a correct value. It should be between 0 and 25 (0,23 and 25 can be problematic !)\n"; -} - # deal with OFS my $OFS=" "; if($opt_OFS){ diff --git a/bin/agat_sp_filter_incomplete_gene_coding_models.pl b/bin/agat_sp_filter_incomplete_gene_coding_models.pl index 0234e370..d0c2a340 100755 --- a/bin/agat_sp_filter_incomplete_gene_coding_models.pl +++ b/bin/agat_sp_filter_incomplete_gene_coding_models.pl @@ -61,7 +61,7 @@ # --- Check codon table --- $codonTableId = get_proper_codon_table($codonTableId); -print "Codon table ".$codonTableId." in use. You can change it using --table option.\n"; + my $codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId); ###################### diff --git a/bin/agat_sp_fix_fusion.pl b/bin/agat_sp_fix_fusion.pl index 52277325..b716e1c8 100755 --- a/bin/agat_sp_fix_fusion.pl +++ b/bin/agat_sp_fix_fusion.pl @@ -87,7 +87,6 @@ my $logout = prepare_fileout($logout_file); $opt_codonTableID = get_proper_codon_table($opt_codonTableID); -print "Codon table ".$opt_codonTableID." in use. You can change it using --table option.\n"; if(!$threshold){ $threshold=100; diff --git a/bin/agat_sp_fix_longest_ORF.pl b/bin/agat_sp_fix_longest_ORF.pl index 6fddecbe..ea797d97 100755 --- a/bin/agat_sp_fix_longest_ORF.pl +++ b/bin/agat_sp_fix_longest_ORF.pl @@ -72,7 +72,6 @@ # --- Check codon table $codonTable = get_proper_codon_table($codonTable); -print "Codon table ".$codonTable." in use. You can change it using --table option.\n"; ###################### # Manage output file # diff --git a/bin/agat_sp_fix_small_exon_from_extremities.pl b/bin/agat_sp_fix_small_exon_from_extremities.pl index 13799c01..82cc983d 100755 --- a/bin/agat_sp_fix_small_exon_from_extremities.pl +++ b/bin/agat_sp_fix_small_exon_from_extremities.pl @@ -61,8 +61,8 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> EXTRA <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# --- Check codon table $codonTableId = get_proper_codon_table($codonTableId); -print "Codon table ".$codonTableId." in use. You can change it using --table option.\n"; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MAIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bin/agat_sp_flag_premature_stop_codons.pl b/bin/agat_sp_flag_premature_stop_codons.pl index d7e4f248..500a8bd7 100755 --- a/bin/agat_sp_flag_premature_stop_codons.pl +++ b/bin/agat_sp_flag_premature_stop_codons.pl @@ -26,7 +26,7 @@ 'o|out|output=s' => \$opt_output, "fasta|fa|f=s" => \$file_fasta, "table|codon|ct=i" => \$codonTable, - 'c|config=s' => \$config, + 'c|config=s' => \$config, 'h|help!' => \$opt_help ) ) { pod2usage( { -message => 'Failed to parse command line', @@ -53,6 +53,9 @@ # --- Manage config --- $config = get_agat_config({config_file_in => $config}); +# --- Check codon table +$codonTable = get_proper_codon_table($codonTable); + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PARAMS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< my $ostreamReport_file; diff --git a/bin/agat_sp_prokka_fix_fragmented_gene_annotations.pl b/bin/agat_sp_prokka_fix_fragmented_gene_annotations.pl index ff8cd8bd..84d7cb67 100755 --- a/bin/agat_sp_prokka_fix_fragmented_gene_annotations.pl +++ b/bin/agat_sp_prokka_fix_fragmented_gene_annotations.pl @@ -114,7 +114,6 @@ BEGIN # Check codon table $codonTable = get_proper_codon_table($codonTable); -print "Codon table ".$codonTable." in use. You can change it using --table option.\n"; ###################### # Manage output file # diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 1e88cd05..954c12ec 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -75,4 +75,66 @@ Bio::DB::Fasta from Bioperl cannot handle line with more than 65,536 characters. # Fold to 80 characters by line. # Be careful if you have long headers that can be folded over several lines. You must first shorten them, or fold with higher value. fold input.fa > output.fa -``` \ No newline at end of file +``` + + +## How to use codon table 0 (codon table 1 is used instead)? + +Several scripts need to use a codon table: + +``` +agat_sp_add_start_and_stop.pl +agat_sp_extract_sequences.pl +agat_sp_filter_incomplete_gene_coding_models.pl +agat_sp_fix_fusion.pl +agat_sp_fix_longest_ORF.pl +agat_sp_fix_small_exon_from_extremities.pl +agat_sp_flag_premature_stop_codons.pl +agat_sp_prokka_fix_fragmented_gene_annotations.pl +``` + +By default AGAT uses codon table 1 wich is the standard table. + +* What is the difference between table 1 and table 0? + The codon table 0 is strict and uses ATG-only start codon, while codon table 1 uses TTG, CTG and ATG possible start codon. + +* What are the possible codon table? + In top of the table 0 which is specific to Bioperl many other tables are available. Their description can be found [here](https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG1). + +## Why when asking for table 0 AGAT keep using the table 1 ? + +There are two possible reasons for that problem. + + * AGAT: Originally the problem comes from a bug in Bioperl. AGAT was trying to pass by the problem but the fix was not efficient until version 1.4.1. Please be sure to use a version >= 1.4.1 to avoid any problem from the AGAT side. + + * Bioperl: The problem has been present for a while and has been definitly fixed in the [commit fa9366f from the 24th of April 2024](https://github.com/bioperl/bioperl-live/tree/fa9366f3a2f48fd051343d488cfce26655f842b3). + So to fix the problem you need to use a bioperl version equal or later to that point. If not possible (e.g. not yet available for installation via conda) you can follow this procedure: + * run AGAT once + * catch the location of bioperl used from the prompt e.g.: + ``` + ------------------------------------------------------------------------------ + | Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | + | https://github.com/NBISweden/AGAT | + | National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + ... + + => Machine information: + This script is being run by perl v5.32.1 + Bioperl location being used: /usr/local/lib/perl5/site_perl/Bio/ + Operating system being used: linux + ``` + Here the bioperl path is here: `/usr/local/lib/perl5/site_perl/Bio/` + * Move into the directory found in the previous step minus `/Bio`: + `cd /usr/local/lib/perl5/site_perl/` + * Copy paste locally the file and the folder from the bioperl-live repository (here)[https://github.com/bioperl/bioperl-live/tree/master/lib]: + ``` + git clone https://github.com/bioperl/bioperl-live + cp -r bioperl-live/lib/* . + ``` + + Now you should be able to use the codon table 0. If not check your AGAT version (see above). + + + diff --git a/lib/AGAT/Utilities.pm b/lib/AGAT/Utilities.pm index 156d04e5..d1a91455 100644 --- a/lib/AGAT/Utilities.pm +++ b/lib/AGAT/Utilities.pm @@ -76,19 +76,20 @@ sub get_proper_codon_table { my ($codon_table_id_original) = @_; my $codonTable = Bio::Tools::CodonTable->new( -id => $codon_table_id_original); my $codon_table_id_bioperl = $codonTable->id; - if (!$codon_table_id_bioperl){ - $codon_table_id_bioperl = 1 ; - $codonTable = Bio::Tools::CodonTable->new( -id => $codon_table_id_original); - } + + # To deal with empty result in version of bioperl < april 2024 when asking with table 0 (it was reutrning an empty string) + if (! defined($codon_table_id_bioperl)){ + $codon_table_id_bioperl = 1 ; # default codon table + } if ($codon_table_id_original == 0 and $codon_table_id_original != $codon_table_id_bioperl){ $codonTable->warn("Your version of bioperl do not handle codon table 0\n". "see https://github.com/bioperl/bioperl-live/pull/315\n". "It uses codon table $codon_table_id_bioperl instead."); - return $codon_table_id_bioperl; } - - return $codon_table_id_original; + + print "Codon table ".$codon_table_id_bioperl." in use. You can change it using the appropriate parameter.\n"; + return $codon_table_id_bioperl; } # the warning message will be filtered to be printed only $nb_warnings times From c2ff6a4cea5faaf9bd92b7a85e80c19cb01f41a1 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Fri, 28 Jun 2024 16:39:10 +0200 Subject: [PATCH 2/2] increment AGAT version --- lib/AGAT/AGAT.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/AGAT/AGAT.pm b/lib/AGAT/AGAT.pm index 8d757a7b..e7b6b9e4 100644 --- a/lib/AGAT/AGAT.pm +++ b/lib/AGAT/AGAT.pm @@ -16,7 +16,7 @@ use AGAT::Utilities; use AGAT::PlotR; use Bio::Tools::GFF; -our $VERSION = "v1.4.0"; +our $VERSION = "v1.4.1"; our @ISA = qw(Exporter); our @EXPORT = qw(get_agat_header print_agat_version get_agat_config handle_levels); sub import {