diff --git a/bin/agat_sp_filter_feature_by_attribute_presence.pl b/bin/agat_sp_filter_feature_by_attribute_presence.pl index 431c8aa2..657adfc3 100755 --- a/bin/agat_sp_filter_feature_by_attribute_presence.pl +++ b/bin/agat_sp_filter_feature_by_attribute_presence.pl @@ -314,7 +314,8 @@ =head1 OPTIONS =item B<--attribute>, B<--att>, B<-a> -String - Attributes tag specified will be used to filter the feature type (feature type can also be specified by the option -p). List of attribute tags must be coma separated. +String - Attributes tag specified will be used to filter the feature type (feature type can also be specified by the option -p). +List of attribute tags must be coma separated. =item B<--flip> diff --git a/bin/agat_sp_filter_feature_by_attribute_value.pl b/bin/agat_sp_filter_feature_by_attribute_value.pl index c584a7d5..0f599fdd 100755 --- a/bin/agat_sp_filter_feature_by_attribute_value.pl +++ b/bin/agat_sp_filter_feature_by_attribute_value.pl @@ -15,6 +15,8 @@ my $primaryTag=undef; my $opt_output= undef; my $opt_value = undef; +my $opt_keep_parental = undef; +my $opt_na_aside = undef; my $opt_value_insensitive = undef; my $opt_attribute = undef; my $opt_test = "="; @@ -27,6 +29,8 @@ if ( !GetOptions( 'f|ref|reffile|gff=s' => \$opt_gff, 'value=s' => \$opt_value, 'value_insensitive!' => \$opt_value_insensitive, + 'keep_parental!' => \$opt_keep_parental, + 'na_aside!' => \$opt_na_aside, "p|type|l=s" => \$primaryTag, 'a|attribute=s' => \$opt_attribute, 't|test=s' => \$opt_test, @@ -59,7 +63,7 @@ ############### # Test options -if($opt_test ne "<" and $opt_test ne ">" and $opt_test ne "<=" and $opt_test ne ">=" and $opt_test ne "="){ +if($opt_test ne "<" and $opt_test ne ">" and $opt_test ne "<=" and $opt_test ne ">=" and $opt_test ne "=" and $opt_test ne "!"){ print "The test to apply is Wrong: $opt_test.\nWe want something among this list: <,>,<=,>=,! or =.";exit; } @@ -70,19 +74,22 @@ my $gffout_ok_file ; my $fhout_discarded_file ; my $ostreamReport_file; +my $fhout_semidDiscarded_file if $opt_na_aside; if ($opt_output) { my ($outfile,$path,$ext) = fileparse($opt_output,qr/\.[^.]*/); # set file names $gffout_ok_file = $path.$outfile.$ext; - $fhout_discarded_file = $path.$outfile."_discarded.txt"; + $fhout_discarded_file = $path.$outfile."_discarded.gff"; $ostreamReport_file = $path.$outfile."_report.txt"; + $fhout_semidDiscarded_file = $path.$outfile."_na.gff"; } my $gffout_ok = prepare_gffout($config, $gffout_ok_file); -my $fhout_discarded = prepare_fileout($fhout_discarded_file); +my $fhout_discarded = prepare_gffout($config, $fhout_discarded_file); my $ostreamReport = prepare_fileout($ostreamReport_file); +my $fhout_semidDiscarded = prepare_gffout($config, $fhout_semidDiscarded_file) if $opt_na_aside; # Manage $primaryTag my @ptagList; @@ -121,17 +128,14 @@ } } -use Data::Dumper; -print Dumper($value_hash); - # start with some interesting information my $stringPrint = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; $stringPrint .= "\nusage: $0 @copyARGV\n"; $stringPrint .= "We will discard $print_feature_string that have the attribute $opt_attribute with the value $opt_test $opt_value"; if ($opt_value_insensitive){ - $stringPrint .= "case insensitive.\n"; + $stringPrint .= " case insensitive.\n"; }else{ - $stringPrint .= "case sensitive.\n"; + $stringPrint .= " case sensitive.\n"; } if ($opt_output){ @@ -142,7 +146,9 @@ ####################### # >>>>>>>>>>>>>>>>>>>>>>>># MAIN #<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ####################### -my %all_cases = ('l1' => 0, 'l2' => 0, 'l3' => 0, 'all' => 0); +my %all_cases = ( 'left' => {'l1' => 0, 'l2' => 0, 'l3' => 0, 'all' => 0}, + 'discarded' => {'l1' => 0, 'l2' => 0, 'l3' => 0, 'all' => 0} ); + ###################### ### Parse GFF input # my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_gff, @@ -167,19 +173,28 @@ $removeit = check_feature($feature_l1, 'level1'); # we can remove feature L1 now because we are looping over $hash_sortBySeq not $hash_omniscient if ($removeit){ - my $cases = remove_l1_and_relatives($hash_omniscient, $feature_l1, $fhout_discarded); - $all_cases{'l1'} += $cases->{'l1'}; - $all_cases{'l2'} += $cases->{'l2'}; - $all_cases{'l3'} += $cases->{'l3'}; - $all_cases{'all'} += $cases->{'all'}; + my $cases; + if($removeit == 1){ + $cases = remove_l1_and_relatives($hash_omniscient, $feature_l1, $fhout_discarded); + $all_cases{'discarded'}{'l1'} += $cases->{'l1'}; + $all_cases{'discarded'}{'l2'} += $cases->{'l2'}; + $all_cases{'discarded'}{'l3'} += $cases->{'l3'}; + $all_cases{'discarded'}{'all'} += $cases->{'all'}; + } elsif ($removeit == 2 and $opt_na_aside){ + $cases = remove_l1_and_relatives($hash_omniscient, $feature_l1, $fhout_semidDiscarded); + $all_cases{'na'}{'l1'} += $cases->{'l1'}; + $all_cases{'na'}{'l2'} += $cases->{'l2'}; + $all_cases{'na'}{'l3'} += $cases->{'l3'}; + $all_cases{'na'}{'all'} += $cases->{'all'}; + } next; } ################# # == LEVEL 2 == # ################# - my @list_l2_to_remove; - my @list_l3_to_remove; + my %list_l2_to_remove; + my %list_l3_to_remove; foreach my $tag_l2 (sort keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... if ( exists_keys( $hash_omniscient, ('level2', $tag_l2, $id_l1) ) ){ @@ -188,7 +203,11 @@ $removeit = check_feature($feature_l2,'level2'); if ($removeit){ - push @list_l2_to_remove, [$feature_l2, $tag_l1, $id_l1, $fhout_discarded]; + if ($removeit == 1){ + push ( @{$list_l2_to_remove{'discarded'}}, [$feature_l2, $tag_l1, $id_l1, $fhout_discarded]); + } elsif ($removeit == 2 and $opt_na_aside){ + push ( @{$list_l2_to_remove{'na'}}, [$feature_l2, $tag_l1, $id_l1, $fhout_semidDiscarded]); + } next; } ################# @@ -202,8 +221,10 @@ foreach my $feature_l3 ( @list_fl3 ) { $removeit = check_feature($feature_l3, 'level3'); - if ($removeit){ - push @list_l3_to_remove, [$feature_l3, $tag_l1, $id_l1, $tag_l2, $id_l2, $fhout_discarded]; + if($removeit == 1){ + push ( @{$list_l3_to_remove{'discarded'}}, [$feature_l3, $tag_l1, $id_l1, $tag_l2, $id_l2, $fhout_discarded]); + } elsif ( $removeit == 2 and $opt_na_aside ){ + push ( @{$list_l3_to_remove{'na'}}, [$feature_l3, $tag_l1, $id_l1, $tag_l2, $id_l2, $fhout_semidDiscarded]); } } } @@ -212,22 +233,22 @@ } } # Should be removed after looping over them to avoid problems - if (@list_l2_to_remove) { - foreach my $infos (@list_l2_to_remove) { - my $cases = remove_l2_and_relatives( $hash_omniscient, @$infos); - $all_cases{'l1'} += $cases->{'l1'}; - $all_cases{'l2'} += $cases->{'l2'}; - $all_cases{'l3'} += $cases->{'l3'}; - $all_cases{'all'} += $cases->{'all'}; + foreach my $key ( keys %list_l2_to_remove ){ + foreach my $infos ( @{$list_l2_to_remove{$key}} ) { + my $cases = remove_l2_and_relatives( $hash_omniscient, @$infos, $opt_keep_parental); + $all_cases{$key}{'l1'} += $cases->{'l1'}; + $all_cases{$key}{'l2'} += $cases->{'l2'}; + $all_cases{$key}{'l3'} += $cases->{'l3'}; + $all_cases{$key}{'all'} += $cases->{'all'}; } } - if (@list_l3_to_remove) { - foreach my $infos (@list_l3_to_remove) { - my $cases = remove_l3_and_relatives( $hash_omniscient, @$infos); - $all_cases{'l1'} += $cases->{'l1'}; - $all_cases{'l2'} += $cases->{'l2'}; - $all_cases{'l3'} += $cases->{'l3'}; - $all_cases{'all'} += $cases->{'all'}; + foreach my $key ( sort keys %list_l3_to_remove ){ + foreach my $infos ( @{$list_l3_to_remove{$key}} ) { + my $cases = remove_l3_and_relatives( $hash_omniscient, @$infos, $opt_keep_parental); + $all_cases{$key}{'l1'} += $cases->{'l1'}; + $all_cases{$key}{'l2'} += $cases->{'l2'}; + $all_cases{$key}{'l3'} += $cases->{'l3'}; + $all_cases{$key}{'all'} += $cases->{'all'}; } } } @@ -236,10 +257,20 @@ print_omniscient( {omniscient => $hash_omniscient, output => $gffout_ok} ); -$stringPrint = $all_cases{'all'}." features removed:\n"; -$stringPrint .= $all_cases{'l1'}." features level1 (e.g. gene) removed\n"; -$stringPrint .= $all_cases{'l2'}." features level2 (e.g. mRNA) removed\n"; -$stringPrint .= $all_cases{'l3'}." features level3 (e.g. exon) removed\n"; +$stringPrint = "Feature discarded by applying the test (see $fhout_discarded_file file):\n"; +$stringPrint .= $all_cases{'discarded'}{'all'}." features removed:\n"; +$stringPrint .= $all_cases{'discarded'}{'l1'}." features level1 (e.g. gene) removed\n"; +$stringPrint .= $all_cases{'discarded'}{'l2'}." features level2 (e.g. mRNA) removed\n"; +$stringPrint .= $all_cases{'discarded'}{'l3'}." features level3 (e.g. exon) removed\n"; + +if($opt_na_aside){ + $stringPrint .= "Feature left out because the attribute is missing (see $fhout_semidDiscarded_file file):\n"; + $stringPrint .= $all_cases{'na'}{'all'}." features removed:\n"; + $stringPrint .= $all_cases{'na'}{'l1'}." features level1 (e.g. gene) removed\n"; + $stringPrint .= $all_cases{'na'}{'l2'}." features level2 (e.g. mRNA) removed\n"; + $stringPrint .= $all_cases{'na'}{'l3'}." features level3 (e.g. exon) removed\n"; +} + if ($opt_output){ print $ostreamReport $stringPrint; print $stringPrint; @@ -298,35 +329,44 @@ sub should_we_remove_feature{ } # for string values replace = by eq and ! by ne and avoid other type of test if ( ! looks_like_number ($given_value) or ! looks_like_number ($file_value)){ + print "String case\n" if $opt_verbose; if ($opt_test eq "="){ - if ($file_value eq $given_value){return 1; } + if ($file_value eq $given_value) { print "equal\n" if $opt_verbose; return 1; } + else { print "not equal\n" if $opt_verbose; } } elsif ($opt_test eq "!"){ - if ($file_value ne $given_value){return 1; } + if ($file_value ne $given_value){ print "different\n" if $opt_verbose; return 1; } + else { print "not different\n" if $opt_verbose; } + } + } + else{ + print "Number case\n" if $opt_verbose; + if ($opt_test eq "="){ + if ($file_value == $given_value){return 1; } + } + elsif ($opt_test eq "!"){ + if ($file_value != $given_value){return 1; } + } + elsif ($opt_test eq ">"){ + if ($file_value > $given_value){return 1; } + } + elsif ($opt_test eq "<"){ + if ($file_value < $given_value){return 1; } + } + elsif ($opt_test eq "<="){ + if ($file_value <= $given_value){return 1; } + } + elsif ($opt_test eq ">="){ + if ($file_value >= $given_value){return 1; } } - } - elsif ($opt_test eq "="){ - if ($file_value == $given_value){return 1; } - } - elsif ($opt_test eq "!"){ - if ($file_value != $given_value){return 1; } - } - elsif ($opt_test eq ">"){ - if ($file_value > $given_value){return 1; } - } - elsif ($opt_test eq "<"){ - if ($file_value < $given_value){return 1; } - } - elsif ($opt_test eq "<="){ - if ($file_value <= $given_value){return 1; } - } - elsif ($opt_test eq ">="){ - if ($file_value >= $given_value){return 1; } } } } + return 0; + } else { + print "Attribute not found case\n" if $opt_verbose; + return 2; } - return 0; } __END__ @@ -338,11 +378,16 @@ =head1 NAME =head1 DESCRIPTION The script aims to filter features according to attribute value (9th column). -If the attribute tag is missing the feature will not be discarded. -If the attribute exists and the value pass the test, the feature is discarded. +- If the attribute exists and the value do not pass the test, the feature is written into . +- If the attribute exists and the value pass the test, the feature is discarded and written into _discarded.gff. +- If the attribute tag is missing (test cannot be applyed), the feature will be written into by default. If --na_aside parameter +is activated then it will be written into _na.gff. + Attribute are stored in the 9th column and have this shape: tag=value -/!\ Removing a level1 or level2 feature will automatically remove all linked subfeatures, and -removing all children of a feature will automatically remove this feature too. +/!\ Removing a level1 or level2 feature will automatically remove all linked subfeatures. +/!\ Removing all children of a feature will automatically remove this feature too (excepted if --keep_parental is activated). +/!\ If --keep_parental is not activated and --na_aside is activated, and all level3 features of a record are split between both _na.gff and _discarded.gff, +then the parental level1 and level2 features are removed and will end up in the _na.gff file only. =head1 SYNOPSIS @@ -378,7 +423,17 @@ =head1 OPTIONS Bolean. Deactivated by default. When activated the values provided by the --value parameter are handled case insensitive. +=item B<--na_aside> + +Bolean. Deactivated by default. By default if the attribute tag on which the filter is based is missing, the feature will be written into . +When activated, such features will be written into a separate file called _na.gff. + +=item B<--keep_parental> + +Bolean. Deactivated by default. When activated even if all child features have been removed, the parental one will be kept. + =item B<-t> or B<--test> + Test to apply (> < = ! >= <=). default value "=". If you use one of these two character >, <, please don't forget to quote the parameter like that "<=" otherwise your terminal will complain. @@ -386,7 +441,7 @@ =head1 OPTIONS =item B<-o> or B<--output> -Output GFF file. If no output file is specified, the output will be +Output GFF file. If no output file is specified, the output will be written to STDOUT. =item B<-v> diff --git a/bin/agat_sp_statistics.pl b/bin/agat_sp_statistics.pl index 4f3bce47..6ede4d16 100755 --- a/bin/agat_sp_statistics.pl +++ b/bin/agat_sp_statistics.pl @@ -57,8 +57,6 @@ if(defined($opt_yaml)){ if( defined($opt_output)){ $opt_yaml = $opt_output.".yaml"; - }else{ - $out } } diff --git a/docs/tools/agat_sp_filter_feature_by_attribute_value.md b/docs/tools/agat_sp_filter_feature_by_attribute_value.md index e0d37896..b9fa6e50 100644 --- a/docs/tools/agat_sp_filter_feature_by_attribute_value.md +++ b/docs/tools/agat_sp_filter_feature_by_attribute_value.md @@ -3,11 +3,14 @@ ## DESCRIPTION The script aims to filter features according to attribute value (9th column). -If the attribute tag is missing the feature will not be discarded. -If the attribute exists and the value pass the test, the feature is discarded. -Attribute are stored in the 9th column and have this shape: tag=value -/!\\ Removing a level1 or level2 feature will automatically remove all linked subfeatures, and -removing all children of a feature will automatically remove this feature too. +- If the attribute exists and the value do not pass the test, the feature is written into . +- If the attribute exists and the value pass the test, the feature is discarded and written into _discarded.gff. +- If the attribute tag is missing (test cannot be applyed), the feature will be written into by default. If --na_aside parameter is activated then it will be written into _na.gff. + +Attribute are stored in the 9th column and have this shape: tag=value. +/!\\ Removing a level1 or level2 feature will automatically remove all linked subfeatures. +/!\\ Removing all children of a feature will automatically remove this feature too (excepted if --keep_parental is activated). +/!\\ If --keep_parental is not activated and --na_aside is activated, and all level3 features of a record are split between both _na.gff and _discarded.gff, then the parental level1 and level2 features are removed and will end up in the _na.gff file only. ## SYNOPSIS @@ -43,6 +46,14 @@ agat_sp_filter_feature_by_attribute_value.pl --help Bolean. Deactivated by default. When activated the values provided by the --value parameter are handled case insensitive. +- **<--na\_aside** + + Bolean. Deactivated by default. By default if the attribute tag on which the filter is based is missing, the feature will be written into . + When activated, such features will be written into a separate file called _na.gff. + +- **<--keep\_parental>** + + Bolean. Deactivated by default. When activated even if all child features have been removed, the parental one will be kept. - **-t** or **--test** @@ -53,7 +64,7 @@ agat_sp_filter_feature_by_attribute_value.pl --help - **-o** or **--output** - Output GFF file. If no output file is specified, the output will be + Output GFF file. If no output file is specified, the output will be written to STDOUT. - **-v** diff --git a/lib/AGAT/OmniscientTool.pm b/lib/AGAT/OmniscientTool.pm index 2203c795..9e383cb5 100644 --- a/lib/AGAT/OmniscientTool.pm +++ b/lib/AGAT/OmniscientTool.pm @@ -868,7 +868,7 @@ sub remove_element_from_omniscient { if(@listok){ @{$hash_omniscient->{$level}{$tag_key}{$id_concern}}=@listok; } - else{ # The list is empty we could remove the key (otherwise we would have saved a emplty list) + else{ # The list is empty we could remove the key (otherwise we would have saved a empty list) delete $hash_omniscient->{$level}{$tag_key}{$id_concern}; } } @@ -878,6 +878,31 @@ sub remove_element_from_omniscient { } } +# Print a feature safely. +# +# It ensures that the feature is printed correctly without causing any errors according the output type. +# +# Parameters: +# $feature : the feature to print +# $out : AGAT::BioperlGFF / Bio::Tools::GFF object, or IO::File object or empty value +# Returns: +# None +# +# Note: +# This function should be used when printing features to avoid any potential errors. +sub print_feature_safe{ + my ($feature, $out)=@_; + + if($out){ + if($out->isa('AGAT::BioperlGFF' ) or $out->isa('Bio::Tools::GFF')){ + $out->write_feature( $feature ); + } + else{ + print $out $feature->gff_string()."\n"; + } + } +} + # @Purpose: remove from omniscient l1 feature and all subfeatures # @input: 3 => hash(omniscient hash), feature L1, optional fh to write case removed # @output: 1 => hash (nb feature removed) @@ -899,19 +924,19 @@ sub remove_l1_and_relatives{ foreach my $ptag_l3 (keys %{$omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... if ( exists_keys( $omniscient, ('level3', $ptag_l3, $level2_ID) ) ){ foreach my $feature_l3 ( @{$omniscient->{'level3'}{$ptag_l3}{$level2_ID}}) { - $cases_l3++; - print $fh_removed $feature_l3->_tag_value('ID')."\n" if ($fh_removed); + $cases_l3++; + print_feature_safe( $feature_l3, $fh_removed ); } delete $omniscient->{'level3'}{$ptag_l3}{$level2_ID} # delete level3 } } $cases_l2++; - print $fh_removed $feature_l2->_tag_value('ID')."\n" if($fh_removed); + print_feature_safe( $feature_l2, $fh_removed ); } delete $omniscient->{'level2'}{$ptag_l2}{$id_l1} # delete level2 } } - print $fh_removed $feature->gff_string()."\n" if $fh_removed; + print_feature_safe( $feature, $fh_removed ); delete $omniscient->{'level1'}{$tag_l1}{$id_l1}; # delete level1 $cases_l1++; @@ -927,11 +952,11 @@ sub remove_l1_and_relatives{ # @input: 5 => hash(omniscient hash), featureL2, primary tag l1, id l1, optional fh to write case removed # @output: 1 => hash (nb feature removed) sub remove_l2_and_relatives{ - my ($omniscient, $feature, $ptag_l1, $id_l1, $fh_removed)=@_; + my ($omniscient, $feature, $ptag_l1, $id_l1, $fh_removed, $keep_parental)=@_; my %cases; - my $cases_l1 = 0; my $cases_l2 = 0; my $cases_l3 = 0; - my $ptag_l2 = lc($feature->primary_tag); + my $cases_l1 = 0; my $cases_l2 = 0; my $cases_l3 = 0; + my $ptag_l2 = lc($feature->primary_tag); my $level2_Parent_ID = lc($feature->_tag_value('Parent')); my $level2_ID = lc($feature->_tag_value('ID')); @@ -943,40 +968,41 @@ sub remove_l2_and_relatives{ foreach my $ptag_l3 (keys %{$omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... if ( exists_keys( $omniscient, ('level3', $ptag_l3, $level2_ID) ) ){ foreach my $feature_l3 ( @{$omniscient->{'level3'}{$ptag_l3}{$level2_ID}}) { - $cases_l3++; - print $fh_removed $feature_l3->_tag_value('ID')."\n" if ($fh_removed); + $cases_l3++; + print_feature_safe( $feature_l3, $fh_removed ); } delete $omniscient->{'level3'}{$ptag_l3}{$level2_ID} # delete level3 } } } } - + # delete level2 and the hash pointer if the list is empty (no isoform left) my @id_concern_list=($id_l1); my @id_list_to_remove=($level2_ID); my @list_tag_key=('all'); $cases_l2++; - print $fh_removed $feature->gff_string()."\n" if ($fh_removed); + print_feature_safe( $feature, $fh_removed ); remove_element_from_omniscient(\@id_concern_list, \@id_list_to_remove, $omniscient, 'level2','false', \@list_tag_key); - - if( ! exists_keys($omniscient, ('level2', $ptag_l2, $id_l1) ) ){ - my $anotherL2Linked=0; - foreach my $ptag_l2 (keys %{$omniscient->{'level2'}}){ - if( exists_keys($omniscient, ('level2', $ptag_l2, $id_l1) ) ){ - $anotherL2Linked=1; - } - } - #The list was empty so l2 has been removed, we can now remove l1 - if(! $anotherL2Linked){ - if( exists_keys($omniscient, ('level1', $ptag_l1, $id_l1) ) ){ - $cases_l1++; - print $fh_removed $id_l1."\n" if ($fh_removed); - delete $omniscient->{'level1'}{$ptag_l1}{$id_l1}; - } - } - } + if (! $keep_parental){ + if( ! exists_keys($omniscient, ('level2', $ptag_l2, $id_l1) ) ){ + my $anotherL2Linked=0; + foreach my $ptag_l2 (keys %{$omniscient->{'level2'}}){ + if( exists_keys($omniscient, ('level2', $ptag_l2, $id_l1) ) ){ + $anotherL2Linked=1; + } + } + #The list was empty so l2 has been removed, we can now remove l1 + if(! $anotherL2Linked){ + if( exists_keys($omniscient, ('level1', $ptag_l1, $id_l1) ) ){ + $cases_l1++; + print_feature_safe( $omniscient->{'level1'}{$ptag_l1}{$id_l1}, $fh_removed ); + delete $omniscient->{'level1'}{$ptag_l1}{$id_l1}; + } + } + } + } } $cases{'l1'} = $cases_l1; @@ -991,61 +1017,65 @@ sub remove_l2_and_relatives{ # @input: 7 => hash(omniscient hash), feature L3, primary tag l2, id l2, primary tag l2, id l2, optional fh to write case removed # @output: 1 => hash (nb feature removed) sub remove_l3_and_relatives{ - my ($omniscient, $feature, $ptag_l1, $id_l1, $ptag_l2, $id_l2, $fh_removed)=@_; - + my ($omniscient, $feature, $ptag_l1, $id_l1, $ptag_l2, $id_l2, $fh_removed, $keep_parental)=@_; + my %cases; my $cases_l1 = 0; my $cases_l2 = 0; my $cases_l3 = 0; - my $level3_Parent_ID = lc($feature->_tag_value('Parent')); - my $id_l3 = lc($feature->_tag_value('ID')); + my $level3_Parent_ID = lc($feature->_tag_value('Parent')); + my $id_l3 = lc($feature->_tag_value('ID')); my $ptag_l3 = lc($feature->primary_tag); - if ( exists_keys( $omniscient, ('level3', $ptag_l3, $id_l2) ) ){ # just extra security in case - foreach my $feature_l3 ( @{$omniscient->{'level3'}{$ptag_l3}{$id_l2}}) { - - if($id_l3 eq lc($feature_l3->_tag_value('ID')) ){ - #remove one feature and pointer if no more feature left in the list - my @id_concern_list=($level3_Parent_ID); - my @id_list_to_remove=($id_l3); - my @list_tag_key=('all'); - $cases_l3++; - print $fh_removed $feature->gff_string()."\n" if ($fh_removed); - remove_element_from_omniscient(\@id_concern_list, \@id_list_to_remove, $omniscient, 'level3','false', \@list_tag_key); - } - } - } - - # List empty check if we remove l2 or other l3 linked to it - if( ! exists_keys($omniscient, ('level3', $ptag_l3, $id_l2)) ){ - foreach my $tag_l3 (keys %{$omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists_keys( $omniscient, ('level3', $tag_l3, $id_l2) ) ){ - $cases{'l1'} = $cases_l1; - $cases{'l2'} = $cases_l2; - $cases{'l3'} = $cases_l3; - $cases{'all'} = $cases_l1+$cases_l2+$cases_l3; - - return \%cases; - } - } + if ( exists_keys( $omniscient, ('level3', $ptag_l3, $id_l2) ) ) { # just extra security in case + foreach my $feature_l3 ( @{$omniscient->{'level3'}{$ptag_l3}{$id_l2}}) { + + if($id_l3 eq lc($feature_l3->_tag_value('ID')) ){ + #remove one feature and pointer if no more feature left in the list + my @id_concern_list=($level3_Parent_ID); + my @id_list_to_remove=($id_l3); + my @list_tag_key=('all'); + $cases_l3++; + print_feature_safe( $feature, $fh_removed ); + remove_element_from_omniscient(\@id_concern_list, \@id_list_to_remove, $omniscient, 'level3','false', \@list_tag_key); + } + } + } - # if we arrive here it means no more L3 feature is attached to L2 - # we remove the L2 parent properly (if isoforms they are kept) - my @id_concern_list=($id_l1); - my @id_list_to_remove=($id_l2); - my @list_tag_key=('all'); - $cases_l2++; - print $fh_removed $id_l2."\n" if ($fh_removed); - remove_element_from_omniscient(\@id_concern_list, \@id_list_to_remove, $omniscient, 'level2','false', \@list_tag_key); + if (! $keep_parental){ + # List empty check if we remove l2 or other l3 linked to it + if( ! exists_keys($omniscient, ('level3', $ptag_l3, $id_l2)) ){ + foreach my $tag_l3 (keys %{$omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... + if ( exists_keys( $omniscient, ('level3', $tag_l3, $id_l2) ) ){ + $cases{'l1'} = $cases_l1; + $cases{'l2'} = $cases_l2; + $cases{'l3'} = $cases_l3; + $cases{'all'} = $cases_l1+$cases_l2+$cases_l3; + return \%cases; + } + } - # Should we remove l1 too? - if( ! exists_keys($omniscient, ('level2', $ptag_l2, $id_l1) ) ){ - #The list was empty so l2 has been removed, we can now remove l1 - if( exists_keys($omniscient, ('level1', $ptag_l1, $id_l1) ) ){ - $cases_l1++; - print $fh_removed $id_l1."\n" if($fh_removed); - delete $omniscient->{'level1'}{$ptag_l1}{$id_l1} - } - } - } + # if we arrive here it means no more L3 feature is attached to L2 + # we remove the L2 parent properly (if isoforms they are kept) + if( exists_keys($omniscient, ('level2', $ptag_l2, $id_l1)) ){ + my @id_concern_list=($id_l1); + my @id_list_to_remove=($id_l2); + my @list_tag_key=('all'); + $cases_l2++; + my $feature_l2 = get_feature_l2_from_id_l2_l1($omniscient, $id_l2, $id_l1); + print_feature_safe( $feature_l2, $fh_removed ); + remove_element_from_omniscient(\@id_concern_list, \@id_list_to_remove, $omniscient, 'level2','false', \@list_tag_key); + + # Should we remove l1 too? + if( ! exists_keys($omniscient, ('level2', $ptag_l2, $id_l1) ) ){ + #The list was empty so l2 has been removed, we can now remove l1 + if( exists_keys($omniscient, ('level1', $ptag_l1, $id_l1) ) ){ + $cases_l1++; + print_feature_safe( $omniscient->{'level1'}{$ptag_l1}{$id_l1}, $fh_removed ); + delete $omniscient->{'level1'}{$ptag_l1}{$id_l1} + } + } + } + } + } $cases{'l1'} = $cases_l1; $cases{'l2'} = $cases_l2; @@ -1623,6 +1653,13 @@ sub group_l1IDs_from_omniscient { return \%group; } +# Retrieves the feature level 2 from the given level 2 and level 1 IDs. +# +# Parameters: +# - $id_l2: The ID of the level 2 feature +# - $id_l1: The ID of the parent level 1 feature +# Returns: +# - The feature at level 2 corresponding to the given ID. sub get_feature_l2_from_id_l2_l1 { my ($hash_omniscient, $id_l2, $id_l1) = @_ ; foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){