Skip to content

Commit f7983b9

Browse files
fixed bug in compare_clusters.pl when .cluster_list file is not parsed, due to previous changes in find_taxa_FASTA_array_headers
1 parent 6b7cff9 commit f7983b9

File tree

2 files changed

+20
-14
lines changed

2 files changed

+20
-14
lines changed

CHANGES.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -216,4 +216,4 @@
216216
17082017: updated manuals and plot_matrix_heatmap.sh with options -r (remove column names and cell contents) and -k (set name for color key X-axis)
217217
19082017: added options -d (max no. decimals) and -x (filter matrix with regex) to plot_matrix_heatmap.sh
218218
28082017: added parse_pangenome_matrix.pl -x to compute cluster intersection between taxa in a pangenome matrix (thanks Sean and John!)
219-
219+
28082017: fixed bug in compare_clusters.pl when .cluster_list file is not parsed, due to previous changes in find_taxa_FASTA_array_headers (thanks Audrey Bioteau)

compare_clusters.pl

+19-13
Original file line numberDiff line numberDiff line change
@@ -232,26 +232,36 @@
232232
next if(-d "$dir/$file"); #print "$dir/$file\n";
233233

234234
# read sequences in each cluster
235-
my ($clusterkey,$cluster_data,$n_of_cluster_seqs) = ('','',0);
235+
my ($clusterkey,$cluster_data,$n_of_cluster_seqs,$taxon_name) = ('','',0);
236236
my (@choppedseqs,@clusterseqs,%cluster_taxa,@gis,@neighbors,@sorted_taxa);
237237
my $cluster_ref = read_FASTA_file_array("$dir/$file");
238238

239-
if($taxa{$file}) # previosly read from cluster list file
239+
if($taxa{$file}) # previously read from .cluster_list file
240240
{
241241
%cluster_taxa = %{$taxa{$file}};
242-
delete($cluster_taxa{'sorted_taxa'}); # otherwise it would count as one extra taxa; it is conserved in %taxa
242+
delete($cluster_taxa{'sorted_taxa'}); # otherwise it would count as one extra taxa; conserved in %taxa
243+
#Uncultured_bacterium_plasmid_pRSB205.gb 1
243244
}
244245
else # automatically extracted from headers, error prone
245246
{
246-
%cluster_taxa = find_taxa_FASTA_array_headers($cluster_ref,1);
247+
my %cluster_taxa_in_headers = find_taxa_FASTA_array_headers($cluster_ref,1);
248+
249+
foreach $taxon (keys(%cluster_taxa_in_headers))
250+
{
251+
$taxon_name = $taxon;
252+
$taxon_name =~ s/\[|\]//g;
253+
$cluster_taxa{$taxon_name} = $cluster_taxa_in_headers{$taxon}{'SIZE'};
254+
}
255+
247256
foreach $seq (0 .. $#{$cluster_ref})
248257
{
249-
foreach $taxon (keys(%cluster_taxa))
258+
foreach $taxon (keys(%cluster_taxa_in_headers))
250259
{
251-
if(grep(/^$seq$/,@{$cluster_taxa{$taxon}{'MEMBERS'}}))
260+
if(grep(/^$seq$/,@{$cluster_taxa_in_headers{$taxon}{'MEMBERS'}}))
252261
{
253-
$taxon =~ s/\[|\]//g;
254-
push(@{$taxa{$file}{'sorted_taxa'}},$taxon);
262+
$taxon_name = $taxon;
263+
$taxon_name =~ s/\[|\]//g;
264+
push(@{$taxa{$file}{'sorted_taxa'}},$taxon_name);
255265
}
256266
}
257267
}
@@ -470,7 +480,6 @@
470480
my @intersection_keys;
471481
foreach my $key (keys(%stats))
472482
{
473-
474483
# intersection steps
475484
next if($stats{$key}{'total'} != $n_of_dirs);
476485

@@ -596,10 +605,7 @@
596605
my $pangenome_fasta_file = $INP_output_dir . "/pangenome_matrix$params\.fasta";
597606
my $pangenome_matrix_file = $INP_output_dir . "/pangenome_matrix$params\.tab";
598607

599-
# 1) ordena taxa por clustering jerarquico ,de la matriz pangenomica
600-
# codigo en python en collective intelligence para hacer clusters y pintar dendrograma
601-
# 2) ordena clusters (en horizontal) de mas frecuentes a menos, de core a pan
602-
608+
# 1) sort clusters
603609
my @taxon_names = keys(%pangemat);
604610
my (%cluster_names,$cluster_name,$file_number,%file_name);
605611
for($taxon=0;$taxon<scalar(@taxon_names);$taxon++)

0 commit comments

Comments
 (0)