-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathpbcluster_haplo.pl
executable file
·92 lines (77 loc) · 1.72 KB
/
pbcluster_haplo.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/perl -w
#
# AUthor: Jue Ruan
#
use strict;
my $core = shift or die("Usage: $0 <pb_name> <msa>\n");
my @seqs = ();
my $ref = undef;
while(<>){
my @ts = split;
$ts[1] = uc $ts[1];
push(@seqs, [$ts[0], $ts[1], 0]);
if($ts[0] eq $core){
$ref = $ts[1];
}
}
die("No sequences") if(@seqs < 1);
die("Cannot find $core") if(not defined $ref);
my $N = @seqs;
my $M = length $ref;
my @weights = ();
for(my $i=0;$i<$M;$i++){ push(@weights, 1); }
for(my $iter=0;$iter<4;$iter++){
for(my $i=0;$i<$N;$i++){
my $mat = 0;
for(my $j=0;$j<$M;$j++){
my $a = substr($ref, $j, 1);
my $b = substr($seqs[$i][1], $j, 1);
$mat += $weights[$j] if($a eq $b and $a ne '-');
}
$seqs[$i][2] = $mat;
}
@seqs = sort {$b->[2] <=> $a->[2]} @seqs;
my $par = 1;
my $lst = -1;
while(1){
my $cnt = int($N / $par);
$par ++;
last if($cnt < 3);
next if($cnt == $lst);
$lst = $cnt;
call_cns($cnt);
#last if($par > 8);
}
print join("\n", map {join("\t", @$_)} @seqs), "\n";
}
1;
sub call_cns {
my $CNT = shift;
my @cns = ();
for(my $i=0;$i<$M;$i++){
push(@cns, '-'),next if(substr($ref, $i, 1) eq '-');
my @bases = ();
for(my $j=0;$j<$N;$j++){
my $c = substr($seqs[$j][1], $i, 1);
push(@bases, [$c, $N - $j]) if($c ne '-');
}
my %hash = ();
my $cnt = $CNT;
$cnt = @bases if($cnt > @bases);
for(my $j=0;$j<$cnt;$j++){
my $c = $bases[$j];
next if($c->[0] eq '-');
$hash{$c->[0]} += $c->[1];
}
my $max = [1, '-'];
foreach my $c (sort keys %hash){
#print "$c:$hash{$c}\t";
$max = [$hash{$c}, $c] if($hash{$c} > $max->[0]);
}
#print "\n";
#print "$i\t$max->[1]\t", join('', map{$_->[0]} @bases), "\n";
push(@cns, $max->[1]);
}
$ref = join('', @cns);
print "REF[$CNT]\t$ref\n";
}