-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculateNx.py
66 lines (56 loc) · 1.88 KB
/
calculateNx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import sys
import re
import numpy as np
from Bio import SeqIO
input_fa = sys.argv[1]
seq_len = []
for rec in SeqIO.parse(input_fa,'fasta'):
seqs = re.split("N{10,}",str(rec.seq))
for seq in seqs:
seq_len.append(len(seq))
seq_len.sort(reverse=True)
seq_len_cumsum = list(np.cumsum(seq_len))
total_length = np.sum(seq_len)
Nx = {"N10":[],
"N20":[],
"N30":[],
"N40":[],
"N50":[],
"N60":[],
"N70":[],
"N80":[],
"N90":[],
"N100":[]}
for i,j in enumerate(seq_len_cumsum):
if j >= 0.1*total_length and j < 0.2*total_length:
#print("N10: ",)
Nx["N10"].append(seq_len[i])
elif j >= 0.2*total_length and j < 0.3*total_length:
#print("N20: ",seq_len[i])
Nx["N20"].append(seq_len[i])
elif j >= 0.3*total_length and j < 0.4*total_length:
#print("N30: ",seq_len[i])
Nx["N30"].append(seq_len[i])
elif j >= 0.4*total_length and j < 0.5*total_length:
#print("N40: ",seq_len[i])
Nx["N40"].append(seq_len[i])
elif j >= 0.5*total_length and j < 0.6*total_length:
#print("N50: ",seq_len[i])
Nx["N50"].append(seq_len[i])
elif j >= 0.6*total_length and j < 0.7*total_length:
#print("N60: ",seq_len[i])
Nx["N60"].append(seq_len[i])
elif j >= 0.7*total_length and j < 0.8*total_length:
#print("N70: ",seq_len[i])
Nx["N70"].append(seq_len[i])
elif j >= 0.8*total_length and j < 0.9*total_length:
#print("N80: ",seq_len[i])
Nx["N80"].append(seq_len[i])
elif j >= 0.9*total_length and j < 1*total_length:
#print("N90: ",seq_len[i])
Nx["N90"].append(seq_len[i])
elif j >= 1*total_length:
#print("N100: ",seq_len[i])
Nx["N100"].append(seq_len[i])
for key,value in Nx.items():
print(key,value[0])