-
Notifications
You must be signed in to change notification settings - Fork 0
/
alignment_problem.py
187 lines (142 loc) · 4.43 KB
/
alignment_problem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import random
random.seed(10)
# Recursive Function
def generateSequence(chars):
random.shuffle(chars)
return chars
# Helper Function
def generateChars(charset):
chars = [x for pair in zip(charset,charset) for x in pair]
chars = [x for pair in zip(chars,chars) for x in pair]
return chars
# 1) Randomize S1, S2
charset = ['A', 'C', 'G', 'T']
chars = generateChars(charset)
s1 = "".join(generateSequence(chars))
chars = generateChars(charset)
s2 = "".join(generateSequence(chars))
# TEST STUB
s1 = "ACACACTA"
s2 = "AGCACACA"
print(f"s1: {s1} \ns2: {s2}")
#%%
# ALIGNMENT MATRIX
l1 = len(s1)
l2 = len(s2)
# Match & Mismatch Scores
match_score = 2
mismatch_score = -1
# Initialise Alignment Matrix
alignment_matrix = [[0 for i in range(l1+1)] for i in range(l2+1)]
# Print Alignment Matrix
def printMatrix(matrix):
for i in matrix:
for j in i:
print(j, end='\t')
print()
print()
# printMatrix(alignment_matrix)
#%%
# The characters of 's1' are on the Top of the Matrix
# The characters of 's2' are to the Left of the Matrix
# 'i' iterates through every row
# 'j' iterates through every column
def dp(s1, s2, alignment_matrix, i, j):
# If last row is completed, exit operations
if i > len(s2):
return alignment_matrix
else:
# If last element in row is reached,
if j > len(s1):
# Go to the next row
dp(s1, s2, alignment_matrix, i+1, 1)
return alignment_matrix
else:
# MATCH
if s1[j-1] == s2[i-1]:
alignment_matrix[i][j] = alignment_matrix[i-1][j-1] + match_score
# MISMATCH
else:
# DO NOT look at diagonal element if Mismatch
alignment_matrix[i][j] = max(alignment_matrix[i-1][j] + mismatch_score,
alignment_matrix[i][j-1] + mismatch_score,
0)
# CALL the function recursive on next cell (to the right)
dp(s1, s2, alignment_matrix, i, j+1)
print()
# Calculate values of Alignment Matrix
dp(s1, s2, alignment_matrix, 1, 1)
print("*** Alignment Matrix ***\n")
printMatrix(alignment_matrix)
def tracepath(s1, s2, alignment_matrix, alignment1, alignment2, i, j):
# If you reached a point of 'no match' = '0'
if alignment_matrix[i][j] == 0:
alignment1 = "".join(alignment1)[::-1]
alignment2 = "".join(alignment2)[::-1]
# Print the aligned sequences
print(f"*** Alignment Sequence ***")
print(f"s1: {alignment1}")
print(f"s2: {alignment2}")
return
# Temporary variable
directions = [alignment_matrix[i-1][j-1], # DIAGONAL
alignment_matrix[i-1][j], # UP
alignment_matrix[i][j-1]] # LEFT
direction = directions.index(max(directions))
if direction == 0: # DIAGONAL
alignment1.append(s1[j-1])
alignment2.append(s2[i-1])
tracepath(s1, s2, alignment_matrix, alignment1, alignment2, i-1, j-1)
elif direction == 1: # UP
# Gap in sequence 's1'
alignment1.append('_')
alignment2.append(s2[i-1])
tracepath(s1, s2, alignment_matrix, alignment1, alignment2, i-1, j)
else: # LEFT
alignment1.append(s1[j-1])
# Gap in sequence 's2'
alignment2.append('_')
tracepath(s1, s2, alignment_matrix, alignment1, alignment2, i, j-1)
# SCORES
# Score 12
i = 8
j = 8
print(f"\nAlignment Score = {alignment_matrix[i][j]}")
print(f"Start position: ({i}, {j})")
tracepath(s1, s2, alignment_matrix, [], [], i, j)
# Score 11
i = 7
j = 6
print(f"\nAlignment Score = {alignment_matrix[i][j]}")
print(f"Start position: ({i}, {j})")
tracepath(s1, s2, alignment_matrix, [], [], i, j)
# Score 10
i = 7
j = 7
print(f"\nAlignment Score = {alignment_matrix[i][j]}")
print(f"Start position: ({i}, {j})")
tracepath(s1, s2, alignment_matrix, [], [], i, j)
# Score 10
i = 8
j = 6
print(f"\nAlignment Score = {alignment_matrix[i][j]}")
print(f"Start position: ({i}, {j})")
tracepath(s1, s2, alignment_matrix, [], [], i, j)
#%%
'''
Note: Explore Wunsh-Needleman Algorithm
Difference between this and Smith-Waterman Algorithm - Local & Global Score
A C A C A C T A
0 0 0 0 0 0 0 0 0
A 0
G 0
C 0
A 0
C 0
A 0
C 0
A 0
Assumptions:
(i) End at index [0,0].
(ii) Do not look at diagonal element in case of Mismatch.
'''