-
Notifications
You must be signed in to change notification settings - Fork 0
/
187_RepeatedDNASequences.py
49 lines (46 loc) · 6.49 KB
/
187_RepeatedDNASequences.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
class Solution:
# @param {string} s
# @return {string[]}
def findRepeatedDnaSequences(self, s):
if len(s) <= 10:
return []
seqs = set()
result = set()
for i in range(len(s) - 9):
key = s[i:i+10]
if key not in seqs:
seqs.add(key)
else:
result.add(key)
return list(result)
def afindRepeatedDnaSequences(self, s):
if len(s) <= 10:
return []
result = []
for startIdx in range(len(s) - 9):
# window = s[startIdx:startIdx+10]
nextStart = startIdx + 1
while True:
while s[startIdx] != s[nextStart]:
nextStart += 1
if nextStart + 10 >= len(s):
break
if s[startIdx] != s[nextStart] or nextStart + 10 >= len(s):
break
match = True
for i in range(10):
if s[startIdx+i] != s[nextStart+i]:
match = False
break
if match:
result.append(s[startIdx:startIdx+10])
break
nextStart += 1
return result
if __name__ == "__main__":
sol = Solution()
print sol.findRepeatedDnaSequences("AAAAAAAAAAA")
print sol.findRepeatedDnaSequences("CAAAAAAAAAC")
# print sol.findRepeatedDnaSequences("AAAAAGGGGGAAAAAGGGGGGAAAAACCCCTTT")
print sol.findRepeatedDnaSequences("GAGAGAGAGAG")
# print sol.findRepeatedDnaSequences("CCAAGTCTAAAAGAATGGGCAGAGTTGCTCTGCACGTTCCGGATACGATAAGAAGACTCCGCCGGGCACCGCCAGTCCCTCGCAGGGGGACACATTGGAATCGGATAATCGATCACTCCGCGCCACCCCGGGTTACGCACCTCACGTTGAGTGAGGAGATGGTGACATTTCGGCACCTATAGGCACGCTCTACCGGAACAGCATCTGTATCAGATGGCAATTCGATGGTATCTTGCGTGAATCAGGAAGTTTTATGTGGCTTCCGGATGACGCGAGATTTTGCGTGAGGTGAGCGGTTGCGTTACCGGTTAGTAGCCCTTAGTAGTTGAAGACAGGCGAGCAGGTGCAGCTGTCCACACGGTATAGGGCAATCAACTTCCATAAAGCATATGTTGTGTAACACCTGCCACCCCTTGTCACAGGTTTCACCAAGACCACATCGTGCTAAAGTTAAACAACATTCGTAAAAGTGCAGAAATTGAAGTATTTGATCTGAGATTACGTTTCAATCTGGAATCCGAAGCTATAGGAGAATGTTGGACCATACGGTGAGTAGAATCGCCACTCACGTTTCGGTACTAGCACATTAGTACGGATCCCGATCGGGGTGGTGGGGCGTTCTTCATGTATACCATTGCGACGGAGTCTGGCTAAACCTTTAGGCAGGTCGCAAGGCAGAATCATGAGCACAATCAGGGTGGCCTGGATCCGTCATGGAGGAATCGGGTCTTCCCGTTTGAGGGGGCGGACTTCGCGGCGCACGTCAGGACACCAAAGGCTCTAGTACCCTCCTATCGAAACTCTTTAGCGAGTTTTGTCCAGCTAGCTTCATCTAACAGGCTCAAAAGTCATAAGAAGGCTCACTTTAAGTAAAAAGTCAGGCGCAAAGTCTAGTGGCGCATTTGATACGTTCCCAGGGCACACATGCACCCCTCTGCAACATTTCAGGCCGCAATTACACTGTAGCTCACCGGGGTCGAAATAGTTAACTCAAGTTTCCGGTCCCTTCCCCACAAAGATCTGTATATGAACCATCAAGACGTGGCTAATAATTATATCTCCGGCCCCTGCCACAGGAGAGAGGGAGCCGGAATGACCGTAATTAGCTCATAAAACTTACCAGCTTATAGGAAGTCGGCTTCTTTGATGACTACGAAATGGTAGGGCGTTCGTTATGTGCTCTCGCGTTCAACATCCGGCGCGACCAATATGCCAACATTCGGAAGCGTGTGATAAATTGCTGAATCCATAGTCCCATAGTGGTTGATAGTTACTCCGGTTCTGAACTTGTGTAATAAATGCCAACATTCTCGTTACTCAGAACGACAGTCAAAGTGCGTGTCACGTTGGGCCTGGGGCACGTCGCTGGTATTCGGGTACTTTACTAATGAATAAGGATTGTGTTGTAGGCCCCTTAAAAACAGGGAATGGGCCGAATACATTTAGCTTGATTCCAGCGGTAGTACTTGGCGGAGTCATGTACCTTACCCGCGCATATAACTCCAGTCGGACATACGTTAGAATTTTGAAAAGGCTGGCGGGGACCGCGAGATGTCCGCCACCCATGTCGCGGCGTGAGGTGACATGACAGCCAGGCAGTTGGCTCGGTGTCCCAGGTGCACGGCCTCACTTAAACTAGCACCCGACGGATTCTTCATACCTGACGTGTGTATTTTTTGACTCTGAGGTTCTGAATAACATCCGTAGGCACATGTGAGAGCAGGGACGGTTCATCACCTATGGTCTCCAGGTACAGCCTACGTGGGGTAGGACCACAACACTCTGTGTGCCATACGGTCGGTCAAGACCTAATGGCGGGAAAAGTGGCTTTAGTCCCGTTGTACGAGGGAAATCAGTTTGGTTACGTGAAAAGTTAATGCTGCATCCAGTGACTACGATATTACCCGGCGAAACGAGATCGTAGTAGTTCTATGTCGGGCTGTCACCTAAGACACCATAAAAGGCGATTAAATATGGATGTGCGGAAGGGTGACTTCTACCCTCAAGACAACTGACTACCACCTTATACGCGCTTCATCGGGAGCTAGCGAGGCGCATCGCACAGTTACAAAGGGGTGTGGTAGGCACCCTGAACACAGAGAGTCACGCCGTGGGAAGACGACGGCGCCCGAACTGCTATCAGCTTCAAACTTCCAACGCCCATCGCGAGCGTACTGAGTTTTCACAGCGGGCTTCCACAAAGAAGTGGCCAGGCGACACCACATTACTGGGTCTGAGCCTCAGGCCAGGCGAAGTAATTGGTTTTGTGGGAATAGTGCACAAATGACCCATATGTGTTGTCAAGTCCCCTGCGACCGTTCGTCGGCGTACCCTTCGCTAATTTCCTAAGCACAATAATTGCAACCCCAAATGAGTCTCGTATTAGAGTAACGCAGAGTTAGGCTCCCCGAAAACGCTAGTCCGGAGCTTGGGATAAAAATAATGATGTAGGGCGGGACCCCGTACTTTCGCATAACAGGGTTTTTGTCGGTTGGCTTGTAAATGCAACTTGGGTTCCACAATCCCACTGGACAAGACAGGAAGGTGAAGAGGGAGCCGATTAAACCCAACTCAACTAGAACTTAGATCTTTCATTCACATCGTGTCAGTACAAATTTGAAAGAGAAGTAGGTACATGGGAGGAACGGGTTACGCCGAGTCTGATATTCTGTGGGAACTCCGTCTGGTCGCAGAGTTACGCCATCACAGCATTGGCTGAGTATCCAATTTGCCTATCACGCAACTACCATTTGCCGATAGCGGACCGACCCTATTTGAGCTATGGTATGTTCACGAATACAACTACGTCTGACAAGACGAGATCCTAAGCACAACTCCTACGATTCCGGACGTCTGGCCCTTGGAGCACTAATCCCTGGGGAATGTCACCAGAGATGTTTACGGCATGAAGAATGGAGGTCACAATTATTGAGAGACGGCGGTATGCACCATATCCAGGGTGCAGTGAGACGATAGACGTAGGGGAGCGGAGCGTCGAGGTGTCTCTGCCAAAGGGCCCCAATGATCCTGAATGGTGTAATCCGGAGACTCGATGGTATCCGCCGCAACGGTTCACTCCGCGTATGGCAGTTGGCTACGTGGTCGCGAGGACAGCTGTACGTTAAGCTAAACGATCCCTTAGCCTCTCGCAGATCGAAGTGCTAATAGTCCTGTCGCAGCCAGGATTGCGAAACTACGACTCAGGGGTAATCGCGGATAGCCTGATTTCATCCGAAAGACCCACACTATAGTCTGTGGTCGGTCCCAGGCGGTTCACGCCACCGGCGACATCGGCAAGTCTACAGGGGTTGGGCCAATTTCCGATAGGATCTCGGAAATGGTATCTCCGACCAGCGCAAACGCGCCACCCTGCGAGTCCGGAGTTTGCTACTCTTCAAATGAACCGATCGCGTCGTCGTTGAAACCGTGAGAGCAACCAGAATTACATTAAGCTCATCGGGATATTGATCCAATTGGGTAAATGTCAACGCTTCAATTTTTGCCGTGTCGCAACCTCCGGATAAACTTAGTACAGATCTTTTGTCGTAGAACGTTGTGGAAGCAGCAGGGAGACCCTGTACTTTCGTCACCTATCAACGTTTCCACCTCTGGTGATGTAAAAGCACGCAACCTACTCCGATACCAATCTGAGTTATGCTGTAAATTATTGGACGCCAATGGGATAAATGGGTAACGTGACCGATACGAAGCTGGCCCTTTCTCCCTCCAGTACCGATGGAATCTAGTACTTTTATAATGATTGGCACGGACGCACCGGTGTGGTTGACCGATTTGGCGGTAACATTGCGGGTTAGGGGGTTAATATATAACGCCTTCACCGATCGCACTACACGCAGATCGGAGGCTCCACTCAGACGTAGAATTTAAACAGAGATGGCTAGCGAAACTCCAAGCAGACGGCAAGGCTGTTCACCAGGAAGCCCCCTTCAAGATTCGTATTCAGGTATTGAGCAAACATCACTGACATTATTTTTGGATTAGCATAGGAAATATATACCGACTCGCGGTGGAGGTCAATGTCAACAGTGCGTGTTTTTAATGTCTGAACAATTTGCCTCCAAAATCTTGTTCGTTATCTCCCGATCAATTACCGCTAGCATTCGGGCATCTTGCGACCCAAGATTGAGATGAGGTGTGAACACTATCTGCACTGACAGCGGTCATAGCGGCGTAAAAATCGTGCGGAGGGAACCTTCACTCATGAGCAGCGGTGTAAAACCCTATGAGACAATTCGCTCCGATGGATAGTGTTGCGCTGACGTCCCACCGGACCAGCAGATATTCAACTCGAGAACATCTTAAAGTTTCCATCTAGGGCGGACCCTCAGGCTTTGCACGTCGGGGCTTAATGGTAATATGAGGTACGATTTAGTCGAATGGTTAAAGACCGTGACCGTGATATGCGCAACAATTCCCTCTGATACCCACTCATCGGGGTGGATTCTGAACACAGTCTGCATAAGTCTCAACCCATGGACGGGGAACTGTTACGAAATATGCAGGGGTCCGATTAGTCCCAGGGTGAGTCTTCCCTTACCGATCTCCACCGTGTTCCATGAGTCGCCGGTGTTTGATTACTGTCTATAGGCCTTACCGTCGTTACTGAGATTTATGGGCGGGGACGTTCGACTGCACTTTCAATTGGCAGAATTCCGTTAAATAAGGACGCAGTTGTCCGCGCACTTCATACCGTTGAGAAAGCAATACATTTTCTTGACACTCCCGGCATGTCGTCATATAGTACCGTCTAGCATTCTGCTAGCTCAAAAGCTCTCTGGCACCGGCATGTGTGACGTATCGAAAGAACAGCACTACAGCAGGCGAGTAGCTCCGGGCTACTTTGTTGCACAGCTACATGCAGTGGTGGTAACTTCGATTGATCCCGGCTCTAGGACTGAGCTTTAGCGAGTCCCTACCCGGCAAGTTCGCCTGATCCTGTCTTGGATGAACTTGGGCGGGCTCTGGCGGGTTGTTAGATC")