-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtarea4.py
121 lines (100 loc) · 4.09 KB
/
tarea4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import numpy as np
from trie import Trie
def dp_levenshtein_trie(x, trie, th):
"""
Encuentra la distancia de edición de Levenshtein entre dos cadenas x e y
Los resultados devueltos están limitados a una distancia de edición th
Hace uso de la estructura de datos trie (árbol de prefijos)
"""
if th == None: th = float("inf")
results = {}
states = trie.get_num_states()
tam_x = len(x)
current = np.zeros(states)
pre = np.zeros(states)
#Recorremos todos los nodos del trie y asignamos un coste a cada prefijo
for i in range(1, states):
current[i]= current[trie.get_parent(i)] + 1
#Vamos recorriendo las letras de la palabra x
for i in range(1, tam_x + 1):
pre[0] = i
#Para cada letra cogemos la operación de coste mínimo
for j in range(1,states) :
pre[j] = min(current[j] + 1, #insercion
pre[trie.get_parent(j)] + 1, #borrado
current[trie.get_parent(j)] if x[i-1] == trie.get_label(j) else current[trie.get_parent(j)] + 1 #sustitucion
)
if min(pre) > th: return {} #Si supera el threshold salimos
current, pre = pre, current
#Recorremos todos los estados, si son finales y menores que el threshold añadimos a result
for i in range(states):
if trie.is_final(i):
if current[i] <= th: results[trie.get_output(i)] = current[i]
return results
def dp_restricted_damerau_trie(x, trie, th):
d = np.zeros((trie.get_num_states() + 1, len(x) + 1))
results = {}
for i in range(1, trie.get_num_states() + 1):
d[i, 0] = d[trie.get_parent(i), 0] + 1
for j in range(1, len(x) + 1):
d[0, j] = d[0, j - 1] + 1
for i in range(1, trie.get_num_states() + 1):
d[i, j] = min(
d[trie.get_parent(i), j] + 1,
d[i, j - 1] + 1,
d[trie.get_parent(i), j - 1] + (trie.get_label(i) != x[j - 1])
)
if i > 1 and j > 1 and x[j - 2] == trie.get_label(i) and x[j - 1] == trie.get_label(
trie.get_parent(i)):
d[i, j] = min(
d[i, j],
d[trie.get_parent(trie.get_parent(i)), j - 2] + 1
)
if (min(d[:, j]) > th):
return th + 1
for i in range(trie.get_num_states()):
if trie.is_final(i):
if d[i, len(x)] <= th: results[trie.get_output(i)] = d[i, len(x)]
return results
def dp_intermediate_damerau_trie(x, trie, th):
# TODO
return []
words = ["algortimo", "algortximo","lagortimo", "agaloritom", "algormio", "ba"]
words.sort()
trie = Trie(words)
test = ["algoritmo", "acb"]
thrs = range(1, 4)
for threshold in thrs:
print(f"threshols: {threshold:3}")
for x in test:
for dist,name in (
(dp_levenshtein_trie,"levenshtein"),
(dp_restricted_damerau_trie,"restricted"),
(dp_intermediate_damerau_trie,"intermediate"),
):
print(f"\t{x:12} \t{name}\t", end="")
print(dist(x, trie, threshold))
"""
Salida del programa:
threshols: 1
algoritmo levenshtein []
algoritmo restricted [('algortimo', 1)]
algoritmo intermediate [('algortimo', 1)]
acb levenshtein []
acb restricted []
acb intermediate []
threshols: 2
algoritmo levenshtein [('algortimo', 2)]
algoritmo restricted [('algortimo', 1), ('lagortimo', 2)]
algoritmo intermediate [('algormio', 2), ('algortimo', 1), ('lagortimo', 2), ('algortximo', 2)]
acb levenshtein []
acb restricted []
acb intermediate [('ba', 2)]
threshols: 3
algoritmo levenshtein [('algormio', 3), ('algortimo', 2), ('algortximo', 3)]
algoritmo restricted [('algormio', 3), ('algortimo', 1), ('lagortimo', 2), ('algortximo', 3)]
algoritmo intermediate [('algormio', 2), ('algortimo', 1), ('lagortimo', 2), ('agaloritom', 3), ('algortximo', 2)]
acb levenshtein [('ba', 3)]
acb restricted [('ba', 3)]
acb intermediate [('ba', 2)]
"""