You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
""" Manber-Myers suffix array construction - O(n*log^2(n)) - inspired from GeeksForGeeks """
5
+
6
+
defbuild_suffix_arr(string):
7
+
""" Constructs and returns the suffix array and LCP array """
8
+
length=len(string)
9
+
10
+
# During computation, every suffix is represented by three numbers: the rank of its first half, the rank of its second half, and the index it corresponds to.
11
+
# The index is irrelevant to sorting in construction of the suffix array, so it is at the end of the list so it naturally works with python's sort.
""" Manber-Myers with radix sort - O(nlogn) - turns out to be consistently slower in empirical tests """
185
+
186
+
# def counting_sort_ranks(arr, sort_ind):
187
+
# largest = max(arr, key=lambda e: e[sort_ind])
188
+
# counts = [0] * (largest[sort_ind] + 1)
189
+
# out = [None] * len(arr)
190
+
# for elem in arr:
191
+
# counts[elem[sort_ind]] += 1
192
+
# # Make cumulative
193
+
# for i in range(1, len(counts)):
194
+
# counts[i] += counts[i-1]
195
+
# # Construct output
196
+
# for elem in reversed(arr):
197
+
# counts[elem[sort_ind]] -= 1
198
+
# out[counts[elem[sort_ind]]] = elem
199
+
200
+
# return out
201
+
202
+
# def radix_sort_ranks(arr):
203
+
# arr = counting_sort_ranks(arr, 1)
204
+
# # print(arr)
205
+
# arr = counting_sort_ranks(arr, 0)
206
+
# # print(arr)
207
+
# return arr
208
+
209
+
# def build_suffix_arr_radix(string):
210
+
# length = len(string)
211
+
212
+
# # During computation, every suffix is represented by three numbers: the rank of its first half, the rank of its second half, and the index it corresponds to.
213
+
# # The index is irrelevant to sorting in construction of the suffix array, so it is at the end of the list so it naturally works with python's sort.
214
+
# suffs = [[0, 0, 0] for _ in range(length)]
215
+
216
+
# for i in range(length):
217
+
# suffs[i][2]= i
218
+
# # Shift numbers up by 1 so that indicator for end of suffix can be 0
219
+
# suffs[i][0] = string[i]+1
220
+
# suffs[i][1] = string[i+1]+1 if i < length-1 else 0
221
+
222
+
# suffs = radix_sort_ranks(suffs)
223
+
224
+
# k = 2
225
+
# inds = [0] * length
226
+
# while k < length:
227
+
# curr_rank = 0
228
+
# prev_rank = suffs[0][0]
229
+
# suffs[0][0] = curr_rank
230
+
# inds[suffs[0][2]] = 0
231
+
# no_change = True
232
+
233
+
# for i in range(1, length):
234
+
# if suffs[i][0] == prev_rank and suffs[i][1] == suffs[i-1][1]:
235
+
# suffs[i][0] = curr_rank
236
+
# no_change = False
237
+
# else:
238
+
# prev_rank = suffs[i][0]
239
+
# curr_rank += 1
240
+
# suffs[i][0] = curr_rank
241
+
# inds[suffs[i][2]] = i
242
+
243
+
# for i in range(length):
244
+
# next_ind = suffs[i][2] + k
245
+
# suffs[i][1] = suffs[inds[next_ind]][0] if next_ind < length else 0
0 commit comments