|
| 1 | +import sys |
| 2 | +import os |
| 3 | + |
| 4 | +# Reference DP Solution |
| 5 | + |
| 6 | +if len(sys.argv) <= 2: |
| 7 | + print("Usage: python filelcs.py <file> <file> ... <file>") |
| 8 | + exit() |
| 9 | + |
| 10 | + |
| 11 | +# def get_lcs_offsets(f1_data, f2_data): |
| 12 | +# """ f2 is the smaller file """ |
| 13 | +# f1_len, f1_name, f1 = f1_data |
| 14 | +# f2_len, f2_name, f2 = f2_data |
| 15 | +# matrix = [[0] * (f2_len+1), [0] * (f2_len+2)] |
| 16 | +# f1.seek(0) |
| 17 | +# maxlen = 0 |
| 18 | +# of1 = 0 |
| 19 | +# of2 = 0 |
| 20 | +# for i in range(1, f1_len+1): |
| 21 | +# b1 = f1.read(1) |
| 22 | +# f2.seek(0) |
| 23 | +# # print(i) |
| 24 | +# for j in range(1, f2_len+1): |
| 25 | +# b2 = f2.read(1) |
| 26 | +# if b1 == b2: |
| 27 | +# num = matrix[(i%2) - 1][j-1] + 1 |
| 28 | +# matrix[i%2][j] = num |
| 29 | +# if num > maxlen: |
| 30 | +# maxlen = num |
| 31 | +# of1 = i |
| 32 | +# of2 = j |
| 33 | +# offset_1 = (f1_name, of1) |
| 34 | +# offset_2 = (f2_name, of2) |
| 35 | +# return maxlen, [min(offset_1, offset_2), max(offset_1, offset_2)] |
| 36 | + |
| 37 | +def get_lcs_offsets(f1_data, f2_data): |
| 38 | + """ f2 is the smaller file """ |
| 39 | + f1_len, f1_name, f1 = f1_data |
| 40 | + f2_len, f2_name, f2 = f2_data |
| 41 | + matrix = [[0] * (f2_len+1), [0] * (f2_len+1)] |
| 42 | + f1.seek(0) |
| 43 | + f2.seek(0) |
| 44 | + f1_str = f1.read() |
| 45 | + f2_str = f2.read() |
| 46 | + maxlen = 0 |
| 47 | + of1 = 0 |
| 48 | + of2 = 0 |
| 49 | + for i in range(1, f1_len+1): |
| 50 | + b1 = f1_str[i-1] |
| 51 | + # print(i) |
| 52 | + for j in range(1, f2_len+1): |
| 53 | + b2 = f2_str[j-1] |
| 54 | + if b1 == b2: |
| 55 | + num = matrix[(i-1)%2][j-1] + 1 |
| 56 | + matrix[i%2][j] = num |
| 57 | + if num > maxlen: |
| 58 | + maxlen = num |
| 59 | + of1 = i-1 |
| 60 | + of2 = j-1 |
| 61 | + offset_1 = (f1_name, of1) |
| 62 | + offset_2 = (f2_name, of2) |
| 63 | + return maxlen, [min(offset_1, offset_2), max(offset_1, offset_2)] |
| 64 | + |
| 65 | + |
| 66 | +filenames = sys.argv[1:] |
| 67 | + |
| 68 | +maxlen = 0 |
| 69 | +offsets = [] |
| 70 | +for i in range(len(filenames)-1): |
| 71 | + name1 = filenames[i] |
| 72 | + try: |
| 73 | + f1 = open(name1, "rb") |
| 74 | + except FileNotFoundError: |
| 75 | + print("ERROR: FILE '{}' DOES NOT EXIST.".format(name1)) |
| 76 | + exit() |
| 77 | + f1_len = os.path.getsize(name1) |
| 78 | + f1_data = (f1_len, name1, f1) |
| 79 | + for j in range(i+1, len(filenames)): |
| 80 | + name2 = filenames[j] |
| 81 | + try: |
| 82 | + f2 = open(name2, "rb") |
| 83 | + except FileNotFoundError: |
| 84 | + print("ERROR: FILE '{}' DOES NOT EXIST.".format(name2)) |
| 85 | + f2_len = os.path.getsize(name2) |
| 86 | + f2_data = (f2_len, name2, f2) |
| 87 | + lcs_len, lcs_offset = get_lcs_offsets(max(f1_data, f2_data), min(f1_data, f2_data)) |
| 88 | + if lcs_len > 0: |
| 89 | + if lcs_len > maxlen: |
| 90 | + maxlen = lcs_len |
| 91 | + offsets = lcs_offset |
| 92 | + elif lcs_len == maxlen and lcs_offset[0][0] == offsets[0][0]: |
| 93 | + offsets.append(lcs_offset[1]) |
| 94 | + f2.close() |
| 95 | + f1.close() |
| 96 | + |
| 97 | +print("Length of longest shared strand of bytes: {}".format(maxlen)) |
| 98 | +for off in offsets: |
| 99 | + print("File name: {}, Offset where sequence begins: {}".format(off[0], off[1] - maxlen)) |
0 commit comments