Skip to content

Commit d60a641

Browse files
committed
Implement Yaz0 compression (but don't use it because it's extremely slow)
1 parent 98668cb commit d60a641

File tree

1 file changed

+129
-0
lines changed

1 file changed

+129
-0
lines changed

wwlib/yaz0.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
from fs_helpers import *
66

77
class Yaz0:
8+
MAX_RUN_LENGTH = 0xFF + 0x12
9+
10+
num_bytes_1 = 0
11+
match_pos = 0
12+
prev_flag = False
13+
814
@staticmethod
915
def decompress(comp_data):
1016
if try_read_str(comp_data, 0, 4) != "Yaz0":
@@ -56,3 +62,126 @@ def decompress(comp_data):
5662
uncomp_data = struct.pack("B"*output_len, *output)
5763

5864
return BytesIO(uncomp_data)
65+
66+
@staticmethod
67+
def compress(uncomp_data):
68+
comp_data = BytesIO()
69+
write_str(comp_data, 0, "Yaz0", 4)
70+
71+
uncomp_size = data_len(uncomp_data)
72+
write_u32(comp_data, 4, uncomp_size)
73+
74+
write_u32(comp_data, 8, 0)
75+
write_u32(comp_data, 0xC, 0)
76+
77+
Yaz0.num_bytes_1 = 0
78+
Yaz0.match_pos = 0
79+
Yaz0.prev_flag = False
80+
81+
uncomp_offset = 0
82+
uncomp = read_and_unpack_bytes(uncomp_data, 0, uncomp_size, "B"*uncomp_size)
83+
comp_offset = 0x10
84+
dst = []
85+
valid_bit_count = 0
86+
curr_code_byte = 0
87+
while uncomp_offset < uncomp_size:
88+
num_bytes, match_pos = Yaz0.get_num_bytes_and_match_pos(uncomp, uncomp_offset)
89+
90+
if num_bytes < 3:
91+
# Copy the byte directly
92+
dst.append(uncomp[uncomp_offset])
93+
uncomp_offset += 1
94+
95+
curr_code_byte |= (0x80 >> valid_bit_count)
96+
else:
97+
dist = (uncomp_offset - match_pos - 1)
98+
99+
if num_bytes >= 0x12:
100+
dst.append((dist & 0xFF00) >> 8)
101+
dst.append((dist & 0x00FF))
102+
103+
if num_bytes > Yaz0.MAX_RUN_LENGTH:
104+
num_bytes = Yaz0.MAX_RUN_LENGTH
105+
dst.append(num_bytes - 0x12)
106+
else:
107+
byte = (((num_bytes - 2) << 4) | (dist >> 8) & 0xFF)
108+
dst.append(byte)
109+
dst.append(dist & 0xFF)
110+
111+
uncomp_offset += num_bytes
112+
113+
valid_bit_count += 1
114+
115+
if valid_bit_count == 8:
116+
# Finished 8 codes, so write this block
117+
write_u8(comp_data, comp_offset, curr_code_byte)
118+
comp_offset += 1
119+
120+
for byte in dst:
121+
write_u8(comp_data, comp_offset, byte)
122+
comp_offset += 1
123+
124+
curr_code_byte = 0
125+
valid_bit_count = 0
126+
dst = []
127+
128+
if valid_bit_count > 0:
129+
# Still some codes leftover that weren't written yet, so write them now.
130+
write_u8(comp_data, comp_offset, curr_code_byte)
131+
comp_offset += 1
132+
133+
for byte in dst:
134+
write_u8(comp_data, comp_offset, byte)
135+
comp_offset += 1
136+
137+
return comp_data
138+
139+
@staticmethod
140+
def get_num_bytes_and_match_pos(uncomp, uncomp_offset):
141+
num_bytes = 1
142+
143+
if Yaz0.prev_flag:
144+
Yaz0.prev_flag = False
145+
return (Yaz0.num_bytes_1, Yaz0.match_pos)
146+
147+
Yaz0.prev_flag = False
148+
num_bytes, Yaz0.match_pos = Yaz0.simple_rle_encode(uncomp, uncomp_offset)
149+
match_pos = Yaz0.match_pos
150+
151+
if num_bytes >= 3:
152+
Yaz0.num_bytes_1, Yaz0.match_pos = Yaz0.simple_rle_encode(uncomp, uncomp_offset+1)
153+
154+
if Yaz0.num_bytes_1 >= num_bytes+2:
155+
num_bytes = 1
156+
Yaz0.prev_flag = True
157+
158+
return (num_bytes, match_pos)
159+
160+
@staticmethod
161+
def simple_rle_encode(uncomp, uncomp_offset):
162+
# How far back to search. Can search as far back as 0x1000 bytes, but the farther back we search the slower it is.
163+
start_offset = uncomp_offset - 0x400
164+
if start_offset < 0:
165+
start_offset = 0
166+
167+
num_bytes = 1
168+
match_pos = 0
169+
170+
for i in range(start_offset, uncomp_offset):
171+
for j in range(len(uncomp) - uncomp_offset):
172+
if uncomp[i + j] != uncomp[uncomp_offset + j]:
173+
break
174+
#if j == Yaz0.MAX_RUN_LENGTH:
175+
# break
176+
177+
if j > num_bytes:
178+
num_bytes = j
179+
match_pos = i
180+
181+
#if num_bytes == Yaz0.MAX_RUN_LENGTH:
182+
# break
183+
184+
#if num_bytes == 2:
185+
# num_bytes = 1
186+
187+
return (num_bytes, match_pos)

0 commit comments

Comments
 (0)