Skip to content

Commit 96d6b64

Browse files
authored
Merge pull request #17 from skelsec/speedup
Speedup
2 parents 9d50d3b + 714d122 commit 96d6b64

File tree

6 files changed

+224
-73
lines changed

6 files changed

+224
-73
lines changed

Diff for: minidump/_version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
__version__ = "0.0.15"
2+
__version__ = "0.0.16"
33
__banner__ = \
44
"""
55
# minidump %s

Diff for: minidump/aminidumpreader.py

+63-22
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,26 @@
88
from .common_structs import *
99
from .streams.SystemInfoStream import PROCESSOR_ARCHITECTURE
1010

11-
class AMinidumpBufferedMemorySegment:
12-
def __init__(self):
13-
self.start_address = None
14-
self.end_address = None
11+
12+
class VirtualSegment:
13+
def __init__(self, start, end, start_file_address):
14+
self.start = start
15+
self.end = end
16+
self.start_file_address = start_file_address
17+
1518
self.data = None
19+
20+
def inrange(self, start, end):
21+
return self.start <= start and end<= self.end
1622

17-
async def load(self, memory_segment, file_handle):
23+
class AMinidumpBufferedMemorySegment:
24+
def __init__(self, memory_segment, chunksize = 10*1024):
1825
self.start_address = memory_segment.start_virtual_address
1926
self.end_address = memory_segment.end_virtual_address
20-
await file_handle.seek(memory_segment.start_file_address)
21-
self.data = await file_handle.read(memory_segment.size)
27+
self.total_size = memory_segment.end_virtual_address - memory_segment.start_virtual_address
28+
self.start_file_address = memory_segment.start_file_address
29+
self.chunksize = chunksize
30+
self.chunks = []
2231

2332
def inrange(self, position):
2433
return self.start_address <= position <= self.end_address
@@ -28,10 +37,43 @@ def remaining_len(self, position):
2837
return None
2938
return self.end_address - position
3039

40+
async def find(self, file_handle, pattern, startpos):
41+
data = await self.read(file_handle, 0, -1)
42+
return data.find(pattern, startpos)
43+
44+
async def read(self, file_handle, start, end):
45+
if end is None:
46+
await file_handle.seek(self.start_file_address + start)
47+
return await file_handle.read(self.end_address - (self.start_file_address + start))
48+
49+
for chunk in self.chunks:
50+
if chunk.inrange(start, end):
51+
return chunk.data[start - chunk.start: end - chunk.start]
52+
53+
if self.total_size <= 2*self.chunksize:
54+
chunksize = self.total_size
55+
vs = VirtualSegment(0, chunksize, self.start_file_address)
56+
await file_handle.seek(self.start_file_address)
57+
vs.data = await file_handle.read(chunksize)
58+
self.chunks.append(vs)
59+
return vs.data[start - vs.start: end - vs.start]
60+
61+
chunksize = max((end-start), self.chunksize)
62+
if start + chunksize > self.end_address:
63+
chunksize = self.end_address - start
64+
65+
vs = VirtualSegment(start, start+chunksize, self.start_file_address + start)
66+
await file_handle.seek(vs.start_file_address)
67+
vs.data = await file_handle.read(chunksize)
68+
self.chunks.append(vs)
69+
70+
return vs.data[start - vs.start: end - vs.start]
71+
3172
class AMinidumpBufferedReader:
32-
def __init__(self, reader):
73+
def __init__(self, reader, segment_chunk_size = 10*1024):
3374
self.reader = reader
3475
self.memory_segments = []
76+
self.segment_chunk_size = segment_chunk_size
3577

3678
self.current_segment = None
3779
self.current_position = None
@@ -50,8 +92,7 @@ async def _select_segment(self, requested_position):
5092
# not in cache, check if it's present in memory space. if yes then create a new buffered memeory object, and copy data
5193
for memory_segment in self.reader.memory_segments:
5294
if memory_segment.inrange(requested_position):
53-
newsegment = AMinidumpBufferedMemorySegment()
54-
await newsegment.load(memory_segment, self.reader.file_handle)
95+
newsegment = AMinidumpBufferedMemorySegment(memory_segment, chunksize=self.segment_chunk_size)
5596
self.memory_segments.append(newsegment)
5697
self.current_segment = newsegment
5798
self.current_position = requested_position
@@ -118,7 +159,7 @@ async def peek(self, length):
118159
t = self.current_position + length
119160
if not self.current_segment.inrange(t):
120161
raise Exception('Would read over segment boundaries!')
121-
return self.current_segment.data[self.current_position - self.current_segment.start_address :t - self.current_segment.start_address]
162+
return await self.current_segment.read(self.reader.file_handle, self.current_position - self.current_segment.start_address , t - self.current_segment.start_address)
122163

123164
async def read(self, size = -1):
124165
"""
@@ -133,15 +174,15 @@ async def read(self, size = -1):
133174

134175
old_new_pos = self.current_position
135176
self.current_position = self.current_segment.end_address
136-
return self.current_segment.data[old_new_pos - self.current_segment.start_address:]
177+
return await self.current_segment.read(self.reader.file_handle, old_new_pos - self.current_segment.start_address, None)
137178

138179
t = self.current_position + size
139180
if not self.current_segment.inrange(t):
140181
raise Exception('Would read over segment boundaries!')
141182

142183
old_new_pos = self.current_position
143184
self.current_position = t
144-
return self.current_segment.data[old_new_pos - self.current_segment.start_address :t - self.current_segment.start_address]
185+
return await self.current_segment.read(self.reader.file_handle, old_new_pos - self.current_segment.start_address, t - self.current_segment.start_address)
145186

146187
async def read_int(self):
147188
"""
@@ -173,7 +214,7 @@ async def find(self, pattern):
173214
"""
174215
Searches for a pattern in the current memory segment
175216
"""
176-
pos = self.current_segment.data.find(pattern)
217+
pos = await self.current_segment.find(self.reader.file_handle, pattern)
177218
if pos == -1:
178219
return -1
179220
return pos + self.current_position
@@ -185,7 +226,7 @@ async def find_all(self, pattern):
185226
pos = []
186227
last_found = -1
187228
while True:
188-
last_found = self.current_segment.data.find(pattern, last_found + 1)
229+
last_found = await self.current_segment.find(self.reader.file_handle, pattern, last_found + 1)
189230
if last_found == -1:
190231
break
191232
pos.append(last_found + self.current_segment.start_address)
@@ -227,7 +268,7 @@ async def get_ptr_with_offset(self, pos):
227268
return await self.read_uint()
228269

229270
async def find_in_module(self, module_name, pattern, find_first = False, reverse_order = False):
230-
t = await self.reader.search_module(module_name, pattern, find_first = find_first, reverse_order = reverse_order)
271+
t = await self.reader.search_module(module_name, pattern, find_first = find_first, reverse_order = reverse_order,chunksize = self.segment_chunk_size)
231272
return t
232273

233274

@@ -262,32 +303,32 @@ def __init__(self, minidumpfile):
262303
else:
263304
raise Exception('Unknown processor architecture %s! Please fix and submit PR!' % self.sysinfo.ProcessorArchitecture)
264305

265-
def get_buffered_reader(self):
266-
return AMinidumpBufferedReader(self)
306+
def get_buffered_reader(self, segment_chunk_size = 10*1024):
307+
return AMinidumpBufferedReader(self, segment_chunk_size = segment_chunk_size)
267308

268309
def get_module_by_name(self, module_name):
269310
for mod in self.modules:
270311
if ntpath.basename(mod.name).find(module_name) != -1:
271312
return mod
272313
return None
273314

274-
async def search_module(self, module_name, pattern, find_first = False, reverse_order = False):
315+
async def search_module(self, module_name, pattern, find_first = False, reverse_order = False, chunksize = 10*1024):
275316
mod = self.get_module_by_name(module_name)
276317
if mod is None:
277318
raise Exception('Could not find module! %s' % module_name)
278319
needles = []
279320
for ms in self.memory_segments:
280321
if mod.baseaddress <= ms.start_virtual_address < mod.endaddress:
281-
needles += await ms.asearch(pattern, self.file_handle, find_first = find_first)
322+
needles += await ms.asearch(pattern, self.file_handle, find_first = find_first, chunksize = chunksize)
282323
if len(needles) > 0 and find_first is True:
283324
return needles
284325

285326
return needles
286327

287-
async def search(self, pattern, find_first = False):
328+
async def search(self, pattern, find_first = False, chunksize = 10*1024):
288329
t = []
289330
for ms in self.memory_segments:
290-
t += await ms.asearch(pattern, self.file_handle)
331+
t += await ms.asearch(pattern, self.file_handle, find_first = find_first, chunksize = chunksize)
291332

292333
return t
293334

Diff for: minidump/common_structs.py

+70-28
Original file line numberDiff line numberDiff line change
@@ -155,46 +155,88 @@ async def aread(self, virtual_address, size, file_handler):
155155
await file_handler.seek(pos, 0)
156156
return data
157157

158-
def search(self, pattern, file_handler, find_first = False):
158+
def search(self, pattern, file_handler, find_first = False, chunksize = 50*1024):
159159
if len(pattern) > self.size:
160160
return []
161161
pos = file_handler.tell()
162162
file_handler.seek(self.start_file_address, 0)
163-
data = file_handler.read(self.size)
164-
file_handler.seek(pos, 0)
165163
fl = []
166-
offset = 0
167-
while len(data) > len(pattern):
168-
marker = data.find(pattern)
169-
if marker == -1:
170-
return fl
171-
fl.append(marker + offset + self.start_virtual_address)
172-
data = data[marker+1:]
173-
offset = marker + 1
174-
if find_first is True:
175-
return fl
176-
164+
if find_first is True:
165+
chunksize = min(chunksize, self.size)
166+
data = b''
167+
i = 0
168+
while len(data) < self.size:
169+
i += 1
170+
if chunksize > (self.size - len(data)):
171+
chunksize = (self.size - len(data))
172+
data += file_handler.read(chunksize)
173+
marker = data.find(pattern)
174+
if marker != -1:
175+
#print('FOUND! size: %s i: %s read: %s perc: %s' % (self.size, i, i*chunksize, 100*((i*chunksize)/self.size)))
176+
file_handler.seek(pos, 0)
177+
return [self.start_virtual_address + marker]
178+
179+
180+
#print('NOTFOUND! size: %s i: %s read: %s perc %s' % (self.size, i, len(data), 100*(len(data)/self.size) ))
181+
182+
else:
183+
data = file_handler.read(self.size)
184+
file_handler.seek(pos, 0)
185+
186+
offset = 0
187+
while len(data) > len(pattern):
188+
marker = data.find(pattern)
189+
if marker == -1:
190+
return fl
191+
fl.append(marker + offset + self.start_virtual_address)
192+
data = data[marker+1:]
193+
offset = marker + 1
194+
if find_first is True:
195+
return fl
196+
197+
file_handler.seek(pos, 0)
177198
return fl
178199

179-
async def asearch(self, pattern, file_handler, find_first = False):
200+
async def asearch(self, pattern, file_handler, find_first = False, chunksize = 50*1024):
180201
if len(pattern) > self.size:
181202
return []
182203
pos = file_handler.tell()
183204
await file_handler.seek(self.start_file_address, 0)
184-
data = await file_handler.read(self.size)
185-
await file_handler.seek(pos, 0)
186205
fl = []
187-
offset = 0
188-
while len(data) > len(pattern):
189-
marker = data.find(pattern)
190-
if marker == -1:
191-
return fl
192-
fl.append(marker + offset + self.start_virtual_address)
193-
data = data[marker+1:]
194-
offset = marker + 1
195-
if find_first is True:
196-
return fl
197-
206+
207+
if find_first is True:
208+
chunksize = min(chunksize, self.size)
209+
data = b''
210+
i = 0
211+
while len(data) < self.size:
212+
i += 1
213+
if chunksize > (self.size - len(data)):
214+
chunksize = (self.size - len(data))
215+
data += await file_handler.read(chunksize)
216+
marker = data.find(pattern)
217+
if marker != -1:
218+
#print('FOUND! size: %s i: %s read: %s perc: %s' % (self.size, i, i*chunksize, 100*((i*chunksize)/self.size)))
219+
await file_handler.seek(pos, 0)
220+
return [self.start_virtual_address + marker]
221+
222+
223+
#print('NOTFOUND! size: %s i: %s read: %s perc %s' % (self.size, i, len(data), 100*(len(data)/self.size) ))
224+
225+
else:
226+
offset = 0
227+
data = await file_handler.read(self.size)
228+
await file_handler.seek(pos, 0)
229+
while len(data) > len(pattern):
230+
marker = data.find(pattern)
231+
if marker == -1:
232+
return fl
233+
fl.append(marker + offset + self.start_virtual_address)
234+
data = data[marker+1:]
235+
offset = marker + 1
236+
if find_first is True:
237+
return fl
238+
239+
await file_handler.seek(pos, 0)
198240
return fl
199241

200242

0 commit comments

Comments
 (0)