8
8
from .common_structs import *
9
9
from .streams .SystemInfoStream import PROCESSOR_ARCHITECTURE
10
10
11
- class AMinidumpBufferedMemorySegment :
12
- def __init__ (self ):
13
- self .start_address = None
14
- self .end_address = None
11
+
12
+ class VirtualSegment :
13
+ def __init__ (self , start , end , start_file_address ):
14
+ self .start = start
15
+ self .end = end
16
+ self .start_file_address = start_file_address
17
+
15
18
self .data = None
19
+
20
+ def inrange (self , start , end ):
21
+ return self .start <= start and end <= self .end
16
22
17
- async def load (self , memory_segment , file_handle ):
23
+ class AMinidumpBufferedMemorySegment :
24
+ def __init__ (self , memory_segment , chunksize = 10 * 1024 ):
18
25
self .start_address = memory_segment .start_virtual_address
19
26
self .end_address = memory_segment .end_virtual_address
20
- await file_handle .seek (memory_segment .start_file_address )
21
- self .data = await file_handle .read (memory_segment .size )
27
+ self .total_size = memory_segment .end_virtual_address - memory_segment .start_virtual_address
28
+ self .start_file_address = memory_segment .start_file_address
29
+ self .chunksize = chunksize
30
+ self .chunks = []
22
31
23
32
def inrange (self , position ):
24
33
return self .start_address <= position <= self .end_address
@@ -28,10 +37,43 @@ def remaining_len(self, position):
28
37
return None
29
38
return self .end_address - position
30
39
40
+ async def find (self , file_handle , pattern , startpos ):
41
+ data = await self .read (file_handle , 0 , - 1 )
42
+ return data .find (pattern , startpos )
43
+
44
+ async def read (self , file_handle , start , end ):
45
+ if end is None :
46
+ await file_handle .seek (self .start_file_address + start )
47
+ return await file_handle .read (self .end_address - (self .start_file_address + start ))
48
+
49
+ for chunk in self .chunks :
50
+ if chunk .inrange (start , end ):
51
+ return chunk .data [start - chunk .start : end - chunk .start ]
52
+
53
+ if self .total_size <= 2 * self .chunksize :
54
+ chunksize = self .total_size
55
+ vs = VirtualSegment (0 , chunksize , self .start_file_address )
56
+ await file_handle .seek (self .start_file_address )
57
+ vs .data = await file_handle .read (chunksize )
58
+ self .chunks .append (vs )
59
+ return vs .data [start - vs .start : end - vs .start ]
60
+
61
+ chunksize = max ((end - start ), self .chunksize )
62
+ if start + chunksize > self .end_address :
63
+ chunksize = self .end_address - start
64
+
65
+ vs = VirtualSegment (start , start + chunksize , self .start_file_address + start )
66
+ await file_handle .seek (vs .start_file_address )
67
+ vs .data = await file_handle .read (chunksize )
68
+ self .chunks .append (vs )
69
+
70
+ return vs .data [start - vs .start : end - vs .start ]
71
+
31
72
class AMinidumpBufferedReader :
32
- def __init__ (self , reader ):
73
+ def __init__ (self , reader , segment_chunk_size = 10 * 1024 ):
33
74
self .reader = reader
34
75
self .memory_segments = []
76
+ self .segment_chunk_size = segment_chunk_size
35
77
36
78
self .current_segment = None
37
79
self .current_position = None
@@ -50,8 +92,7 @@ async def _select_segment(self, requested_position):
50
92
# not in cache, check if it's present in memory space. if yes then create a new buffered memeory object, and copy data
51
93
for memory_segment in self .reader .memory_segments :
52
94
if memory_segment .inrange (requested_position ):
53
- newsegment = AMinidumpBufferedMemorySegment ()
54
- await newsegment .load (memory_segment , self .reader .file_handle )
95
+ newsegment = AMinidumpBufferedMemorySegment (memory_segment , chunksize = self .segment_chunk_size )
55
96
self .memory_segments .append (newsegment )
56
97
self .current_segment = newsegment
57
98
self .current_position = requested_position
@@ -118,7 +159,7 @@ async def peek(self, length):
118
159
t = self .current_position + length
119
160
if not self .current_segment .inrange (t ):
120
161
raise Exception ('Would read over segment boundaries!' )
121
- return self .current_segment .data [ self .current_position - self .current_segment .start_address : t - self .current_segment .start_address ]
162
+ return await self .current_segment .read ( self .reader . file_handle , self . current_position - self .current_segment .start_address , t - self .current_segment .start_address )
122
163
123
164
async def read (self , size = - 1 ):
124
165
"""
@@ -133,15 +174,15 @@ async def read(self, size = -1):
133
174
134
175
old_new_pos = self .current_position
135
176
self .current_position = self .current_segment .end_address
136
- return self .current_segment .data [ old_new_pos - self .current_segment .start_address :]
177
+ return await self .current_segment .read ( self . reader . file_handle , old_new_pos - self .current_segment .start_address , None )
137
178
138
179
t = self .current_position + size
139
180
if not self .current_segment .inrange (t ):
140
181
raise Exception ('Would read over segment boundaries!' )
141
182
142
183
old_new_pos = self .current_position
143
184
self .current_position = t
144
- return self .current_segment .data [ old_new_pos - self .current_segment .start_address : t - self .current_segment .start_address ]
185
+ return await self .current_segment .read ( self . reader . file_handle , old_new_pos - self .current_segment .start_address , t - self .current_segment .start_address )
145
186
146
187
async def read_int (self ):
147
188
"""
@@ -173,7 +214,7 @@ async def find(self, pattern):
173
214
"""
174
215
Searches for a pattern in the current memory segment
175
216
"""
176
- pos = self .current_segment .data . find (pattern )
217
+ pos = await self .current_segment .find (self . reader . file_handle , pattern )
177
218
if pos == - 1 :
178
219
return - 1
179
220
return pos + self .current_position
@@ -185,7 +226,7 @@ async def find_all(self, pattern):
185
226
pos = []
186
227
last_found = - 1
187
228
while True :
188
- last_found = self .current_segment .data . find (pattern , last_found + 1 )
229
+ last_found = await self .current_segment .find (self . reader . file_handle , pattern , last_found + 1 )
189
230
if last_found == - 1 :
190
231
break
191
232
pos .append (last_found + self .current_segment .start_address )
@@ -227,7 +268,7 @@ async def get_ptr_with_offset(self, pos):
227
268
return await self .read_uint ()
228
269
229
270
async def find_in_module (self , module_name , pattern , find_first = False , reverse_order = False ):
230
- t = await self .reader .search_module (module_name , pattern , find_first = find_first , reverse_order = reverse_order )
271
+ t = await self .reader .search_module (module_name , pattern , find_first = find_first , reverse_order = reverse_order , chunksize = self . segment_chunk_size )
231
272
return t
232
273
233
274
@@ -262,32 +303,32 @@ def __init__(self, minidumpfile):
262
303
else :
263
304
raise Exception ('Unknown processor architecture %s! Please fix and submit PR!' % self .sysinfo .ProcessorArchitecture )
264
305
265
- def get_buffered_reader (self ):
266
- return AMinidumpBufferedReader (self )
306
+ def get_buffered_reader (self , segment_chunk_size = 10 * 1024 ):
307
+ return AMinidumpBufferedReader (self , segment_chunk_size = segment_chunk_size )
267
308
268
309
def get_module_by_name (self , module_name ):
269
310
for mod in self .modules :
270
311
if ntpath .basename (mod .name ).find (module_name ) != - 1 :
271
312
return mod
272
313
return None
273
314
274
- async def search_module (self , module_name , pattern , find_first = False , reverse_order = False ):
315
+ async def search_module (self , module_name , pattern , find_first = False , reverse_order = False , chunksize = 10 * 1024 ):
275
316
mod = self .get_module_by_name (module_name )
276
317
if mod is None :
277
318
raise Exception ('Could not find module! %s' % module_name )
278
319
needles = []
279
320
for ms in self .memory_segments :
280
321
if mod .baseaddress <= ms .start_virtual_address < mod .endaddress :
281
- needles += await ms .asearch (pattern , self .file_handle , find_first = find_first )
322
+ needles += await ms .asearch (pattern , self .file_handle , find_first = find_first , chunksize = chunksize )
282
323
if len (needles ) > 0 and find_first is True :
283
324
return needles
284
325
285
326
return needles
286
327
287
- async def search (self , pattern , find_first = False ):
328
+ async def search (self , pattern , find_first = False , chunksize = 10 * 1024 ):
288
329
t = []
289
330
for ms in self .memory_segments :
290
- t += await ms .asearch (pattern , self .file_handle )
331
+ t += await ms .asearch (pattern , self .file_handle , find_first = find_first , chunksize = chunksize )
291
332
292
333
return t
293
334
0 commit comments