66from typing import Dict , Set , List , Optional , Any
77from .constants import *
88from .mft_record import MftRecord
9+ from .file_writers import FileWriters
910
1011class MftAnalyzer :
1112
12- def __init__ (self , mft_file : str , output_file : str , debug : bool = False , compute_hashes : bool = False , export_format : str = "csv" ) -> None :
13+ def __init__ (self , mft_file : str , output_file : str , debug : bool = False , very_debug : bool = False ,
14+ verbosity : int = 0 , compute_hashes : bool = False , export_format : str = "csv" ) -> None :
1315 self .mft_file = mft_file
1416 self .output_file = output_file
1517 self .debug = debug
18+ self .very_debug = very_debug
19+ self .verbosity = verbosity
1620 self .compute_hashes = compute_hashes
1721 self .export_format = export_format
18- self .mft_records = []
22+ self .compute_hashes = compute_hashes
23+ self .export_format = export_format
24+ self .mft_records = {}
1925 self .interrupt_flag = asyncio .Event ()
20- self . csv_writer = None
26+
2127 self .csvfile = None
28+ self .csv_writer = None
29+
2230 self .stats = {
2331 'total_records' : 0 ,
2432 'active_records' : 0 ,
@@ -33,20 +41,27 @@ def __init__(self, mft_file: str, output_file: str, debug: bool = False, compute
3341 'unique_crc32' : set (),
3442 })
3543
44+ def log (self , message : str , level : int = 0 ):
45+ if level <= self .debug or level <= self .verbosity :
46+ print (message )
3647
3748 async def analyze (self ) -> None :
3849 try :
50+ self .initialize_csv_writer ()
3951 await self .process_mft ()
4052 await self .write_output ()
4153 except Exception as e :
4254 print (f"An unexpected error occurred: { e } " )
4355 if self .debug :
4456 traceback .print_exc ()
4557 finally :
58+ if self .csvfile :
59+ self .csvfile .close ()
4660 self .print_statistics ()
4761
4862
4963 async def process_mft (self ) -> None :
64+ self .log (f"Processing MFT file: { self .mft_file } " , 1 )
5065 try :
5166 with open (self .mft_file , 'rb' ) as f :
5267 while not self .interrupt_flag .is_set ():
@@ -56,41 +71,40 @@ async def process_mft(self) -> None:
5671
5772 try :
5873 record = MftRecord (raw_record , self .compute_hashes )
59-
6074 self .stats ['total_records' ] += 1
75+
6176 if record .flags & FILE_RECORD_IN_USE :
6277 self .stats ['active_records' ] += 1
6378 if record .flags & FILE_RECORD_IS_DIRECTORY :
6479 self .stats ['directories' ] += 1
6580 else :
6681 self .stats ['files' ] += 1
6782
68- if self .compute_hashes :
69- self .stats ['unique_md5' ].add (record .md5 )
70- self .stats ['unique_sha256' ].add (record .sha256 )
71- self .stats ['unique_sha512' ].add (record .sha512 )
72- self .stats ['unique_crc32' ].add (record .crc32 )
73-
74- if self .debug :
75- print (f"Processing record { self .stats ['total_records' ]} : { record .filename } " )
76-
7783 self .mft_records [record .recordnum ] = record
7884
79- # Write to CSV in blocks of 1000 records
85+ if self .debug >= 2 :
86+ self .log (f"Processed record { self .stats ['total_records' ]} : { record .filename } " , 2 )
87+ elif self .stats ['total_records' ] % 10000 == 0 :
88+ self .log (f"Processed { self .stats ['total_records' ]} records..." , 1 )
89+
8090 if self .stats ['total_records' ] % 1000 == 0 :
8191 await self .write_csv_block ()
82- self .mft_records .clear () # Clear processed records to save memory
92+ self .mft_records .clear ()
8393
8494 except Exception as e :
85- if self .debug :
86- print (f"Error processing record { self .stats ['total_records' ]} : { str (e )} " )
95+ self .log (f"Error processing record { self .stats ['total_records' ]} : { str (e )} " , 1 )
96+ if self .debug >= 2 :
97+ traceback .print_exc ()
8798 continue
8899
89100 except Exception as e :
90- print (f"Error reading MFT file: { str (e )} " )
91- if self .debug :
101+ self . log (f"Error reading MFT file: { str (e )} " , 0 )
102+ if self .debug >= 1 :
92103 traceback .print_exc ()
93104
105+ self .log (f"MFT processing complete. Total records processed: { self .stats ['total_records' ]} " , 0 )
106+
107+
94108 def handle_interrupt (self ) -> None :
95109 if sys .platform == "win32" :
96110 # Windows-specific interrupt handling
@@ -111,26 +125,40 @@ def unix_handler():
111125 getattr (signal , signame ),
112126 unix_handler )
113127
128+ def initialize_csv_writer (self ):
129+ if self .csvfile is None :
130+ self .csvfile = open (self .output_file , 'w' , newline = '' , encoding = 'utf-8' )
131+ self .csv_writer = csv .writer (self .csvfile )
132+ self .csv_writer .writerow (CSV_HEADER )
133+
114134 async def write_csv_block (self ) -> None :
135+ self .log (f"Writing CSV block. Records in block: { len (self .mft_records )} " , 2 )
115136 try :
137+ if self .csv_writer is None :
138+ self .initialize_csv_writer ()
139+
116140 for record in self .mft_records .values ():
117- filepath = self .build_filepath (record )
118- csv_row = record .to_csv ()
119- csv_row [- 1 ] = filepath # Replace the filepath placeholder
120-
121- csv_row = [str (item ) for item in csv_row ]
122-
123141 try :
124- self .csv_writer .writerow (csv_row )
125- except UnicodeEncodeError as e :
126- print (f"Error writing record { record .recordnum } : { str (e )} " )
127- self .csv_writer .writerow ([item .encode ('utf-8' , errors = 'replace' ).decode ('utf-8' ) for item in csv_row ])
128-
129- await asyncio .sleep (0 ) # Yield control to allow other tasks to run
142+ filepath = self .build_filepath (record )
143+ csv_row = record .to_csv ()
144+ csv_row [- 1 ] = filepath
130145
146+ csv_row = [str (item ) for item in csv_row ]
147+
148+ self .csv_writer .writerow (csv_row )
149+ if self .very_debug :
150+ self .log (f"Wrote record { record .recordnum } to CSV" , 2 )
151+ except Exception as e :
152+ self .log (f"Error writing record { record .recordnum } : { str (e )} " , 1 )
153+ if self .very_debug :
154+ traceback .print_exc ()
155+
156+ if self .csvfile :
157+ self .csvfile .flush ()
158+ self .log (f"CSV block written. Current file size: { self .csvfile .tell () if self .csvfile else 0 } bytes" , 2 )
131159 except Exception as e :
132- print (f"Error writing CSV block : { str (e )} " )
133- if self .debug :
160+ self . log (f"Error in write_csv_block : { str (e )} " , 0 )
161+ if self .debug or self . very_debug :
134162 traceback .print_exc ()
135163
136164
@@ -184,13 +212,14 @@ def print_statistics(self) -> None:
184212
185213
186214 async def write_output (self ) -> None :
215+ print (f"Writing output in { self .export_format } format to { self .output_file } " )
187216 if self .export_format == "csv" :
188- await FileWriters . write_csv ( self .mft_records , self . output_file )
217+ await self .write_remaining_records ( )
189218 elif self .export_format == "json" :
190- await FileWriters .write_json (self .mft_records , self .output_file )
219+ await FileWriters .write_json (list ( self .mft_records . values ()) , self .output_file )
191220 elif self .export_format == "xml" :
192- await FileWriters .write_xml (self .mft_records , self .output_file )
221+ await FileWriters .write_xml (list ( self .mft_records . values ()) , self .output_file )
193222 elif self .export_format == "excel" :
194- await FileWriters .write_excel (self .mft_records , self .output_file )
223+ await FileWriters .write_excel (list ( self .mft_records . values ()) , self .output_file )
195224 else :
196225 print (f"Unsupported export format: { self .export_format } " )
0 commit comments