Skip to content

Commit 41023b4

Browse files
authored
Merge pull request #129 from rowingdude/quick_fixes
Fix MFT record storage and CSV writer initialization
2 parents e6591dc + 04de4ed commit 41023b4

File tree

6 files changed

+97
-47
lines changed

6 files changed

+97
-47
lines changed

analyzeMFT.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
import asyncio
33
from src.analyzeMFT.cli import main
44

5+
# Adds the current directory to the path to ensure our file calls are consistent.
6+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
7+
58
if __name__ == "__main__":
69
if sys.platform == "win32":
710
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

setup.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
from setuptools import setup, find_packages
2+
from src.analyzeMFT.constants import VERSION
23

34
with open("README.md", "r", encoding="utf-8") as fh:
45
long_description = fh.read()
56

67
setup(
78
name='analyzeMFT',
8-
version='3.0',
9+
version=VERSION,
910
author='Benjamin Cance',
1011
author_email='[email protected]',
11-
packages=find_packages(),
12+
package_dir={'': 'src'},
13+
packages=find_packages(where='src'),
1214
url='http://github.com/rowingdude/analyzeMFT',
1315
license='LICENSE.txt',
1416
description='Analyze the $MFT from a NTFS filesystem.',

src/analyzeMFT/cli.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,13 @@ async def main():
2929
help="Export as log2timeline CSV")
3030
parser.add_option_group(export_group)
3131

32-
parser.add_option("-d", "--debug", action="store_true", dest="debug",
33-
help="Enable debug output", default=False)
32+
verbosity_group = OptionGroup(parser, "Verbosity Options")
33+
verbosity_group.add_option("-v", action="count", dest="verbosity",
34+
help="Increase output verbosity (can be used multiple times)", default=0)
35+
verbosity_group.add_option("-d", action="count", dest="debug",
36+
help="Increase debug output (can be used multiple times)", default=0)
37+
parser.add_option_group(verbosity_group)
38+
3439
parser.add_option("-H", "--hash", action="store_true", dest="compute_hashes",
3540
help="Compute hashes (MD5, SHA256, SHA512, CRC32)", default=False)
3641

@@ -46,8 +51,15 @@ async def main():
4651
print("\nError: No output file specified. Use -o or --output to specify an output file.")
4752
sys.exit(1)
4853

54+
# Default to CSV if no format specified
4955
if not options.export_format:
50-
options.export_format = "csv" # Default to CSV if no format specified
56+
options.export_format = "csv"
57+
58+
59+
analyzer = MftAnalyzer(options.filename, options.output_file, options.debug, options.very_debug,
60+
options.verbosity, options.compute_hashes, options.export_format)
61+
await analyzer.analyze()
62+
print(f"Analysis complete. Results written to {options.output_file}")
5163

5264
try:
5365
analyzer = MftAnalyzer(options.filename, options.output_file, options.debug, options.compute_hashes, options.export_format)
@@ -65,6 +77,7 @@ async def main():
6577
import traceback
6678
traceback.print_exc()
6779
sys.exit(1)
80+
master
6881

6982
if __name__ == "__main__":
7083
asyncio.run(main())

src/analyzeMFT/file_writers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import asyncio
55
from typing import List, Dict, Any
66
from .mft_record import MftRecord
7+
from .constants import *
78

89
class FileWriters:
910
@staticmethod

src/analyzeMFT/mft_analyzer.py

Lines changed: 66 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,27 @@
66
from typing import Dict, Set, List, Optional, Any
77
from .constants import *
88
from .mft_record import MftRecord
9+
from .file_writers import FileWriters
910

1011
class MftAnalyzer:
1112

12-
def __init__(self, mft_file: str, output_file: str, debug: bool = False, compute_hashes: bool = False, export_format: str = "csv") -> None:
13+
def __init__(self, mft_file: str, output_file: str, debug: bool = False, very_debug: bool = False,
14+
verbosity: int = 0, compute_hashes: bool = False, export_format: str = "csv") -> None:
1315
self.mft_file = mft_file
1416
self.output_file = output_file
1517
self.debug = debug
18+
self.very_debug = very_debug
19+
self.verbosity = verbosity
1620
self.compute_hashes = compute_hashes
1721
self.export_format = export_format
18-
self.mft_records = []
22+
self.compute_hashes = compute_hashes
23+
self.export_format = export_format
24+
self.mft_records = {}
1925
self.interrupt_flag = asyncio.Event()
20-
self.csv_writer = None
26+
2127
self.csvfile = None
28+
self.csv_writer = None
29+
2230
self.stats = {
2331
'total_records': 0,
2432
'active_records': 0,
@@ -33,20 +41,27 @@ def __init__(self, mft_file: str, output_file: str, debug: bool = False, compute
3341
'unique_crc32': set(),
3442
})
3543

44+
def log(self, message: str, level: int = 0):
45+
if level <= self.debug or level <= self.verbosity:
46+
print(message)
3647

3748
async def analyze(self) -> None:
3849
try:
50+
self.initialize_csv_writer()
3951
await self.process_mft()
4052
await self.write_output()
4153
except Exception as e:
4254
print(f"An unexpected error occurred: {e}")
4355
if self.debug:
4456
traceback.print_exc()
4557
finally:
58+
if self.csvfile:
59+
self.csvfile.close()
4660
self.print_statistics()
4761

4862

4963
async def process_mft(self) -> None:
64+
self.log(f"Processing MFT file: {self.mft_file}", 1)
5065
try:
5166
with open(self.mft_file, 'rb') as f:
5267
while not self.interrupt_flag.is_set():
@@ -56,41 +71,40 @@ async def process_mft(self) -> None:
5671

5772
try:
5873
record = MftRecord(raw_record, self.compute_hashes)
59-
6074
self.stats['total_records'] += 1
75+
6176
if record.flags & FILE_RECORD_IN_USE:
6277
self.stats['active_records'] += 1
6378
if record.flags & FILE_RECORD_IS_DIRECTORY:
6479
self.stats['directories'] += 1
6580
else:
6681
self.stats['files'] += 1
6782

68-
if self.compute_hashes:
69-
self.stats['unique_md5'].add(record.md5)
70-
self.stats['unique_sha256'].add(record.sha256)
71-
self.stats['unique_sha512'].add(record.sha512)
72-
self.stats['unique_crc32'].add(record.crc32)
73-
74-
if self.debug:
75-
print(f"Processing record {self.stats['total_records']}: {record.filename}")
76-
7783
self.mft_records[record.recordnum] = record
7884

79-
# Write to CSV in blocks of 1000 records
85+
if self.debug >= 2:
86+
self.log(f"Processed record {self.stats['total_records']}: {record.filename}", 2)
87+
elif self.stats['total_records'] % 10000 == 0:
88+
self.log(f"Processed {self.stats['total_records']} records...", 1)
89+
8090
if self.stats['total_records'] % 1000 == 0:
8191
await self.write_csv_block()
82-
self.mft_records.clear() # Clear processed records to save memory
92+
self.mft_records.clear()
8393

8494
except Exception as e:
85-
if self.debug:
86-
print(f"Error processing record {self.stats['total_records']}: {str(e)}")
95+
self.log(f"Error processing record {self.stats['total_records']}: {str(e)}", 1)
96+
if self.debug >= 2:
97+
traceback.print_exc()
8798
continue
8899

89100
except Exception as e:
90-
print(f"Error reading MFT file: {str(e)}")
91-
if self.debug:
101+
self.log(f"Error reading MFT file: {str(e)}", 0)
102+
if self.debug >= 1:
92103
traceback.print_exc()
93104

105+
self.log(f"MFT processing complete. Total records processed: {self.stats['total_records']}", 0)
106+
107+
94108
def handle_interrupt(self) -> None:
95109
if sys.platform == "win32":
96110
# Windows-specific interrupt handling
@@ -111,26 +125,40 @@ def unix_handler():
111125
getattr(signal, signame),
112126
unix_handler)
113127

128+
def initialize_csv_writer(self):
129+
if self.csvfile is None:
130+
self.csvfile = open(self.output_file, 'w', newline='', encoding='utf-8')
131+
self.csv_writer = csv.writer(self.csvfile)
132+
self.csv_writer.writerow(CSV_HEADER)
133+
114134
async def write_csv_block(self) -> None:
135+
self.log(f"Writing CSV block. Records in block: {len(self.mft_records)}", 2)
115136
try:
137+
if self.csv_writer is None:
138+
self.initialize_csv_writer()
139+
116140
for record in self.mft_records.values():
117-
filepath = self.build_filepath(record)
118-
csv_row = record.to_csv()
119-
csv_row[-1] = filepath # Replace the filepath placeholder
120-
121-
csv_row = [str(item) for item in csv_row]
122-
123141
try:
124-
self.csv_writer.writerow(csv_row)
125-
except UnicodeEncodeError as e:
126-
print(f"Error writing record {record.recordnum}: {str(e)}")
127-
self.csv_writer.writerow([item.encode('utf-8', errors='replace').decode('utf-8') for item in csv_row])
128-
129-
await asyncio.sleep(0) # Yield control to allow other tasks to run
142+
filepath = self.build_filepath(record)
143+
csv_row = record.to_csv()
144+
csv_row[-1] = filepath
130145

146+
csv_row = [str(item) for item in csv_row]
147+
148+
self.csv_writer.writerow(csv_row)
149+
if self.very_debug:
150+
self.log(f"Wrote record {record.recordnum} to CSV", 2)
151+
except Exception as e:
152+
self.log(f"Error writing record {record.recordnum}: {str(e)}", 1)
153+
if self.very_debug:
154+
traceback.print_exc()
155+
156+
if self.csvfile:
157+
self.csvfile.flush()
158+
self.log(f"CSV block written. Current file size: {self.csvfile.tell() if self.csvfile else 0} bytes", 2)
131159
except Exception as e:
132-
print(f"Error writing CSV block: {str(e)}")
133-
if self.debug:
160+
self.log(f"Error in write_csv_block: {str(e)}", 0)
161+
if self.debug or self.very_debug:
134162
traceback.print_exc()
135163

136164

@@ -184,13 +212,14 @@ def print_statistics(self) -> None:
184212

185213

186214
async def write_output(self) -> None:
215+
print(f"Writing output in {self.export_format} format to {self.output_file}")
187216
if self.export_format == "csv":
188-
await FileWriters.write_csv(self.mft_records, self.output_file)
217+
await self.write_remaining_records()
189218
elif self.export_format == "json":
190-
await FileWriters.write_json(self.mft_records, self.output_file)
219+
await FileWriters.write_json(list(self.mft_records.values()), self.output_file)
191220
elif self.export_format == "xml":
192-
await FileWriters.write_xml(self.mft_records, self.output_file)
221+
await FileWriters.write_xml(list(self.mft_records.values()), self.output_file)
193222
elif self.export_format == "excel":
194-
await FileWriters.write_excel(self.mft_records, self.output_file)
223+
await FileWriters.write_excel(list(self.mft_records.values()), self.output_file)
195224
else:
196225
print(f"Unsupported export format: {self.export_format}")

src/analyzeMFT/mft_record.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
import zlib
55
from .constants import *
66
from .windows_time import WindowsTime
7-
from typing import Dict, Set, List, Optional, Any,Union
7+
8+
from typing import Dict, Set, List, Optional, Any, Union
9+
810

911

1012
class MftRecord:
@@ -79,11 +81,11 @@ def parse_record(self) -> None:
7981
self.base_ref = struct.unpack("<Q", self.raw_record[MFT_RECORD_FILE_REFERENCE_OFFSET:MFT_RECORD_FILE_REFERENCE_OFFSET+MFT_RECORD_FILE_REFERENCE_SIZE])[0]
8082
self.next_attrid = struct.unpack("<H", self.raw_record[MFT_RECORD_NEXT_ATTRIBUTE_ID_OFFSET:MFT_RECORD_NEXT_ATTRIBUTE_ID_OFFSET+MFT_RECORD_NEXT_ATTRIBUTE_ID_SIZE])[0]
8183
self.recordnum = struct.unpack("<I", self.raw_record[MFT_RECORD_RECORD_NUMBER_OFFSET:MFT_RECORD_RECORD_NUMBER_OFFSET+MFT_RECORD_RECORD_NUMBER_SIZE])[0]
84+
self.parse_attributes()
85+
8286
except struct.error:
83-
if self.debug:
87+
if hasattr(self, 'debug') and self.debug:
8488
print(f"Error parsing MFT record header for record {self.recordnum}")
85-
86-
self.parse_attributes()
8789

8890
def parse_attributes(self) -> None:
8991
offset = self.attr_off
@@ -104,7 +106,7 @@ def parse_attributes(self) -> None:
104106
elif attr_type == ATTRIBUTE_LIST_ATTRIBUTE:
105107
self.parse_attribute_list(offset)
106108
elif attr_type == OBJECT_ID_ATTRIBUTE:
107-
self.parse_object_id(offset)
109+
self.parse_object_id_attribute(offset)
108110
elif attr_type == SECURITY_DESCRIPTOR_ATTRIBUTE:
109111
self.parse_security_descriptor(offset)
110112
elif attr_type == VOLUME_NAME_ATTRIBUTE:

0 commit comments

Comments
 (0)