-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathfile_splitter.py
executable file
·47 lines (38 loc) · 1.83 KB
/
file_splitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python3
""" File Splitter
Command line tool that splits a CSV file generated by network-stats (either
classic or extended-mode output) into separate files each containing 5 minutes
worth of capture data.
"""
import argparse
import re
from pathlib import Path
if __name__ == "__main__":
cmd_parser = argparse.ArgumentParser(description="Tool that splits csv files created by network-stats into separate files of shorter specified durations.")
cmd_parser.add_argument("-i", "--input", help="csv file to be split; will not be modified", required=True, type=argparse.FileType('r'))
cmd_parser.add_argument("-o", "--output", help="Directory to put split files", default=".")
cmd_parser.add_argument("-d", "--duration", help="Duration in minutes to split the input file by", default=5)
args = cmd_parser.parse_args()
output_path = Path(args.output)
if False == output_path.is_dir():
print("Error: Output \"" + args.output + "\" is not a directory.")
cmd_parser.print_usage()
exit()
# TODO: verify input file actually ends in .csv
filename_prefix = re.compile('(.*)\.csv').match(args.input.name).group(1)
time_pattern = re.compile('\d+')
file_header = args.input.readline()
file_start_time = 0
file_name_int = 0
output_file = None
duration = int(args.duration) * 60
for line in args.input:
time = int(time_pattern.match(line).group())
if time > file_start_time + duration:
file_name = filename_prefix + "-" + str(file_name_int) + '.csv'
output_file_path = output_path / file_name
output_file = output_file_path.open('w')
output_file.write(file_header)
file_start_time = time
file_name_int+=1
output_file.write(line)