11import argparse
2- from service .commands import EnrollSpeakerCommand , RecognizeSpeakerCommand , ListSpeakersCommand , DeleteSpeakerCommand , CommandHandler
3- from bst import BinarySearchTree
4-
5- def main ():
2+ import os
3+
4+ from service .commands import (
5+ EnrollSpeakerCommand ,
6+ RecognizeSpeakerCommand ,
7+ ListSpeakersCommand ,
8+ DeleteSpeakerCommand ,
9+ CommandHandler
10+ )
11+ from file_management .bst import BinarySearchTree
12+ from file_management .file_management import FileManagementInterface
13+
14+ def setup_environment (base_directory ):
15+ # Ensure the base directory for models, audio files, and metadata exists
16+ if not os .path .exists (os .path .join (base_directory , "models" )):
17+ os .makedirs (os .path .join (base_directory , "models" ))
18+ if not os .path .exists (os .path .join (base_directory , "audio_files" )):
19+ os .makedirs (os .path .join (base_directory , "audio_files" ))
20+ if not os .path .exists (os .path .join (base_directory , "metadata" )):
21+ os .makedirs (os .path .join (base_directory , "metadata" ))
22+ print (f"Environment set up at { base_directory } " )
23+
24+ def main (command_line_args = None ):
625 """CLI entry point."""
726 # Initialize Argument Parser
827 parser = argparse .ArgumentParser (description = "Speaker Recognition CLI Tool" )
@@ -27,6 +46,12 @@ def main():
2746 # Recognize Command
2847 recognize_parser = subparsers .add_parser ('recognize' , help = 'Recognize a speaker from an audio file' )
2948 recognize_parser .add_argument ('audio_file' , type = str , help = 'Path to the audio file' )
49+ recognize_parser .add_argument ('--sample_rate' , type = int , default = 16000 , help = 'Sample rate of the audio file' )
50+ recognize_parser .add_argument ('--frame_size' , type = float , default = 0.025 , help = 'Frame size in seconds' )
51+ recognize_parser .add_argument ('--frame_step' , type = float , default = 0.01 , help = 'Frame step (overlap) in seconds' )
52+ recognize_parser .add_argument ('--fft_size' , type = int , default = 512 , help = 'FFT size for audio processing' )
53+ recognize_parser .add_argument ('--num_filters' , type = int , default = 26 , help = 'Number of Mel filters' )
54+ recognize_parser .add_argument ('--num_ceps' , type = int , default = 13 , help = 'Number of MFCC coefficients' )
3055
3156 # List Speakers Command
3257 subparsers .add_parser ('list_speakers' , help = 'List all enrolled speakers' )
@@ -36,14 +61,19 @@ def main():
3661 delete_parser .add_argument ('speaker_name' , type = str , help = 'Name of the speaker to delete' )
3762
3863 # Parse the arguments
39- args = parser .parse_args ()
64+ args = parser .parse_args (command_line_args )
4065
4166 # Initialize the command handler
4267 handler = CommandHandler ()
4368
44- # Binary Search Tree and base directory
45- bst = BinarySearchTree () # Placeholder for actual binary search tree
46- base_directory = "models/" # Placeholder for actual base directory
69+ # Base directory setup
70+ base_directory = "test_environment" # Placeholder for the base directory
71+
72+ # Ensure environment setup
73+ setup_environment (base_directory )
74+
75+ # Initialize Binary Search Tree
76+ bst = BinarySearchTree () # Placeholder for actual binary search tree implementation
4777
4878 # Process the command based on the parsed arguments
4979 if args .command == 'enroll' :
@@ -61,21 +91,60 @@ def main():
6191 n_mixtures = args .n_mixtures
6292 )
6393 handler .run (command )
94+
95+ # Serialize the BST before exiting the program
96+ bst .serialize_bst ()
6497
6598 elif args .command == 'recognize' :
66- command = RecognizeSpeakerCommand (args .audio_file )
99+ command = RecognizeSpeakerCommand (
100+ bst = bst ,
101+ audio_file = args .audio_file ,
102+ base_directory = base_directory ,
103+ sample_rate = args .sample_rate ,
104+ frame_size = args .frame_size ,
105+ frame_step = args .frame_step ,
106+ fft_size = args .fft_size ,
107+ num_filters = args .num_filters ,
108+ num_ceps = args .num_ceps
109+ )
67110 handler .run (command )
68111
69112 elif args .command == 'list_speakers' :
70- command = ListSpeakersCommand ()
113+ file_management = FileManagementInterface (bst = bst , base_directory = base_directory )
114+ command = ListSpeakersCommand (file_management )
71115 handler .run (command )
72116
73117 elif args .command == 'delete_speaker' :
74- command = DeleteSpeakerCommand (args .speaker_name )
118+ file_management = FileManagementInterface (bst = bst , base_directory = base_directory )
119+ command = DeleteSpeakerCommand (args .speaker_name , file_management )
75120 handler .run (command )
76121
77122 else :
78123 parser .print_help ()
79124
80125if __name__ == "__main__" :
81- main ()
126+ #debug_args = [
127+ # 'enroll',
128+ # 'maria',
129+ # '/home/gena/PROJECTS/voice-recognition-engine/audio_files/maria.wav',
130+ # '--sample_rate', '16000',
131+ # '--num_filters', '40',
132+ # '--num_ceps', '13',
133+ # '--n_fft', '512',
134+ # '--frame_size', '0.025',
135+ # '--frame_step', '0.01',
136+ # '--n_mixtures', '8'
137+ #]
138+
139+ debug_args = [
140+ 'recognize' ,
141+ '/home/gena/PROJECTS/voice-recognition-engine/audio_files/leah_recognize.wav' ,
142+ '--sample_rate' , '16000' ,
143+ '--frame_size' , '0.025' ,
144+ '--frame_step' , '0.01' ,
145+ '--fft_size' , '512' ,
146+ '--num_filters' , '40' ,
147+ '--num_ceps' , '13' ,
148+ ]
149+
150+ main (debug_args )
0 commit comments