Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions examples/custom_model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,20 @@ piper_sample_generator_path: "./piper-sample-generator"
# Sub-directories will be automatically created for train and test clips for both positive and negative examples
output_dir: "./my_custom_model"

# The path to model used by piper_sample_generator
generator_model: "./piper-sample-generator/models/en_US-libritts_r-medium.pt"

# Min phoneme count for piper_sample_generator
min_phoneme_count: null

# Noise setting for trainning samples
noise_scales_train:
- 0.98

# Noise setting for testing samples
noise_scales_test:
- 1.0

# The directories containing Room Impulse Response recordings
rir_paths:
- "./mit_rirs"
Expand Down
20 changes: 16 additions & 4 deletions openwakeword/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,9 @@ def convert_onnx_to_tflite(onnx_model_path, output_path):
generate_samples(
text=config["target_phrase"], max_samples=config["n_samples"]-n_current_samples,
batch_size=config["tts_batch_size"],
noise_scales=[0.98], noise_scale_ws=[0.98], length_scales=[0.75, 1.0, 1.25],
model = config["generator_model"],
min_phoneme_count=config["min_phoneme_count"],
noise_scales=config["noise_scales_train"], noise_scale_ws=config["noise_scales_train"], length_scales=[0.75, 1.0, 1.25],
output_dir=positive_train_output_dir, auto_reduce_batch_size=True,
file_names=[uuid.uuid4().hex + ".wav" for i in range(config["n_samples"])]
)
Expand All @@ -685,7 +687,9 @@ def convert_onnx_to_tflite(onnx_model_path, output_path):
if n_current_samples <= 0.95*config["n_samples_val"]:
generate_samples(text=config["target_phrase"], max_samples=config["n_samples_val"]-n_current_samples,
batch_size=config["tts_batch_size"],
noise_scales=[1.0], noise_scale_ws=[1.0], length_scales=[0.75, 1.0, 1.25],
model = config["generator_model"],
min_phoneme_count=config["min_phoneme_count"],
noise_scales=config["noise_scales_test"], noise_scale_ws=config["noise_scales_test"], length_scales=[0.75, 1.0, 1.25],
output_dir=positive_test_output_dir, auto_reduce_batch_size=True)
torch.cuda.empty_cache()
else:
Expand All @@ -706,7 +710,11 @@ def convert_onnx_to_tflite(onnx_model_path, output_path):
include_input_words=0.2))
generate_samples(text=adversarial_texts, max_samples=config["n_samples"]-n_current_samples,
batch_size=config["tts_batch_size"]//7,
noise_scales=[0.98], noise_scale_ws=[0.98], length_scales=[0.75, 1.0, 1.25],
model = config["generator_model"],
min_phoneme_count=config["min_phoneme_count"],
noise_scales=config["noise_scales_train"],
noise_scale_ws=config["noise_scales_train"],
length_scales=[0.75, 1.0, 1.25],
output_dir=negative_train_output_dir, auto_reduce_batch_size=True,
file_names=[uuid.uuid4().hex + ".wav" for i in range(config["n_samples"])]
)
Expand All @@ -729,7 +737,11 @@ def convert_onnx_to_tflite(onnx_model_path, output_path):
include_input_words=0.2))
generate_samples(text=adversarial_texts, max_samples=config["n_samples_val"]-n_current_samples,
batch_size=config["tts_batch_size"]//7,
noise_scales=[1.0], noise_scale_ws=[1.0], length_scales=[0.75, 1.0, 1.25],
model = config["generator_model"],
min_phoneme_count=config["min_phoneme_count"],
noise_scales=config["noise_scales_test"],
noise_scale_ws=config["noise_scales_test"],
length_scales=[0.75, 1.0, 1.25],
output_dir=negative_test_output_dir, auto_reduce_batch_size=True)
torch.cuda.empty_cache()
else:
Expand Down