diff --git a/README.md b/README.md index 3c99150..b0927d4 100644 --- a/README.md +++ b/README.md @@ -124,4 +124,5 @@ docker run -it --name uetasr --gpus all -v :/workspace uetasr:v1. 2. [TensorFlowASR: Almost State-of-the-art Automatic Speech Recognition in Tensorflow 2](https://github.com/TensorSpeech/TensorFlowASR) 3. [ESPNet: End-to-End Speech Processing Toolkit](https://github.com/espnet/espnet) 4. [SpeechBrain: A PyTorch-based Speech Toolkit](https://github.com/speechbrain/speechbrain) -5. [Python module for evaluting ASR hypotheses](https://github.com/belambert/asr-evaluation) \ No newline at end of file +5. [Python module for evaluting ASR hypotheses](https://github.com/belambert/asr-evaluation) +6. [Accumulated Gradients for TensorFlow 2](https://github.com/andreped/GradientAccumulator) \ No newline at end of file diff --git a/egs/test.py b/egs/test.py index 43141aa..d3d7597 100644 --- a/egs/test.py +++ b/egs/test.py @@ -52,12 +52,6 @@ help="Enable to evaluate loss in test data.", ) -parser.add_argument( - "--summary", - action="store_true", - help="Enable to print summary.", -) - parser.add_argument( "--verbose", action="store_true", @@ -74,11 +68,6 @@ def test(config_file): with open(config_file) as fin: modules = load_hyperpyyaml(fin) - - if args.summary: - model = modules['model'] - model.summary() - test_loader = modules['test_loader'] trainer = modules['trainer'] diff --git a/egs/train.py b/egs/train.py index 2f09aed..f32aa18 100644 --- a/egs/train.py +++ b/egs/train.py @@ -48,7 +48,6 @@ def train(config_file): with open(config_file) as fin: modules = load_hyperpyyaml(fin) model = modules['model'] - model.summary() train_loader = modules['train_loader'] dev_loader = modules['dev_loader'] cmvn_loader = None diff --git a/egs/vlsp2022/conformer/v3/config.yaml b/egs/vlsp2022/conformer/v3/config.yaml index 29d39b4..428cb62 100644 --- a/egs/vlsp2022/conformer/v3/config.yaml +++ b/egs/vlsp2022/conformer/v3/config.yaml @@ -324,7 +324,6 @@ callbacks: ## =================== TRAINER =================== ## trainer: !new:uetasr.trainers.trainer.ASRTrainer model: !ref - learning_rate: !ref beam_decoder: !ref optimizer: !ref losses: [!ref ] diff --git a/uetasr/trainers/trainer.py b/uetasr/trainers/trainer.py index 6e66127..b3e0393 100644 --- a/uetasr/trainers/trainer.py +++ b/uetasr/trainers/trainer.py @@ -18,7 +18,6 @@ class ASRTrainer(BaseTrainer): def __init__( self, model: tf.keras.Model, - learning_rate: Union[float, LearningRateSchedule], beam_decoder: tf.keras.layers.Layer, optimizer: tf.keras.optimizers.Optimizer, log_append: bool = False, @@ -27,6 +26,7 @@ def __init__( loss_weights: List[float] = [], metrics: List[tf.keras.metrics.Metric] = [], num_epochs: int = 1, + finetune: bool = False, jit_compile: bool = False, steps_per_execution: int = 1, callbacks: List[tf.keras.callbacks.Callback] = [], @@ -38,12 +38,13 @@ def __init__( if accum_steps > 1 and has_devices("GPU"): if get_num_devices("GPU") > 1: optimizer = GradientAccumulator(optimizer, accum_steps) - # elif get_num_devices("GPU") == 1: # GA model is not stable multi-gpus - # model = GradientAccumulateModel(accum_steps=accum_steps, - # mixed_precision=False, - # use_agc=True, - # inputs=model.input, - # outputs=model.output) + elif get_num_devices("GPU") == 1: # GA model is not stable multi-gpus + model.summary() # this is necessary to build model + model = GradientAccumulateModel(accum_steps=accum_steps, + mixed_precision=False, + use_agc=True, + inputs=model.input, + outputs=model.output) self.optimizer = optimizer self.model = model @@ -51,6 +52,15 @@ def __init__( if pretrained_model: self.load_model(pretrained_model) + if finetune: + # freeze model except last layers + for layer in self.model.layers: + layer.trainable = False + if layer.name == "rnnt_jointer": + layer.trainable = True + break + + self.model.summary() self.model.compile(loss=losses, loss_weights=loss_weights, optimizer=optimizer,