diff --git a/README.md b/README.md
index 3c99150..b0927d4 100644
--- a/README.md
+++ b/README.md
@@ -124,4 +124,5 @@ docker run -it --name uetasr --gpus all -v <workspace_dir>:/workspace uetasr:v1.
 2. [TensorFlowASR: Almost State-of-the-art Automatic Speech Recognition in Tensorflow 2](https://github.com/TensorSpeech/TensorFlowASR)
 3. [ESPNet: End-to-End Speech Processing Toolkit](https://github.com/espnet/espnet)
 4. [SpeechBrain: A PyTorch-based Speech Toolkit](https://github.com/speechbrain/speechbrain)
-5. [Python module for evaluting ASR hypotheses](https://github.com/belambert/asr-evaluation)
\ No newline at end of file
+5. [Python module for evaluting ASR hypotheses](https://github.com/belambert/asr-evaluation)
+6. [Accumulated Gradients for TensorFlow 2](https://github.com/andreped/GradientAccumulator)
\ No newline at end of file
diff --git a/egs/test.py b/egs/test.py
index 43141aa..d3d7597 100644
--- a/egs/test.py
+++ b/egs/test.py
@@ -52,12 +52,6 @@
     help="Enable to evaluate loss in test data.",
 )
 
-parser.add_argument(
-    "--summary",
-    action="store_true",
-    help="Enable to print summary.",
-)
-
 parser.add_argument(
     "--verbose",
     action="store_true",
@@ -74,11 +68,6 @@
 def test(config_file):
     with open(config_file) as fin:
         modules = load_hyperpyyaml(fin)
-
-        if args.summary:
-            model = modules['model']
-            model.summary()
-
         test_loader = modules['test_loader']
         trainer = modules['trainer']
 
diff --git a/egs/train.py b/egs/train.py
index 2f09aed..f32aa18 100644
--- a/egs/train.py
+++ b/egs/train.py
@@ -48,7 +48,6 @@ def train(config_file):
     with open(config_file) as fin:
         modules = load_hyperpyyaml(fin)
         model = modules['model']
-        model.summary()
         train_loader = modules['train_loader']
         dev_loader = modules['dev_loader']
         cmvn_loader = None
diff --git a/egs/vlsp2022/conformer/v3/config.yaml b/egs/vlsp2022/conformer/v3/config.yaml
index 29d39b4..428cb62 100644
--- a/egs/vlsp2022/conformer/v3/config.yaml
+++ b/egs/vlsp2022/conformer/v3/config.yaml
@@ -324,7 +324,6 @@ callbacks:
 ## =================== TRAINER =================== ##
 trainer: !new:uetasr.trainers.trainer.ASRTrainer
     model: !ref <model>
-    learning_rate: !ref <lr>
     beam_decoder: !ref <decoder>
     optimizer: !ref <optimizer>
     losses: [!ref <rnnt_loss>]
diff --git a/uetasr/trainers/trainer.py b/uetasr/trainers/trainer.py
index 6e66127..b3e0393 100644
--- a/uetasr/trainers/trainer.py
+++ b/uetasr/trainers/trainer.py
@@ -18,7 +18,6 @@ class ASRTrainer(BaseTrainer):
     def __init__(
         self,
         model: tf.keras.Model,
-        learning_rate: Union[float, LearningRateSchedule],
         beam_decoder: tf.keras.layers.Layer,
         optimizer: tf.keras.optimizers.Optimizer,
         log_append: bool = False,
@@ -27,6 +26,7 @@ def __init__(
         loss_weights: List[float] = [],
         metrics: List[tf.keras.metrics.Metric] = [],
         num_epochs: int = 1,
+        finetune: bool = False,
         jit_compile: bool = False,
         steps_per_execution: int = 1,
         callbacks: List[tf.keras.callbacks.Callback] = [],
@@ -38,12 +38,13 @@ def __init__(
         if accum_steps > 1 and has_devices("GPU"):
             if get_num_devices("GPU") > 1:
                 optimizer = GradientAccumulator(optimizer, accum_steps)
-            # elif get_num_devices("GPU") == 1:  # GA model is not stable multi-gpus
-            #     model = GradientAccumulateModel(accum_steps=accum_steps,
-            #                                     mixed_precision=False,
-            #                                     use_agc=True,
-            #                                     inputs=model.input,
-            #                                     outputs=model.output)
+            elif get_num_devices("GPU") == 1:  # GA model is not stable multi-gpus
+                model.summary()  # this is necessary to build model
+                model = GradientAccumulateModel(accum_steps=accum_steps,
+                                                mixed_precision=False,
+                                                use_agc=True,
+                                                inputs=model.input,
+                                                outputs=model.output)
 
         self.optimizer = optimizer
         self.model = model
@@ -51,6 +52,15 @@ def __init__(
         if pretrained_model:
             self.load_model(pretrained_model)
 
+        if finetune:
+            # freeze model except last layers
+            for layer in self.model.layers:
+                layer.trainable = False
+                if layer.name == "rnnt_jointer":
+                    layer.trainable = True
+                    break
+
+        self.model.summary()
         self.model.compile(loss=losses,
                            loss_weights=loss_weights,
                            optimizer=optimizer,