refresh rate,rm leaky relu, fix predictor

jacons · jacons · commit d2c7ed24c740 · 2023-01-30T10:16:40.000+01:00
diff --git a/Configuration.py b/Configuration.py
@@ -28,6 +28,8 @@ def __init__(self, p):
         self.cuda = True if torch.cuda.is_available() else False
         self.gpu = "cuda:0"
 
+        self.refresh_rate: int = 60  # interval of refresh in tqdm
+
     def update_params(self, param: str, value: float):
         self.param[param] = value
 
diff --git a/Evaluation/metrics.py b/Evaluation/metrics.py
@@ -1,6 +1,6 @@
 import torch
 from pandas import DataFrame
-from torch import Tensor, zeros, IntTensor, BoolTensor, LongTensor, masked_select
+from torch import Tensor, zeros, IntTensor, BoolTensor, LongTensor, masked_select, nn
 from tqdm import tqdm
 from transformers import BertTokenizerFast
 
@@ -11,15 +11,15 @@
 
 def scores(confusion: Tensor, all_metrics=False):
     """
-    Given a Confusion matrix, returns an F1-score, if all_metrics is false, then returns only F1-score
+    Given a Confusion matrix, returns an F1-score, if all_metrics is false, then returns only a mean of F1-score
     """
     length = confusion.shape[0]
     iter_label = range(length)
 
-    accuracy: Tensor = torch.zeros(length)
-    precision: Tensor = torch.zeros(length)
-    recall: Tensor = torch.zeros(length)
-    f1: Tensor = torch.zeros(length)
+    accuracy: Tensor = zeros(length)
+    precision: Tensor = zeros(length)
+    recall: Tensor = zeros(length)
+    f1: Tensor = zeros(length)
 
     for i in iter_label:
         fn = torch.sum(confusion[i, :i]) + torch.sum(confusion[i, i + 1:])  # false negative
@@ -46,15 +46,15 @@ def scores(confusion: Tensor, all_metrics=False):
         return f1.mean()
 
 
-def eval_model(model, dataset: DataFrame, conf: Configuration,
+def eval_model(model: nn.Module, dataset: DataFrame, conf: Configuration,
                handler: EntityHandler, result="conlleval"):
     model.eval()
     true_label, pred_label = [], []  # using for conlleval
     max_labels = len(handler.set_entities)
     confusion = zeros(size=(max_labels, max_labels))  # Confusion matrix
     tokenizer = BertTokenizerFast.from_pretrained(conf.bert)
 
-    for row in tqdm(dataset.itertuples(), total=dataset.shape[0]):
+    for row in tqdm(dataset.itertuples(), total=dataset.shape[0], desc="Evaluating", mininterval=conf.refresh_rate):
 
         # tokens = ["Hi","How","are","you"], labels = ["O","I-TREAT" ...]
         tokens, labels = row[1].split(), row[2].split()
@@ -77,10 +77,11 @@ def eval_model(model, dataset: DataFrame, conf: Configuration,
             labels_ids = labels_ids.to(conf.gpu)
 
         # Perform the prediction
-        logits = model(input_ids, att_mask, None)
+        path, _ = model(input_ids, att_mask, None)[0][0]  # path is a list of int
+        path = LongTensor(path)
 
-        path, _ = logits[0][0]
-        path = torch.LongTensor(path).to("cuda:0")
+        if conf.cuda:
+            path = path.to(conf.gpu)
 
         logits = masked_select(path, tag_mask)
         labels = masked_select(labels_ids, tag_mask)
diff --git a/Parsing/CustomDataset.py b/Parsing/CustomDataset.py
@@ -16,7 +16,8 @@ def __init__(self, dataset: DataFrame, conf: Configuration, e_handler: EntityHan
 
         tokenizer = BertTokenizerFast.from_pretrained(conf.bert)
 
-        for row in tqdm(dataset.itertuples(), total=dataset.shape[0], mininterval=60):
+        for row in tqdm(dataset.itertuples(), total=dataset.shape[0], desc="Building dataset",
+                        mininterval=conf.refresh_rate):
 
             # tokens = ["Hi","How","are","you"]
             tokens, labels = row[1].split(), row[2].split()
diff --git a/Parsing/parser_utils.py b/Parsing/parser_utils.py
@@ -159,7 +159,7 @@ def buildDataset(path_file: str, verbose=True) -> EntityHandler:
     return EntityHandler(DataFrame(t).drop_duplicates(), set_entities)
 
 
-def ensembleParser(path_file_a, path_file_b, verbose=True):
+def ensembleParser(path_file_a, path_file_b, verbose=True) -> tuple[tuple[EntityHandler, EntityHandler], DataFrame]:
     """
     ensembleParser is used to group in one single dataframe the both to dataset A and B.
 
@@ -225,10 +225,10 @@ def parse_args():
     p.add_argument('--bert', type=str,
                    help='Bert model provided by Huggingface', default="dbmdz/bert-base-italian-xxl-cased")
 
-    p.add_argument('--save_model', type=int,
+    p.add_argument('--save', type=int,
                    help='set 1 if you want save the model otherwise set 0', default=1)
 
-    p.add_argument('--type_eval', type=str,
+    p.add_argument('--eval', type=str,
                    help='define the type of evaluation: conlleval or df', default="conlleval")
 
     p.add_argument('--lr', type=float, help='Learning rate', default=0.001)
@@ -241,6 +241,6 @@ def parse_args():
 
     p.add_argument('--max_epoch', type=int, help='Max number of epochs', default=20)
 
-    p.add_argument('--early_stopping', type=float, help='Patience in early stopping', default=3)
+    p.add_argument('--patience', type=float, help='Patience in early stopping', default=3)
 
     return p.parse_known_args()
diff --git a/Prediction/Predictor.py b/Prediction/Predictor.py
@@ -1,6 +1,6 @@
 from typing import Tuple
 
-from torch import IntTensor, BoolTensor, masked_select
+from torch import IntTensor, BoolTensor, masked_select, LongTensor, nn
 from transformers import BertTokenizerFast
 
 import Configuration
@@ -58,7 +58,7 @@ def unify_labels(labelsA: list, labelsB: list) -> list:
                 unified.append(a + "/" + b)
         return unified
 
-    def add_model(self, group: str, model, dictionary: dict):
+    def add_model(self, group: str, model: nn.Module, dictionary: dict):
         model.eval()
         self.models[group] = (model, dictionary)
 
@@ -77,9 +77,13 @@ def predict(self, string: str) -> Tuple[list, list]:
 
         results = []
         for (model, dictionary) in self.models.values():
-            logits = model(input_ids, att_mask, None)
-            logits = logits[0].squeeze(0).argmax(1)
-            logits = masked_select(logits, tag_mask).tolist()
+            path, _ = model(input_ids, att_mask, None)[0][0]
+            path = LongTensor(path)
+
+            if self.conf.cuda:
+                path = path.to(self.conf.gpu)
+
+            logits = masked_select(path, tag_mask).tolist()
 
             results.append(
                 [lbl[2:] if lbl != "O" else "O" for lbl in self.map_id2lab(dictionary, logits)])
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@
 
 As final project for Human Language Technologies (HLT) I developed a project that extracts knowledge from Italian medical records written by physician and provides a simple web interface to make prediction on sentences. I also compared the quality of project’s result with the  result of MultiCoNER competition. Both models uses BERT + CRF .
 
-#### *Report outdated further improvment have been applyed*
+#### *Report outdated further improvement have been applied*
 
 report : [NER_for_Medical_Records.pdf](https://github.com/jacons/NERMedicalRecords/files/10427990/NER_for_Medical_Records.pdf)
 
@@ -33,10 +33,10 @@ p.add_argument('--path_model', type=str,
 p.add_argument('--bert', type=str,
     help='Bert model provided by Huggingface', default="dbmdz/bert-base-italian-xxl-cased")
 
-p.add_argument('--save_model', type=int,
+p.add_argument('--save', type=int,
     help='set 1 if you want save the model otherwise set 0', default=1)
 
-p.add_argument('--type_eval', type=str,
+p.add_argument('--eval', type=str,
     help='define the type of evaluation: conlleval or df', default="conlleval")
     
 p.add_argument('--lr', type=float, help='Learning rate', default=0.004)
@@ -49,7 +49,7 @@ p.add_argument('--batch_size', type=int, help='Batch size', default=16)
     
 p.add_argument('--max_epoch', type=int, help='Max number of epochs', default=15)
     
-p.add_argument('--early_stopping', type=float, help='Patience in early stopping', default=3)
+p.add_argument('--patience', type=float, help='Patience in early stopping', default=3)
 ``` 
 
 #### Running 
diff --git a/Training/NERCRFClassifier.py b/Training/NERCRFClassifier.py
@@ -7,14 +7,12 @@
 from torch.nn import Module
 from transformers import BertPreTrainedModel, BertModel
 
-from Parsing.parser_utils import EntityHandler
-
 
 class NERBertCRFClassification(BertPreTrainedModel):  # noqa
 
     _keys_to_ignore_on_load_unexpected = [r"pooler"]
 
-    def __init__(self, config, handler: EntityHandler):
+    def __init__(self, config, id2label: dict):
         super().__init__(config)
         self.num_labels = config.num_labels
 
@@ -24,15 +22,14 @@ def __init__(self, config, handler: EntityHandler):
         self.bert = BertModel(config, add_pooling_layer=False)
 
         self.linear_layer = nn.Sequential(
-            nn.LeakyReLU(),
             nn.Dropout(classifier_dropout),
             nn.Linear(config.hidden_size, config.num_labels),
             nn.LogSoftmax(-1),
         )
 
         self.crf_layer = ConditionalRandomField(num_tags=config.num_labels,
                                                 constraints=allowed_transitions(constraint_type="BIO",
-                                                                                labels=handler.id2label))
+                                                                                labels=id2label))
 
         # Initialize weights and apply final processing
         self.post_init()
@@ -80,16 +77,16 @@ def forward(
 
 
 class NERCRFClassifier(Module):
-    def __init__(self, bert: str, handler: EntityHandler):
+    def __init__(self, bert: str, id2label: dict):
         """
         Bert model
         :param bert: Name of bert used
         :param frozen: True to freeze the deep parameters
         """
         super(NERCRFClassifier, self).__init__()
 
-        num_labels = len(handler.set_entities)
-        self.bert = NERBertCRFClassification.from_pretrained(bert, num_labels=num_labels, handler=handler)
+        num_labels = len(id2label)
+        self.bert = NERBertCRFClassification.from_pretrained(bert, num_labels=num_labels, id2label=id2label)
 
         return
 
diff --git a/Training/Trainer.py b/Training/Trainer.py
@@ -1,6 +1,6 @@
 import torch
 from pandas import DataFrame
-from torch import no_grad, zeros, masked_select
+from torch import no_grad, zeros, masked_select, nn
 from torch.nn.utils import clip_grad_norm_
 from torch.optim.lr_scheduler import ReduceLROnPlateau
 from torch.optim.sgd import SGD
@@ -14,7 +14,7 @@
 from Training.trainer_utils import padding_batch, EarlyStopping, ModelVersion
 
 
-def train(model, e_handler: EntityHandler, df_train: DataFrame, df_val: DataFrame, conf: Configuration):
+def train(model: nn.Module, e_handler: EntityHandler, df_train: DataFrame, df_val: DataFrame, conf: Configuration):
     # --------- DATASETS ---------
     print("--INFO--\tCreating Dataloader for Training set")
     tr = DataLoader(NerDataset(df_train, conf, e_handler), collate_fn=padding_batch,
@@ -41,7 +41,7 @@ def train(model, e_handler: EntityHandler, df_train: DataFrame, df_val: DataFram
     model_version = ModelVersion(folder=conf.folder, name=conf.model_name) if conf.save_model else None
 
     # --------- Scheduling the learning rate to improve the convergence ---------
-    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2)
+    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3)
 
     print("\n--INFO--\tThe Training is started")
     model.train()
@@ -52,7 +52,7 @@ def train(model, e_handler: EntityHandler, df_train: DataFrame, df_val: DataFram
         # ========== Training Phase ==========
 
         #  There inputs are created in "NerDataset" class
-        for inputs_ids, att_mask, _, labels in tqdm(tr, mininterval=60):
+        for inputs_ids, att_mask, _, labels in tqdm(tr, desc="Training", mininterval=conf.refresh_rate):
             optimizer.zero_grad(set_to_none=True)
 
             loss, _ = model(inputs_ids, att_mask, labels)
@@ -65,13 +65,16 @@ def train(model, e_handler: EntityHandler, df_train: DataFrame, df_val: DataFram
         # ========== Validation Phase ==========
         confusion = zeros(size=(max_labels, max_labels))
         with no_grad():  # Validation phase
-            for inputs_ids, att_mask, tag_maks, labels in tqdm(vl, mininterval=60):
+            for inputs_ids, att_mask, tag_maks, labels in tqdm(vl, desc="Evaluation", mininterval=conf.refresh_rate):
 
                 loss, logits = model(inputs_ids, att_mask, labels)
                 loss_val += loss.item()
 
                 path, _ = logits[0]
-                path = torch.LongTensor(path).to("cuda:0")
+                path = torch.LongTensor(path)
+
+                if conf.cuda:
+                    path = path.to(conf.gpu)
 
                 logits = masked_select(path, tag_maks)
                 labels = masked_select(labels, tag_maks)
diff --git a/eval_models.py b/eval_models.py
@@ -21,10 +21,10 @@
     (handler_a, handler_b), unified_dt = ensembleParser(paths[0], paths[1])
     _, _, df_test = holdout(unified_dt)
 
-    modelA = NERCRFClassifier(conf.bert, handler_a)
+    modelA = NERCRFClassifier(conf.bert, handler_a.id2label)
     modelA.load_state_dict(torch.load(models[0]))
 
-    modelB = NERCRFClassifier(conf.bert, handler_b)
+    modelB = NERCRFClassifier(conf.bert, handler_b.id2label)
     modelB.load_state_dict(torch.load(models[1]))
 
     if conf.cuda:
diff --git a/prediction.py b/prediction.py
@@ -3,7 +3,7 @@
 from Configuration import Configuration
 from Parsing.parser_utils import parse_args
 from Prediction.Predictor import Predictor
-from Training.NERClassifier import NERClassifier
+from Training.NERCRFClassifier import NERCRFClassifier
 
 if __name__ == '__main__':
 
@@ -17,10 +17,15 @@
 
     models = args.models
 
-    modelA = NERClassifier(conf.bert, 9, frozen=False)
+    id2lab_group_a = {0: 'B-ACTI', 1: 'B-DISO', 2: 'B-DRUG', 3: 'B-SIGN', 4: 'I-ACTI', 5: 'I-DISO', 6: 'I-DRUG',
+                      7: 'I-SIGN', 8: 'O'}
+
+    id2lab_group_b = {0: 'B-BODY', 1: 'B-TREA', 2: 'I-BODY', 3: 'I-TREA', 4: 'O'}
+
+    modelA = NERCRFClassifier(conf.bert, id2lab_group_a)
     modelA.load_state_dict(torch.load(models[0]))
 
-    modelB = NERClassifier(conf.bert, 5, frozen=False)
+    modelB = NERCRFClassifier(conf.bert, id2lab_group_b)
     modelB.load_state_dict(torch.load(models[1]))
 
     if conf.cuda:
@@ -29,14 +34,5 @@
 
     predictor = Predictor(conf)
 
-    id2lab_group_a = {0: 'B-ACTI', 1: 'B-DISO', 2: 'B-DRUG', 3: 'B-SIGN', 4: 'I-ACTI', 5: 'I-DISO', 6: 'I-DRUG',
-                      7: 'I-SIGN', 8: 'O'}
-
-    id2lab_group_b = {0: 'B-BODY', 1: 'B-TREA', 2: 'I-BODY', 3: 'I-TREA', 4: 'O'}
-
     predictor.add_model("a", modelA, id2lab_group_a)
     predictor.add_model("b", modelB, id2lab_group_b)
-
-    print(predictor.predict("Hello!!"))
-
-    #  C:\ProgramData\Anaconda3\envs\deeplearning\python.exe train_model.py --models K:/NoSyncCache/Models/A/modelE1.pt K:/NoSyncCache/Models/B/modelH2.pt
diff --git a/server.py b/server.py
@@ -4,7 +4,7 @@
 from Configuration import Configuration
 from Parsing.parser_utils import parse_args
 from Prediction.Predictor import Predictor
-from Training.NERClassifier import NERClassifier
+from Training.NERCRFClassifier import NERCRFClassifier
 
 app = Flask(__name__)
 
@@ -15,10 +15,15 @@
 
 models = ["saved_models/model.a.pt", "saved_models/model.b.pt"]
 
-modelA = NERClassifier(conf.bert, 9, frozen=False)
+id2lab_group_a = {0: 'B-ACTI', 1: 'B-DISO', 2: 'B-DRUG', 3: 'B-SIGN', 4: 'I-ACTI', 5: 'I-DISO', 6: 'I-DRUG',
+                  7: 'I-SIGN', 8: 'O'}
+
+id2lab_group_b = {0: 'B-BODY', 1: 'B-TREA', 2: 'I-BODY', 3: 'I-TREA', 4: 'O'}
+
+modelA = NERCRFClassifier(conf.bert, id2lab_group_a)
 modelA.load_state_dict(torch.load(models[0], map_location=torch.device('cpu')))
 
-modelB = NERClassifier(conf.bert, 5, frozen=False)
+modelB = NERCRFClassifier(conf.bert, id2lab_group_b)
 modelB.load_state_dict(torch.load(models[1], map_location=torch.device('cpu')))
 
 if conf.cuda:
@@ -27,11 +32,6 @@
 
 predictor = Predictor(conf)
 
-id2lab_group_a = {0: 'B-ACTI', 1: 'B-DISO', 2: 'B-DRUG', 3: 'B-SIGN', 4: 'I-ACTI', 5: 'I-DISO', 6: 'I-DRUG',
-                  7: 'I-SIGN', 8: 'O'}
-
-id2lab_group_b = {0: 'B-BODY', 1: 'B-TREA', 2: 'I-BODY', 3: 'I-TREA', 4: 'O'}
-
 predictor.add_model("a", modelA, id2lab_group_a)
 predictor.add_model("b", modelB, id2lab_group_b)
 
@@ -52,4 +52,4 @@ def create():
         elif "clear" in request.form:
             list_of_result.clear()
 
-    return render_template('main.html', list_of_result=list_of_result)
+    return render_template('main.html', list_of_result=list_of_result)
diff --git a/train_model.py b/train_model.py
@@ -17,9 +17,9 @@
         raise Exception("Define a model name!")
 
     handler = buildDataset(args.datasets[0], verbose=True)
-    df_train, df_val, df_test = holdout(handler.dt)
+    df_train, df_val, _ = holdout(handler.dt)
 
-    model = NERCRFClassifier(conf.bert, handler)
+    model = NERCRFClassifier(conf.bert, handler.id2label)
 
     if conf.cuda:
         model = model.to(conf.gpu)