Merge pull request #65 from cvejoski/change_pad_dtype

iLampard · web-flow · commit 7e2b7a001a29 · 2025-04-09T11:22:27.000+08:00
set the dtype to float32 during tokenization when no padding is needed
diff --git a/easy_tpp/preprocess/event_tokenizer.py b/easy_tpp/preprocess/event_tokenizer.py
@@ -414,9 +414,9 @@ def _pad(
                                                                              max_len=max_length,
                                                                              dtype=np.int64)
         else:
-            batch_output[self.model_input_names[0]] = np.array(encoded_inputs[self.model_input_names[0]])
-            batch_output[self.model_input_names[1]] = np.array(encoded_inputs[self.model_input_names[1]])
-            batch_output[self.model_input_names[2]] = np.array(encoded_inputs[self.model_input_names[2]])
+            batch_output[self.model_input_names[0]] = np.array(encoded_inputs[self.model_input_names[0]], dtype=np.float32)
+            batch_output[self.model_input_names[1]] = np.array(encoded_inputs[self.model_input_names[1]], dtype=np.float32)
+            batch_output[self.model_input_names[2]] = np.array(encoded_inputs[self.model_input_names[2]], dtype=np.int64)
 
         # non_pad_mask; replaced the use of event types by using the original sequence length
         seq_pad_mask = np.full_like(batch_output[self.model_input_names[2]], fill_value=True, dtype=bool)