Skip to content

Commit

Permalink
pin down the version of transformers for deid
Browse files Browse the repository at this point in the history
  • Loading branch information
baixiac committed Sep 27, 2024
1 parent 0dbe9de commit e6f9e33
Show file tree
Hide file tree
Showing 14 changed files with 25 additions and 26 deletions.
6 changes: 3 additions & 3 deletions app/api/auth/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ class CmsUserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
verification_token_secret = get_settings().AUTH_JWT_SECRET

async def on_after_register(self, user: User, request: Optional[Request] = None) -> None:
logger.info(f"User {user.id} has registered.")
logger.info("User %s has registered.", user.id)

async def on_after_forgot_password(self, user: User, token: str, request: Optional[Request] = None) -> None:
logger.info(f"User {user.id} has forgot their password. Reset token: {token}")
logger.info("User %s has forgot their password. Reset token: %s", user.id, token)

async def on_after_request_verify(self, user: User, token: str, request: Optional[Request] = None) -> None:
logger.info(f"Verification requested for user {user.id}. Verification token: {token}")
logger.info("Verification requested for user %s. Verification token: %s", user.id, token)


async def get_user_manager(user_db: SQLAlchemyUserDatabase = Depends(get_user_db)) -> AsyncGenerator:
Expand Down
2 changes: 1 addition & 1 deletion app/api/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __call__(self) -> AbstractModelService:
if self._model_type in model_service_registry.keys():
self._model_sevice = model_service_registry[self._model_type](self._config)
else:
logger.error(f"Unknown model type: {self._model_type}")
logger.error("Unknown model type: %s", self._model_type)
exit(1) # throw an exception?
return self._model_sevice

Expand Down
8 changes: 4 additions & 4 deletions app/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def log_record_factory(*args: Tuple, **kwargs: Dict[str, Any]) -> LogRecord:
logger.addHandler(gelf_tcp_handler)
logging.getLogger("uvicorn").addHandler(gelf_tcp_handler)
except Exception as e:
logger.error(f"$GELF_INPUT_URI is set to \"{os.environ['GELF_INPUT_URI']}\" but it's not ready to receive logs")
logger.error("$GELF_INPUT_URI is set to \"%s\" but it's not ready to receive logs", os.environ['GELF_INPUT_URI'])
logger.exception(e)

config = get_settings()
Expand Down Expand Up @@ -99,7 +99,7 @@ def log_record_factory(*args: Tuple, **kwargs: Dict[str, Any]) -> LogRecord:
logger.error("Neither the model path or the mlflow model uri was passed in")
sys.exit(1)

logger.info(f'Start serving model "{model_type}" on {host}:{port}')
logger.info('Start serving model "%s" on %s:%s', model_type, host, port)
# interrupted = False
# while not interrupted:
uvicorn.run(model_server_app if not streamable else get_stream_server(), host=host, port=int(port), log_config=None)
Expand Down Expand Up @@ -161,7 +161,7 @@ def log_record_factory(*args: Tuple, **kwargs: Dict[str, Any]) -> LogRecord:
elif training_type == TrainingType.META_SUPERVISED and model_service._metacat_trainer is not None:
model_service.train_metacat(*training_args, **json.loads(hyperparameters))
else:
logger.error(f"Training type {training_type} is not supported or the corresponding trainer has not been enabled in the .env file.")
logger.error("Training type %s is not supported or the corresponding trainer has not been enabled in the .env file.", training_type)
sys.exit(1)


Expand All @@ -179,7 +179,7 @@ def register_model(model_type: ModelType = typer.Option(..., help="The type of t
if model_type in model_service_registry.keys():
model_service_type = model_service_registry[model_type]
else:
logger.error(f"Unknown model type: {model_type}")
logger.error("Unknown model type: %s", model_type)
sys.exit(1)

m_config = json.loads(model_config) if model_config is not None else None
Expand Down
2 changes: 1 addition & 1 deletion app/model_services/medcat_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def from_model(cls, model: CAT) -> "MedCATModel":
@staticmethod
def load_model(model_file_path: str, *args: Tuple, **kwargs: Dict[str, Any]) -> CAT:
cat = CAT.load_model_pack(model_file_path, *args, **kwargs)
logger.info(f"Model pack loaded from {os.path.normpath(model_file_path)}")
logger.info("Model pack loaded from %s", os.path.normpath(model_file_path))
return cat

@staticmethod
Expand Down
3 changes: 1 addition & 2 deletions app/model_services/medcat_model_deid.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ def init_model(self) -> None:
device=device)
else:
if self._config.DEVICE != "default":
logger.warning(
f"DEVICE is set to '{self._config.DEVICE}' but it is not available. Using 'default' instead.")
logger.warning("DEVICE is set to '%s' but it is not available. Using 'default' instead.", self._config.DEVICE)
_save_pretrained = self._model._addl_ner[0].model.save_pretrained
if ("safe_serialization" in inspect.signature(_save_pretrained).parameters):
self._model._addl_ner[0].model.save_pretrained = partial(_save_pretrained, safe_serialization=(self._config.TRAINING_SAFE_MODEL_SERIALISATION == "true"))
Expand Down
4 changes: 2 additions & 2 deletions app/model_services/medcat_model_icd10.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def get_records_from_doc(self, doc: Dict) -> Dict:
new_rows = []
for _, row in df.iterrows():
if self.ICD10_KEY not in row or not row[self.ICD10_KEY]:
logger.debug(f"No mapped ICD-10 code associated with the entity: {row}")
logger.debug("No mapped ICD-10 code associated with the entity: %s", row)
else:
for icd10 in row[self.ICD10_KEY]:
output_row = row.copy()
Expand All @@ -53,7 +53,7 @@ def get_records_from_doc(self, doc: Dict) -> Dict:
elif isinstance(icd10, list) and icd10:
output_row[self.ICD10_KEY] = icd10[-1]
else:
logger.error(f"Unknown format for the ICD-10 code(s): {icd10}")
logger.error("Unknown format for the ICD-10 code(s): %s", icd10)
if "athena_ids" in output_row and output_row["athena_ids"]:
output_row["athena_ids"] = [athena_id["code"] for athena_id in output_row["athena_ids"]]
new_rows.append(output_row)
Expand Down
4 changes: 2 additions & 2 deletions app/model_services/trf_model_deid.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ def load_model(model_file_path: str) -> Tuple[TransformersTokenizerNER, PreTrain
shutil.unpack_archive(model_file_path, extract_dir=unpacked_model_dir)
tokenizer_path = os.path.join(unpacked_model_dir, "tokenizer.dat")
tokenizer = TransformersTokenizerNER.load(tokenizer_path)
logger.info(f"Tokenizer loaded from {tokenizer_path}")
logger.info("Tokenizer loaded from %s", tokenizer_path)
model = AutoModelForTokenClassification.from_pretrained(unpacked_model_dir)
logger.info(f"Model loaded from {unpacked_model_dir}")
logger.info("Model loaded from %s", unpacked_model_dir)
return tokenizer, model

def init_model(self) -> None:
Expand Down
2 changes: 1 addition & 1 deletion app/trainers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def start_training(self,
else:
raise ValueError(f"Unknown training type: {training_type}")

logger.info(f"Starting training job: {training_id} with experiment ID: {experiment_id}")
logger.info("Starting training job: %s with experiment ID: %s", training_id, experiment_id)
self._training_in_progress = True
training_task = asyncio.ensure_future(loop.run_in_executor(self._executor,
partial(run, self, training_params, data_file, log_frequency, run_id, description)))
Expand Down
4 changes: 2 additions & 2 deletions app/trainers/medcat_deid_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def run(trainer: "MedcatDeIdentificationSupervisedTrainer",
cdb_config_path = model_pack_path.replace(".zip", "_config.json")
model.cdb.config.save(cdb_config_path)
artifacts_info = trainer._tracker_client.save_model(model_pack_path, trainer._model_name, trainer._model_manager)
logger.info(f"Retrained model saved: {artifacts_info}")
logger.info("Retrained model saved: %s", artifacts_info)
trainer._tracker_client.save_model_artifact(cdb_config_path, trainer._model_name)
else:
logger.info("Skipped saving on the retrained model")
Expand Down Expand Up @@ -294,5 +294,5 @@ def _customise_training_device(ner: TransformersNER, device_name: str) -> Transf
device=device)
else:
if device_name != "default":
logger.warning(f"DEVICE is set to '{device_name}' but it is not available. Using 'default' instead.")
logger.warning("DEVICE is set to '%s' but it is not available. Using 'default' instead.", device_name)
return ner
6 changes: 3 additions & 3 deletions app/trainers/medcat_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ def deploy_model(model_service: AbstractModelService,

@staticmethod
def save_model_pack(model: CAT, model_dir: str, description: Optional[str] = None) -> str:
logger.info(f"Saving retrained model to {model_dir}...")
logger.info("Saving retrained model to %s...", model_dir)
model.config.version.description = description or model.config.version.description
model_pack_name = model.create_model_pack(model_dir, "model")
model_pack_path = f"{os.path.join(model_dir, model_pack_name)}.zip"
logger.debug(f"Retrained model saved to {model_pack_path}")
logger.debug("Retrained model saved to %s", model_pack_path)
return model_pack_path

@staticmethod
Expand Down Expand Up @@ -167,7 +167,7 @@ def run(trainer: "MedcatSupervisedTrainer",
cdb_config_path = model_pack_path.replace(".zip", "_config.json")
model.cdb.config.save(cdb_config_path)
artifacts_info = trainer._tracker_client.save_model(model_pack_path, trainer._model_name, trainer._model_manager)
logger.info(f"Retrained model saved: {artifacts_info}")
logger.info("Retrained model saved: %s", artifacts_info)
trainer._tracker_client.save_model_artifact(cdb_config_path, trainer._model_name)
else:
logger.info("Skipped saving on the retrained model")
Expand Down
6 changes: 3 additions & 3 deletions app/trainers/metacat_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def run(trainer: "MetacatTrainer",
meta_cat.config.train.nepochs = training_params["nepochs"]
trainer._tracker_client.log_model_config(trainer.get_flattened_config(meta_cat, category_name))
trainer._tracker_client.log_trainer_version(medcat_version)
logger.info(f'Performing supervised training on category "{category_name}"...')
logger.info('Performing supervised training on category "%s"...', category_name)

try:
winner_report = meta_cat.train(data_file.name, os.path.join(copied_model_pack_path.replace(".zip", ""), f"meta_{category_name}"))
Expand All @@ -85,7 +85,7 @@ def run(trainer: "MetacatTrainer",
}
trainer._tracker_client.send_model_stats(report_stats, winner_report["epoch"])
except Exception as e:
logger.error(f"Failed on training meta model: {category_name}. This could be benign if training data has no annotations belonging to this category.")
logger.error("Failed on training meta model: %s. This could be benign if training data has no annotations belonging to this category.", category_name)
logger.exception(e)
trainer._tracker_client.log_exceptions(e)

Expand All @@ -98,7 +98,7 @@ def run(trainer: "MetacatTrainer",
cdb_config_path = model_pack_path.replace(".zip", "_config.json")
model.cdb.config.save(cdb_config_path)
artifacts_info = trainer._tracker_client.save_model(model_pack_path, trainer._model_name, trainer._model_manager)
logger.info(f"Retrained model saved: {artifacts_info}")
logger.info("Retrained model saved: %s", artifacts_info)
trainer._tracker_client.save_model_artifact(cdb_config_path, trainer._model_name)
else:
logger.info("Skipped saving on the retrained model")
Expand Down
2 changes: 0 additions & 2 deletions docker-compose-mon.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ services:
- grafana-data:/var/lib/grafana
expose:
- 3000
# ports:
# - 8202:3000
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:3000/healthz" ]
interval: 60s
Expand Down
1 change: 1 addition & 0 deletions docker/medcat-deid/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
medcat~=1.12.1
transformers<4.42.0
blis<1.0.0
fastapi~=0.102.0
uvicorn~=0.29.0
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ requires-python = ">=3.8"
dynamic = ["version"]
dependencies = [
"medcat~=1.12.1",
"transformers<4.42.0",
"blis<1.0.0",
"fastapi~=0.110.3",
"uvicorn~=0.29.0",
Expand Down

0 comments on commit e6f9e33

Please sign in to comment.