Skip to content

Commit dc66a38

Browse files
committed
Merge branch 'workflow_test'
2 parents 676f16d + ef8e5f5 commit dc66a38

File tree

6 files changed

+0
-33
lines changed

6 files changed

+0
-33
lines changed

environment.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ dependencies:
3232
- opencv-python
3333
- matplotlib
3434
- scikit-learn
35-
- pynvml
3635
- ruff
3736
- segmentation-models-pytorch
3837
- ipywidgets

requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ scikit-learn
2626
timm
2727
onnx
2828
onnxruntime-gpu
29-
pynvml
3029
ruff
3130
ipywidgets
3231
ipykernel

src/eval.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
instantiate_callbacks,
1515
log_hyperparameters,
1616
task_wrapper,
17-
log_gpu_memory_metadata,
1817
)
1918

2019
log = RankedLogger(__name__, rank_zero_only=True)
@@ -34,8 +33,6 @@ def evaluate(cfg: DictConfig) -> tuple[dict[str, Any], dict[str, Any]]:
3433
"""
3534
assert cfg.model.ckpt_path, 'The checkpoint path (cfg.model.ckpt_path) is not set!'
3635

37-
log_gpu_memory_metadata()
38-
3936
log.info(f'Instantiating datamodule <{cfg.data._target_}>')
4037
datamodule: LightningDataModule = hydra.utils.instantiate(cfg.data)
4138

src/train.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
instantiate_loggers,
1919
log_hyperparameters,
2020
task_wrapper,
21-
log_gpu_memory_metadata,
2221
save_model_metadata,
2322
is_running_in_docker,
2423
)
@@ -40,7 +39,6 @@ def train(cfg: DictConfig) -> tuple[dict[str, Any], dict[str, Any]]:
4039
Returns:
4140
Tuple[Dict[str, Any], Dict[str, Any]]: metrics and dict with all instantiated objects.
4241
"""
43-
log_gpu_memory_metadata()
4442

4543
# set seed for random number generators in pytorch, numpy and python.random
4644
if cfg.get('seed'):

src/utils/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
instantiate_callbacks,
88
instantiate_loggers,
99
log_hyperparameters,
10-
log_gpu_memory_metadata,
1110
run_sh_command,
1211
save_model_metadata,
1312
is_running_in_docker,

src/utils/utils.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,6 @@
1313
from omegaconf import DictConfig, OmegaConf
1414
from lightning_utilities.core.rank_zero import rank_zero_only
1515
import torch
16-
from pynvml import (
17-
nvmlDeviceGetHandleByIndex,
18-
nvmlDeviceGetMemoryInfo,
19-
nvmlInit,
20-
)
2116

2217
from src.utils import pylogger, rich_utils
2318

@@ -311,26 +306,6 @@ def log_hyperparameters(object_dict: dict[str, Any]) -> None:
311306
logger.log_hyperparams(hparams)
312307

313308

314-
def log_gpu_memory_metadata() -> None:
315-
"""_Logging GPUs memory metadata (total, free and used) if it's available by
316-
PYNVML.
317-
"""
318-
gpus_num = torch.cuda.device_count()
319-
if gpus_num == 0:
320-
return
321-
nvmlInit()
322-
cards = (nvmlDeviceGetHandleByIndex(num) for num in range(gpus_num))
323-
for i, card in enumerate(cards):
324-
info = nvmlDeviceGetMemoryInfo(card)
325-
div = 1023**3
326-
total_gb = info.total / div
327-
free_gb = info.free / div
328-
used_gb = info.used / div
329-
log.info(f'GPU memory info: card {i} : total : {total_gb:.2f} GB')
330-
log.info(f'GPU memory info: card {i} : free : {free_gb:.2f} GB')
331-
log.info(f'GPU memory info: card {i} : used : {used_gb:.2f} GB')
332-
333-
334309
def save_model_metadata(
335310
model_path: str,
336311
host_model_path: str,

0 commit comments

Comments
 (0)