From 317f18c9c2ec8b2e610528640c3aa6b59914f9ea Mon Sep 17 00:00:00 2001 From: sallyjunjun Date: Mon, 2 Dec 2024 17:28:14 +0800 Subject: [PATCH] fix check CUDA_DEVICE_MAX_CONNECTIONS --- internlm/utils/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internlm/utils/common.py b/internlm/utils/common.py index 56ebcfbe6..de885b72a 100644 --- a/internlm/utils/common.py +++ b/internlm/utils/common.py @@ -250,8 +250,9 @@ def enable_pytorch_expandable_segments(): def check_cuda_env(): - if os.getenv("CUDA_DEVICE_MAX_CONNECTIONS") is None: - logger.warning("Env var CUDA_DEVICE_MAX_CONNECTIONS has not be set, please note this!") + max_connections = os.getenv("CUDA_DEVICE_MAX_CONNECTIONS") + assert max_connections is not None, "Env var CUDA_DEVICE_MAX_CONNECTIONS has not been set, please set it to 1!" + assert max_connections == '1', "Env var CUDA_DEVICE_MAX_CONNECTIONS is set to {}, but it should be set to 1!".format(max_connections) class DummyProfile: