Skip to content

Commit

Permalink
Merge pull request #35 from keisuke-yanagisawa/mps
Browse files Browse the repository at this point in the history
Enabling to use MPS (Multi-Process Service) for better calculation efficiency
  • Loading branch information
keisuke-yanagisawa authored May 17, 2024
2 parents 4f5f248 + d42b03b commit a104fb2
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 7 deletions.
26 changes: 19 additions & 7 deletions exprorer_msmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ from joblib import Parallel, delayed

from script.utilities import util
from script.utilities.GPUtil import get_gpuids
from script.utilities.GPUtil import is_mps_control_running
from script.genpmap import gen_pmap
from script.utilities.pmd import convert as pmd_convert
from script.generate_msmd_system import generate_msmd_system
Expand Down Expand Up @@ -171,19 +172,30 @@ if __name__ == "__main__":

# Count num. of GPUs and allocate CPU cores to each GPU
# Raise EnvironmentError if GPU is not available
gpuids = get_gpuids()

num_process_per_gpu=setting["general"]["num_process_per_gpu"]
gpuids = get_gpuids() * num_process_per_gpu
print(gpuids)
if num_process_per_gpu > 1:
if not is_mps_control_running():
raise RuntimeError("nvidia-cuda-mps-server is not running. Please start it before using MPS.")
else:
logger.info("nvidia-cuda-mps-server is running.")

ngpus = len(gpuids)
if not setting["general"]["multiprocessing"]:
ngpus = 1
ncpus = len(os.sched_getaffinity(0))
ncpus = 1 if ncpus is None else ncpus
ncpus_per_gpu = ncpus // len(get_gpuids(ignore_cuda_visible_devices=True))
if ncpus_per_gpu == 0:
raise EnvironmentError("The number of CPU threads must be "
"equal to or greater than the number of available GPUs")

ratio_available_gpus = len(get_gpuids(ignore_cuda_visible_devices=True)) / len(get_gpuids(ignore_cuda_visible_devices=False))
ncpus_per_run = int(ncpus * ratio_available_gpus / len(gpuids))
if ncpus_per_run == 0:
raise EnvironmentError("The number of CPU threads must be equal to "
"or greater than the number of runs executed simultaneously")

logger.info(f"{ncpus} threads are detected")
logger.info(f"{ngpus} parallel execution with {ncpus_per_gpu} CPU threads per process")
logger.info(f"{ngpus} parallel execution with {ncpus_per_run} CPU threads per process")

# prepare systems
# n_jobs = num of CPU cores, not num of GPUs
Expand All @@ -205,7 +217,7 @@ if __name__ == "__main__":
if not args.skip_simulation:
gpuids = (gpuids * len(indices))[:len(indices)]
trajectories = Parallel(n_jobs=ngpus, backend='threading')(
delayed(execute_single_simulation)(idx, setting, gpuid, ncpus_per_gpu, top=top, gro=gro, pdb=pdb, debug=args.debug)
delayed(execute_single_simulation)(idx, setting, gpuid, ncpus_per_run, top=top, gro=gro, pdb=pdb, debug=args.debug)
for idx, gpuid, top, gro, pdb in zip(indices, gpuids, tops, gros, pdbs)
)
else:
Expand Down
13 changes: 13 additions & 0 deletions script/utilities/GPUtil.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import math
import os
import sys
import subprocess

import GPUtil

Expand Down Expand Up @@ -29,3 +30,15 @@ def get_gpuids(ignore_cuda_visible_devices=False):
ngpus = 1

return list(gpuids)

def is_mps_control_running() -> bool:
# check if nvidia-cuda-mps-control process is running
try:
output = subprocess.check_output("ps x | grep nvidia-cuda-mps-control", shell=True, text=True)
lines = output.strip().split('\n')
if len(lines) >= 3:
return True
else:
return False
except subprocess.CalledProcessError:
return False
3 changes: 3 additions & 0 deletions script/utilities/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ def set_default(setting: dict) -> None:
setting["exprorer_msmd"]["general"]["temperature"] = 300
if "pressure" not in setting["exprorer_msmd"]["general"]:
setting["exprorer_msmd"]["general"]["pressure"] = 1.0
if "num_process_per_gpu" not in setting["general"]:
setting["general"]["num_process_per_gpu"] = 1



def ensure_compatibility_v1_1(setting: dict):
Expand Down

0 comments on commit a104fb2

Please sign in to comment.