|
| 1 | +# Copyright (c) Meta Platforms, Inc. and affiliates. |
| 2 | +# All rights reserved. |
| 3 | +# |
| 4 | +# This source code is licensed under the BSD-style license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. |
| 6 | + |
| 7 | +# mypy: allow-untyped-defs |
| 8 | + |
| 9 | +# Unlike the rest of the PyTorch this file must be python2 compliant. |
| 10 | +# This script outputs relevant system environment info |
| 11 | +# Run it with `python util/collect_env.py` or `python -m util.collect_env` |
| 12 | + |
| 13 | +import datetime |
| 14 | +import json |
| 15 | +import locale |
| 16 | +import os |
| 17 | +import re |
| 18 | +import subprocess |
| 19 | +import sys |
| 20 | +from collections import namedtuple |
| 21 | + |
| 22 | + |
| 23 | +try: |
| 24 | + import torch |
| 25 | + |
| 26 | + TORCH_AVAILABLE = True |
| 27 | +except (ImportError, NameError, AttributeError, OSError): |
| 28 | + TORCH_AVAILABLE = False |
| 29 | + |
| 30 | +# System Environment Information |
| 31 | +SystemEnv = namedtuple( |
| 32 | + "SystemEnv", |
| 33 | + [ |
| 34 | + "torch_version", |
| 35 | + "is_debug_build", |
| 36 | + "cuda_compiled_version", |
| 37 | + "gcc_version", |
| 38 | + "clang_version", |
| 39 | + "cmake_version", |
| 40 | + "os", |
| 41 | + "libc_version", |
| 42 | + "python_version", |
| 43 | + "python_platform", |
| 44 | + "is_cuda_available", |
| 45 | + "cuda_runtime_version", |
| 46 | + "cuda_module_loading", |
| 47 | + "nvidia_driver_version", |
| 48 | + "nvidia_gpu_models", |
| 49 | + "cudnn_version", |
| 50 | + "pip_version", # 'pip' or 'pip3' |
| 51 | + "pip_packages", |
| 52 | + "conda_packages", |
| 53 | + "hip_compiled_version", |
| 54 | + "hip_runtime_version", |
| 55 | + "miopen_runtime_version", |
| 56 | + "caching_allocator_config", |
| 57 | + "is_xnnpack_available", |
| 58 | + "cpu_info", |
| 59 | + ], |
| 60 | +) |
| 61 | + |
| 62 | +COMMON_PATTERNS = [ |
| 63 | + "torch", |
| 64 | + "numpy", |
| 65 | + "triton", |
| 66 | + "optree", |
| 67 | +] |
| 68 | + |
| 69 | +NVIDIA_PATTERNS = [ |
| 70 | + "cuda-cudart", |
| 71 | + "cuda-cupti", |
| 72 | + "cuda-libraries", |
| 73 | + "cuda-opencl", |
| 74 | + "cuda-nvrtc", |
| 75 | + "cuda-runtime", |
| 76 | + "cublas", |
| 77 | + "cudnn", |
| 78 | + "cufft", |
| 79 | + "curand", |
| 80 | + "cusolver", |
| 81 | + "cusparse", |
| 82 | + "nccl", |
| 83 | + "nvjitlink", |
| 84 | + "nvtx", |
| 85 | +] |
| 86 | + |
| 87 | +CONDA_PATTERNS = [ |
| 88 | + "cudatoolkit", |
| 89 | + "soumith", |
| 90 | + "mkl", |
| 91 | + "magma", |
| 92 | +] |
| 93 | + |
| 94 | +PIP_PATTERNS = [ |
| 95 | + "mypy", |
| 96 | + "flake8", |
| 97 | + "onnx", |
| 98 | +] |
| 99 | + |
| 100 | + |
| 101 | +def run(command): |
| 102 | + """Return (return-code, stdout, stderr).""" |
| 103 | + shell = True if type(command) is str else False |
| 104 | + p = subprocess.Popen( |
| 105 | + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell |
| 106 | + ) |
| 107 | + raw_output, raw_err = p.communicate() |
| 108 | + rc = p.returncode |
| 109 | + if get_platform() == "win32": |
| 110 | + enc = "oem" |
| 111 | + else: |
| 112 | + enc = locale.getpreferredencoding() |
| 113 | + output = raw_output.decode(enc) |
| 114 | + err = raw_err.decode(enc) |
| 115 | + return rc, output.strip(), err.strip() |
| 116 | + |
| 117 | + |
| 118 | +def run_and_read_all(run_lambda, command): |
| 119 | + """Run command using run_lambda; reads and returns entire output if rc is 0.""" |
| 120 | + rc, out, _ = run_lambda(command) |
| 121 | + if rc != 0: |
| 122 | + return None |
| 123 | + return out |
| 124 | + |
| 125 | + |
| 126 | +def run_and_parse_first_match(run_lambda, command, regex): |
| 127 | + """Run command using run_lambda, returns the first regex match if it exists.""" |
| 128 | + rc, out, _ = run_lambda(command) |
| 129 | + if rc != 0: |
| 130 | + return None |
| 131 | + match = re.search(regex, out) |
| 132 | + if match is None: |
| 133 | + return None |
| 134 | + return match.group(1) |
| 135 | + |
| 136 | + |
| 137 | +def run_and_return_first_line(run_lambda, command): |
| 138 | + """Run command using run_lambda and returns first line if output is not empty.""" |
| 139 | + rc, out, _ = run_lambda(command) |
| 140 | + if rc != 0: |
| 141 | + return None |
| 142 | + return out.split("\n")[0] |
| 143 | + |
| 144 | + |
| 145 | +def get_conda_packages(run_lambda, patterns=None): |
| 146 | + if patterns is None: |
| 147 | + patterns = CONDA_PATTERNS + COMMON_PATTERNS + NVIDIA_PATTERNS |
| 148 | + conda = os.environ.get("CONDA_EXE", "conda") |
| 149 | + out = run_and_read_all(run_lambda, "{} list".format(conda)) |
| 150 | + if out is None: |
| 151 | + return out |
| 152 | + |
| 153 | + return "\n".join( |
| 154 | + line |
| 155 | + for line in out.splitlines() |
| 156 | + if not line.startswith("#") and any(name in line for name in patterns) |
| 157 | + ) |
| 158 | + |
| 159 | + |
| 160 | +def get_gcc_version(run_lambda): |
| 161 | + return run_and_parse_first_match(run_lambda, "gcc --version", r"gcc (.*)") |
| 162 | + |
| 163 | + |
| 164 | +def get_clang_version(run_lambda): |
| 165 | + return run_and_parse_first_match( |
| 166 | + run_lambda, "clang --version", r"clang version (.*)" |
| 167 | + ) |
| 168 | + |
| 169 | + |
| 170 | +def get_cmake_version(run_lambda): |
| 171 | + return run_and_parse_first_match(run_lambda, "cmake --version", r"cmake (.*)") |
| 172 | + |
| 173 | + |
| 174 | +def get_nvidia_driver_version(run_lambda): |
| 175 | + if get_platform() == "darwin": |
| 176 | + cmd = "kextstat | grep -i cuda" |
| 177 | + return run_and_parse_first_match( |
| 178 | + run_lambda, cmd, r"com[.]nvidia[.]CUDA [(](.*?)[)]" |
| 179 | + ) |
| 180 | + smi = get_nvidia_smi() |
| 181 | + return run_and_parse_first_match(run_lambda, smi, r"Driver Version: (.*?) ") |
| 182 | + |
| 183 | + |
| 184 | +def get_gpu_info(run_lambda): |
| 185 | + if get_platform() == "darwin" or ( |
| 186 | + TORCH_AVAILABLE |
| 187 | + and hasattr(torch.version, "hip") |
| 188 | + and torch.version.hip is not None |
| 189 | + ): |
| 190 | + if TORCH_AVAILABLE and torch.cuda.is_available(): |
| 191 | + if torch.version.hip is not None: |
| 192 | + prop = torch.cuda.get_device_properties(0) |
| 193 | + if hasattr(prop, "gcnArchName"): |
| 194 | + gcnArch = " ({})".format(prop.gcnArchName) |
| 195 | + else: |
| 196 | + gcnArch = "NoGCNArchNameOnOldPyTorch" |
| 197 | + else: |
| 198 | + gcnArch = "" |
| 199 | + return torch.cuda.get_device_name(None) + gcnArch |
| 200 | + return None |
| 201 | + smi = get_nvidia_smi() |
| 202 | + uuid_regex = re.compile(r" \(UUID: .+?\)") |
| 203 | + rc, out, _ = run_lambda(smi + " -L") |
| 204 | + if rc != 0: |
| 205 | + return None |
| 206 | + # Anonymize GPUs by removing their UUID |
| 207 | + return re.sub(uuid_regex, "", out) |
| 208 | + |
| 209 | + |
| 210 | +def get_running_cuda_version(run_lambda): |
| 211 | + return run_and_parse_first_match(run_lambda, "nvcc --version", r"release .+ V(.*)") |
| 212 | + |
| 213 | + |
| 214 | +def get_cudnn_version(run_lambda): |
| 215 | + """Return a list of libcudnn.so; it's hard to tell which one is being used.""" |
| 216 | + if get_platform() == "win32": |
| 217 | + system_root = os.environ.get("SYSTEMROOT", "C:\\Windows") |
| 218 | + cuda_path = os.environ.get("CUDA_PATH", "%CUDA_PATH%") |
| 219 | + where_cmd = os.path.join(system_root, "System32", "where") |
| 220 | + cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path) |
| 221 | + elif get_platform() == "darwin": |
| 222 | + # CUDA libraries and drivers can be found in /usr/local/cuda/. See |
| 223 | + # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install |
| 224 | + # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac |
| 225 | + # Use CUDNN_LIBRARY when cudnn library is installed elsewhere. |
| 226 | + cudnn_cmd = "ls /usr/local/cuda/lib/libcudnn*" |
| 227 | + else: |
| 228 | + cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev' |
| 229 | + rc, out, _ = run_lambda(cudnn_cmd) |
| 230 | + # find will return 1 if there are permission errors or if not found |
| 231 | + if len(out) == 0 or (rc != 1 and rc != 0): |
| 232 | + cudnn_lib = os.environ.get("CUDNN_LIBRARY") |
| 233 | + if cudnn_lib is not None and os.path.isfile(cudnn_lib): |
| 234 | + return os.path.realpath(cudnn_lib) |
| 235 | + return None |
| 236 | + files_set = set() |
| 237 | + for fn in out.split("\n"): |
| 238 | + fn = os.path.realpath(fn) # eliminate symbolic links |
| 239 | + if os.path.isfile(fn): |
| 240 | + files_set.add(fn) |
| 241 | + if not files_set: |
| 242 | + return None |
| 243 | + # Alphabetize the result because the order is non-deterministic otherwise |
| 244 | + files = sorted(files_set) |
| 245 | + if len(files) == 1: |
| 246 | + return files[0] |
| 247 | + result = "\n".join(files) |
| 248 | + return "Probably one of the following:\n{}".format(result) |
| 249 | + |
| 250 | + |
| 251 | +def get_nvidia_smi(): |
| 252 | + # Note: nvidia-smi is currently available only on Windows and Linux |
| 253 | + smi = "nvidia-smi" |
| 254 | + if get_platform() == "win32": |
| 255 | + system_root = os.environ.get("SYSTEMROOT", "C:\\Windows") |
| 256 | + program_files_root = os.environ.get("PROGRAMFILES", "C:\\Program Files") |
| 257 | + legacy_path = os.path.join( |
| 258 | + program_files_root, "NVIDIA Corporation", "NVSMI", smi |
| 259 | + ) |
| 260 | + new_path = os.path.join(system_root, "System32", smi) |
| 261 | + smis = [new_path, legacy_path] |
| 262 | + for candidate_smi in smis: |
| 263 | + if os.path.exists(candidate_smi): |
| 264 | + smi = '"{}"'.format(candidate_smi) |
| 265 | + break |
| 266 | + return smi |
| 267 | + |
| 268 | + |
| 269 | +# example outputs of CPU infos |
| 270 | +# * linux |
| 271 | +# Architecture: x86_64 |
| 272 | +# CPU op-mode(s): 32-bit, 64-bit |
| 273 | +# Address sizes: 46 bits physical, 48 bits virtual |
| 274 | +# Byte Order: Little Endian |
| 275 | +# CPU(s): 128 |
| 276 | +# On-line CPU(s) list: 0-127 |
| 277 | +# Vendor ID: GenuineIntel |
| 278 | +# Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz |
| 279 | +# CPU family: 6 |
| 280 | +# Model: 106 |
| 281 | +# Thread(s) per core: 2 |
| 282 | +# Core(s) per socket: 32 |
| 283 | +# Socket(s): 2 |
| 284 | +# Stepping: 6 |
| 285 | +# BogoMIPS: 5799.78 |
| 286 | +# Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr |
| 287 | +# sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl |
| 288 | +# xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 |
| 289 | +# pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand |
| 290 | +# hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced |
| 291 | +# fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap |
| 292 | +# avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 |
| 293 | +# xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq |
| 294 | +# avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities |
| 295 | +# Virtualization features: |
| 296 | +# Hypervisor vendor: KVM |
| 297 | +# Virtualization type: full |
| 298 | +# Caches (sum of all): |
| 299 | +# L1d: 3 MiB (64 instances) |
| 300 | +# L1i: 2 MiB (64 instances) |
| 301 | +# L2: 80 MiB (64 instances) |
| 302 | +# L3: 108 MiB (2 instances) |
| 303 | +# NUMA: |
| 304 | +# NUMA node(s): 2 |
| 305 | +# NUMA node0 CPU(s): 0-31,64-95 |
| 306 | +# NUMA node1 CPU(s): 32-63,96-127 |
| 307 | +# Vulnerabilities: |
| 308 | +# Itlb multihit: Not affected |
| 309 | +# L1tf: Not affected |
| 310 | +# Mds: Not affected |
| 311 | +# Meltdown: Not affected |
| 312 | +# Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown |
| 313 | +# Retbleed: Not affected |
| 314 | +# Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp |
| 315 | +# Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization |
| 316 | +# Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence |
| 317 | +# Srbds: Not affected |
| 318 | +# Tsx async abort: Not affected |
| 319 | +# * win32 |
| 320 | +# Architecture=9 |
| 321 | +# CurrentClockSpeed=2900 |
| 322 | +# DeviceID=CPU0 |
| 323 | +# Family=179 |
| 324 | +# L2CacheSize=40960 |
| 325 | +# L2CacheSpeed= |
| 326 | +# Manufacturer=GenuineIntel |
| 327 | +# MaxClockSpeed=2900 |
| 328 | +# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz |
| 329 | +# ProcessorType=3 |
| 330 | +# Revision=27142 |
| 331 | +# |
| 332 | +# Architecture=9 |
| 333 | +# CurrentClockSpeed=2900 |
| 334 | +# DeviceID=CPU1 |
| 335 | +# Family=179 |
| 336 | +# L2CacheSize=40960 |
| 337 | +# L2CacheSpeed= |
| 338 | +# Manufacturer=GenuineIntel |
| 339 | +# MaxClockSpeed=2900 |
| 340 | +# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz |
| 341 | +# ProcessorType=3 |
| 342 | +# Revision=27142 |
| 343 | + |
| 344 | + |
| 345 | +def get_cpu_info(run_lambda): |
| 346 | + rc, out, err = 0, "", "" |
| 347 | + if get_platform() == "linux": |
| 348 | + rc, out, err = run_lambda("lscpu") |
| 349 | + elif get_platform() == "win32": |
| 350 | + rc, out, err = run_lambda( |
| 351 | + 'powershell.exe "gwmi -Class Win32_Processor | Select-Object -Property Name,Manufacturer,Family,\ |
| 352 | + Architecture,ProcessorType,DeviceID,CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision\ |
| 353 | + | ConvertTo-Json"' |
| 354 | + ) |
| 355 | + if rc == 0: |
| 356 | + lst = [] |
| 357 | + try: |
| 358 | + obj = json.loads(out) |
| 359 | + if type(obj) is list: |
| 360 | + for o in obj: |
| 361 | + lst.append("----------------------") |
| 362 | + lst.extend([f"{k}: {v}" for (k, v) in o.items()]) |
| 363 | + else: |
| 364 | + lst.extend([f"{k}: {v}" for (k, v) in obj.items()]) |
| 365 | + except ValueError as e: |
| 366 | + lst.append(out) |
| 367 | + lst.append(str(e)) |
| 368 | + out = "\n".join(lst) |
| 369 | + elif get_platform() == "darwin": |
| 370 | + rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string") |
| 371 | + cpu_info = "None" |
| 372 | + if rc == 0: |
| 373 | + cpu_info = out |
| 374 | + else: |
| 375 | + cpu_info = err |
| 376 | + return cpu_info |
| 377 | + |
| 378 | + |
| 379 | +def get_platform(): |
| 380 | + if sys.platform.startswith("linux"): |
| 381 | + return "linux" |
| 382 | + elif sys.platform.startswith("win32"): |
| 383 | + return "win32" |
| 384 | + elif sys.platform.startswith("cygwin"): |
| 385 | + return "cygwin" |
| 386 | + elif sys.platform.startswith("darwin"): |
| 387 | + return "darwin" |
| 388 | + else: |
| 389 | + return sys.platform |
| 390 | + |
| 391 | + |
| 392 | +def get_mac_version(run_lambda): |
| 393 | + return run_and_parse_first_match(run_lambda, "sw_vers -productVersion", r"(.*)") |
| 394 | + |
| 395 | + |
| 396 | +def get_windows_version(run_lambda): |
| 397 | + ret = run_and_read_all( |
| 398 | + run_lambda, |
| 399 | + 'powershell.exe "gwmi -Class Win32_OperatingSystem | Select-Object -Property Caption,\ |
| 400 | + OSArchitecture,Version | ConvertTo-Json"', |
| 401 | + ) |
| 402 | + try: |
| 403 | + obj = json.loads(ret) |
| 404 | + ret = f'{obj["Caption"]} ({obj["Version"]} {obj["OSArchitecture"]})' |
| 405 | + except ValueError as e: |
| 406 | + ret += f"\n{str(e)}" |
| 407 | + return ret |
| 408 | + |
| 409 | + |
| 410 | +def get_lsb_version(run_lambda): |
| 411 | + return run_and_parse_first_match( |
| 412 | + run_lambda, "lsb_release -a", r"Description:\t(.*)" |
| 413 | + ) |
| 414 | + |
| 415 | + |
| 416 | +def check_release_file(run_lambda): |
| 417 | + return run_and_parse_first_match( |
| 418 | + run_lambda, "cat /etc/*-release", r'PRETTY_NAME="(.*)"' |
| 419 | + ) |
| 420 | + |
| 421 | + |
| 422 | +def get_os(run_lambda): |
| 423 | + from platform import machine |
| 424 | + |
| 425 | + platform = get_platform() |
| 426 | + |
| 427 | + if platform == "win32" or platform == "cygwin": |
| 428 | + return get_windows_version(run_lambda) |
| 429 | + |
| 430 | + if platform == "darwin": |
| 431 | + version = get_mac_version(run_lambda) |
| 432 | + if version is None: |
| 433 | + return None |
| 434 | + return "macOS {} ({})".format(version, machine()) |
| 435 | + |
| 436 | + if platform == "linux": |
| 437 | + # Ubuntu/Debian based |
| 438 | + desc = get_lsb_version(run_lambda) |
| 439 | + if desc is not None: |
| 440 | + return "{} ({})".format(desc, machine()) |
| 441 | + |
| 442 | + # Try reading /etc/*-release |
| 443 | + desc = check_release_file(run_lambda) |
| 444 | + if desc is not None: |
| 445 | + return "{} ({})".format(desc, machine()) |
| 446 | + |
| 447 | + return "{} ({})".format(platform, machine()) |
| 448 | + |
| 449 | + # Unknown platform |
| 450 | + return platform |
| 451 | + |
| 452 | + |
| 453 | +def get_python_platform(): |
| 454 | + import platform |
| 455 | + |
| 456 | + return platform.platform() |
| 457 | + |
| 458 | + |
| 459 | +def get_libc_version(): |
| 460 | + import platform |
| 461 | + |
| 462 | + if get_platform() != "linux": |
| 463 | + return "N/A" |
| 464 | + return "-".join(platform.libc_ver()) |
| 465 | + |
| 466 | + |
| 467 | +def get_pip_packages(run_lambda, patterns=None): |
| 468 | + """Return `pip list` output. Note: will also find conda-installed pytorch and numpy packages.""" |
| 469 | + if patterns is None: |
| 470 | + patterns = PIP_PATTERNS + COMMON_PATTERNS + NVIDIA_PATTERNS |
| 471 | + |
| 472 | + pip_version = "pip3" if sys.version[0] == "3" else "pip" |
| 473 | + |
| 474 | + os.environ["PIP_DISABLE_PIP_VERSION_CHECK"] = "1" |
| 475 | + # People generally have pip as `pip` or `pip3` |
| 476 | + # But here it is invoked as `python -mpip` |
| 477 | + out = run_and_read_all( |
| 478 | + run_lambda, [sys.executable, "-mpip", "list", "--format=freeze"] |
| 479 | + ) |
| 480 | + filtered_out = "\n".join( |
| 481 | + line for line in out.splitlines() if any(name in line for name in patterns) |
| 482 | + ) |
| 483 | + |
| 484 | + return pip_version, filtered_out |
| 485 | + |
| 486 | + |
| 487 | +def get_cachingallocator_config(): |
| 488 | + ca_config = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "") |
| 489 | + return ca_config |
| 490 | + |
| 491 | + |
| 492 | +def get_cuda_module_loading_config(): |
| 493 | + if TORCH_AVAILABLE and torch.cuda.is_available(): |
| 494 | + torch.cuda.init() |
| 495 | + config = os.environ.get("CUDA_MODULE_LOADING", "") |
| 496 | + return config |
| 497 | + else: |
| 498 | + return "N/A" |
| 499 | + |
| 500 | + |
| 501 | +def is_xnnpack_available(): |
| 502 | + if TORCH_AVAILABLE: |
| 503 | + import torch.backends.xnnpack |
| 504 | + |
| 505 | + return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined] |
| 506 | + else: |
| 507 | + return "N/A" |
| 508 | + |
| 509 | + |
| 510 | +def get_env_info(): |
| 511 | + """ |
| 512 | + Collects environment information to aid in debugging. |
| 513 | +
|
| 514 | + The returned environment information contains details on torch version, is debug build |
| 515 | + or not, cuda compiled version, gcc version, clang version, cmake version, operating |
| 516 | + system, libc version, python version, python platform, CUDA availability, CUDA |
| 517 | + runtime version, CUDA module loading config, GPU model and configuration, Nvidia |
| 518 | + driver version, cuDNN version, pip version and versions of relevant pip and |
| 519 | + conda packages, HIP runtime version, MIOpen runtime version, |
| 520 | + Caching allocator config, XNNPACK availability and CPU information. |
| 521 | +
|
| 522 | + Returns: |
| 523 | + SystemEnv (namedtuple): A tuple containining various environment details |
| 524 | + and system information. |
| 525 | + """ |
| 526 | + run_lambda = run |
| 527 | + pip_version, pip_list_output = get_pip_packages(run_lambda) |
| 528 | + |
| 529 | + if TORCH_AVAILABLE: |
| 530 | + version_str = torch.__version__ |
| 531 | + debug_mode_str = str(torch.version.debug) |
| 532 | + cuda_available_str = str(torch.cuda.is_available()) |
| 533 | + cuda_version_str = torch.version.cuda |
| 534 | + if ( |
| 535 | + not hasattr(torch.version, "hip") or torch.version.hip is None |
| 536 | + ): # cuda version |
| 537 | + hip_compiled_version = hip_runtime_version = miopen_runtime_version = "N/A" |
| 538 | + else: # HIP version |
| 539 | + |
| 540 | + def get_version_or_na(cfg, prefix): |
| 541 | + _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s] |
| 542 | + return _lst[0] if _lst else "N/A" |
| 543 | + |
| 544 | + cfg = torch._C._show_config().split("\n") |
| 545 | + hip_runtime_version = get_version_or_na(cfg, "HIP Runtime") |
| 546 | + miopen_runtime_version = get_version_or_na(cfg, "MIOpen") |
| 547 | + cuda_version_str = "N/A" |
| 548 | + hip_compiled_version = torch.version.hip |
| 549 | + else: |
| 550 | + version_str = debug_mode_str = cuda_available_str = cuda_version_str = "N/A" |
| 551 | + hip_compiled_version = hip_runtime_version = miopen_runtime_version = "N/A" |
| 552 | + |
| 553 | + sys_version = sys.version.replace("\n", " ") |
| 554 | + |
| 555 | + conda_packages = get_conda_packages(run_lambda) |
| 556 | + |
| 557 | + return SystemEnv( |
| 558 | + torch_version=version_str, |
| 559 | + is_debug_build=debug_mode_str, |
| 560 | + python_version="{} ({}-bit runtime)".format( |
| 561 | + sys_version, sys.maxsize.bit_length() + 1 |
| 562 | + ), |
| 563 | + python_platform=get_python_platform(), |
| 564 | + is_cuda_available=cuda_available_str, |
| 565 | + cuda_compiled_version=cuda_version_str, |
| 566 | + cuda_runtime_version=get_running_cuda_version(run_lambda), |
| 567 | + cuda_module_loading=get_cuda_module_loading_config(), |
| 568 | + nvidia_gpu_models=get_gpu_info(run_lambda), |
| 569 | + nvidia_driver_version=get_nvidia_driver_version(run_lambda), |
| 570 | + cudnn_version=get_cudnn_version(run_lambda), |
| 571 | + hip_compiled_version=hip_compiled_version, |
| 572 | + hip_runtime_version=hip_runtime_version, |
| 573 | + miopen_runtime_version=miopen_runtime_version, |
| 574 | + pip_version=pip_version, |
| 575 | + pip_packages=pip_list_output, |
| 576 | + conda_packages=conda_packages, |
| 577 | + os=get_os(run_lambda), |
| 578 | + libc_version=get_libc_version(), |
| 579 | + gcc_version=get_gcc_version(run_lambda), |
| 580 | + clang_version=get_clang_version(run_lambda), |
| 581 | + cmake_version=get_cmake_version(run_lambda), |
| 582 | + caching_allocator_config=get_cachingallocator_config(), |
| 583 | + is_xnnpack_available=is_xnnpack_available(), |
| 584 | + cpu_info=get_cpu_info(run_lambda), |
| 585 | + ) |
| 586 | + |
| 587 | + |
| 588 | +env_info_fmt = """ |
| 589 | +PyTorch version: {torch_version} |
| 590 | +Is debug build: {is_debug_build} |
| 591 | +CUDA used to build PyTorch: {cuda_compiled_version} |
| 592 | +ROCM used to build PyTorch: {hip_compiled_version} |
| 593 | +
|
| 594 | +OS: {os} |
| 595 | +GCC version: {gcc_version} |
| 596 | +Clang version: {clang_version} |
| 597 | +CMake version: {cmake_version} |
| 598 | +Libc version: {libc_version} |
| 599 | +
|
| 600 | +Python version: {python_version} |
| 601 | +Python platform: {python_platform} |
| 602 | +Is CUDA available: {is_cuda_available} |
| 603 | +CUDA runtime version: {cuda_runtime_version} |
| 604 | +CUDA_MODULE_LOADING set to: {cuda_module_loading} |
| 605 | +GPU models and configuration: {nvidia_gpu_models} |
| 606 | +Nvidia driver version: {nvidia_driver_version} |
| 607 | +cuDNN version: {cudnn_version} |
| 608 | +HIP runtime version: {hip_runtime_version} |
| 609 | +MIOpen runtime version: {miopen_runtime_version} |
| 610 | +Is XNNPACK available: {is_xnnpack_available} |
| 611 | +
|
| 612 | +CPU: |
| 613 | +{cpu_info} |
| 614 | +
|
| 615 | +Versions of relevant libraries: |
| 616 | +{pip_packages} |
| 617 | +{conda_packages} |
| 618 | +""".strip() |
| 619 | + |
| 620 | + |
| 621 | +def pretty_str(envinfo): # noqa: C901 |
| 622 | + def replace_nones(dct, replacement="Could not collect"): |
| 623 | + for key in dct.keys(): |
| 624 | + if dct[key] is not None: |
| 625 | + continue |
| 626 | + dct[key] = replacement |
| 627 | + return dct |
| 628 | + |
| 629 | + def replace_bools(dct, true="Yes", false="No"): |
| 630 | + for key in dct.keys(): |
| 631 | + if dct[key] is True: |
| 632 | + dct[key] = true |
| 633 | + elif dct[key] is False: |
| 634 | + dct[key] = false |
| 635 | + return dct |
| 636 | + |
| 637 | + def prepend(text, tag="[prepend]"): |
| 638 | + lines = text.split("\n") |
| 639 | + updated_lines = [tag + line for line in lines] |
| 640 | + return "\n".join(updated_lines) |
| 641 | + |
| 642 | + def replace_if_empty(text, replacement="No relevant packages"): |
| 643 | + if text is not None and len(text) == 0: |
| 644 | + return replacement |
| 645 | + return text |
| 646 | + |
| 647 | + def maybe_start_on_next_line(string): |
| 648 | + # If `string` is multiline, prepend a \n to it. |
| 649 | + if string is not None and len(string.split("\n")) > 1: |
| 650 | + return "\n{}\n".format(string) |
| 651 | + return string |
| 652 | + |
| 653 | + mutable_dict = envinfo._asdict() |
| 654 | + |
| 655 | + # If nvidia_gpu_models is multiline, start on the next line |
| 656 | + mutable_dict["nvidia_gpu_models"] = maybe_start_on_next_line( |
| 657 | + envinfo.nvidia_gpu_models |
| 658 | + ) |
| 659 | + |
| 660 | + # If the machine doesn't have CUDA, report some fields as 'No CUDA' |
| 661 | + dynamic_cuda_fields = [ |
| 662 | + "cuda_runtime_version", |
| 663 | + "nvidia_gpu_models", |
| 664 | + "nvidia_driver_version", |
| 665 | + ] |
| 666 | + all_cuda_fields = dynamic_cuda_fields + ["cudnn_version"] |
| 667 | + all_dynamic_cuda_fields_missing = all( |
| 668 | + mutable_dict[field] is None for field in dynamic_cuda_fields |
| 669 | + ) |
| 670 | + if ( |
| 671 | + TORCH_AVAILABLE |
| 672 | + and not torch.cuda.is_available() |
| 673 | + and all_dynamic_cuda_fields_missing |
| 674 | + ): |
| 675 | + for field in all_cuda_fields: |
| 676 | + mutable_dict[field] = "No CUDA" |
| 677 | + if envinfo.cuda_compiled_version is None: |
| 678 | + mutable_dict["cuda_compiled_version"] = "None" |
| 679 | + |
| 680 | + # Replace True with Yes, False with No |
| 681 | + mutable_dict = replace_bools(mutable_dict) |
| 682 | + |
| 683 | + # Replace all None objects with 'Could not collect' |
| 684 | + mutable_dict = replace_nones(mutable_dict) |
| 685 | + |
| 686 | + # If either of these are '', replace with 'No relevant packages' |
| 687 | + mutable_dict["pip_packages"] = replace_if_empty(mutable_dict["pip_packages"]) |
| 688 | + mutable_dict["conda_packages"] = replace_if_empty(mutable_dict["conda_packages"]) |
| 689 | + |
| 690 | + # Tag conda and pip packages with a prefix |
| 691 | + # If they were previously None, they'll show up as ie '[conda] Could not collect' |
| 692 | + if mutable_dict["pip_packages"]: |
| 693 | + mutable_dict["pip_packages"] = prepend( |
| 694 | + mutable_dict["pip_packages"], "[{}] ".format(envinfo.pip_version) |
| 695 | + ) |
| 696 | + if mutable_dict["conda_packages"]: |
| 697 | + mutable_dict["conda_packages"] = prepend( |
| 698 | + mutable_dict["conda_packages"], "[conda] " |
| 699 | + ) |
| 700 | + mutable_dict["cpu_info"] = envinfo.cpu_info |
| 701 | + return env_info_fmt.format(**mutable_dict) |
| 702 | + |
| 703 | + |
| 704 | +def get_pretty_env_info(): |
| 705 | + """ |
| 706 | + Returns a pretty string of environment information. |
| 707 | +
|
| 708 | + This function retrieves environment information by calling the `get_env_info` function |
| 709 | + and then formats the information into a human-readable string. The retrieved environment |
| 710 | + information is listed in the document of `get_env_info`. |
| 711 | + This function is used in `python collect_env.py` that should be executed when reporting a bug. |
| 712 | +
|
| 713 | + Returns: |
| 714 | + str: A pretty string of the environment information. |
| 715 | + """ |
| 716 | + return pretty_str(get_env_info()) |
| 717 | + |
| 718 | + |
| 719 | +def main(): |
| 720 | + print("Collecting environment information...") |
| 721 | + output = get_pretty_env_info() |
| 722 | + print(output) |
| 723 | + |
| 724 | + if ( |
| 725 | + TORCH_AVAILABLE |
| 726 | + and hasattr(torch, "utils") |
| 727 | + and hasattr(torch.utils, "_crash_handler") |
| 728 | + ): |
| 729 | + minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR |
| 730 | + if sys.platform == "linux" and os.path.exists(minidump_dir): |
| 731 | + dumps = [ |
| 732 | + os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir) |
| 733 | + ] |
| 734 | + latest = max(dumps, key=os.path.getctime) |
| 735 | + ctime = os.path.getctime(latest) |
| 736 | + creation_time = datetime.datetime.fromtimestamp(ctime).strftime( |
| 737 | + "%Y-%m-%d %H:%M:%S" |
| 738 | + ) |
| 739 | + msg = ( |
| 740 | + "\n*** Detected a minidump at {} created on {}, ".format( |
| 741 | + latest, creation_time |
| 742 | + ) |
| 743 | + + "if this is related to your bug please include it when you file a report ***" |
| 744 | + ) |
| 745 | + print(msg, file=sys.stderr) |
| 746 | + |
| 747 | + |
| 748 | +if __name__ == "__main__": |
| 749 | + main() |
0 commit comments