Skip to content

Commit e7eb16e

Browse files
authored
[r3.0] release v3.0.3 (#4759)
<!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Added a new sigmoid activation function compatible with both JAX and other array types. - Introduced a JAX configuration to set default matrix multiplication precision for improved performance. - **Bug Fixes** - Enhanced detection of CUDA out-of-memory errors for more robust error handling. - Improved validation and error messages for mismatched or incomplete type maps in data handling. - Updated logic for stress calculation to better handle cases where cell information is missing. - Improved handling of multi-dimensional atom type arrays and model type checks in TensorFlow evaluation. - **Chores** - Upgraded PyTorch and LibTorch versions to 2.7.0 and updated related download and installation scripts. - Updated ROCm repository version and CMake minimum version requirements. - Improved device placement for tensors in PyTorch to ensure compatibility. - Added or updated teardown methods in multiple test classes for better resource management and cleanup. - Expanded and refined test coverage for data type mapping and error scenarios. - Updated expected values and test logic for TensorFlow-related tests. - Updated macOS deployment target to version 11.0 for build environment. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
2 parents 70bc6d8 + 8e9484a commit e7eb16e

File tree

28 files changed

+52753
-49
lines changed

28 files changed

+52753
-49
lines changed

.devcontainer/download_libtorch.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@ set -ev
44
SCRIPT_PATH=$(dirname $(realpath -s $0))
55
cd ${SCRIPT_PATH}/..
66

7-
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcpu.zip -O ~/libtorch.zip
7+
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcpu.zip -O ~/libtorch.zip
88
unzip ~/libtorch.zip

.github/workflows/build_cc.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
env:
5353
DEBIAN_FRONTEND: noninteractive
5454
- run: |
55-
echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ jammy main' | sudo tee /etc/apt/sources.list.d/rocm.list \
55+
echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/6.3/ jammy main' | sudo tee /etc/apt/sources.list.d/rocm.list \
5656
&& printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 \
5757
&& curl -s https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - \
5858
&& sudo apt-get update \

.github/workflows/test_cc.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
run: source/tests/infer/convert-models.sh
3737
- name: Download libtorch
3838
run: |
39-
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip
39+
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcpu.zip -O libtorch.zip
4040
unzip libtorch.zip
4141
# https://github.com/actions/runner-images/issues/9491
4242
- name: Fix kernel mmap rnd bits

.github/workflows/test_cuda.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
&& sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3
4848
if: false # skip as we use nvidia image
4949
- run: python -m pip install -U uv
50-
- run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.6.0" "jax[cuda12]==0.5.0"
50+
- run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.7.0" "jax[cuda12]==0.5.0"
5151
- run: |
5252
export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
5353
export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
@@ -67,7 +67,7 @@ jobs:
6767
run: source/tests/infer/convert-models.sh
6868
- name: Download libtorch
6969
run: |
70-
wget https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu124.zip -O libtorch.zip
70+
wget https://download.pytorch.org/libtorch/cu126/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcu126.zip -O libtorch.zip
7171
unzip libtorch.zip
7272
- run: |
7373
export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch

.github/workflows/test_python.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ jobs:
3030
export TENSORFLOW_ROOT=$(python -c 'import tensorflow;print(tensorflow.__path__[0])')
3131
export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
3232
source/install/uv_with_retry.sh pip install --system -e .[test,jax] mpi4py "jax==0.5.0;python_version>='3.10'"
33+
source/install/uv_with_retry.sh pip install --system -U setuptools
3334
source/install/uv_with_retry.sh pip install --system horovod --no-build-isolation
3435
env:
3536
# Please note that uv has some issues with finding
@@ -42,6 +43,8 @@ jobs:
4243
HOROVOD_WITH_TENSORFLOW: 1
4344
HOROVOD_WITHOUT_PYTORCH: 1
4445
HOROVOD_WITH_MPI: 1
46+
# https://cmake.org/cmake/help/latest/variable/CMAKE_POLICY_VERSION_MINIMUM.html
47+
CMAKE_POLICY_VERSION_MINIMUM: 3.5
4548
- run: dp --version
4649
- name: Get durations from cache
4750
uses: actions/cache@v4

backend/find_pytorch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def get_pt_requirement(pt_version: str = "") -> dict:
116116
cuda_version = os.environ.get("CUDA_VERSION", "12.2")
117117
if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"):
118118
# CUDA 12.2, cudnn 9
119-
pt_version = "2.6.0"
119+
pt_version = "2.7.0"
120120
elif cuda_version in SpecifierSet(">=11,<12"):
121121
# CUDA 11.8, cudnn 8
122122
pt_version = "2.3.1"

deepmd/calculator.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,13 @@ def calculate(
138138
self.results["virial"] = v[0].reshape(3, 3)
139139

140140
# convert virial into stress for lattice relaxation
141-
if "stress" in properties:
142-
if sum(atoms.get_pbc()) > 0:
143-
# the usual convention (tensile stress is positive)
144-
# stress = -virial / volume
145-
stress = -0.5 * (v[0].copy() + v[0].copy().T) / atoms.get_volume()
146-
# Voigt notation
147-
self.results["stress"] = stress.flat[[0, 4, 8, 5, 2, 1]]
148-
else:
149-
raise PropertyNotImplementedError
141+
if cell is not None:
142+
# the usual convention (tensile stress is positive)
143+
# stress = -virial / volume
144+
stress = -0.5 * (v[0].copy() + v[0].copy().T) / atoms.get_volume()
145+
# Voigt notation
146+
self.results["stress"] = stress.flat[[0, 4, 8, 5, 2, 1]]
147+
elif "stress" in properties:
148+
raise PropertyNotImplementedError
149+
else:
150+
pass

deepmd/dpmodel/utils/network.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,19 @@
3434
)
3535

3636

37+
def sigmoid_t(x: np.ndarray) -> np.ndarray:
38+
"""Sigmoid."""
39+
if array_api_compat.is_jax_array(x):
40+
from deepmd.jax.env import (
41+
jax,
42+
)
43+
44+
# see https://github.com/jax-ml/jax/discussions/15617
45+
return jax.nn.sigmoid(x)
46+
xp = array_api_compat.array_namespace(x)
47+
return 1 / (1 + xp.exp(-x))
48+
49+
3750
class Identity(NativeOP):
3851
def __init__(self) -> None:
3952
super().__init__()
@@ -313,9 +326,8 @@ def fn(x):
313326
elif activation_function == "sigmoid":
314327

315328
def fn(x):
316-
xp = array_api_compat.array_namespace(x)
317329
# generated by GitHub Copilot
318-
return 1 / (1 + xp.exp(-x))
330+
return sigmoid_t(x)
319331

320332
return fn
321333
elif activation_function.lower() in ("none", "linear"):

deepmd/jax/env.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
jax.config.update("jax_enable_x64", True)
1414
# jax.config.update("jax_debug_nans", True)
15+
# https://github.com/jax-ml/jax/issues/24909
16+
jax.config.update("jax_default_matmul_precision", "tensorfloat32")
1517

1618
if os.environ.get("DP_DTYPE_PROMOTION_STRICT") == "1":
1719
jax.config.update("jax_numpy_dtype_promotion", "strict")

deepmd/jax/model/base_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def forward_common_atomic(
4747
kk_redu = get_reduce_name(kk)
4848
model_predict[kk_redu] = jnp.sum(vv, axis=atom_axis)
4949
kk_derv_r, kk_derv_c = get_deriv_name(kk)
50-
if vdef.c_differentiable:
50+
if vdef.r_differentiable:
5151

5252
def eval_output(
5353
cc_ext,

0 commit comments

Comments
 (0)