Skip to content

Commit 5b00a32

Browse files
jerome-habanapre-commit-ci[bot]Borda
authored
Upgrade to Synapse AI Release 1.12.1 (#106)
* Upgrade to Synapse AI Release 1.12.1 Update images and modify hooks to suit lightning 2.1 Signed-off-by: Jerome <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <[email protected]>
1 parent 73149db commit 5b00a32

File tree

8 files changed

+54
-22
lines changed

8 files changed

+54
-22
lines changed

.azure/hpu-tests.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,15 @@ jobs:
3030
strategy:
3131
matrix:
3232
'w. pytorch-lightning | pypi':
33-
image: "1.12.0/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest"
33+
image: "1.12.1/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest"
3434
dependency: "pytorch-lightning"
3535
pkg_source: "pypi"
3636
'w. lightning | pypi':
37-
image: "1.12.0/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest"
37+
image: "1.12.1/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest"
3838
dependency: "lightning"
3939
pkg_source: "pypi"
4040
'w. lightning | source':
41-
image: "1.12.0/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest"
41+
image: "1.12.1/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest"
4242
dependency: "lightning"
4343
pkg_source: "source"
4444
pool: "intel-hpus"
@@ -52,7 +52,7 @@ jobs:
5252
--shm-size=4g \
5353
-v /usr/bin/docker:/tmp/docker:ro"
5454
variables:
55-
DEEPSPEED_VERSION: "1.12.0"
55+
DEEPSPEED_VERSION: "1.12.1"
5656
workspace:
5757
clean: all
5858

CHANGELOG.md

+24
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12+
-
13+
14+
### Changed
15+
16+
-
17+
18+
### Fixed
19+
20+
-
21+
22+
### Removed
23+
24+
-
25+
26+
### Deprecated
27+
28+
-
29+
30+
31+
## [1.2.0] - 2023-10-26
32+
33+
### Added
34+
1235
- Added tests, examples and documentation for HPUPrecisionPlugin with autocast ([#94](https://github.com/Lightning-AI/lightning-Habana/pull/94))
1336
- Added test to validate checkpoint resuming with HPUDeepSpeedStrategy ([#95](https://github.com/Lightning-AI/lightning-Habana/pull/95))
1437
- Added support for lightning 2.1 ([#100](https://github.com/Lightning-AI/lightning-Habana/pull/100), [#105](https://github.com/Lightning-AI/lightning-Habana/pull/105))
@@ -17,6 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1740

1841
- Changed HPU docker image based on synapse AI release 1.12.0 ([#90](https://github.com/Lightning-AI/lightning-Habana/pull/90))
1942
- Use standard API's and Remove env variable to get HPU distributed backend ([#91](https://github.com/Lightning-AI/lightning-Habana/pull/91))
43+
- Changed HPU docker image based on synapse AI release 1.12.1, updated hooks ([#106](https://github.com/Lightning-AI/lightning-Habana/pull/106))
2044

2145

2246
### Fixed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,11 @@ The `devices>1` parameter with HPUs enables the Habana accelerator for distribut
6363

6464
# Support Matrix
6565

66-
| **SynapseAI** | **1.12.0** |
66+
| **SynapseAI** | **1.12.1** |
6767
| --------------------- | -------------------------------------------------- |
6868
| PyTorch | 2.0.1 |
6969
| (PyTorch) Lightning\* | 2.1.x |
70-
| **Lightning Habana** | **1.1.0** |
70+
| **Lightning Habana** | **1.2.0** |
7171
| DeepSpeed\*\* | Forked from v0.9.4 of the official DeepSpeed repo. |
7272

7373
\* covers both packages [`lightning`](https://pypi.org/project/lightning/) and [`pytorch-lightning`](https://pypi.org/project/pytorch-lightning/)

src/lightning_habana/__about__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "1.1.1.dev"
1+
__version__ = "1.2.0"
22
__author__ = "Lightning-AI et al."
33
__author_email__ = "[email protected]"
44
__license__ = "Apache-2.0"

src/lightning_habana/pytorch/accelerator.py

+4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import os
1516
from typing import Any, Dict, List, Optional, Union
1617

1718
import torch
@@ -54,6 +55,9 @@ def get_device_stats(self, device: _DEVICE) -> Dict[str, Any]:
5455
return get_device_stats(device)
5556

5657
def teardown(self) -> None:
58+
os.environ.pop("HABANA_PROFILE", None)
59+
os.environ.pop("HLS_MODULE_ID", None)
60+
os.environ.pop("ID", None)
5761
pass
5862

5963
@staticmethod

src/lightning_habana/pytorch/strategies/deepspeed.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@
8282
warning_cache = WarningCache()
8383

8484
_HPU_DEEPSPEED_AVAILABLE = (
85-
# HPU deep speed is supported only through this pip install git+https://github.com/HabanaAI/[email protected].0
86-
RequirementCache("deepspeed==0.9.4+hpu.synapse.v1.12.0")
85+
# HPU deep speed is supported only through this pip install git+https://github.com/HabanaAI/[email protected].1
86+
RequirementCache("deepspeed==0.9.4+hpu.synapse.v1.12.1")
8787
)
8888
if TYPE_CHECKING and _HPU_DEEPSPEED_AVAILABLE:
8989
import deepspeed
@@ -295,7 +295,7 @@ def __init__(
295295
if not _HPU_DEEPSPEED_AVAILABLE:
296296
raise MisconfigurationException(
297297
"To use the `HPUDeepSpeedStrategy`, you must have hpu DeepSpeed installed."
298-
" Install it by running `pip install git+https://github.com/HabanaAI/[email protected].0`."
298+
" Install it by running `pip install git+https://github.com/HabanaAI/[email protected].1`."
299299
)
300300

301301
super().__init__(

src/lightning_habana/pytorch/strategies/parallel.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from lightning.pytorch.plugins.io.wrapper import _WrappingCheckpointIO
2828
from lightning.pytorch.plugins.precision import PrecisionPlugin
2929
from lightning.pytorch.strategies.ddp import DDPStrategy
30+
from lightning.pytorch.utilities.types import STEP_OUTPUT
3031
elif module_available("pytorch_lightning"):
3132
from lightning_fabric.plugins import CheckpointIO, ClusterEnvironment
3233
from lightning_fabric.utilities.distributed import group as _group
@@ -36,6 +37,7 @@
3637
from pytorch_lightning.plugins.io.wrapper import _WrappingCheckpointIO
3738
from pytorch_lightning.plugins.precision import PrecisionPlugin
3839
from pytorch_lightning.strategies.ddp import DDPStrategy
40+
from pytorch_lightning.utilities.types import STEP_OUTPUT
3941
else:
4042
raise ModuleNotFoundError("You are missing `lightning` or `pytorch-lightning` package, please install it.")
4143
from torch import Tensor
@@ -138,20 +140,20 @@ def optimizer_step(
138140
htcore.mark_step()
139141
return optimizer_output
140142

141-
def validation_step(self, batch: Any, batch_idx: int) -> Any:
143+
def validation_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT:
142144
# Break lazy accumulation of graph after every step
143145
htcore.mark_step()
144-
return super().validation_step(batch, batch_idx)
146+
return super().validation_step(*args, **kwargs)
145147

146-
def test_step(self, batch: Any, batch_idx: int) -> Any:
148+
def test_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT:
147149
# Break lazy accumulation of graph after every step
148150
htcore.mark_step()
149-
return super().test_step(batch, batch_idx)
151+
return super().test_step(*args, **kwargs)
150152

151-
def predict_step(self, batch: Any, batch_idx: int) -> Any:
153+
def predict_step(self, *args: Any, **kwargs: Any) -> Any:
152154
# Break lazy accumulation of graph after every step
153155
htcore.mark_step()
154-
return super().predict_step(batch, batch_idx)
156+
return super().predict_step(*args, **kwargs)
155157

156158
def reduce(
157159
self, tensor: Tensor, group: Optional[Any] = None, reduce_op: Optional[Union[ReduceOp, str]] = "mean"

src/lightning_habana/pytorch/strategies/single.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from lightning.pytorch.plugins.io.wrapper import _WrappingCheckpointIO
2525
from lightning.pytorch.plugins.precision import PrecisionPlugin
2626
from lightning.pytorch.strategies.single_device import SingleDeviceStrategy
27+
from lightning.pytorch.utilities.types import STEP_OUTPUT
2728
elif module_available("pytorch_lightning"):
2829
from lightning_fabric.plugins import CheckpointIO
2930
from lightning_fabric.utilities.types import _DEVICE
@@ -32,6 +33,7 @@
3233
from pytorch_lightning.plugins.io.wrapper import _WrappingCheckpointIO
3334
from pytorch_lightning.plugins.precision import PrecisionPlugin
3435
from pytorch_lightning.strategies.single_device import SingleDeviceStrategy
36+
from pytorch_lightning.utilities.types import STEP_OUTPUT
3537
else:
3638
raise ModuleNotFoundError("You are missing `lightning` or `pytorch-lightning` package, please install it.")
3739

@@ -107,20 +109,20 @@ def optimizer_step(
107109
htcore.mark_step()
108110
return optimizer_output
109111

110-
def validation_step(self, batch: Any, batch_idx: int) -> Any:
112+
def validation_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT:
111113
# Break lazy accumulation of graph after every step
112114
htcore.mark_step()
113-
return super().validation_step(batch, batch_idx)
115+
return super().validation_step(*args, **kwargs)
114116

115-
def test_step(self, batch: Any, batch_idx: int) -> Any:
117+
def test_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT:
116118
# Break lazy accumulation of graph after every step
117119
htcore.mark_step()
118-
return super().test_step(batch, batch_idx)
120+
return super().test_step(*args, **kwargs)
119121

120-
def predict_step(self, batch: Any, batch_idx: int) -> Any:
122+
def predict_step(self, *args: Any, **kwargs: Any) -> Any:
121123
# Break lazy accumulation of graph after every step
122124
htcore.mark_step()
123-
return super().predict_step(batch, batch_idx)
125+
return super().predict_step(*args, **kwargs)
124126

125127
@classmethod
126128
def register_strategies(cls, strategy_registry: Dict) -> None:

0 commit comments

Comments
 (0)