Skip to content

Commit

Permalink
- Updated transformer_engine requirments
Browse files Browse the repository at this point in the history
- Import megablocks only if num_experts > 1
  • Loading branch information
aurelion-source committed Feb 1, 2025
1 parent aa7c4bc commit 2118a14
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
5 changes: 3 additions & 2 deletions megatron/model/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
from pkg_resources import packaging
from importlib.metadata import version

from megatron.model.moe import ParallelDroplessMoE

from .norms import get_norm
from megatron import mpu
from megatron.model.fused_softmax import FusedScaleMaskSoftmax
Expand Down Expand Up @@ -947,6 +945,9 @@ def __init__(
else 1
)

if self.num_experts > 1:
from megatron.model.moe import ParallelDroplessMoE

if self.gpt_j_residual:
# GPT-J style layers allow us to defer the reduction of results across TP ranks until the end of the two sublayers.
# the reduction we use is a simple allreduce for pure Tensor Parallel,
Expand Down
3 changes: 2 additions & 1 deletion requirements/requirements-transformerengine.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
transformer-engine[pytorch]
transformer-engine[pytorch]==1.12
flash-attn==2.6.3

0 comments on commit 2118a14

Please sign in to comment.