Skip to content

Commit

Permalink
Merge branch 'cherry-pick-df324ee9' into 'core_r0.7.0'
Browse files Browse the repository at this point in the history
Merge branch 'gaod/moe/add_geglu_to_expertlayer' into 'main'

See merge request ADLR/megatron-lm!1462
  • Loading branch information
jaredcasper committed May 30, 2024
2 parents d5d2a44 + e2fbaf7 commit e2627c7
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions megatron/core/transformer/moe/experts.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ def __init__(self, num_local_experts: int, config: TransformerConfig):

self.expert_parallel = config.expert_model_parallel_size > 1
if self.config.gated_linear_unit:
if self.config.activation_func != F.silu:
raise ValueError("Activation function must be silu when using GroupedMLP.")
if self.config.activation_func not in (F.silu, F.gelu):
raise ValueError("Activation function must be silu or gelu when using GroupedMLP.")

@jit_fuser
def glu(x):
x = torch.chunk(x, 2, dim=-1)
return F.silu(x[0]) * x[1]
return self.config.activation_func(x[0]) * x[1]

self.activation_func = glu
else:
Expand Down

0 comments on commit e2627c7

Please sign in to comment.