Skip to content

Commit

Permalink
fix(pp): fix pp get tensor shape err and layernorm input dtype err (#378
Browse files Browse the repository at this point in the history
)
  • Loading branch information
huangting4201 authored Dec 10, 2024
1 parent ae2243c commit 5ad2eb0
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 9 deletions.
6 changes: 5 additions & 1 deletion internlm/core/scheduler/pipeline_scheduler_1f1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ def get_tensor_shape():
if not gpc.is_initialized(ParallelMode.PIPELINE):
return None

if hasattr(gpc.config, "SEQ_LEN") and hasattr(gpc.config.data, "micro_bsz") and hasattr(gpc.config, "HIDDEN_SIZE"):
if (
hasattr(gpc.config.data, "seq_len")
and hasattr(gpc.config.data, "micro_bsz")
and hasattr(gpc.config.model, "hidden_size")
):
if gpc.config.data.use_packed_dataset and gpc.is_evaluating is False:
if gpc.config.parallel.sequence_parallel:
sequence_world_size = gpc.get_world_size(ParallelMode.TENSOR)
Expand Down
4 changes: 2 additions & 2 deletions internlm/model/modeling_internlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _forward(self, hidden_states, *args, **kwargs):
def _dropout_and_norm_attn(_hidden_states):
_dropped = self.dropout1(_hidden_states)
_residual = _dropped
_hidden_states = self.norm1(_residual.float())
_hidden_states = self.norm1(_residual.to(self.norm1.weight.dtype))
return _residual, _hidden_states

if self.dropout_selective_checkpoint:
Expand All @@ -212,7 +212,7 @@ def _dropout_and_norm_attn(_hidden_states):
def _dropout_and_norm_ffn(_residual, _hidden_states):
_dropped = self.dropout2(_hidden_states)
_residual = (_dropped + _residual) if _residual is not None else _dropped
_hidden_states = self.norm2(_residual.float())
_hidden_states = self.norm2(_residual.to(self.norm2.weight.dtype))
return _residual, _hidden_states

if self.dropout_selective_checkpoint:
Expand Down
2 changes: 1 addition & 1 deletion internlm/model/modeling_internlm2.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def _dropout_and_norm_attn(_residual, _hidden_states):
def _dropout_and_norm_ffn(_residual, _hidden_states):
_dropped = self.dropout2(_hidden_states)
_residual = (_dropped + _residual) if _residual is not None else _dropped
_hidden_states = self.ffn_norm(_residual.to(torch.float32))
_hidden_states = self.ffn_norm(_residual.to(self.ffn_norm.weight.dtype))

return _residual, _hidden_states

Expand Down
2 changes: 1 addition & 1 deletion internlm/model/modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def _dropout_and_norm_attn(_residual, _hidden_states):
def _dropout_and_norm_ffn(_residual, _hidden_states):
_dropped = self.dropout2(_hidden_states)
_residual = (_dropped + _residual) if _residual is not None else _dropped
_hidden_states = self.ffn_norm(_residual.to(torch.float32))
_hidden_states = self.ffn_norm(_residual.to(self.ffn_norm.weight.dtype))

return _residual, _hidden_states

Expand Down
4 changes: 2 additions & 2 deletions internlm/model/modeling_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def _forward(self, hidden_states, *args, **kwargs):
def _dropout_and_norm_attn(_hidden_states):
_dropped = self.dropout1(_hidden_states)
_residual = _dropped
_hidden_states = self.norm1(_residual.float())
_hidden_states = self.norm1(_residual.to(self.norm1.weight.dtype))
return _residual, _hidden_states

if self.dropout_selective_checkpoint:
Expand All @@ -231,7 +231,7 @@ def _dropout_and_norm_attn(_hidden_states):
def _dropout_and_norm_ffn(_residual, _hidden_states):
_dropped = self.dropout2(_hidden_states)
_residual = (_dropped + _residual) if _residual is not None else _dropped
_hidden_states = self.norm2(_residual.float())
_hidden_states = self.norm2(_residual.to(self.norm2.weight.dtype))
return _residual, _hidden_states

if self.dropout_selective_checkpoint:
Expand Down
4 changes: 2 additions & 2 deletions internlm/model/modeling_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def _forward(self, hidden_states, *args, **kwargs):
def _dropout_and_norm_attn(_hidden_states):
_dropped = self.dropout1(_hidden_states)
_residual = _dropped
_hidden_states = self.norm1(_residual.float())
_hidden_states = self.norm1(_residual.to(self.norm1.weight.dtype))
return _residual, _hidden_states

if self.dropout_selective_checkpoint:
Expand All @@ -222,7 +222,7 @@ def _dropout_and_norm_attn(_hidden_states):
def _dropout_and_norm_ffn(_residual, _hidden_states):
_dropped = self.dropout2(_hidden_states)
_residual = (_dropped + _residual) if _residual is not None else _dropped
_hidden_states = self.norm2(_residual.float())
_hidden_states = self.norm2(_residual.to(self.norm2.weight.dtype))
return _residual, _hidden_states

if self.dropout_selective_checkpoint:
Expand Down

0 comments on commit 5ad2eb0

Please sign in to comment.