From df75cff09f5b77d9e6ad2b6ae72a5ee435b40a69 Mon Sep 17 00:00:00 2001 From: laurent Date: Wed, 8 Jan 2025 11:35:26 +0100 Subject: [PATCH] Fix more rebase issues. --- src/transformers/models/helium/modeling_helium.py | 8 ++------ src/transformers/models/helium/modular_helium.py | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/transformers/models/helium/modeling_helium.py b/src/transformers/models/helium/modeling_helium.py index 0750e5b6b579ce..3dc2d579972e9f 100644 --- a/src/transformers/models/helium/modeling_helium.py +++ b/src/transformers/models/helium/modeling_helium.py @@ -254,7 +254,7 @@ def __init__(self, config: HeliumConfig, layer_idx: Optional[int] = None): self.v_proj = nn.Linear( config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias ) - self.o_proj = nn.Linear(self.hidden_size, self.hidden_size, bias=False) + self.o_proj = nn.Linear(config.hidden_size, config.hidden_size, bias=False) def forward( self, @@ -502,11 +502,7 @@ def __init__(self, config: HeliumConfig): [HeliumDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)] ) self.norm = HeliumRMSNorm(config.hidden_size, eps=config.rms_norm_eps) - self.rotary_emb = HeliumRotaryEmbedding( - dim=config.head_dim, - max_position_embeddings=config.max_position_embeddings, - base=config.rope_theta, - ) + self.rotary_emb = HeliumRotaryEmbedding(config) self.gradient_checkpointing = False # Initialize weights and apply final processing diff --git a/src/transformers/models/helium/modular_helium.py b/src/transformers/models/helium/modular_helium.py index c0deb902017108..e84ff9ebe431a5 100644 --- a/src/transformers/models/helium/modular_helium.py +++ b/src/transformers/models/helium/modular_helium.py @@ -113,7 +113,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1): class HeliumAttention(GraniteAttention): def __init__(self, config: HeliumConfig, layer_idx: Optional[int] = None): super().__init__(config, layer_idx) - self.o_proj = nn.Linear(self.hidden_size, self.hidden_size, bias=False) + self.o_proj = nn.Linear(config.hidden_size, config.hidden_size, bias=False) self.scaling = 1 / math.sqrt(self.head_dim) @@ -137,11 +137,7 @@ def __init__(self, config: HeliumConfig): [HeliumDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)] ) self.norm = HeliumRMSNorm(config.hidden_size, eps=config.rms_norm_eps) - self.rotary_emb = HeliumRotaryEmbedding( - dim=config.head_dim, - max_position_embeddings=config.max_position_embeddings, - base=config.rope_theta, - ) + self.rotary_emb = HeliumRotaryEmbedding(config) self.gradient_checkpointing = False # Initialize weights and apply final processing