Skip to content

Commit

Permalink
Merge pull request #225 from RahulSChand/rope_changes
Browse files Browse the repository at this point in the history
Fixing max_seq_len passed to RoPE implementation. Minor comment changes
  • Loading branch information
karpathy authored Aug 5, 2023
2 parents 9d001c6 + 02cf3c7 commit db4ad58
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion model.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def __init__(self, params: ModelArgs):
self.tok_embeddings.weight = self.output.weight # https://paperswithcode.com/method/weight-tying

# some useful precompute for the RoPE relative positional embeddings. TODO why * 2 here? confuse
freqs_cos, freqs_sin = precompute_freqs_cis(self.params.dim // self.params.n_heads, self.params.max_seq_len * 2)
freqs_cos, freqs_sin = precompute_freqs_cis(self.params.dim // self.params.n_heads, self.params.max_seq_len)
self.register_buffer("freqs_cos", freqs_cos, persistent=False)
self.register_buffer("freqs_sin", freqs_sin, persistent=False)

Expand Down
4 changes: 2 additions & 2 deletions run.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ typedef struct {
// final rmsnorm
float* rms_final_weight; // (dim,)
// freq_cis for RoPE relatively positional embeddings
float* freq_cis_real; // (seq_len, dim/2)
float* freq_cis_imag; // (seq_len, dim/2)
float* freq_cis_real; // (seq_len, head_size/2)
float* freq_cis_imag; // (seq_len, head_size/2)
// (optional) classifier weights for the logits, on the last layer
float* wcls;
} TransformerWeights;
Expand Down

0 comments on commit db4ad58

Please sign in to comment.