@@ -381,7 +381,7 @@ void free_tokenizer(Tokenizer* t) {
381
381
free (t -> vocab_scores );
382
382
}
383
383
384
- char * get_piece (Tokenizer * t , int prev_token , int token ) {
384
+ char * decode (Tokenizer * t , int prev_token , int token ) {
385
385
char * piece = t -> vocab [token ];
386
386
// following BOS (1) token, sentencepiece decoder strips any leading whitespace (see PR #89)
387
387
if (prev_token == 1 && piece [0 ] == ' ' ) { piece ++ ; }
@@ -414,7 +414,7 @@ int str_lookup(char *str, TokenIndex *sorted_vocab, int vocab_size) {
414
414
return res != NULL ? res -> id : -1 ;
415
415
}
416
416
417
- void bpe_encode (Tokenizer * t , char * text , int * tokens , int * n_tokens ) {
417
+ void encode (Tokenizer * t , char * text , int * tokens , int * n_tokens ) {
418
418
// encode the string text (input) into an upper-bound preallocated tokens[] array
419
419
420
420
// sort vocabulary
@@ -694,7 +694,7 @@ int main(int argc, char *argv[]) {
694
694
int num_prompt_tokens = 0 ;
695
695
if (prompt != NULL ) {
696
696
prompt_tokens = (int * )malloc ((strlen (prompt )+ 1 ) * sizeof (int ));
697
- bpe_encode (& tokenizer , prompt , prompt_tokens , & num_prompt_tokens );
697
+ encode (& tokenizer , prompt , prompt_tokens , & num_prompt_tokens );
698
698
}
699
699
700
700
// start the main loop
@@ -737,7 +737,7 @@ int main(int argc, char *argv[]) {
737
737
if (next == 1 ) { break ; }
738
738
739
739
// print the token as string, decode it with the Tokenizer object
740
- char * piece = get_piece (& tokenizer , token , next );
740
+ char * piece = decode (& tokenizer , token , next );
741
741
printf ("%s" , piece );
742
742
fflush (stdout );
743
743
token = next ;
0 commit comments