@@ -163,7 +163,7 @@ struct mpt_hparams {
163
163
int32_t n_embd = 0 ; // max_seq_len
164
164
int32_t n_head = 0 ; // n_heads
165
165
int32_t n_layer = 0 ; // n_layers
166
- int32_t ftype = 0 ;
166
+ int32_t ftype = 0 ;
167
167
};
168
168
169
169
struct replit_layer {
@@ -220,7 +220,7 @@ static bool kv_cache_init(
220
220
params.mem_size = cache.buf .size ;
221
221
params.mem_buffer = cache.buf .addr ;
222
222
params.no_alloc = false ;
223
-
223
+
224
224
cache.ctx = ggml_init (params);
225
225
if (!cache.ctx ) {
226
226
fprintf (stderr, " %s: failed to allocate memory for kv cache\n " , __func__);
@@ -503,7 +503,7 @@ bool replit_model_load(const std::string & fname, std::istream &fin, replit_mode
503
503
}
504
504
505
505
GGML_CHECK_BUF (ggml_metal_add_buffer (model.ctx_metal , " data" , data_ptr, data_size, max_size));
506
- GGML_CHECK_BUF (ggml_metal_add_buffer (model.ctx_metal , " kv" , ggml_get_mem_buffer (model.kv_self .ctx ),
506
+ GGML_CHECK_BUF (ggml_metal_add_buffer (model.ctx_metal , " kv" , ggml_get_mem_buffer (model.kv_self .ctx ),
507
507
ggml_get_mem_size (model.kv_self .ctx ), 0 ));
508
508
GGML_CHECK_BUF (ggml_metal_add_buffer (model.ctx_metal , " eval" , model.eval_buf .addr , model.eval_buf .size , 0 ));
509
509
GGML_CHECK_BUF (ggml_metal_add_buffer (model.ctx_metal , " scr0" , model.scr0_buf .addr , model.scr0_buf .size , 0 ));
@@ -975,6 +975,14 @@ const std::vector<LLModel::Token> &Replit::endTokens() const
975
975
return fres;
976
976
}
977
977
978
+ bool Replit::usingGPUDevice ()
979
+ {
980
+ #if defined(GGML_USE_METAL)
981
+ return true ;
982
+ #endif
983
+ return false ;
984
+ }
985
+
978
986
#if defined(_WIN32)
979
987
#define DLL_EXPORT __declspec (dllexport)
980
988
#else
0 commit comments