Skip to content

Commit 53ff6b9

Browse files
GGUF: C++ refactor, backend support, misc fixes (ggml-org#11030)
* GGUF: C++ refactor, backend support, misc fixes remove ggml_tensor.backend update CODEOWNERS [no ci] remove gguf_get_data from API revise GGUF API data types
1 parent 017cc5f commit 53ff6b9

21 files changed

+1801
-1633
lines changed

CODEOWNERS

+6
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,9 @@
33
/ci/ @ggerganov
44
/.devops/*.Dockerfile @ngxson
55
/examples/server/ @ngxson
6+
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
7+
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
8+
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
9+
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
10+
/ggml/src/ggml-opt.cpp @JohannesGaessler
11+
/ggml/src/gguf.cpp @JohannesGaessler

common/common.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
33
#endif
44

5+
#include "ggml.h"
6+
#include "gguf.h"
7+
58
#include "common.h"
69
#include "log.h"
710
// Change JSON_ASSERT from assert() to GGML_ASSERT:

examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "ggml.h"
2+
#include "gguf.h"
3+
24
#include "llama.h"
35
#include "common.h"
46
#include "log.h"

examples/cvector-generator/cvector-generator.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
#include "ggml.h"
2+
#include "gguf.h"
3+
14
#include "arg.h"
25
#include "common.h"
36
#include "llama.h"
4-
#include "ggml.h"
57
#include "pca.hpp"
68
#include "mean.hpp"
79

examples/export-lora/export-lora.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
#include "arg.h"
2-
#include "common.h"
31
#include "ggml.h"
42
#include "ggml-alloc.h"
3+
#include "gguf.h"
4+
5+
#include "arg.h"
6+
#include "common.h"
57

68
#include <map>
79
#include <vector>

examples/gguf-hash/gguf-hash.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "ggml.h"
2+
#include "gguf.h"
23

34
#include <cstdlib> /* abort() */
45
#include <cstddef>

examples/gguf-split/gguf-split.cpp

+8-6
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1+
#include "ggml.h"
2+
#include "gguf.h"
13
#include "llama.h"
24
#include "common.h"
35

46
#include <algorithm>
7+
#include <cinttypes>
8+
#include <climits>
9+
#include <cstdio>
510
#include <cstdlib>
11+
#include <stdexcept>
12+
#include <cstring>
613
#include <fstream>
714
#include <string>
815
#include <vector>
9-
#include <climits>
10-
11-
#include <cstdio>
12-
#include <cstring>
13-
#include <stdexcept>
1416

1517
#if defined(_WIN32)
1618
#include <windows.h>
@@ -296,7 +298,7 @@ struct split_strategy {
296298
total_size += ggml_nbytes(t);
297299
}
298300
total_size = total_size / 1000 / 1000; // convert to megabytes
299-
printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
301+
printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
300302
i_split++;
301303
}
302304
}

examples/gguf/gguf.cpp

+10-6
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
#include "ggml.h"
2+
#include "gguf.h"
23

34
#include <cstdio>
4-
#include <cinttypes>
55
#include <string>
66
#include <sstream>
7-
#include <fstream>
87
#include <vector>
98

109
#undef MIN
@@ -135,9 +134,10 @@ static bool gguf_ex_read_0(const std::string & fname) {
135134

136135
for (int i = 0; i < n_tensors; ++i) {
137136
const char * name = gguf_get_tensor_name (ctx, i);
137+
const size_t size = gguf_get_tensor_size (ctx, i);
138138
const size_t offset = gguf_get_tensor_offset(ctx, i);
139139

140-
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
140+
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
141141
}
142142
}
143143

@@ -182,9 +182,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
182182

183183
for (int i = 0; i < n_tensors; ++i) {
184184
const char * name = gguf_get_tensor_name (ctx, i);
185+
const size_t size = gguf_get_tensor_size (ctx, i);
185186
const size_t offset = gguf_get_tensor_offset(ctx, i);
186187

187-
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
188+
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
188189
}
189190
}
190191

@@ -199,7 +200,8 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
199200

200201
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
201202

202-
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
203+
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
204+
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
203205

204206
// print first 10 elements
205207
const float * data = (const float *) cur->data;
@@ -215,7 +217,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
215217
const float * data = (const float *) cur->data;
216218
for (int j = 0; j < ggml_nelements(cur); ++j) {
217219
if (data[j] != 100 + i) {
218-
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
220+
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
219221
gguf_free(ctx);
220222
return false;
221223
}
@@ -245,6 +247,8 @@ int main(int argc, char ** argv) {
245247
check_data = false;
246248
}
247249

250+
srand(123456);
251+
248252
const std::string fname(argv[1]);
249253
const std::string mode (argv[2]);
250254

examples/llava/clip.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "ggml-cpu.h"
88
#include "ggml-alloc.h"
99
#include "ggml-backend.h"
10+
#include "gguf.h"
1011

1112
//#ifdef GGML_USE_CUDA
1213
//#include "ggml-cuda.h"
@@ -262,7 +263,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
262263
{
263264
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
264265
int arr_n = gguf_get_arr_n(ctx_gguf, i);
265-
const void * data = gguf_get_arr_data(ctx_gguf, i);
266+
const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i);
266267
std::stringstream ss;
267268
ss << "[";
268269
for (int j = 0; j < arr_n; j++) {
@@ -2734,7 +2735,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
27342735
total_size_org += orig_size;
27352736
total_size_new += new_size;
27362737
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
2737-
gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
2738+
GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size);
2739+
gguf_set_tensor_data(ctx_out, name.c_str(), new_data);
27382740
fout.write((const char *)new_data, new_size);
27392741
size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size;
27402742
for (size_t j = 0; j < pad; ++j) {

ggml/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,8 @@ set(GGML_PUBLIC_HEADERS
243243
include/ggml-metal.h
244244
include/ggml-rpc.h
245245
include/ggml-sycl.h
246-
include/ggml-vulkan.h)
246+
include/ggml-vulkan.h
247+
include/gguf.h)
247248

248249
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
249250
#if (GGML_METAL)

ggml/include/ggml-cpp.h

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "ggml.h"
88
#include "ggml-alloc.h"
99
#include "ggml-backend.h"
10+
#include "gguf.h"
1011
#include <memory>
1112

1213
// Smart pointers for ggml types

ggml/include/ggml.h

-140
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,6 @@
241241
#define GGML_ROPE_TYPE_MROPE 8
242242
#define GGML_ROPE_TYPE_VISION 24
243243

244-
#define GGUF_MAGIC "GGUF"
245-
246-
#define GGUF_VERSION 3
247-
248-
#define GGUF_DEFAULT_ALIGNMENT 32
249-
250244
#define GGML_UNUSED(x) (void)(x)
251245

252246
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@@ -403,12 +397,6 @@ extern "C" {
403397
GGML_PREC_F32,
404398
};
405399

406-
enum ggml_backend_type {
407-
GGML_BACKEND_TYPE_CPU = 0,
408-
GGML_BACKEND_TYPE_GPU = 10,
409-
GGML_BACKEND_TYPE_GPU_SPLIT = 20,
410-
};
411-
412400
// model file types
413401
enum ggml_ftype {
414402
GGML_FTYPE_UNKNOWN = -1,
@@ -587,8 +575,6 @@ extern "C" {
587575
struct ggml_tensor {
588576
enum ggml_type type;
589577

590-
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
591-
592578
struct ggml_backend_buffer * buffer;
593579

594580
int64_t ne[GGML_MAX_DIMS]; // number of elements
@@ -2111,132 +2097,6 @@ extern "C" {
21112097
int64_t n_per_row,
21122098
const float * imatrix);
21132099

2114-
//
2115-
// gguf
2116-
//
2117-
2118-
enum gguf_type {
2119-
GGUF_TYPE_UINT8 = 0,
2120-
GGUF_TYPE_INT8 = 1,
2121-
GGUF_TYPE_UINT16 = 2,
2122-
GGUF_TYPE_INT16 = 3,
2123-
GGUF_TYPE_UINT32 = 4,
2124-
GGUF_TYPE_INT32 = 5,
2125-
GGUF_TYPE_FLOAT32 = 6,
2126-
GGUF_TYPE_BOOL = 7,
2127-
GGUF_TYPE_STRING = 8,
2128-
GGUF_TYPE_ARRAY = 9,
2129-
GGUF_TYPE_UINT64 = 10,
2130-
GGUF_TYPE_INT64 = 11,
2131-
GGUF_TYPE_FLOAT64 = 12,
2132-
GGUF_TYPE_COUNT, // marks the end of the enum
2133-
};
2134-
2135-
struct gguf_context;
2136-
2137-
struct gguf_init_params {
2138-
bool no_alloc;
2139-
2140-
// if not NULL, create a ggml_context and allocate the tensor data in it
2141-
struct ggml_context ** ctx;
2142-
};
2143-
2144-
GGML_API struct gguf_context * gguf_init_empty(void);
2145-
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
2146-
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
2147-
2148-
GGML_API void gguf_free(struct gguf_context * ctx);
2149-
2150-
GGML_API const char * gguf_type_name(enum gguf_type type);
2151-
2152-
GGML_API int gguf_get_version (const struct gguf_context * ctx);
2153-
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
2154-
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
2155-
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
2156-
2157-
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
2158-
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
2159-
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
2160-
2161-
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
2162-
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
2163-
2164-
// will abort if the wrong type is used for the key
2165-
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
2166-
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
2167-
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
2168-
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
2169-
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
2170-
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
2171-
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
2172-
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
2173-
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
2174-
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
2175-
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
2176-
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
2177-
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
2178-
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
2179-
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
2180-
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
2181-
2182-
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2183-
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2184-
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2185-
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2186-
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
2187-
2188-
// removes key if it exists
2189-
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
2190-
2191-
// overrides existing values or adds a new one
2192-
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
2193-
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
2194-
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
2195-
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
2196-
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
2197-
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
2198-
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
2199-
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
2200-
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
2201-
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
2202-
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
2203-
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
2204-
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
2205-
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
2206-
2207-
// set or add KV pairs from another context
2208-
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
2209-
2210-
// manage tensor info
2211-
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
2212-
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
2213-
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
2214-
2215-
// writing gguf files can be done in 2 ways:
2216-
//
2217-
// - write the entire gguf_context to a binary file in a single pass:
2218-
//
2219-
// gguf_write_to_file(ctx, fname);
2220-
//
2221-
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
2222-
//
2223-
// FILE * f = fopen(fname, "wb");
2224-
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
2225-
// fwrite(f, ...);
2226-
// void * data = gguf_meta_get_meta_data(ctx);
2227-
// fseek(f, 0, SEEK_SET);
2228-
// fwrite(f, data, gguf_get_meta_size(ctx));
2229-
// free(data);
2230-
// fclose(f);
2231-
//
2232-
2233-
// write the entire context to a binary file
2234-
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
2235-
2236-
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
2237-
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
2238-
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
2239-
22402100
#ifdef __cplusplus
22412101
// restrict not standard in C++
22422102
# if defined(__GNUC__)

0 commit comments

Comments
 (0)