From 9d5f18bf479b726615e440b3d6d88fe896398b02 Mon Sep 17 00:00:00 2001 From: Ilham Syahid S Date: Sun, 2 Feb 2025 21:46:33 +0300 Subject: [PATCH 1/2] ggml-cpu: Add bounds checking in `make_block_q4_0x4` function Prevent potential buffer overflows by adding explicit bounds checking when copying memory in the `make_block_q4_0x4` function Signed-off-by: Ilham Syahid S --- src/ggml-cpu/ggml-cpu-aarch64.cpp | 37 ++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/src/ggml-cpu/ggml-cpu-aarch64.cpp b/src/ggml-cpu/ggml-cpu-aarch64.cpp index b311a5b1c..4df1523cc 100644 --- a/src/ggml-cpu/ggml-cpu-aarch64.cpp +++ b/src/ggml-cpu/ggml-cpu-aarch64.cpp @@ -3592,13 +3592,16 @@ static void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, } static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave) { - block_q4_0x4 out; + // Zero initialize the output structure + block_q4_0x4 out = {}; + // Copy d values for (int i = 0; i < 4; i++) { out.d[i] = in[i].d; } const int end = QK4_0 * 2 / blck_size_interleave; + const size_t qs_size = sizeof(out.qs); if (blck_size_interleave == 8) { const uint64_t xor_mask = 0x8888888888888888ULL; @@ -3607,11 +3610,17 @@ static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_in int src_offset = (i / 4) * blck_size_interleave; int dst_offset = i * blck_size_interleave; - uint64_t elems; - // Using memcpy to avoid unaligned memory accesses - memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t)); - elems ^= xor_mask; - memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t)); + // Bounds checking + if (dst_offset + sizeof(uint64_t) <= qs_size && + src_offset + sizeof(uint64_t) <= sizeof(in[src_id].qs)) { + uint64_t elems; + // Using memcpy to avoid unaligned memory accesses + memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t)); + elems ^= xor_mask; + memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t)); + } else { + GGML_ASSERT(false && "buffer overflow prevented in make_block_q4_0x4"); + } } } else if (blck_size_interleave == 4) { const uint32_t xor_mask = 0x88888888; @@ -3620,13 +3629,19 @@ static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_in int src_offset = (i / 4) * blck_size_interleave; int dst_offset = i * blck_size_interleave; - uint32_t elems; - memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t)); - elems ^= xor_mask; - memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t)); + // Bounds checking + if (dst_offset + sizeof(uint32_t) <= qs_size && + src_offset + sizeof(uint32_t) <= sizeof(in[src_id].qs)) { + uint32_t elems; + memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t)); + elems ^= xor_mask; + memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t)); + } else { + GGML_ASSERT(false && "buffer overflow prevented in make_block_q4_0x4"); + } } } else { - GGML_ASSERT(false); + GGML_ASSERT(false && "invalid block size interleave value"); } return out; From 6bd5fb2c064577d210b5aeaa133ab220a5d9a648 Mon Sep 17 00:00:00 2001 From: Ilham Syahid S Date: Tue, 4 Feb 2025 11:22:29 +0300 Subject: [PATCH 2/2] ggml-cpu: Simplify bounds checking in `make_block_q4_0x4` Refactor the memory copying logic in the `make_block_q4_0x4` function to remove explicit bounds checking Use compile-time constants and loop bounds to prevent potential buffer overflows. Signed-off-by: Ilham Syahid S --- src/ggml-cpu/ggml-cpu-aarch64.cpp | 36 +++++++++++-------------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/src/ggml-cpu/ggml-cpu-aarch64.cpp b/src/ggml-cpu/ggml-cpu-aarch64.cpp index 4df1523cc..78aa177f3 100644 --- a/src/ggml-cpu/ggml-cpu-aarch64.cpp +++ b/src/ggml-cpu/ggml-cpu-aarch64.cpp @@ -3601,44 +3601,32 @@ static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_in } const int end = QK4_0 * 2 / blck_size_interleave; - const size_t qs_size = sizeof(out.qs); + constexpr size_t qs_size = QK4_0 * 2; // Size of output qs array if (blck_size_interleave == 8) { const uint64_t xor_mask = 0x8888888888888888ULL; - for (int i = 0; i < end; ++i) { + for (int i = 0; i < end && (i + 1) * blck_size_interleave <= qs_size; ++i) { int src_id = i % 4; int src_offset = (i / 4) * blck_size_interleave; int dst_offset = i * blck_size_interleave; - // Bounds checking - if (dst_offset + sizeof(uint64_t) <= qs_size && - src_offset + sizeof(uint64_t) <= sizeof(in[src_id].qs)) { - uint64_t elems; - // Using memcpy to avoid unaligned memory accesses - memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t)); - elems ^= xor_mask; - memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t)); - } else { - GGML_ASSERT(false && "buffer overflow prevented in make_block_q4_0x4"); - } + uint64_t elems; + // Using memcpy to avoid unaligned memory accesses + memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t)); + elems ^= xor_mask; + memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t)); } } else if (blck_size_interleave == 4) { const uint32_t xor_mask = 0x88888888; - for (int i = 0; i < end; ++i) { + for (int i = 0; i < end && (i + 1) * blck_size_interleave <= qs_size; ++i) { int src_id = i % 4; int src_offset = (i / 4) * blck_size_interleave; int dst_offset = i * blck_size_interleave; - // Bounds checking - if (dst_offset + sizeof(uint32_t) <= qs_size && - src_offset + sizeof(uint32_t) <= sizeof(in[src_id].qs)) { - uint32_t elems; - memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t)); - elems ^= xor_mask; - memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t)); - } else { - GGML_ASSERT(false && "buffer overflow prevented in make_block_q4_0x4"); - } + uint32_t elems; + memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t)); + elems ^= xor_mask; + memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t)); } } else { GGML_ASSERT(false && "invalid block size interleave value");