From 51709290959fb1b29a5ef82ce74316c32231887c Mon Sep 17 00:00:00 2001 From: niansa Date: Sun, 28 May 2023 17:36:56 +0200 Subject: [PATCH 01/12] Build variants with cuda/opencl --- gpt4all-backend/CMakeLists.txt | 47 +++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 80a3f000f14a..be4e808bb3af 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -27,6 +27,7 @@ project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) +set(CMAKE_VERBOSE_MAKEFILE ON) set(BUILD_SHARED_LIBS ON) # Check for IPO support @@ -38,18 +39,31 @@ else() message(STATUS "Interprocedural optimization support detected") endif() +# llama.cpp base configuration +set(LLAMA_LTO ${IPO_SUPPORTED}) include(llama.cpp.cmake) -set(BUILD_VARIANTS default avxonly) +# Build variant list +set(BUILD_VARIANTS default avxonly cuda opencl) if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(BUILD_VARIANTS ${BUILD_VARIANTS} metal) endif() -set(CMAKE_VERBOSE_MAKEFILE ON) +# Detect CUDA +find_package(CUDAToolkit) +if (CUDAToolkit_FOUND) + list(APPEND BUILD_VARIANTS cuda) +endif() + +# Detect opencl +find_package(CLBlast) +if (CLBlast_FOUND) + list(APPEND BUILD_VARIANTS opencl) +endif() # Go through each build variant foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) - # Determine flags + # avxonly configuration if (BUILD_VARIANT STREQUAL avxonly) set(GPT4ALL_ALLOW_NON_AVX NO) else() @@ -59,19 +73,40 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) set(LLAMA_F16C ${GPT4ALL_ALLOW_NON_AVX}) set(LLAMA_FMA ${GPT4ALL_ALLOW_NON_AVX}) + # metal configuration if (BUILD_VARIANT STREQUAL metal) set(LLAMA_METAL YES) + set(LLAMA_GPU YES) else() set(LLAMA_METAL NO) + set(LLAMA_GPU NO) + endif() + + # cuda configuration + if (BUILD_VARIANT STREQUAL cuda) + set(LLAMA_CUBLAS YES) + set(LLAMA_GPU YES) + else() + set(LLAMA_CUBLAS NO) + set(LLAMA_GPU NO) + endif() + + # opencl configuration + if (BUILD_VARIANT STREQUAL opencl) + set(LLAMA_CLBLAST YES) + set(LLAMA_GPU YES) + else() + set(LLAMA_CLBLAST NO) + set(LLAMA_GPU NO) endif() # Include GGML set(LLAMA_K_QUANTS YES) include_ggml(llama.cpp-mainline -mainline-${BUILD_VARIANT} ON) - if (NOT LLAMA_METAL) + if (NOT LLAMA_GPU) set(LLAMA_K_QUANTS NO) - include_ggml(llama.cpp-230511 -230511-${BUILD_VARIANT} ON) include_ggml(llama.cpp-230519 -230519-${BUILD_VARIANT} ON) + include_ggml(llama.cpp-230511 -230511-${BUILD_VARIANT} ON) endif() # Function for preparing individual implementations @@ -101,7 +136,7 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) replit.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h) prepare_target(replit-mainline llama-mainline) - if (NOT LLAMA_METAL) + if (NOT GPU) add_library(llamamodel-230519-${BUILD_VARIANT} SHARED llamamodel.cpp llmodel_shared.cpp) target_compile_definitions(llamamodel-230519-${BUILD_VARIANT} PRIVATE From 0fe4842fb396f89e8679c406f1d7f2c79f6fdd28 Mon Sep 17 00:00:00 2001 From: niansa Date: Sun, 28 May 2023 17:38:56 +0200 Subject: [PATCH 02/12] Updated llama.cpp-mainline and set ngl --- gpt4all-backend/llamamodel.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 5fdc35b29d86..0e0a112cbb7f 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -39,6 +39,9 @@ struct gpt_params { #if LLAMA_DATE <= 230511 int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions) #endif +#if LLAMA_DATE >= 230519 + int32_t n_gpu_layers = 32; +#endif #if LLAMA_DATE >= 230519 // sampling parameters @@ -145,6 +148,8 @@ bool LLamaModel::loadModel(const std::string &modelPath) d_ptr->params.use_mlock = true; #else d_ptr->params.use_mlock = params.use_mlock; +#if LLAMA_DATE > 230519 + d_ptr->params.n_gpu_layers = params.n_gpu_layers; #endif #if LLAMA_DATE <= 230511 d_ptr->params.n_parts = params.n_parts; From 82ed5e53bf18adec4988ceb22bc5bcafa02d176e Mon Sep 17 00:00:00 2001 From: niansa Date: Thu, 29 Jun 2023 11:29:25 +0200 Subject: [PATCH 03/12] Fixed duplicate targets --- gpt4all-backend/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index be4e808bb3af..644ef5552ee9 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -44,7 +44,7 @@ set(LLAMA_LTO ${IPO_SUPPORTED}) include(llama.cpp.cmake) # Build variant list -set(BUILD_VARIANTS default avxonly cuda opencl) +set(BUILD_VARIANTS default avxonly) if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(BUILD_VARIANTS ${BUILD_VARIANTS} metal) endif() From 5f45094eb7b5c0d9185d35b92584e319e4c09655 Mon Sep 17 00:00:00 2001 From: niansa Date: Thu, 29 Jun 2023 11:46:50 +0200 Subject: [PATCH 04/12] enable_language(CUDA) --- gpt4all-backend/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 644ef5552ee9..018d2ad071a9 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -52,6 +52,7 @@ endif() # Detect CUDA find_package(CUDAToolkit) if (CUDAToolkit_FOUND) + enable_language(CUDA) list(APPEND BUILD_VARIANTS cuda) endif() From 9f9e5e98ba0429e1d3029085995fd9e50cfce296 Mon Sep 17 00:00:00 2001 From: niansa Date: Thu, 29 Jun 2023 12:05:35 +0200 Subject: [PATCH 05/12] Bump backend cmake_minimum_required to allow for C++20 CUDA to work --- gpt4all-backend/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 018d2ad071a9..f79da81c3c31 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.25.2) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) if(APPLE) From 30f669f77bf6f0038a24d179a805e57bcb736c19 Mon Sep 17 00:00:00 2001 From: niansa Date: Thu, 29 Jun 2023 12:10:10 +0200 Subject: [PATCH 06/12] Set CMAKE_CUDA_STANDARD properly and revert cmake_minimum_required --- gpt4all-backend/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index f79da81c3c31..e6abb2cb0335 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.25.2) +cmake_minimum_required(VERSION 3.16) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) if(APPLE) @@ -26,6 +26,7 @@ project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CUDA_STANDARD 11) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) set(CMAKE_VERBOSE_MAKEFILE ON) set(BUILD_SHARED_LIBS ON) From e603587938e0cbdb9ad4cf3755d1d77cb1f05615 Mon Sep 17 00:00:00 2001 From: niansa Date: Thu, 29 Jun 2023 12:12:30 +0200 Subject: [PATCH 07/12] Fix syntax error from rebase --- gpt4all-backend/llamamodel.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 0e0a112cbb7f..86f745524848 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -148,6 +148,7 @@ bool LLamaModel::loadModel(const std::string &modelPath) d_ptr->params.use_mlock = true; #else d_ptr->params.use_mlock = params.use_mlock; +#endif #if LLAMA_DATE > 230519 d_ptr->params.n_gpu_layers = params.n_gpu_layers; #endif From cf931c69854c6d9d34a27fcfff1f5606b7cff41d Mon Sep 17 00:00:00 2001 From: User Date: Wed, 5 Jul 2023 08:02:34 +0000 Subject: [PATCH 08/12] Added CUDA auto detection --- gpt4all-backend/CMakeLists.txt | 5 +++++ gpt4all-backend/llmodel.cpp | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index e6abb2cb0335..1f5ebf94c812 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -175,5 +175,10 @@ set_target_properties(llmodel PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) +if (CUDAToolkit_FOUND) + target_compile_definitions(llmodel PRIVATE LLMODEL_CUDA) + target_link_libraries(llmodel PRIVATE cudart) +endif() + set(COMPONENT_NAME_MAIN ${PROJECT_NAME}) set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install) diff --git a/gpt4all-backend/llmodel.cpp b/gpt4all-backend/llmodel.cpp index d7c721e2db3c..c9850c9cbf59 100644 --- a/gpt4all-backend/llmodel.cpp +++ b/gpt4all-backend/llmodel.cpp @@ -7,9 +7,11 @@ #include #include #include +#include #include #include -#include +#include + std::string s_implementations_search_path = "."; @@ -148,13 +150,21 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria #endif if (!impl) { - //TODO: Auto-detect CUDA/OpenCL + // Auto-detect avxonly requirement if (buildVariant == "auto") { if (requires_avxonly()) { buildVariant = "avxonly"; } else { buildVariant = "default"; } + // Auto-detect CUDA +#ifdef LLMODEL_CUDA + int cudaDeviceCount; + if (cudaGetDeviceCount(&cudaDeviceCount) == cudaSuccess + && cudaDeviceCount != 0) { + buildVariant = "cuda"; + } +#endif } impl = implementation(f, buildVariant); if (!impl) return nullptr; From 3da84a27eca2ec50dc1739d3fc9a6ecd5402286a Mon Sep 17 00:00:00 2001 From: niansa Date: Thu, 29 Jun 2023 13:52:20 +0200 Subject: [PATCH 09/12] Fixed name collision for replace() --- gpt4all-backend/utils.cpp | 5 +++-- gpt4all-backend/utils.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gpt4all-backend/utils.cpp b/gpt4all-backend/utils.cpp index 5dc44fadb759..b3866a2859d3 100644 --- a/gpt4all-backend/utils.cpp +++ b/gpt4all-backend/utils.cpp @@ -1,9 +1,10 @@ #include "utils.h" +#include #include #include -void replace(std::string & str, const std::string & needle, const std::string & replacement) { +void replace(std::string & str, std::string_view needle, std::string_view replacement) { size_t pos = 0; while ((pos = str.find(needle, pos)) != std::string::npos) { str.replace(pos, needle.length(), replacement); @@ -325,4 +326,4 @@ gpt_vocab::id gpt_sample_top_k_top_p( int idx = dist(rng); return logits_id[idx].second; -} \ No newline at end of file +} diff --git a/gpt4all-backend/utils.h b/gpt4all-backend/utils.h index b190643dc2bb..470337d8e890 100644 --- a/gpt4all-backend/utils.h +++ b/gpt4all-backend/utils.h @@ -58,7 +58,7 @@ struct gpt_vocab { } }; -void replace(std::string & str, const std::string & needle, const std::string & replacement); +void replace(std::string & str, std::string_view needle, std::string_view replacement); // poor-man's JSON parsing std::map json_parse(const std::string & fname); From abd861885c1cb6b704382a3bd50369f50cf8ad2a Mon Sep 17 00:00:00 2001 From: niansa Date: Thu, 29 Jun 2023 13:58:04 +0200 Subject: [PATCH 10/12] Fixed a CMake variable naming mistake --- gpt4all-backend/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 1f5ebf94c812..2f093fa8da1f 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -138,12 +138,13 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) replit.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h) prepare_target(replit-mainline llama-mainline) - if (NOT GPU) + if (NOT LLAMA_GPU) add_library(llamamodel-230519-${BUILD_VARIANT} SHARED llamamodel.cpp llmodel_shared.cpp) target_compile_definitions(llamamodel-230519-${BUILD_VARIANT} PRIVATE LLAMA_VERSIONS===2 LLAMA_DATE=230519) prepare_target(llamamodel-230519 llama-230519) + add_library(llamamodel-230511-${BUILD_VARIANT} SHARED llamamodel.cpp llmodel_shared.cpp) target_compile_definitions(llamamodel-230511-${BUILD_VARIANT} PRIVATE From 8b4819bed0d8e7ed9b4c5aca736e1f5950ecae97 Mon Sep 17 00:00:00 2001 From: niansa Date: Wed, 5 Jul 2023 10:26:46 +0200 Subject: [PATCH 11/12] Added OpenCL auto-detection --- gpt4all-backend/CMakeLists.txt | 5 +++++ gpt4all-backend/llmodel.cpp | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 2f093fa8da1f..f3c058f3d796 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -181,5 +181,10 @@ if (CUDAToolkit_FOUND) target_link_libraries(llmodel PRIVATE cudart) endif() +if (CLBlast_FOUND) + target_compile_definitions(llmodel PRIVATE LLMODEL_OPENCL) + target_link_libraries(llmodel PRIVATE clblast) +endif() + set(COMPONENT_NAME_MAIN ${PROJECT_NAME}) set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install) diff --git a/gpt4all-backend/llmodel.cpp b/gpt4all-backend/llmodel.cpp index c9850c9cbf59..5347a5a4d75b 100644 --- a/gpt4all-backend/llmodel.cpp +++ b/gpt4all-backend/llmodel.cpp @@ -10,7 +10,12 @@ #include #include #include +#ifdef LLMODEL_CUDA #include +#endif +#ifdef LLMODEL_OPENCL +#include +#endif std::string s_implementations_search_path = "."; @@ -164,6 +169,15 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria && cudaDeviceCount != 0) { buildVariant = "cuda"; } +#endif +#ifdef LLMODEL_OPENCL + // Auto-detect OpenCL + unsigned clPlatformCount; + cl_platform_id platform_ids[16]; + if (clGetPlatformIDs(16, platform_ids, &clPlatformCount) == CL_SUCCESS + && clPlatformCount != 0) { + buildVariant = "opencl"; + } #endif } impl = implementation(f, buildVariant); From 9b95acb1e16477b91c5ba41d1e319ff0ac8ce59e Mon Sep 17 00:00:00 2001 From: niansa/tuxifan Date: Wed, 5 Jul 2023 17:27:42 +0200 Subject: [PATCH 12/12] Do not set LLAMA_LTO Signed-off-by: niansa/tuxifan --- gpt4all-backend/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index f3c058f3d796..fb09c1d3b9d4 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -41,7 +41,6 @@ else() endif() # llama.cpp base configuration -set(LLAMA_LTO ${IPO_SUPPORTED}) include(llama.cpp.cmake) # Build variant list