Skip to content
Closed
57 changes: 52 additions & 5 deletions gpt4all-backend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 11)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
set(CMAKE_VERBOSE_MAKEFILE ON)
set(BUILD_SHARED_LIBS ON)

# Check for IPO support
Expand All @@ -39,18 +41,31 @@ else()
message(STATUS "Interprocedural optimization support detected")
endif()

# llama.cpp base configuration
include(llama.cpp.cmake)

# Build variant list
set(BUILD_VARIANTS default avxonly)
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(BUILD_VARIANTS ${BUILD_VARIANTS} metal)
endif()

set(CMAKE_VERBOSE_MAKEFILE ON)
# Detect CUDA
find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
enable_language(CUDA)
list(APPEND BUILD_VARIANTS cuda)
endif()

# Detect opencl
find_package(CLBlast)
if (CLBlast_FOUND)
list(APPEND BUILD_VARIANTS opencl)
endif()

# Go through each build variant
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
# Determine flags
# avxonly configuration
if (BUILD_VARIANT STREQUAL avxonly)
set(GPT4ALL_ALLOW_NON_AVX NO)
else()
Expand All @@ -60,19 +75,40 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
set(LLAMA_F16C ${GPT4ALL_ALLOW_NON_AVX})
set(LLAMA_FMA ${GPT4ALL_ALLOW_NON_AVX})

# metal configuration
if (BUILD_VARIANT STREQUAL metal)
set(LLAMA_METAL YES)
set(LLAMA_GPU YES)
else()
set(LLAMA_METAL NO)
set(LLAMA_GPU NO)
endif()

# cuda configuration
if (BUILD_VARIANT STREQUAL cuda)
set(LLAMA_CUBLAS YES)
set(LLAMA_GPU YES)
else()
set(LLAMA_CUBLAS NO)
set(LLAMA_GPU NO)
endif()

# opencl configuration
if (BUILD_VARIANT STREQUAL opencl)
set(LLAMA_CLBLAST YES)
set(LLAMA_GPU YES)
else()
set(LLAMA_CLBLAST NO)
set(LLAMA_GPU NO)
endif()

# Include GGML
set(LLAMA_K_QUANTS YES)
include_ggml(llama.cpp-mainline -mainline-${BUILD_VARIANT} ON)
if (NOT LLAMA_METAL)
if (NOT LLAMA_GPU)
set(LLAMA_K_QUANTS NO)
include_ggml(llama.cpp-230511 -230511-${BUILD_VARIANT} ON)
include_ggml(llama.cpp-230519 -230519-${BUILD_VARIANT} ON)
include_ggml(llama.cpp-230511 -230511-${BUILD_VARIANT} ON)
endif()

# Function for preparing individual implementations
Expand Down Expand Up @@ -102,12 +138,13 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
replit.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
prepare_target(replit-mainline llama-mainline)

if (NOT LLAMA_METAL)
if (NOT LLAMA_GPU)
add_library(llamamodel-230519-${BUILD_VARIANT} SHARED
llamamodel.cpp llmodel_shared.cpp)
target_compile_definitions(llamamodel-230519-${BUILD_VARIANT} PRIVATE
LLAMA_VERSIONS===2 LLAMA_DATE=230519)
prepare_target(llamamodel-230519 llama-230519)

add_library(llamamodel-230511-${BUILD_VARIANT} SHARED
llamamodel.cpp llmodel_shared.cpp)
target_compile_definitions(llamamodel-230511-${BUILD_VARIANT} PRIVATE
Expand Down Expand Up @@ -143,5 +180,15 @@ set_target_properties(llmodel PROPERTIES
VERSION ${PROJECT_VERSION}
SOVERSION ${PROJECT_VERSION_MAJOR})

if (CUDAToolkit_FOUND)
target_compile_definitions(llmodel PRIVATE LLMODEL_CUDA)
target_link_libraries(llmodel PRIVATE cudart)
endif()

if (CLBlast_FOUND)
target_compile_definitions(llmodel PRIVATE LLMODEL_OPENCL)
target_link_libraries(llmodel PRIVATE clblast)
endif()

set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
6 changes: 6 additions & 0 deletions gpt4all-backend/llamamodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ struct gpt_params {
#if LLAMA_DATE <= 230511
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
#endif
#if LLAMA_DATE >= 230519
int32_t n_gpu_layers = 32;
#endif

#if LLAMA_DATE >= 230519
// sampling parameters
Expand Down Expand Up @@ -146,6 +149,9 @@ bool LLamaModel::loadModel(const std::string &modelPath)
#else
d_ptr->params.use_mlock = params.use_mlock;
#endif
#if LLAMA_DATE > 230519
d_ptr->params.n_gpu_layers = params.n_gpu_layers;
#endif
#if LLAMA_DATE <= 230511
d_ptr->params.n_parts = params.n_parts;
#endif
Expand Down
27 changes: 25 additions & 2 deletions gpt4all-backend/llmodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@
#include <vector>
#include <fstream>
#include <filesystem>
#include <sstream>
#include <cassert>
#include <cstdlib>
#include <sstream>
#ifdef LLMODEL_CUDA
#include <cuda_runtime.h>
#endif
#ifdef LLMODEL_OPENCL
#include <clblast.h>
#endif
#ifdef _MSC_VER
#include <windows.h>
#include <processthreadsapi.h>
Expand Down Expand Up @@ -150,13 +156,30 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
#endif

if (!impl) {
//TODO: Auto-detect CUDA/OpenCL
// Auto-detect avxonly requirement
if (buildVariant == "auto") {
if (requires_avxonly()) {
buildVariant = "avxonly";
} else {
buildVariant = "default";
}
// Auto-detect CUDA
#ifdef LLMODEL_CUDA
int cudaDeviceCount;
if (cudaGetDeviceCount(&cudaDeviceCount) == cudaSuccess
&& cudaDeviceCount != 0) {
buildVariant = "cuda";
}
#endif
#ifdef LLMODEL_OPENCL
// Auto-detect OpenCL
unsigned clPlatformCount;
cl_platform_id platform_ids[16];
if (clGetPlatformIDs(16, platform_ids, &clPlatformCount) == CL_SUCCESS
&& clPlatformCount != 0) {
buildVariant = "opencl";
}
#endif
}
impl = implementation(f, buildVariant);
if (!impl) return nullptr;
Expand Down
5 changes: 3 additions & 2 deletions gpt4all-backend/utils.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#include "utils.h"

#include <string_view>
#include <fstream>
#include <regex>

void replace(std::string & str, const std::string & needle, const std::string & replacement) {
void replace(std::string & str, std::string_view needle, std::string_view replacement) {
size_t pos = 0;
while ((pos = str.find(needle, pos)) != std::string::npos) {
str.replace(pos, needle.length(), replacement);
Expand Down Expand Up @@ -325,4 +326,4 @@ gpt_vocab::id gpt_sample_top_k_top_p(
int idx = dist(rng);

return logits_id[idx].second;
}
}
2 changes: 1 addition & 1 deletion gpt4all-backend/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ struct gpt_vocab {
}
};

void replace(std::string & str, const std::string & needle, const std::string & replacement);
void replace(std::string & str, std::string_view needle, std::string_view replacement);

// poor-man's JSON parsing
std::map<std::string, int32_t> json_parse(const std::string & fname);
Expand Down