Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added cuda and opencl support #746

Closed
wants to merge 13 commits into from
57 changes: 52 additions & 5 deletions gpt4all-backend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 11)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
set(CMAKE_VERBOSE_MAKEFILE ON)
set(BUILD_SHARED_LIBS ON)

# Check for IPO support
Expand All @@ -38,18 +40,31 @@ else()
message(STATUS "Interprocedural optimization support detected")
endif()

# llama.cpp base configuration
include(llama.cpp.cmake)

# Build variant list
set(BUILD_VARIANTS default avxonly)
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(BUILD_VARIANTS ${BUILD_VARIANTS} metal)
endif()

set(CMAKE_VERBOSE_MAKEFILE ON)
# Detect CUDA
find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
enable_language(CUDA)
list(APPEND BUILD_VARIANTS cuda)
endif()

# Detect opencl
find_package(CLBlast)
if (CLBlast_FOUND)
list(APPEND BUILD_VARIANTS opencl)
endif()

# Go through each build variant
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
# Determine flags
# avxonly configuration
if (BUILD_VARIANT STREQUAL avxonly)
set(GPT4ALL_ALLOW_NON_AVX NO)
else()
Expand All @@ -59,19 +74,40 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
set(LLAMA_F16C ${GPT4ALL_ALLOW_NON_AVX})
set(LLAMA_FMA ${GPT4ALL_ALLOW_NON_AVX})

# metal configuration
if (BUILD_VARIANT STREQUAL metal)
set(LLAMA_METAL YES)
set(LLAMA_GPU YES)
else()
set(LLAMA_METAL NO)
set(LLAMA_GPU NO)
endif()

# cuda configuration
if (BUILD_VARIANT STREQUAL cuda)
set(LLAMA_CUBLAS YES)
set(LLAMA_GPU YES)
else()
set(LLAMA_CUBLAS NO)
set(LLAMA_GPU NO)
endif()

# opencl configuration
if (BUILD_VARIANT STREQUAL opencl)
set(LLAMA_CLBLAST YES)
set(LLAMA_GPU YES)
else()
set(LLAMA_CLBLAST NO)
set(LLAMA_GPU NO)
endif()

# Include GGML
set(LLAMA_K_QUANTS YES)
include_ggml(llama.cpp-mainline -mainline-${BUILD_VARIANT} ON)
if (NOT LLAMA_METAL)
if (NOT LLAMA_GPU)
set(LLAMA_K_QUANTS NO)
include_ggml(llama.cpp-230511 -230511-${BUILD_VARIANT} ON)
include_ggml(llama.cpp-230519 -230519-${BUILD_VARIANT} ON)
include_ggml(llama.cpp-230511 -230511-${BUILD_VARIANT} ON)
endif()

# Function for preparing individual implementations
Expand Down Expand Up @@ -101,12 +137,13 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
replit.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
prepare_target(replit-mainline llama-mainline)

if (NOT LLAMA_METAL)
if (NOT LLAMA_GPU)
add_library(llamamodel-230519-${BUILD_VARIANT} SHARED
llamamodel.cpp llmodel_shared.cpp)
target_compile_definitions(llamamodel-230519-${BUILD_VARIANT} PRIVATE
LLAMA_VERSIONS===2 LLAMA_DATE=230519)
prepare_target(llamamodel-230519 llama-230519)

add_library(llamamodel-230511-${BUILD_VARIANT} SHARED
llamamodel.cpp llmodel_shared.cpp)
target_compile_definitions(llamamodel-230511-${BUILD_VARIANT} PRIVATE
Expand Down Expand Up @@ -138,5 +175,15 @@ set_target_properties(llmodel PROPERTIES
VERSION ${PROJECT_VERSION}
SOVERSION ${PROJECT_VERSION_MAJOR})

if (CUDAToolkit_FOUND)
target_compile_definitions(llmodel PRIVATE LLMODEL_CUDA)
target_link_libraries(llmodel PRIVATE cudart)
endif()

if (CLBlast_FOUND)
target_compile_definitions(llmodel PRIVATE LLMODEL_OPENCL)
target_link_libraries(llmodel PRIVATE clblast)
endif()

set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
6 changes: 6 additions & 0 deletions gpt4all-backend/llamamodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ struct gpt_params {
#if LLAMA_DATE <= 230511
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
#endif
#if LLAMA_DATE >= 230519
int32_t n_gpu_layers = 32;
#endif

#if LLAMA_DATE >= 230519
// sampling parameters
Expand Down Expand Up @@ -146,6 +149,9 @@ bool LLamaModel::loadModel(const std::string &modelPath)
#else
d_ptr->params.use_mlock = params.use_mlock;
#endif
#if LLAMA_DATE > 230519
d_ptr->params.n_gpu_layers = params.n_gpu_layers;
#endif
#if LLAMA_DATE <= 230511
d_ptr->params.n_parts = params.n_parts;
#endif
Expand Down
28 changes: 26 additions & 2 deletions gpt4all-backend/llmodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@
#include <vector>
#include <fstream>
#include <filesystem>
#include <sstream>
#include <cassert>
#include <cstdlib>
#include <sstream>
#ifdef LLMODEL_CUDA
#include <cuda_runtime.h>
#endif
#ifdef LLMODEL_OPENCL
#include <clblast.h>
#endif


std::string s_implementations_search_path = ".";

Expand Down Expand Up @@ -148,13 +155,30 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
#endif

if (!impl) {
//TODO: Auto-detect CUDA/OpenCL
// Auto-detect avxonly requirement
if (buildVariant == "auto") {
if (requires_avxonly()) {
buildVariant = "avxonly";
} else {
buildVariant = "default";
}
// Auto-detect CUDA
#ifdef LLMODEL_CUDA
int cudaDeviceCount;
if (cudaGetDeviceCount(&cudaDeviceCount) == cudaSuccess
&& cudaDeviceCount != 0) {
buildVariant = "cuda";
}
#endif
#ifdef LLMODEL_OPENCL
// Auto-detect OpenCL
unsigned clPlatformCount;
cl_platform_id platform_ids[16];
if (clGetPlatformIDs(16, platform_ids, &clPlatformCount) == CL_SUCCESS
&& clPlatformCount != 0) {
buildVariant = "opencl";
}
#endif
}
impl = implementation(f, buildVariant);
if (!impl) return nullptr;
Expand Down
5 changes: 3 additions & 2 deletions gpt4all-backend/utils.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#include "utils.h"

#include <string_view>
#include <fstream>
#include <regex>

void replace(std::string & str, const std::string & needle, const std::string & replacement) {
void replace(std::string & str, std::string_view needle, std::string_view replacement) {
size_t pos = 0;
while ((pos = str.find(needle, pos)) != std::string::npos) {
str.replace(pos, needle.length(), replacement);
Expand Down Expand Up @@ -325,4 +326,4 @@ gpt_vocab::id gpt_sample_top_k_top_p(
int idx = dist(rng);

return logits_id[idx].second;
}
}
2 changes: 1 addition & 1 deletion gpt4all-backend/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ struct gpt_vocab {
}
};

void replace(std::string & str, const std::string & needle, const std::string & replacement);
void replace(std::string & str, std::string_view needle, std::string_view replacement);

// poor-man's JSON parsing
std::map<std::string, int32_t> json_parse(const std::string & fname);
Expand Down