Skip to content

Commit ae8de6d

Browse files
slarenggerganovyeahdongcn
authored
ggml : build backends as libraries (ggml-org#10256)
* ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <[email protected]> Co-authored-by: Georgi Gerganov <[email protected]> Co-authored-by: R0CKSTAR <[email protected]>
1 parent 4a8ccb3 commit ae8de6d

File tree

191 files changed

+17728
-17066
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

191 files changed

+17728
-17066
lines changed

.devops/llama-cli-cuda.Dockerfile

+5-4
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2323
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2424
fi && \
2525
cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26-
cmake --build build --config Release --target llama-cli -j$(nproc)
26+
cmake --build build --config Release --target llama-cli -j$(nproc) && \
27+
mkdir -p /app/lib && \
28+
find build -name "*.so" -exec cp {} /app/lib \;
2729

2830
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2931

3032
RUN apt-get update && \
3133
apt-get install -y libgomp1
3234

33-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34-
COPY --from=build /app/build/src/libllama.so /libllama.so
35-
COPY --from=build /app/build/bin/llama-cli /llama-cli
35+
COPY --from=build /app/lib/ /
36+
COPY --from=build /app/build/bin/llama-cli /
3637

3738
ENTRYPOINT [ "/llama-cli" ]

.devops/llama-cli-musa.Dockerfile

+4-3
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@ WORKDIR /app
1616
COPY . .
1717

1818
RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
19-
cmake --build build --config Release --target llama-cli -j$(nproc)
19+
cmake --build build --config Release --target llama-cli -j$(nproc) && \
20+
mkdir -p /app/lib && \
21+
find build -name "*.so" -exec cp {} /app/lib \;
2022

2123
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
2224

2325
RUN apt-get update && \
2426
apt-get install -y libgomp1
2527

26-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
27-
COPY --from=build /app/build/src/libllama.so /libllama.so
28+
COPY --from=build /app/lib/ /
2829
COPY --from=build /app/build/bin/llama-cli /llama-cli
2930

3031
ENTRYPOINT [ "/llama-cli" ]

.devops/llama-server-cuda.Dockerfile

+4-3
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2323
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2424
fi && \
2525
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26-
cmake --build build --config Release --target llama-server -j$(nproc)
26+
cmake --build build --config Release --target llama-server -j$(nproc) && \
27+
mkdir -p /app/lib && \
28+
find build -name "*.so" -exec cp {} /app/lib \;
2729

2830
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2931

3032
RUN apt-get update && \
3133
apt-get install -y libcurl4-openssl-dev libgomp1 curl
3234

33-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34-
COPY --from=build /app/build/src/libllama.so /libllama.so
35+
COPY --from=build /app/lib/ /
3536
COPY --from=build /app/build/bin/llama-server /llama-server
3637

3738
# Must be set to 0.0.0.0 so it can listen to requests from host machine

.devops/llama-server-musa.Dockerfile

+4-3
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@ WORKDIR /app
1616
COPY . .
1717

1818
RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
19-
cmake --build build --config Release --target llama-server -j$(nproc)
19+
cmake --build build --config Release --target llama-server -j$(nproc) && \
20+
mkdir -p /app/lib && \
21+
find build -name "*.so" -exec cp {} /app/lib \;
2022

2123
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
2224

2325
RUN apt-get update && \
2426
apt-get install -y libcurl4-openssl-dev libgomp1 curl
2527

26-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
27-
COPY --from=build /app/build/src/libllama.so /libllama.so
28+
COPY --from=build /app/lib/ /
2829
COPY --from=build /app/build/bin/llama-server /llama-server
2930

3031
# Must be set to 0.0.0.0 so it can listen to requests from host machine

.devops/nix/package.nix

+3-3
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ effectiveStdenv.mkDerivation (finalAttrs: {
126126
};
127127

128128
postPatch = ''
129-
substituteInPlace ./ggml/src/ggml-metal.m \
129+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
130130
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
131-
substituteInPlace ./ggml/src/ggml-metal.m \
131+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
132132
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
133133
'';
134134

@@ -173,7 +173,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
173173
(cmakeBool "GGML_NATIVE" false)
174174
(cmakeBool "GGML_BLAS" useBlas)
175175
(cmakeBool "GGML_CUDA" useCuda)
176-
(cmakeBool "GGML_HIPBLAS" useRocm)
176+
(cmakeBool "GGML_HIP" useRocm)
177177
(cmakeBool "GGML_METAL" useMetalKit)
178178
(cmakeBool "GGML_VULKAN" useVulkan)
179179
(cmakeBool "GGML_STATIC" enableStatic)

.github/workflows/build.yml

+5-5
Original file line numberDiff line numberDiff line change
@@ -405,13 +405,13 @@ jobs:
405405
- name: Build with native CMake HIP support
406406
id: cmake_build
407407
run: |
408-
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIPBLAS=ON
408+
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
409409
cmake --build build --config Release -j $(nproc)
410410
411411
- name: Build with legacy HIP support
412412
id: cmake_build_legacy_hip
413413
run: |
414-
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON
414+
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
415415
cmake --build build2 --config Release -j $(nproc)
416416
417417
ubuntu-22-cmake-sycl:
@@ -747,7 +747,7 @@ jobs:
747747
id: clone_kompute
748748
if: ${{ matrix.build == 'kompute-x64' }}
749749
run: |
750-
git submodule update --init ggml/src/kompute
750+
git submodule update --init ggml/src/ggml-kompute/kompute
751751
752752
- name: Download OpenBLAS
753753
id: get_openblas
@@ -1014,7 +1014,7 @@ jobs:
10141014
run: |
10151015
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
10161016
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1017-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
1017+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
10181018
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
10191019
10201020
windows-latest-cmake-hip-release:
@@ -1050,7 +1050,7 @@ jobs:
10501050
run: |
10511051
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
10521052
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1053-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1053+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
10541054
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
10551055
md "build\bin\rocblas\library\"
10561056
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"

.gitmodules

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[submodule "kompute"]
2-
path = ggml/src/kompute
2+
path = ggml/src/ggml-kompute/kompute
33
url = https://github.com/nomic-ai/kompute.git

CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
140140
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
141141
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
142142

143-
144143
# At the moment some compile definitions are placed within the ggml/src
145144
# directory but not exported on the `ggml` target. This could be improved by
146145
# determining _precisely_ which defines are necessary for the llama-config

0 commit comments

Comments
 (0)