Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 16 additions & 27 deletions docs/backend/SYCL.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
**oneAPI** is an open ecosystem and a standard-based specification, supporting multiple architectures including but not limited to intel CPUs, GPUs and FPGAs. The key components of the oneAPI ecosystem include:

- **DPCPP** *(Data Parallel C++)*: The primary oneAPI SYCL implementation, which includes the icpx/icx Compilers.
- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. oneMKL and oneDNN)*.
- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. Intel oneMKL, oneMath and oneDNN)*.
- **oneAPI LevelZero**: A high performance low level interface for fine-grained control over intel iGPUs and dGPUs.
- **Nvidia & AMD Plugins**: These are plugins extending oneAPI's DPCPP support to SYCL on Nvidia and AMD GPU targets.

Expand Down Expand Up @@ -228,27 +228,27 @@ Upon a successful installation, SYCL is enabled for the available intel devices,
**oneAPI Plugin**: In order to enable SYCL support on Nvidia GPUs, please install the [Codeplay oneAPI Plugin for Nvidia GPUs](https://developer.codeplay.com/products/oneapi/nvidia/download). User should also make sure the plugin version matches the installed base toolkit one *(previous step)* for a seamless "oneAPI on Nvidia GPU" setup.


**oneMKL for cuBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the cuBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *cuBLAS* backend enabled is thus required to run it on Nvidia GPUs.
**oneMath for cuBlas**: The current Intel oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the cuBLAS backend. [oneMath](https://github.com/uxlfoundation/oneMath) is used instead to dispatch to *cuBLAS* on Nvidia GPUs.

```sh
git clone https://github.com/oneapi-src/oneMKL
cd oneMKL
cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas
cmake --build buildWithCublas --config Release
git clone https://github.com/uxlfoundation/oneMath
cd oneMath
cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas -DCMAKE_INSTALL_PREFIX:PATH=install
cmake --build buildWithCublas --target install --config Release
```

- **Adding support to AMD GPUs**

**oneAPI Plugin**: In order to enable SYCL support on AMD GPUs, please install the [Codeplay oneAPI Plugin for AMD GPUs](https://developer.codeplay.com/products/oneapi/amd/download). As with Nvidia GPUs, the user should also make sure the plugin version matches the installed base toolkit.

**oneMKL for rocBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* doesn't contain the rocBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *rocBLAS* backend enabled is thus required to run it on AMD GPUs.
**oneMath for rocBlas**: The current Intel oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the rocBLAS backend. [oneMath](https://github.com/uxlfoundation/oneMath) is used instead to dispatch to *rocBLAS* on AMD GPUs.

```sh
git clone https://github.com/oneapi-src/oneMKL
cd oneMKL
# Find your HIPTARGET with rocminfo, under the key 'Name:'
cmake -B buildWithrocBLAS -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_ROCBLAS_BACKEND=ON -DHIPTARGETS=${HIPTARGET} -DTARGET_DOMAINS=blas
cmake --build buildWithrocBLAS --config Release
git clone https://github.com/uxlfoundation/oneMath
cd oneMath
# Find your HIP_TARGETS with rocminfo, under the key 'Name:'
cmake -B buildWithrocBLAS -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_ROCBLAS_BACKEND=ON -DHIP_TARGETS=${HIP_TARGETS} -DTARGET_DOMAINS=blas -DCMAKE_INSTALL_PREFIX:PATH=install
cmake --build buildWithrocBLAS --target install --config Release
```

3. **Verify installation and environment**
Expand Down Expand Up @@ -316,21 +316,15 @@ cmake --build build --config Release -j -v
#### Nvidia GPU

```sh
# Export relevant ENV variables
export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LD_LIBRARY_PATH
export LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LIBRARY_PATH
export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_DIR
export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR

# Build LLAMA with Nvidia BLAS acceleration through SYCL
# Setting GGML_SYCL_DEVICE_ARCH is optional but can improve performance
GGML_SYCL_DEVICE_ARCH=sm_80 # Example architecture

# Option 1: Use FP32 (recommended for better performance in most cases)
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DoneMath_DIR=/path/to/oneMath/buildWithCublas/install/lib/cmake/oneMath

# Option 2: Use FP16
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DoneMath_DIR=/path/to/oneMath/buildWithCublas/install/lib/cmake/oneMath -DGGML_SYCL_F16=ON

# build all binary
cmake --build build --config Release -j -v
Expand All @@ -339,18 +333,13 @@ cmake --build build --config Release -j -v
#### AMD GPU

```sh
# Export relevant ENV variables
export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithrocBLAS/lib:$LD_LIBRARY_PATH
export LIBRARY_PATH=/path/to/oneMKL/buildWithrocBLAS/lib:$LIBRARY_PATH
export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithrocBLAS/include:$CPLUS_INCLUDE_DIR

# Build LLAMA with rocBLAS acceleration through SYCL

## AMD
# Use FP32, FP16 is not supported
# Find your GGML_SYCL_DEVICE_ARCH with rocminfo, under the key 'Name:'
GGML_SYCL_DEVICE_ARCH=gfx90a # Example architecture
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DoneMath_DIR=/path/to/oneMath/buildWithrocBLAS/install/lib/cmake/oneMath

# build all binary
cmake --build build --config Release -j -v
Expand Down Expand Up @@ -659,7 +648,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
| Name | Value | Function |
|--------------------|---------------------------------------|---------------------------------------------|
| GGML_SYCL | ON (mandatory) | Enable build with SYCL code path.<br>FP32 path - recommended for better perforemance than FP16 on quantized model|
| GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA \| AMD | Set the SYCL target device type. |
| GGML_SYCL_TARGET | INTEL *(default)* \| INTEL_CPU \| INTEL_GPU \| NVIDIA \| AMD | Set the SYCL target device type. |
| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
| GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
| CMAKE_C_COMPILER | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path. |
Expand Down
2 changes: 1 addition & 1 deletion examples/sycl/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ List all SYCL devices with ID, compute capability, max work group size, ect.

1. Build the llama.cpp for SYCL for the specified target *(using GGML_SYCL_TARGET)*.

2. Enable oneAPI running environment *(if GGML_SYCL_TARGET is set to INTEL -default-)*
2. Enable oneAPI running environment *(if GGML_SYCL_TARGET is set to INTEL -default-, INTEL_CPU or INTEL_GPU)*

```
source /opt/intel/oneapi/setvars.sh
Expand Down
2 changes: 1 addition & 1 deletion ggml/cmake/ggml-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ if (NOT GGML_SHARED_LIB)

if (GGML_SYCL)
find_package(DNNL)
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
if (${DNNL_FOUND} AND GGML_SYCL_TARGET MATCHES "INTEL.*")
list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl)
endif()
if (WIN32)
Expand Down
99 changes: 77 additions & 22 deletions ggml/src/ggml-sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
message(STATUS "GGML_SYCL_TARGET=${GGML_SYCL_TARGET}")

if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA|AMD)$")
if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL.*|NVIDIA|AMD)$")
message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL, NVIDIA, or AMD")
endif()

Expand Down Expand Up @@ -30,8 +30,6 @@ if (GGML_SYCL_F16)
add_compile_definitions(GGML_SYCL_F16)
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing -fsycl")

if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
Expand All @@ -51,36 +49,93 @@ target_sources(ggml-sycl PRIVATE ${GGML_HEADERS_SYCL} ${GGML_SOURCES_SYCL})
find_package(DNNL)
message("-- DNNL found:" ${DNNL_FOUND})

if (GGML_SYCL_TARGET STREQUAL "INTEL")
if (GGML_SYCL_TARGET MATCHES "INTEL.*")
add_compile_definitions(GGML_SYCL_DNNL=${DNNL_FOUND})
else()
add_compile_definitions(GGML_SYCL_DNNL=0)
endif()

if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
if (${DNNL_FOUND} AND GGML_SYCL_TARGET MATCHES "INTEL.*")
target_link_libraries(ggml-sycl PRIVATE DNNL::dnnl)
endif()

if (WIN32)
find_package(IntelSYCL REQUIRED)
find_package(MKL REQUIRED)
target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
find_package(IntelSYCL)
if (IntelSYCL_FOUND)
# Use oneAPI CMake when possible
target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX)
else()
if (GGML_SYCL_TARGET STREQUAL "INTEL")
target_link_libraries(ggml-sycl PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
add_compile_definitions(GGML_SYCL_NVIDIA)
target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl_blas_cublas)
# Fallback to the simplest way of enabling SYCL when using intel/llvm nightly for instance
target_compile_options(ggml-sycl PRIVATE "-fsycl")
target_link_options(ggml-sycl PRIVATE "-fsycl")
endif()

target_compile_options(ggml-sycl PRIVATE "-Wno-narrowing")

find_package(oneMath QUIET)
if (NOT oneMath_FOUND)
message("-- oneMath not found: oneMath will be automatically downloaded")
# Use FetchContent to automatically pull and build oneMath
include(FetchContent)
set(BUILD_FUNCTIONAL_TESTS False)
set(BUILD_EXAMPLES False)
set(TARGET_DOMAINS blas)
if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
set(ENABLE_MKLCPU_BACKEND False)
set(ENABLE_MKLGPU_BACKEND False)
set(ENABLE_CUBLAS_BACKEND True)
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
if (NOT GGML_SYCL_DEVICE_ARCH)
message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
set(ENABLE_MKLCPU_BACKEND False)
set(ENABLE_MKLGPU_BACKEND False)
set(ENABLE_ROCBLAS_BACKEND True)
endif()
FetchContent_Declare(
ONEMATH
GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git
GIT_TAG develop
)
FetchContent_MakeAvailable(ONEMATH)
# Create alias to match with find_package targets name
function(onemath_alias target)
if (TARGET ${target})
# Silence verbose warnings from external libraries
target_compile_options(${target} PRIVATE -Wno-uninitialized -Wno-unused-parameter -Wno-unused-variable -Wno-cast-qual)
add_library(ONEMATH::${target} ALIAS ${target})
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa")
target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
endfunction()
onemath_alias(onemath)
onemath_alias(onemath_blas_mklcpu)
onemath_alias(onemath_blas_mklgpu)
onemath_alias(onemath_blas_cublas)
onemath_alias(onemath_blas_rocblas)
endif()

# Below oneMath compile-time dispatching is used for better performance
if (GGML_SYCL_TARGET STREQUAL "INTEL_CPU")
target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_mklcpu)
target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_INTEL GGML_SYCL_INTEL_CPU)
elseif (GGML_SYCL_TARGET STREQUAL "INTEL_GPU")
target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_mklgpu)
target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_INTEL GGML_SYCL_INTEL_GPU)
elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_cublas)
target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
target_link_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_NVIDIA)
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
if (NOT GGML_SYCL_DEVICE_ARCH)
message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
endif()
target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_rocblas)
target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
target_link_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_AMD)
else()
# Fallback to oneMath runtime dispatcher
target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath)
target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GENERIC)
endif()

if (GGML_SYCL_DEVICE_ARCH)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}")
endif()
if (GGML_SYCL_DEVICE_ARCH)
target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
endif()
Loading
Loading