ggml-org · NeoZhangJianyu · Apr 1, 2025 · Jan 10, 2025 · Jan 23, 2025 · Jan 29, 2025
diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md
@@ -20,7 +20,7 @@
 **oneAPI** is an open ecosystem and a standard-based specification, supporting multiple architectures including but not limited to intel CPUs, GPUs and FPGAs. The key components of the oneAPI ecosystem include:
 
 - **DPCPP** *(Data Parallel C++)*: The primary oneAPI SYCL implementation, which includes the icpx/icx Compilers.
-- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. oneMKL and oneDNN)*.
+- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. Intel oneMKL, oneMath and oneDNN)*.
 - **oneAPI LevelZero**: A high performance low level interface for fine-grained control over intel iGPUs and dGPUs.
 - **Nvidia & AMD Plugins**: These are plugins extending oneAPI's DPCPP support to SYCL on Nvidia and AMD GPU targets.
 
@@ -227,30 +227,10 @@ Upon a successful installation, SYCL is enabled for the available intel devices,
 
 **oneAPI Plugin**: In order to enable SYCL support on Nvidia GPUs, please install the [Codeplay oneAPI Plugin for Nvidia GPUs](https://developer.codeplay.com/products/oneapi/nvidia/download). User should also make sure the plugin version matches the installed base toolkit one *(previous step)* for a seamless "oneAPI on Nvidia GPU" setup.
 
-
-**oneMKL for cuBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the cuBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *cuBLAS* backend enabled is thus required to run it on Nvidia GPUs.
-
-```sh
-git clone https://github.com/oneapi-src/oneMKL
-cd oneMKL
-cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas
-cmake --build buildWithCublas --config Release
-```
-
 - **Adding support to AMD GPUs**
 
 **oneAPI Plugin**: In order to enable SYCL support on AMD GPUs, please install the [Codeplay oneAPI Plugin for AMD GPUs](https://developer.codeplay.com/products/oneapi/amd/download). As with Nvidia GPUs, the user should also make sure the plugin version matches the installed base toolkit.
 
-**oneMKL for rocBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* doesn't contain the rocBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *rocBLAS* backend enabled is thus required to run it on AMD GPUs.
-
-```sh
-git clone https://github.com/oneapi-src/oneMKL
-cd oneMKL
-# Find your HIPTARGET with rocminfo, under the key 'Name:'
-cmake -B buildWithrocBLAS -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_ROCBLAS_BACKEND=ON -DHIPTARGETS=${HIPTARGET} -DTARGET_DOMAINS=blas
-cmake --build buildWithrocBLAS --config Release
-```
-
 3. **Verify installation and environment**
 
 In order to check the available SYCL devices on the machine, please use the `sycl-ls` command.
@@ -291,6 +271,8 @@ For AMD GPUs we should expect at least one SYCL-HIP device [`hip:gpu`]:
 
 ### II. Build llama.cpp
 
+The SYCL backend depends on [oneMath](https://github.com/uxlfoundation/oneMath). By default it is automatically built along with the project. A specific build can be provided by setting the CMake flag `-DoneMath_DIR=/path/to/oneMath/install/lib/cmake/oneMath`.
+
 #### Intel GPU
 
 ```
@@ -316,12 +298,6 @@ cmake --build build --config Release -j -v
 #### Nvidia GPU
 
 ```sh
-# Export relevant ENV variables
-export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LD_LIBRARY_PATH
-export LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LIBRARY_PATH
-export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_DIR
-export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
-
 # Build LLAMA with Nvidia BLAS acceleration through SYCL
 # Setting GGML_SYCL_DEVICE_ARCH is optional but can improve performance
 GGML_SYCL_DEVICE_ARCH=sm_80 # Example architecture
@@ -339,11 +315,6 @@ cmake --build build --config Release -j -v
 #### AMD GPU
 
 ```sh
-# Export relevant ENV variables
-export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithrocBLAS/lib:$LD_LIBRARY_PATH
-export LIBRARY_PATH=/path/to/oneMKL/buildWithrocBLAS/lib:$LIBRARY_PATH
-export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithrocBLAS/include:$CPLUS_INCLUDE_DIR
-
 # Build LLAMA with rocBLAS acceleration through SYCL
 
 ## AMD

diff --git a/ggml/src/ggml-sycl/CMakeLists.txt b/ggml/src/ggml-sycl/CMakeLists.txt
@@ -30,8 +30,6 @@ if (GGML_SYCL_F16)
     add_compile_definitions(GGML_SYCL_F16)
 endif()
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing -fsycl")
-
 if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
     add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
 elseif (GGML_SYCL_TARGET STREQUAL "AMD")
@@ -51,36 +49,93 @@ target_sources(ggml-sycl PRIVATE ${GGML_HEADERS_SYCL} ${GGML_SOURCES_SYCL})
 find_package(DNNL)
 message("-- DNNL found:" ${DNNL_FOUND})
 
-if (GGML_SYCL_TARGET STREQUAL "INTEL")
+if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
     add_compile_definitions(GGML_SYCL_DNNL=${DNNL_FOUND})
+    target_link_libraries(ggml-sycl PRIVATE DNNL::dnnl)
 else()
     add_compile_definitions(GGML_SYCL_DNNL=0)
 endif()
 
-if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
-    target_link_libraries(ggml-sycl PRIVATE DNNL::dnnl)
+find_package(IntelSYCL)
+if (IntelSYCL_FOUND)
+    # Use oneAPI CMake when possible
+    target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX)
+else()
+    # Fallback to the simplest way of enabling SYCL when using intel/llvm nightly for instance
+    target_compile_options(ggml-sycl PRIVATE "-fsycl")
+    target_link_options(ggml-sycl PRIVATE "-fsycl")
 endif()
 
-if (WIN32)
-    find_package(IntelSYCL REQUIRED)
-    find_package(MKL REQUIRED)
-    target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
-else()
-    if (GGML_SYCL_TARGET STREQUAL "INTEL")
-        target_link_libraries(ggml-sycl PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
-    elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
-        add_compile_definitions(GGML_SYCL_NVIDIA)
-        target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl_blas_cublas)
+target_compile_options(ggml-sycl PRIVATE "-Wno-narrowing")
+
+find_package(oneMath QUIET)
+if (NOT oneMath_FOUND)
+    message("-- oneMath not found: oneMath will be automatically downloaded")
+    # Use FetchContent to automatically pull and build oneMath
+    include(FetchContent)
+    set(BUILD_FUNCTIONAL_TESTS False)
+    set(BUILD_EXAMPLES False)
+    set(TARGET_DOMAINS blas)
+    if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
+        set(ENABLE_MKLCPU_BACKEND False)
+        set(ENABLE_MKLGPU_BACKEND False)
+        set(ENABLE_CUBLAS_BACKEND True)
     elseif (GGML_SYCL_TARGET STREQUAL "AMD")
-        if (NOT GGML_SYCL_DEVICE_ARCH)
-            message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
+        set(ENABLE_MKLCPU_BACKEND False)
+        set(ENABLE_MKLGPU_BACKEND False)
+        set(ENABLE_ROCBLAS_BACKEND True)
+        # Ensure setting a string variable here is not overriden by oneMath CACHE variables
+        cmake_policy(SET CMP0126 NEW)
+        # Setting the device architecture is only needed and useful for AMD devices in oneMath
+        set(HIP_TARGETS ${GGML_SYCL_DEVICE_ARCH} CACHE STRING "oneMath HIP target" FORCE)
+    endif()
+    FetchContent_Declare(
+        ONEMATH
+        GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git
+        GIT_TAG c255b1b4c41e2ee3059455c1f96a965d6a62568a
+    )
+    FetchContent_MakeAvailable(ONEMATH)
+    # Create alias to match with find_package targets name
+    function(onemath_alias target)
+        if (TARGET ${target}_obj)
+            # Silence verbose warnings from external libraries
+            target_compile_options(${target}_obj PRIVATE -w)
+        endif()
+        if (TARGET ${target})
+            add_library(ONEMATH::${target} ALIAS ${target})
         endif()
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa")
-        target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
+    endfunction()
+    onemath_alias(onemath)
+    onemath_alias(onemath_blas_mklcpu)
+    onemath_alias(onemath_blas_mklgpu)
+    onemath_alias(onemath_blas_cublas)
+    onemath_alias(onemath_blas_rocblas)
+endif()
+
+# Below oneMath compile-time dispatching is used for better performance
+if (GGML_SYCL_TARGET STREQUAL "INTEL")
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_mklgpu)
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_INTEL)
+elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_cublas)
+    target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
+    target_link_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_NVIDIA)
+elseif (GGML_SYCL_TARGET STREQUAL "AMD")
+    if (NOT GGML_SYCL_DEVICE_ARCH)
+        message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
     endif()
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_rocblas)
+    target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
+    target_link_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_AMD)
+else()
+    # Fallback to oneMath runtime dispatcher
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath)
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GENERIC)
+endif()
 
-    if (GGML_SYCL_DEVICE_ARCH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}")
-  endif()
+if (GGML_SYCL_DEVICE_ARCH)
+    target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
+    target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
 endif()