ggml-org · NeoZhangJianyu · Apr 1, 2025 · Jan 10, 2025 · Jan 23, 2025 · Jan 29, 2025
diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md
@@ -20,7 +20,7 @@
 **oneAPI** is an open ecosystem and a standard-based specification, supporting multiple architectures including but not limited to intel CPUs, GPUs and FPGAs. The key components of the oneAPI ecosystem include:
 
 - **DPCPP** *(Data Parallel C++)*: The primary oneAPI SYCL implementation, which includes the icpx/icx Compilers.
-- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. oneMKL and oneDNN)*.
+- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. Intel oneMKL, oneMath and oneDNN)*.
 - **oneAPI LevelZero**: A high performance low level interface for fine-grained control over intel iGPUs and dGPUs.
 - **Nvidia & AMD Plugins**: These are plugins extending oneAPI's DPCPP support to SYCL on Nvidia and AMD GPU targets.
 
@@ -228,27 +228,27 @@ Upon a successful installation, SYCL is enabled for the available intel devices,
 **oneAPI Plugin**: In order to enable SYCL support on Nvidia GPUs, please install the [Codeplay oneAPI Plugin for Nvidia GPUs](https://developer.codeplay.com/products/oneapi/nvidia/download). User should also make sure the plugin version matches the installed base toolkit one *(previous step)* for a seamless "oneAPI on Nvidia GPU" setup.
 
 
-**oneMKL for cuBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the cuBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *cuBLAS* backend enabled is thus required to run it on Nvidia GPUs.
+**oneMath for cuBlas**: The current Intel oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the cuBLAS backend. [oneMath](https://github.com/uxlfoundation/oneMath) is used instead to dispatch to *cuBLAS* on Nvidia GPUs.
 
 ```sh
-git clone https://github.com/oneapi-src/oneMKL
-cd oneMKL
-cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas
-cmake --build buildWithCublas --config Release
+git clone https://github.com/uxlfoundation/oneMath
+cd oneMath
+cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas -DCMAKE_INSTALL_PREFIX:PATH=install
+cmake --build buildWithCublas --target install --config Release
 ```
 
 - **Adding support to AMD GPUs**
 
 **oneAPI Plugin**: In order to enable SYCL support on AMD GPUs, please install the [Codeplay oneAPI Plugin for AMD GPUs](https://developer.codeplay.com/products/oneapi/amd/download). As with Nvidia GPUs, the user should also make sure the plugin version matches the installed base toolkit.
 
-**oneMKL for rocBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* doesn't contain the rocBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *rocBLAS* backend enabled is thus required to run it on AMD GPUs.
+**oneMath for rocBlas**: The current Intel oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the rocBLAS backend. [oneMath](https://github.com/uxlfoundation/oneMath) is used instead to dispatch to *rocBLAS* on AMD GPUs.
 
 ```sh
-git clone https://github.com/oneapi-src/oneMKL
-cd oneMKL
-# Find your HIPTARGET with rocminfo, under the key 'Name:'
-cmake -B buildWithrocBLAS -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_ROCBLAS_BACKEND=ON -DHIPTARGETS=${HIPTARGET} -DTARGET_DOMAINS=blas
-cmake --build buildWithrocBLAS --config Release
+git clone https://github.com/uxlfoundation/oneMath
+cd oneMath
+# Find your HIP_TARGETS with rocminfo, under the key 'Name:'
+cmake -B buildWithrocBLAS -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_ROCBLAS_BACKEND=ON -DHIP_TARGETS=${HIP_TARGETS} -DTARGET_DOMAINS=blas  -DCMAKE_INSTALL_PREFIX:PATH=install
+cmake --build buildWithrocBLAS --target install --config Release
 ```
 
 3. **Verify installation and environment**
@@ -316,21 +316,15 @@ cmake --build build --config Release -j -v
 #### Nvidia GPU
 
 ```sh
-# Export relevant ENV variables
-export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LD_LIBRARY_PATH
-export LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LIBRARY_PATH
-export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_DIR
-export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
-
 # Build LLAMA with Nvidia BLAS acceleration through SYCL
 # Setting GGML_SYCL_DEVICE_ARCH is optional but can improve performance
 GGML_SYCL_DEVICE_ARCH=sm_80 # Example architecture
 
 # Option 1: Use FP32 (recommended for better performance in most cases)
-cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DoneMath_DIR=/path/to/oneMath/buildWithCublas/install/lib/cmake/oneMath
 
 # Option 2: Use FP16
-cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
+cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DoneMath_DIR=/path/to/oneMath/buildWithCublas/install/lib/cmake/oneMath -DGGML_SYCL_F16=ON
 
 # build all binary
 cmake --build build --config Release -j -v
@@ -339,18 +333,13 @@ cmake --build build --config Release -j -v
 #### AMD GPU
 
 ```sh
-# Export relevant ENV variables
-export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithrocBLAS/lib:$LD_LIBRARY_PATH
-export LIBRARY_PATH=/path/to/oneMKL/buildWithrocBLAS/lib:$LIBRARY_PATH
-export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithrocBLAS/include:$CPLUS_INCLUDE_DIR
-
 # Build LLAMA with rocBLAS acceleration through SYCL
 
 ## AMD
 # Use FP32, FP16 is not supported
 # Find your GGML_SYCL_DEVICE_ARCH with rocminfo, under the key 'Name:'
 GGML_SYCL_DEVICE_ARCH=gfx90a # Example architecture
-cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DoneMath_DIR=/path/to/oneMath/buildWithrocBLAS/install/lib/cmake/oneMath
 
 # build all binary
 cmake --build build --config Release -j -v
@@ -659,7 +648,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
 | Name               | Value                                 | Function                                    |
 |--------------------|---------------------------------------|---------------------------------------------|
 | GGML_SYCL          | ON (mandatory)                        | Enable build with SYCL code path.<br>FP32 path - recommended for better perforemance than FP16 on quantized model|
-| GGML_SYCL_TARGET   | INTEL *(default)* \| NVIDIA \| AMD    | Set the SYCL target device type.            |
+| GGML_SYCL_TARGET   | INTEL *(default)* \| INTEL_CPU \| INTEL_GPU \| NVIDIA \| AMD    | Set the SYCL target device type.            |
 | GGML_SYCL_DEVICE_ARCH | Optional (except for AMD)          | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
 | GGML_SYCL_F16      | OFF *(default)* \|ON *(optional)*     | Enable FP16 build with SYCL code path.      |
 | CMAKE_C_COMPILER   | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path.      |

diff --git a/examples/sycl/README.md b/examples/sycl/README.md
@@ -14,7 +14,7 @@ List all SYCL devices with ID, compute capability, max work group size, ect.
 
 1. Build the llama.cpp for SYCL for the specified target *(using GGML_SYCL_TARGET)*.
 
-2. Enable oneAPI running environment *(if GGML_SYCL_TARGET is set to INTEL -default-)*
+2. Enable oneAPI running environment *(if GGML_SYCL_TARGET is set to INTEL -default-, INTEL_CPU or INTEL_GPU)*
 
 ```
 source /opt/intel/oneapi/setvars.sh

diff --git a/ggml/cmake/ggml-config.cmake.in b/ggml/cmake/ggml-config.cmake.in
@@ -78,7 +78,7 @@ if (NOT GGML_SHARED_LIB)
 
     if (GGML_SYCL)
         find_package(DNNL)
-        if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
+        if (${DNNL_FOUND} AND GGML_SYCL_TARGET MATCHES "INTEL.*")
             list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl)
         endif()
         if (WIN32)

diff --git a/ggml/src/ggml-sycl/CMakeLists.txt b/ggml/src/ggml-sycl/CMakeLists.txt
@@ -1,6 +1,6 @@
 message(STATUS  "GGML_SYCL_TARGET=${GGML_SYCL_TARGET}")
 
-if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA|AMD)$")
+if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL.*|NVIDIA|AMD)$")
     message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL, NVIDIA, or AMD")
 endif()
 
@@ -30,8 +30,6 @@ if (GGML_SYCL_F16)
     add_compile_definitions(GGML_SYCL_F16)
 endif()
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing -fsycl")
-
 if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
     add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
 elseif (GGML_SYCL_TARGET STREQUAL "AMD")
@@ -51,36 +49,93 @@ target_sources(ggml-sycl PRIVATE ${GGML_HEADERS_SYCL} ${GGML_SOURCES_SYCL})
 find_package(DNNL)
 message("-- DNNL found:" ${DNNL_FOUND})
 
-if (GGML_SYCL_TARGET STREQUAL "INTEL")
+if (GGML_SYCL_TARGET MATCHES "INTEL.*")
     add_compile_definitions(GGML_SYCL_DNNL=${DNNL_FOUND})
 else()
     add_compile_definitions(GGML_SYCL_DNNL=0)
 endif()
 
-if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
+if (${DNNL_FOUND} AND GGML_SYCL_TARGET MATCHES "INTEL.*")
     target_link_libraries(ggml-sycl PRIVATE DNNL::dnnl)
 endif()
 
-if (WIN32)
-    find_package(IntelSYCL REQUIRED)
-    find_package(MKL REQUIRED)
-    target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
+find_package(IntelSYCL)
+if (IntelSYCL_FOUND)
+    # Use oneAPI CMake when possible
+    target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX)
 else()
-    if (GGML_SYCL_TARGET STREQUAL "INTEL")
-        target_link_libraries(ggml-sycl PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
-    elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
-        add_compile_definitions(GGML_SYCL_NVIDIA)
-        target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl_blas_cublas)
+    # Fallback to the simplest way of enabling SYCL when using intel/llvm nightly for instance
+    target_compile_options(ggml-sycl PRIVATE "-fsycl")
+    target_link_options(ggml-sycl PRIVATE "-fsycl")
+endif()
+
+target_compile_options(ggml-sycl PRIVATE "-Wno-narrowing")
+
+find_package(oneMath QUIET)
+if (NOT oneMath_FOUND)
+    message("-- oneMath not found: oneMath will be automatically downloaded")
+    # Use FetchContent to automatically pull and build oneMath
+    include(FetchContent)
+    set(BUILD_FUNCTIONAL_TESTS False)
+    set(BUILD_EXAMPLES False)
+    set(TARGET_DOMAINS blas)
+    if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
+        set(ENABLE_MKLCPU_BACKEND False)
+        set(ENABLE_MKLGPU_BACKEND False)
+        set(ENABLE_CUBLAS_BACKEND True)
     elseif (GGML_SYCL_TARGET STREQUAL "AMD")
-        if (NOT GGML_SYCL_DEVICE_ARCH)
-            message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
+        set(ENABLE_MKLCPU_BACKEND False)
+        set(ENABLE_MKLGPU_BACKEND False)
+        set(ENABLE_ROCBLAS_BACKEND True)
+    endif()
+    FetchContent_Declare(
+        ONEMATH
+        GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git
+        GIT_TAG develop
+    )
+    FetchContent_MakeAvailable(ONEMATH)
+    # Create alias to match with find_package targets name
+    function(onemath_alias target)
+        if (TARGET ${target})
+            # Silence verbose warnings from external libraries
+            target_compile_options(${target} PRIVATE -Wno-uninitialized -Wno-unused-parameter -Wno-unused-variable -Wno-cast-qual)
+            add_library(ONEMATH::${target} ALIAS ${target})
         endif()
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa")
-        target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
+    endfunction()
+    onemath_alias(onemath)
+    onemath_alias(onemath_blas_mklcpu)
+    onemath_alias(onemath_blas_mklgpu)
+    onemath_alias(onemath_blas_cublas)
+    onemath_alias(onemath_blas_rocblas)
+endif()
+
+# Below oneMath compile-time dispatching is used for better performance
+if (GGML_SYCL_TARGET STREQUAL "INTEL_CPU")
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_mklcpu)
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_INTEL GGML_SYCL_INTEL_CPU)
+elseif (GGML_SYCL_TARGET STREQUAL "INTEL_GPU")
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_mklgpu)
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_INTEL GGML_SYCL_INTEL_GPU)
+elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_cublas)
+    target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
+    target_link_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_NVIDIA)
+elseif (GGML_SYCL_TARGET STREQUAL "AMD")
+    if (NOT GGML_SYCL_DEVICE_ARCH)
+        message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
     endif()
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_rocblas)
+    target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
+    target_link_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_AMD)
+else()
+    # Fallback to oneMath runtime dispatcher
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath)
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GENERIC)
+endif()
 
-    if (GGML_SYCL_DEVICE_ARCH)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}")
-  endif()
+if (GGML_SYCL_DEVICE_ARCH)
+    target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
+    target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
 endif()