ARM-software
diff --git a/‎Android.bp
Lines changed: 2 additions & 3 deletions b/‎Android.bp
Lines changed: 2 additions & 3 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 13 additions & 8 deletions b/‎CMakeLists.txt
Lines changed: 13 additions & 8 deletions
diff --git a/‎README.md
Lines changed: 12 additions & 12 deletions b/‎README.md
Lines changed: 12 additions & 12 deletions
diff --git a/‎SConscript
Lines changed: 2 additions & 2 deletions b/‎SConscript
Lines changed: 2 additions & 2 deletions
diff --git a/‎SConstruct
Lines changed: 6 additions & 2 deletions b/‎SConstruct
Lines changed: 6 additions & 2 deletions
diff --git a/‎arm_compute/core/utils/DataTypeUtils.h
Lines changed: 3 additions & 0 deletions b/‎arm_compute/core/utils/DataTypeUtils.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎arm_compute/runtime/CL/CLScheduler.h
Lines changed: 6 additions & 4 deletions b/‎arm_compute/runtime/CL/CLScheduler.h
Lines changed: 6 additions & 4 deletions
diff --git a/‎arm_compute/runtime/CL/CLTensorAllocator.h
Lines changed: 7 additions & 4 deletions b/‎arm_compute/runtime/CL/CLTensorAllocator.h
Lines changed: 7 additions & 4 deletions
diff --git a/‎arm_compute/runtime/ITensorAllocator.h
Lines changed: 10 additions & 4 deletions b/‎arm_compute/runtime/ITensorAllocator.h
Lines changed: 10 additions & 4 deletions
diff --git a/‎arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
Lines changed: 0 additions & 1 deletion b/‎arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
Lines changed: 0 additions & 1 deletion
diff --git a/‎arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
Lines changed: 2 additions & 1 deletion b/‎arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
Lines changed: 2 additions & 1 deletion
diff --git a/‎arm_compute/runtime/NEON/functions/NEReverse.h
Lines changed: 5 additions & 10 deletions b/‎arm_compute/runtime/NEON/functions/NEReverse.h
Lines changed: 5 additions & 10 deletions
@@ -202,12 +202,9 @@ cc_library_static {
         "src/core/AccessWindowAutoPadding.cpp",
         "src/core/AccessWindowStatic.cpp",
         "src/core/AccessWindowTranspose.cpp",
-        "src/core/CL/CLCommandBuffer.cpp",
-        "src/core/CL/CLCompatCommandBuffer.cpp",
         "src/core/CL/CLCompileContext.cpp",
         "src/core/CL/CLHelpers.cpp",
         "src/core/CL/CLKernelLibrary.cpp",
-        "src/core/CL/CLMutableCommandBuffer.cpp",
         "src/core/CL/CLUtils.cpp",
         "src/core/CL/DefaultLWSHeuristics.cpp",
         "src/core/CL/ICLKernel.cpp",
@@ -466,6 +463,7 @@ cc_library_static {
         "src/cpu/kernels/activation/generic/neon/qasymm8.cpp",
         "src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp",
         "src/cpu/kernels/activation/generic/neon/qsymm16.cpp",
+        "src/cpu/kernels/activation/heuristics/CpuActivationKernelHeuristics.cpp",
         "src/cpu/kernels/add/generic/neon/fp16.cpp",
         "src/cpu/kernels/add/generic/neon/fp32.cpp",
         "src/cpu/kernels/add/generic/neon/impl.cpp",
@@ -1032,6 +1030,7 @@ cc_library_static {
         "src/runtime/experimental/operators/CpuGemmConv2d.cpp",
         "src/runtime/experimental/operators/CpuGemmDirectConv2d.cpp",
         "src/runtime/experimental/operators/CpuMul.cpp",
+        "src/runtime/experimental/operators/CpuSoftmax.cpp",
         "src/runtime/experimental/operators/CpuSub.cpp",
         "src/runtime/experimental/operators/CpuTranspose.cpp",
         "src/runtime/experimental/operators/CpuWinogradConv2d.cpp",
 
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
 project(
   ArmCompute
-  VERSION 41.0.0
+  VERSION 42.0.0
   DESCRIPTION
     "The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
   LANGUAGES C CXX ASM)
@@ -138,11 +138,12 @@ if(ARM_COMPUTE_OPENMP)
 endif()
 
 # ---------------------------------------------------------------------
-# SVE Library
+# SVE Object Library
 
-add_library(arm_compute_sve "")
+add_library(arm_compute_sve OBJECT "")
 target_compile_options(arm_compute_sve
-                       PRIVATE "-march=armv8.2-a+sve+fp16+dotprod")
+                       PRIVATE "-march=armv8.2-a+sve+fp16+dotprod"
+                       PRIVATE "-fPIC")
 target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_BF16)
 target_compile_definitions(arm_compute_sve PRIVATE ENABLE_SVE)
 target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_SVE)
@@ -160,11 +161,12 @@ target_include_directories(
          src/core/NEON/kernels/arm_gemm/merges)
 
 # ---------------------------------------------------------------------
-# SVE2 Library
+# SVE2 Object Library
 
-add_library(arm_compute_sve2 "")
+add_library(arm_compute_sve2 OBJECT "")
 target_compile_options(arm_compute_sve2
-                       PRIVATE "-march=armv8.6-a+sve2+fp16+dotprod")
+                       PRIVATE "-march=armv8.6-a+sve2+fp16+dotprod"
+                       PRIVATE "-fPIC")
 target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_SVE2)
 target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_BF16)
 target_compile_definitions(arm_compute_sve2 PRIVATE ENABLE_SVE)
@@ -205,8 +207,11 @@ target_include_directories(
 target_compile_options(arm_compute PUBLIC ${COMMON_CXX_FLAGS})
 
 add_library(ArmCompute::Core ALIAS arm_compute)
+
+# arm_compute_sve and arm_compute_sve2 obj files will not be public in the arm_compute.so
 target_link_libraries(
-  arm_compute PUBLIC arm_compute_sve arm_compute_sve2)
+  arm_compute PRIVATE $<TARGET_OBJECTS:arm_compute_sve>
+              PRIVATE $<TARGET_OBJECTS:arm_compute_sve2>)
 
 # ---------------------------------------------------------------------
 # Graph Library
 
@@ -9,7 +9,7 @@
  <img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
 </div>
 
-# Compute Library ![](https://img.shields.io/badge/latest_release-24.08.1-green)
+# Compute Library ![](https://img.shields.io/badge/latest_release-24.09-green)
 
 
 The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
@@ -37,7 +37,7 @@ Key Features:
 <br>
 
 ## Documentation
-[![Documentation](https://img.shields.io/badge/documentation-24.08.1-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.08.1/index.xhtml)
+[![Documentation](https://img.shields.io/badge/documentation-24.09-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/index.xhtml)
 
 > Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
 
@@ -50,22 +50,22 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
 
 | Platform       | Operating System | Release archive (Download) |
 | -------------- | ---------------- | -------------------------- |
-| Raspberry Pi 4 | Linux® 32bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-armv7a-cpu-bin.tar.gz) |
-| Raspberry Pi 4 | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) |
-| Odroid N2      | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
-| HiKey960       | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
+| Raspberry Pi 4 | Linux® 32bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-bin.tar.gz) |
+| Raspberry Pi 4 | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) |
+| Odroid N2      | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |
+| HiKey960       | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |
 
 <br>
 
 | Architecture | Operating System | Release archive (Download) |
 | ------------ | ---------------- | -------------------------- |
-| armv7        | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-armv7a-cpu-gpu-bin.tar.gz) |
-| arm64-v8a    | Android™          | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-android-aarch64-cpu-gpu-bin.tar.gz) |
-| arm64-v8a    | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
+| armv7        | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-gpu-bin.tar.gz) |
+| arm64-v8a    | Android™          | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-android-aarch64-cpu-gpu-bin.tar.gz) |
+| arm64-v8a    | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |
 
 <br>
 
-Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.08.1-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.08.1)
+Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.09-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.09)
 
 Pre-build binaries are generated with the following security / good coding practices related flags:
 > -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
@@ -107,13 +107,13 @@ Pre-build binaries are generated with the following security / good coding pract
 
 ## Experimental builds
 
-**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.08.1/how_to_build.xhtml) for more details.
+**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/how_to_build.xhtml) for more details.
 
 <br>
 
 ## How to contribute
 
-Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.08.1/contribution_guidelines.xhtml).
+Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/contribution_guidelines.xhtml).
 
 ### Developer Certificate of Origin (DCO)
 Before the Compute Library accepts your contribution, you need to certify its origin and give us your permission. To manage this process we use the Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/)
 
@@ -33,8 +33,8 @@ import codecs
 import platform
 import SCons
 
-VERSION = "v24.08.1"
-LIBRARY_VERSION_MAJOR = 41
+VERSION = "v24.09"
+LIBRARY_VERSION_MAJOR = 42
 LIBRARY_VERSION_MINOR = 0
 LIBRARY_VERSION_PATCH = 0
 SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
 
@@ -281,8 +281,12 @@ if env['cppthreads']:
 
 if env['openmp']:
     env.Append(CPPDEFINES = [('ARM_COMPUTE_OPENMP_SCHEDULER', 1)])
-    env.Append(CXXFLAGS = ['-fopenmp'])
-    env.Append(LINKFLAGS = ['-fopenmp'])
+    if not 'windows' in env['os']:
+        env.Append(CXXFLAGS = ['-fopenmp'])
+        env.Append(LINKFLAGS = ['-fopenmp'])
+    else:
+        env.Append(CXXFLAGS = ['-openmp'])
+        env.Append(LINKFLAGS = ['libomp.lib'])
 
 # Validate and define state
 if env['estate'] == 'auto':
 
@@ -97,9 +97,12 @@ inline size_t element_size_from_data_type(DataType dt)
         case DataType::S32:
         case DataType::F32:
             return 4;
+        case DataType::F64:
         case DataType::U64:
         case DataType::S64:
             return 8;
+        case DataType::SIZET:
+            return sizeof(size_t); // portable
         default:
             ARM_COMPUTE_ERROR("Undefined element size for given data type");
             return 0;
 
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2022 Arm Limited.
+ * Copyright (c) 2016-2022, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CLSCHEDULER_H
-#define ARM_COMPUTE_CLSCHEDULER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_CL_CLSCHEDULER_H
+#define ACL_ARM_COMPUTE_RUNTIME_CL_CLSCHEDULER_H
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLTypes.h"
@@ -211,6 +211,8 @@ class CLScheduler final
     bool                    _job_chaining_enabled;
     int                     _job_chaining_size;
     int                     _job_chaining_count;
+    unsigned int            _enqueue_count;
+    unsigned int            _flush_count;
 };
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CLSCHEDULER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_CL_CLSCHEDULER_H
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CLTENSORALLOCATOR_H
-#define ARM_COMPUTE_CLTENSORALLOCATOR_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_CL_CLTENSORALLOCATOR_H
+#define ACL_ARM_COMPUTE_RUNTIME_CL_CLTENSORALLOCATOR_H
 
 #include "arm_compute/core/CL/CLTypes.h"
 #include "arm_compute/core/CL/OpenCL.h"
@@ -106,6 +106,9 @@ class CLTensorAllocator : public ITensorAllocator
      *
      */
     void free() override;
+
+    bool is_allocated() const override;
+
     /** Import an existing memory as a tensor's backing memory
      *
      * @warning memory should have been created under the same context that Compute Library uses.
@@ -156,4 +159,4 @@ class CLTensorAllocator : public ITensorAllocator
     CLInt32Array       _offset;                  /**< Offsets array in case of quantized per channel data type */
 };
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CLTENSORALLOCATOR_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_CL_CLTENSORALLOCATOR_H
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_ITENSORALLOCATOR_H
-#define ARM_COMPUTE_ITENSORALLOCATOR_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_ITENSORALLOCATOR_H
+#define ACL_ARM_COMPUTE_RUNTIME_ITENSORALLOCATOR_H
 
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Types.h"
@@ -91,6 +91,12 @@ class ITensorAllocator
      */
     virtual void free() = 0;
 
+    /** Return whether the tensor is currently allocated.
+     *
+     * @return true if the tensor is allocated, false otherwise.
+     */
+    virtual bool is_allocated() const = 0;
+
 protected:
     /** Interface to be implemented by the child class to lock the memory allocation for the CPU to access.
      *
@@ -106,4 +112,4 @@ class ITensorAllocator
     size_t      _alignment{};            /**< Tensor's alignment in bytes */
 };
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_ITENSORALLOCATOR_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_ITENSORALLOCATOR_H
@@ -67,7 +67,6 @@ class NEArithmeticSubtraction : public IFunction
      * |QASYMM8        |QASYMM8        |QASYMM8        |
      * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
      * |QSYMM16        |QSYMM16        |QASYMM16       |
-     * |QSYMM16        |QSYMM16        |S32            |
      * |U8             |U8             |U8             |
      * |S16            |S16            |S16            |
      * |S32            |S32            |S32            |
 
@@ -69,8 +69,9 @@ class NEPixelWiseMultiplication : public IFunction
      * |U8             |S16            |S16            |
      * |S16            |U8             |S16            |
      * |S16            |S16            |S16            |
+     * |S32            |S32            |S32            |
      * |F16            |F16            |F16            |
-     * |F32            |S32            |F32            |
+     * |F32            |F32            |F32            |
      *
      * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
      *       For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
 
@@ -42,11 +42,11 @@ class NEReverse : public INESimpleFunctionNoBorder
      * - All
      *
      * Valid data type configurations:
-     * |src0           |src1           |dst            |
-     * |:--------------|:--------------|:--------------|
-     * |All            |U32, S32       |All            |
+     * |src0                        |src1           |dst                         |
+     * |:---------------------------|:--------------|:---------------------------|
+     * |All except SIZET <= 32-bits |U32, S32       |All except SIZET <= 32-bits |
      *
-     * @param[in]  input             Input tensor. Data types supported: All
+     * @param[in]  input             Input tensor. Data types supported: All except SIZET <= 32-bit data types
      * @param[out] output            Output tensor. Data type supported: Same as @p input
      * @param[in]  axis              Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
      * @param[in]  use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
@@ -60,12 +60,7 @@ class NEReverse : public INESimpleFunctionNoBorder
     void configure(const ITensor *input, ITensor *output, const ITensor *axis, const bool use_inverted_axis = false);
     /** Static function to check if given info will lead to a valid configuration of NEReverseKernel
      *
-     * @param[in] input             Input tensor info. Data types supported: All
-     * @param[in] output            Output tensor info. Data type supported: Same as @p input
-     * @param[in] axis              Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
-     * @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
-     *
-     * @return a status
+     * Similar to @ref NEReverse::configure()
      */
     static Status validate(const ITensorInfo *input,
                            const ITensorInfo *output,