Skip to content

Commit c61bd33

Browse files
author
Jenkins
committed
Compute Library v24.09
1 parent de7288c commit c61bd33

File tree

124 files changed

+3554
-1397
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+3554
-1397
lines changed

Android.bp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,12 +202,9 @@ cc_library_static {
202202
"src/core/AccessWindowAutoPadding.cpp",
203203
"src/core/AccessWindowStatic.cpp",
204204
"src/core/AccessWindowTranspose.cpp",
205-
"src/core/CL/CLCommandBuffer.cpp",
206-
"src/core/CL/CLCompatCommandBuffer.cpp",
207205
"src/core/CL/CLCompileContext.cpp",
208206
"src/core/CL/CLHelpers.cpp",
209207
"src/core/CL/CLKernelLibrary.cpp",
210-
"src/core/CL/CLMutableCommandBuffer.cpp",
211208
"src/core/CL/CLUtils.cpp",
212209
"src/core/CL/DefaultLWSHeuristics.cpp",
213210
"src/core/CL/ICLKernel.cpp",
@@ -466,6 +463,7 @@ cc_library_static {
466463
"src/cpu/kernels/activation/generic/neon/qasymm8.cpp",
467464
"src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp",
468465
"src/cpu/kernels/activation/generic/neon/qsymm16.cpp",
466+
"src/cpu/kernels/activation/heuristics/CpuActivationKernelHeuristics.cpp",
469467
"src/cpu/kernels/add/generic/neon/fp16.cpp",
470468
"src/cpu/kernels/add/generic/neon/fp32.cpp",
471469
"src/cpu/kernels/add/generic/neon/impl.cpp",
@@ -1032,6 +1030,7 @@ cc_library_static {
10321030
"src/runtime/experimental/operators/CpuGemmConv2d.cpp",
10331031
"src/runtime/experimental/operators/CpuGemmDirectConv2d.cpp",
10341032
"src/runtime/experimental/operators/CpuMul.cpp",
1033+
"src/runtime/experimental/operators/CpuSoftmax.cpp",
10351034
"src/runtime/experimental/operators/CpuSub.cpp",
10361035
"src/runtime/experimental/operators/CpuTranspose.cpp",
10371036
"src/runtime/experimental/operators/CpuWinogradConv2d.cpp",

CMakeLists.txt

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2828
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
2929
project(
3030
ArmCompute
31-
VERSION 41.0.0
31+
VERSION 42.0.0
3232
DESCRIPTION
3333
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
3434
LANGUAGES C CXX ASM)
@@ -138,11 +138,12 @@ if(ARM_COMPUTE_OPENMP)
138138
endif()
139139

140140
# ---------------------------------------------------------------------
141-
# SVE Library
141+
# SVE Object Library
142142

143-
add_library(arm_compute_sve "")
143+
add_library(arm_compute_sve OBJECT "")
144144
target_compile_options(arm_compute_sve
145-
PRIVATE "-march=armv8.2-a+sve+fp16+dotprod")
145+
PRIVATE "-march=armv8.2-a+sve+fp16+dotprod"
146+
PRIVATE "-fPIC")
146147
target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_BF16)
147148
target_compile_definitions(arm_compute_sve PRIVATE ENABLE_SVE)
148149
target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_SVE)
@@ -160,11 +161,12 @@ target_include_directories(
160161
src/core/NEON/kernels/arm_gemm/merges)
161162

162163
# ---------------------------------------------------------------------
163-
# SVE2 Library
164+
# SVE2 Object Library
164165

165-
add_library(arm_compute_sve2 "")
166+
add_library(arm_compute_sve2 OBJECT "")
166167
target_compile_options(arm_compute_sve2
167-
PRIVATE "-march=armv8.6-a+sve2+fp16+dotprod")
168+
PRIVATE "-march=armv8.6-a+sve2+fp16+dotprod"
169+
PRIVATE "-fPIC")
168170
target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_SVE2)
169171
target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_BF16)
170172
target_compile_definitions(arm_compute_sve2 PRIVATE ENABLE_SVE)
@@ -205,8 +207,11 @@ target_include_directories(
205207
target_compile_options(arm_compute PUBLIC ${COMMON_CXX_FLAGS})
206208

207209
add_library(ArmCompute::Core ALIAS arm_compute)
210+
211+
# arm_compute_sve and arm_compute_sve2 obj files will not be public in the arm_compute.so
208212
target_link_libraries(
209-
arm_compute PUBLIC arm_compute_sve arm_compute_sve2)
213+
arm_compute PRIVATE $<TARGET_OBJECTS:arm_compute_sve>
214+
PRIVATE $<TARGET_OBJECTS:arm_compute_sve2>)
210215

211216
# ---------------------------------------------------------------------
212217
# Graph Library

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
<img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
1010
</div>
1111

12-
# Compute Library ![](https://img.shields.io/badge/latest_release-24.08.1-green)
12+
# Compute Library ![](https://img.shields.io/badge/latest_release-24.09-green)
1313

1414

1515
The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
@@ -37,7 +37,7 @@ Key Features:
3737
<br>
3838

3939
## Documentation
40-
[![Documentation](https://img.shields.io/badge/documentation-24.08.1-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.08.1/index.xhtml)
40+
[![Documentation](https://img.shields.io/badge/documentation-24.09-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/index.xhtml)
4141

4242
> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
4343
@@ -50,22 +50,22 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
5050

5151
| Platform | Operating System | Release archive (Download) |
5252
| -------------- | ---------------- | -------------------------- |
53-
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-armv7a-cpu-bin.tar.gz) |
54-
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) |
55-
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
56-
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
53+
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-bin.tar.gz) |
54+
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) |
55+
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |
56+
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |
5757

5858
<br>
5959

6060
| Architecture | Operating System | Release archive (Download) |
6161
| ------------ | ---------------- | -------------------------- |
62-
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-armv7a-cpu-gpu-bin.tar.gz) |
63-
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-android-aarch64-cpu-gpu-bin.tar.gz) |
64-
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.08.1/arm_compute-v24.08.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
62+
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-armv7a-cpu-gpu-bin.tar.gz) |
63+
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-android-aarch64-cpu-gpu-bin.tar.gz) |
64+
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.09/arm_compute-v24.09-linux-aarch64-cpu-gpu-bin.tar.gz) |
6565

6666
<br>
6767

68-
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.08.1-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.08.1)
68+
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.09-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.09)
6969

7070
Pre-build binaries are generated with the following security / good coding practices related flags:
7171
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
@@ -107,13 +107,13 @@ Pre-build binaries are generated with the following security / good coding pract
107107

108108
## Experimental builds
109109

110-
**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.08.1/how_to_build.xhtml) for more details.
110+
**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/how_to_build.xhtml) for more details.
111111

112112
<br>
113113

114114
## How to contribute
115115

116-
Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.08.1/contribution_guidelines.xhtml).
116+
Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.09/contribution_guidelines.xhtml).
117117

118118
### Developer Certificate of Origin (DCO)
119119
Before the Compute Library accepts your contribution, you need to certify its origin and give us your permission. To manage this process we use the Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/)

SConscript

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ import codecs
3333
import platform
3434
import SCons
3535

36-
VERSION = "v24.08.1"
37-
LIBRARY_VERSION_MAJOR = 41
36+
VERSION = "v24.09"
37+
LIBRARY_VERSION_MAJOR = 42
3838
LIBRARY_VERSION_MINOR = 0
3939
LIBRARY_VERSION_PATCH = 0
4040
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)

SConstruct

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,12 @@ if env['cppthreads']:
281281

282282
if env['openmp']:
283283
env.Append(CPPDEFINES = [('ARM_COMPUTE_OPENMP_SCHEDULER', 1)])
284-
env.Append(CXXFLAGS = ['-fopenmp'])
285-
env.Append(LINKFLAGS = ['-fopenmp'])
284+
if not 'windows' in env['os']:
285+
env.Append(CXXFLAGS = ['-fopenmp'])
286+
env.Append(LINKFLAGS = ['-fopenmp'])
287+
else:
288+
env.Append(CXXFLAGS = ['-openmp'])
289+
env.Append(LINKFLAGS = ['libomp.lib'])
286290

287291
# Validate and define state
288292
if env['estate'] == 'auto':

arm_compute/core/utils/DataTypeUtils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,12 @@ inline size_t element_size_from_data_type(DataType dt)
9797
case DataType::S32:
9898
case DataType::F32:
9999
return 4;
100+
case DataType::F64:
100101
case DataType::U64:
101102
case DataType::S64:
102103
return 8;
104+
case DataType::SIZET:
105+
return sizeof(size_t); // portable
103106
default:
104107
ARM_COMPUTE_ERROR("Undefined element size for given data type");
105108
return 0;

arm_compute/runtime/CL/CLScheduler.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2022 Arm Limited.
2+
* Copyright (c) 2016-2022, 2024 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -21,8 +21,8 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24-
#ifndef ARM_COMPUTE_CLSCHEDULER_H
25-
#define ARM_COMPUTE_CLSCHEDULER_H
24+
#ifndef ACL_ARM_COMPUTE_RUNTIME_CL_CLSCHEDULER_H
25+
#define ACL_ARM_COMPUTE_RUNTIME_CL_CLSCHEDULER_H
2626

2727
#include "arm_compute/core/CL/CLHelpers.h"
2828
#include "arm_compute/core/CL/CLTypes.h"
@@ -211,6 +211,8 @@ class CLScheduler final
211211
bool _job_chaining_enabled;
212212
int _job_chaining_size;
213213
int _job_chaining_count;
214+
unsigned int _enqueue_count;
215+
unsigned int _flush_count;
214216
};
215217
} // namespace arm_compute
216-
#endif /* ARM_COMPUTE_CLSCHEDULER_H */
218+
#endif // ACL_ARM_COMPUTE_RUNTIME_CL_CLSCHEDULER_H

arm_compute/runtime/CL/CLTensorAllocator.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2021 Arm Limited.
2+
* Copyright (c) 2016-2021, 2024 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -21,8 +21,8 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24-
#ifndef ARM_COMPUTE_CLTENSORALLOCATOR_H
25-
#define ARM_COMPUTE_CLTENSORALLOCATOR_H
24+
#ifndef ACL_ARM_COMPUTE_RUNTIME_CL_CLTENSORALLOCATOR_H
25+
#define ACL_ARM_COMPUTE_RUNTIME_CL_CLTENSORALLOCATOR_H
2626

2727
#include "arm_compute/core/CL/CLTypes.h"
2828
#include "arm_compute/core/CL/OpenCL.h"
@@ -106,6 +106,9 @@ class CLTensorAllocator : public ITensorAllocator
106106
*
107107
*/
108108
void free() override;
109+
110+
bool is_allocated() const override;
111+
109112
/** Import an existing memory as a tensor's backing memory
110113
*
111114
* @warning memory should have been created under the same context that Compute Library uses.
@@ -156,4 +159,4 @@ class CLTensorAllocator : public ITensorAllocator
156159
CLInt32Array _offset; /**< Offsets array in case of quantized per channel data type */
157160
};
158161
} // namespace arm_compute
159-
#endif /* ARM_COMPUTE_CLTENSORALLOCATOR_H */
162+
#endif // ACL_ARM_COMPUTE_RUNTIME_CL_CLTENSORALLOCATOR_H

arm_compute/runtime/ITensorAllocator.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2021 Arm Limited.
2+
* Copyright (c) 2016-2021, 2024 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -21,8 +21,8 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24-
#ifndef ARM_COMPUTE_ITENSORALLOCATOR_H
25-
#define ARM_COMPUTE_ITENSORALLOCATOR_H
24+
#ifndef ACL_ARM_COMPUTE_RUNTIME_ITENSORALLOCATOR_H
25+
#define ACL_ARM_COMPUTE_RUNTIME_ITENSORALLOCATOR_H
2626

2727
#include "arm_compute/core/TensorInfo.h"
2828
#include "arm_compute/core/Types.h"
@@ -91,6 +91,12 @@ class ITensorAllocator
9191
*/
9292
virtual void free() = 0;
9393

94+
/** Return whether the tensor is currently allocated.
95+
*
96+
* @return true if the tensor is allocated, false otherwise.
97+
*/
98+
virtual bool is_allocated() const = 0;
99+
94100
protected:
95101
/** Interface to be implemented by the child class to lock the memory allocation for the CPU to access.
96102
*
@@ -106,4 +112,4 @@ class ITensorAllocator
106112
size_t _alignment{}; /**< Tensor's alignment in bytes */
107113
};
108114
} // namespace arm_compute
109-
#endif /*ARM_COMPUTE_ITENSORALLOCATOR_H */
115+
#endif // ACL_ARM_COMPUTE_RUNTIME_ITENSORALLOCATOR_H

arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ class NEArithmeticSubtraction : public IFunction
6767
* |QASYMM8 |QASYMM8 |QASYMM8 |
6868
* |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
6969
* |QSYMM16 |QSYMM16 |QASYMM16 |
70-
* |QSYMM16 |QSYMM16 |S32 |
7170
* |U8 |U8 |U8 |
7271
* |S16 |S16 |S16 |
7372
* |S32 |S32 |S32 |

arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,9 @@ class NEPixelWiseMultiplication : public IFunction
6969
* |U8 |S16 |S16 |
7070
* |S16 |U8 |S16 |
7171
* |S16 |S16 |S16 |
72+
* |S32 |S32 |S32 |
7273
* |F16 |F16 |F16 |
73-
* |F32 |S32 |F32 |
74+
* |F32 |F32 |F32 |
7475
*
7576
* @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
7677
* For all other scale values only round to zero (implemented as round towards minus infinity) is supported.

arm_compute/runtime/NEON/functions/NEReverse.h

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ class NEReverse : public INESimpleFunctionNoBorder
4242
* - All
4343
*
4444
* Valid data type configurations:
45-
* |src0 |src1 |dst |
46-
* |:--------------|:--------------|:--------------|
47-
* |All |U32, S32 |All |
45+
* |src0 |src1 |dst |
46+
* |:---------------------------|:--------------|:---------------------------|
47+
* |All except SIZET <= 32-bits |U32, S32 |All except SIZET <= 32-bits |
4848
*
49-
* @param[in] input Input tensor. Data types supported: All
49+
* @param[in] input Input tensor. Data types supported: All except SIZET <= 32-bit data types
5050
* @param[out] output Output tensor. Data type supported: Same as @p input
5151
* @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
5252
* @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
@@ -60,12 +60,7 @@ class NEReverse : public INESimpleFunctionNoBorder
6060
void configure(const ITensor *input, ITensor *output, const ITensor *axis, const bool use_inverted_axis = false);
6161
/** Static function to check if given info will lead to a valid configuration of NEReverseKernel
6262
*
63-
* @param[in] input Input tensor info. Data types supported: All
64-
* @param[in] output Output tensor info. Data type supported: Same as @p input
65-
* @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32/S32
66-
* @param[in] use_inverted_axis Reverse ACL axis indices convention, if true, (inverted)axis = (tensor_rank - 1) - axis
67-
*
68-
* @return a status
63+
* Similar to @ref NEReverse::configure()
6964
*/
7065
static Status validate(const ITensorInfo *input,
7166
const ITensorInfo *output,

0 commit comments

Comments
 (0)