Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add XNN_FLAG_CONSISTENT_ARITHMETIC #8077

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions include/xnnpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ extern "C" {
/// Retain reduced dimensions with length 1.
#define XNN_FLAG_KEEP_DIMS 0x00000040

/// This flag indicates that XNNPACK should attempt to produce numerically
/// consistent results from a specific build of XNNPACK. This causes XNNPACK
/// to avoid using codepaths that are numerically inconsistent with any other
/// codepath that could be used in the same compiled XNNPACK library.
#define XNN_FLAG_CONSISTENT_ARITHMETIC 0x00000100

// Next unused flag value: 0x00000200.

/// The number of entries in an array of xnn_quantization_params that XNNPACK may read beyond array bounds.
Expand Down Expand Up @@ -1680,11 +1686,11 @@ enum xnn_status xnn_define_copy(
/// dimensions is added to it.
/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the @a
/// subgraph.
/// @param num_outputs - The number of output tensors to generate. The input tensor will be evenly split into
/// this number of output tensors along the `split_dim`. Each output tensor will have
/// the same dimensions as the input tensor, except for the `split_dim`, which will be
/// @param num_outputs - The number of output tensors to generate. The input tensor will be evenly split into
/// this number of output tensors along the `split_dim`. Each output tensor will have
/// the same dimensions as the input tensor, except for the `split_dim`, which will be
/// divided evenly between the outputs.
/// @param outputs - An array of Value IDs for the output tensors. Each output tensor must be an N-dimensional
/// @param outputs - An array of Value IDs for the output tensors. Each output tensor must be an N-dimensional
/// tensor defined in the @a subgraph with the same shape as the input tensor, except along the
/// `split_dim` dimension, which will be split evenly among the output tensors. The number of
/// output tensors corresponds to the value of `num_outputs`.
Expand Down
1,389 changes: 700 additions & 689 deletions src/configs/gemm-config.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/operators/batch-matrix-multiply-nc.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ enum xnn_status xnn_create_batch_matrix_multiply_nc_bf16_f32(

enum xnn_status xnn_create_batch_matrix_multiply_nc_f32(
uint32_t flags, xnn_operator_t* batch_matrix_multiply_op_out) {
const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_config();
const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_config(flags);
if (gemm_config == NULL) {
xnn_log_error(
"failed to create %s operator: unsupported hardware configuration",
Expand Down
2 changes: 1 addition & 1 deletion src/operators/convolution-nhwc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1784,7 +1784,7 @@ enum xnn_status xnn_create_convolution2d_nhwc_f32(
return xnn_status_unsupported_hardware;
}

const struct xnn_gemm_config* gemm_nr2_config = xnn_init_f32_gemm_nr2_config();
const struct xnn_gemm_config* gemm_nr2_config = xnn_init_f32_gemm_nr2_config(flags);
if (gemm_nr2_config == NULL) {
xnn_log_error("failed to create %s operator: unsupported hardware configuration",
xnn_operator_type_to_string(xnn_operator_type_convolution_nhwc_f32));
Expand Down
2 changes: 1 addition & 1 deletion src/operators/deconvolution-nhwc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,7 @@ enum xnn_status xnn_create_deconvolution2d_nhwc_f32(
return xnn_status_unsupported_hardware;
}

const struct xnn_gemm_config* gemm_nr2_config = xnn_init_f32_gemm_nr2_config();
const struct xnn_gemm_config* gemm_nr2_config = xnn_init_f32_gemm_nr2_config(flags);
if (gemm_nr2_config == NULL) {
xnn_log_error("failed to create %s operator: unsupported hardware configuration",
xnn_operator_type_to_string(xnn_operator_type_deconvolution_nhwc_f32));
Expand Down
4 changes: 2 additions & 2 deletions src/operators/dynamic-fully-connected-nc.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ enum xnn_status create_dynamic_fully_connected_nc_f32(
enum xnn_status xnn_create_dynamic_fully_connected_nc_f32(
float output_min, float output_max, uint32_t flags,
xnn_operator_t* dynamic_fully_connected_op_out) {
const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_config();
const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_config(flags);
if (gemm_config == NULL) {
xnn_log_error(
"failed to create %s operator: unsupported hardware configuration",
Expand All @@ -275,7 +275,7 @@ enum xnn_status xnn_create_dynamic_fully_connected_nc_f32(
}

const struct xnn_gemm_config* gemm_nr2_config =
xnn_init_f32_gemm_nr2_config();
xnn_init_f32_gemm_nr2_config(flags);

return create_dynamic_fully_connected_nc_f32(
output_min, output_max, flags, gemm_config, gemm_nr2_config,
Expand Down
4 changes: 2 additions & 2 deletions src/operators/fully-connected-nc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1658,14 +1658,14 @@ enum xnn_status xnn_create_fully_connected_nc_f32(
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out) {
const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_config();
const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_config(flags);
if (gemm_config == NULL) {
xnn_log_error("failed to create %s operator: unsupported hardware configuration",
xnn_operator_type_to_string(xnn_operator_type_fully_connected_nc_f32));
return xnn_status_unsupported_hardware;
}

const struct xnn_gemm_config* gemm_nr2_config = xnn_init_f32_gemm_nr2_config();
const struct xnn_gemm_config* gemm_nr2_config = xnn_init_f32_gemm_nr2_config(flags);

// Select microkernel configuration based on output channels
if (gemm_nr2_config != NULL) {
Expand Down
2 changes: 1 addition & 1 deletion src/operators/scaled-dot-product-attention-nhtc.c
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ enum xnn_status xnn_create_scaled_dot_product_attention_nhtc_f32(
const enum xnn_operator_type operator_type = xnn_operator_type_scaled_dot_product_attention_nhtc_f32;
enum xnn_status status = xnn_status_unsupported_hardware;

const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_config();
const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_config(flags);
if (gemm_config == NULL) {
xnn_log_error("failed to create %s operator: unsupported hardware configuration",
xnn_operator_type_to_string(operator_type));
Expand Down
7 changes: 7 additions & 0 deletions src/runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,13 @@ enum xnn_status xnn_create_runtime_v4(
}
}

if (flags & XNN_FLAG_CONSISTENT_ARITHMETIC) {
for (size_t i = 0; i < subgraph->num_nodes; i++) {
struct xnn_node* node = subgraph->nodes + i;
node->flags |= XNN_FLAG_CONSISTENT_ARITHMETIC;
}
}

if (flags & XNN_FLAG_TRANSIENT_INDIRECTION_BUFFER) {
for (size_t i = 0; i < subgraph->num_nodes; i++) {
struct xnn_node* node = subgraph->nodes + i;
Expand Down
4 changes: 2 additions & 2 deletions src/xnnpack/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,8 @@ static inline bool xnn_is_hmp_igemm_ukernel(struct xnn_hmp_igemm_ukernel ukernel

XNN_INTERNAL const struct xnn_gemm_config* xnn_init_bf16_f32_gemm_config();
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f16_gemm_config();
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_gemm_config();
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_gemm_nr2_config();
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_gemm_config(uint32_t flags);
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_gemm_nr2_config(uint32_t flags);
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_igemm_config();
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_qc8w_gemm_config();
XNN_INTERNAL const struct xnn_gemm_config* xnn_init_f32_qc4w_gemm_config();
Expand Down
3 changes: 2 additions & 1 deletion test/operators/convert-nc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,8 @@ class ConvertOperatorTester {
xnnpack::ReplicableRandomDevice rng;

// The parameters of the GEMM config are used as packing parameters.
const struct xnn_gemm_config* gemm_config = xnn_init_f32_gemm_nr2_config();
const struct xnn_gemm_config* gemm_config =
xnn_init_f32_gemm_nr2_config(/*flags=*/0);

xnnpack::Buffer<float> input(XNN_EXTRA_BYTES / sizeof(float) +
(batch_size() - 1) * input_stride() +
Expand Down
Loading