Skip to content

Commit

Permalink
Merge branch 'master' into xnn_raddextexp
Browse files Browse the repository at this point in the history
  • Loading branch information
nitheshsrikanth-mcw authored Nov 4, 2024
2 parents e853c77 + 1fed338 commit 433a325
Show file tree
Hide file tree
Showing 2,694 changed files with 65,779 additions and 184,957 deletions.
230 changes: 206 additions & 24 deletions BUILD.bazel

Large diffs are not rendered by default.

325 changes: 135 additions & 190 deletions CMakeLists.txt

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ http_archive(
# Google Test framework, used by most unit-tests.
http_archive(
name = "com_google_googletest",
sha256 = "5cb522f1427558c6df572d6d0e1bf0fd076428633d080e88ad5312be0b6a8859",
strip_prefix = "googletest-e23cdb78e9fef1f69a9ef917f447add5638daf2a",
urls = ["https://github.com/google/googletest/archive/e23cdb78e9fef1f69a9ef917f447add5638daf2a.zip"],
sha256 = "4f45c2213c1ad4de211b15721db4b05c31dc54e244ab26f397345631da327624",
strip_prefix = "googletest-df1544bcee0c7ce35cd5ea0b3eb8cc81855a4140",
urls = ["https://github.com/google/googletest/archive/df1544bcee0c7ce35cd5ea0b3eb8cc81855a4140.zip"],
)
# LINT.ThenChange(cmake/DownloadGoogleTest.cmake)

Expand Down Expand Up @@ -87,10 +87,10 @@ http_archive(
# cpuinfo library, used for detecting processor characteristics
http_archive(
name = "cpuinfo",
sha256 = "ca31f17a86e4db01b5fc05efa1807ddc84c02ba4611464b67e185e8210bf096b",
strip_prefix = "cpuinfo-1e83a2fdd3102f65c6f1fb602c1b320486218a99",
sha256 = "a57f2b93704fe49ad37fa3ef5ce8208443d6b589b89653341332bcf51aa6dde3",
strip_prefix = "cpuinfo-dff2616ddd49122b63abcf44d2c097483b77f861",
urls = [
"https://github.com/pytorch/cpuinfo/archive/1e83a2fdd3102f65c6f1fb602c1b320486218a99.zip",
"https://github.com/pytorch/cpuinfo/archive/dff2616ddd49122b63abcf44d2c097483b77f861.zip",
],
)
# LINT.ThenChange(cmake/DownloadCpuinfo.cmake)
Expand All @@ -99,10 +99,10 @@ http_archive(
# KleidiAI library, used for ARM microkernels.
http_archive(
name = "KleidiAI",
sha256 = "d8f2b5bf6eba7ab8fe3cedd97c4adc967c1befa69a6f4c4f6cbb3c102a7dd3c9",
strip_prefix = "kleidiai-32384cde728f444afdb92eecbb65e293fc6a6315",
sha256 = "6682b7a2795c711c1dd23ada552675b6514523e991043753648f2cad826f588f",
strip_prefix = "kleidiai-382b07835c43fcb0401cb4dab3c8fb85eaf187b6",
urls = [
"https://gitlab.arm.com/kleidi/kleidiai/-/archive/32384cde728f444afdb92eecbb65e293fc6a6315/kleidiai-32384cde728f444afdb92eecbb65e293fc6a6315.zip",
"https://gitlab.arm.com/kleidi/kleidiai/-/archive/382b07835c43fcb0401cb4dab3c8fb85eaf187b6/kleidiai-382b07835c43fcb0401cb4dab3c8fb85eaf187b6.zip"
],
)
# LINT.ThenChange(cmake/DownloadKleidiAI.cmake)
Expand Down
149 changes: 58 additions & 91 deletions bench/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ MICROKERNEL_BENCHMARK_DEPS = [
":bench_utils",
"//:aligned_allocator",
"//:all_microkernels",
"//:buffer",
"//:common",
"//:datatype",
"//:hardware_config",
"//:math",
"//:microkernels_h",
Expand All @@ -44,11 +46,15 @@ OPERATOR_BENCHMARK_DEPS = [
":bench_utils",
"//:XNNPACK",
"//:aligned_allocator",
"//:buffer",
"//:cache",
"//:common",
"//:datatype",
"//:math",
]

############################### Helper libraries ###############################

xnnpack_cxx_library(
name = "bench_utils",
srcs = ["utils.cc"],
Expand Down Expand Up @@ -96,8 +102,6 @@ cc_library(
],
)

######################### Benchmarks for micro-kernels #########################

xnnpack_cxx_library(
name = "gemm_benchmark",
srcs = [
Expand All @@ -113,6 +117,29 @@ xnnpack_cxx_library(
],
)

xnnpack_cxx_library(
name = "packw_benchmark",
hdrs = [
"packw-benchmark.h",
],
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
"@com_google_benchmark//:benchmark",
],
)

xnnpack_cxx_library(
name = "bgemm",
hdrs = [
"bgemm.h",
],
deps = MICROKERNEL_BENCHMARK_DEPS + [
"@com_google_benchmark//:benchmark",
],
)

######################### Benchmarks for micro-kernels #########################

[xnnpack_benchmark(
name = "%s_bench" % kernel,
srcs = [
Expand Down Expand Up @@ -165,12 +192,12 @@ xnnpack_cxx_library(
xnnpack_benchmark(
name = "f32_bgemm_bench",
srcs = [
"bgemm.h",
"f32-bgemm.cc",
],
copts = xnnpack_optional_ruy_copts(),
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
"//:allocator",
] + xnnpack_optional_ruy_deps(),
)
Expand All @@ -190,28 +217,18 @@ xnnpack_benchmark(
]),
)

[xnnpack_benchmark(
name = "%s_bench" % kernel,
srcs = [
"%s.cc" % kernel.replace("_", "-"),
"vcvt-benchmark.h",
],
deps = MICROKERNEL_BENCHMARK_DEPS,
) for kernel in [
"qs8_f16_vcvt",
"qs8_f32_vcvt",
"qs8_vcvt",
"qs16_qs8_vcvt",
"qu8_f32_vcvt",
"qu8_vcvt",
"f16_f32_vcvt",
"f16_qs8_vcvt",
"f32_f16_vcvt",
"f32_qs8_vcvt",
"f32_qu8_vcvt",
"s32_f32_vcvt",
"u32_f32_vcvt",
]]
xnnpack_benchmark(
name = "qp8_f32_qb4w_gemm",
srcs = ["qp8-f32-qb4w-gemm.cc"],
defines = xnnpack_kleidiai_defines(),
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":gemm_benchmark",
"//:isa_checks",
] + xnnpack_if_kleidiai_enabled([
"@KleidiAI//kai/ukernels/matmul",
]),
)

[xnnpack_benchmark(
name = "%s_bench" % kernel,
Expand Down Expand Up @@ -239,12 +256,10 @@ xnnpack_benchmark(
],
deps = MICROKERNEL_BENCHMARK_DEPS,
) for kernel in [
"f16_gavgpool_cw",
"f16_raddstoreexpminusmax",
"f16_rmax",
"f16_rminmax",
"f16_rmin",
"f32_gavgpool_cw",
"f32_raddexpminusmax",
"f32_raddextexp",
"f32_raddstoreexpminusmax",
Expand All @@ -263,29 +278,12 @@ xnnpack_benchmark(
srcs = [
"%s.cc" % kernel.replace("_", "-"),
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS,
) for kernel in [
"xx_transposev",
"xN_transposec",
]]

xnnpack_benchmark(
name = "qs8_requantization_bench",
srcs = [
"qs8-requantization.cc",
],
deps = MICROKERNEL_BENCHMARK_DEPS + ["//:requantization_stubs"],
)

xnnpack_benchmark(
name = "qu8_requantization_bench",
srcs = [
"qu8-requantization.cc",
],
deps = MICROKERNEL_BENCHMARK_DEPS + ["//:requantization_stubs"],
)

xnnpack_benchmark(
name = "qs8_dwconv_bench",
srcs = [
Expand Down Expand Up @@ -470,23 +468,23 @@ xnnpack_benchmark(
xnnpack_cxx_library(
name = "packq_benchmark",
srcs = [
"bgemm.h",
"packq-benchmark.cc",
],
hdrs = ["packq-benchmark.h"],
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
"@com_google_benchmark//:benchmark",
],
)

xnnpack_benchmark(
name = "x8_packq_bench",
srcs = [
"bgemm.h",
"x8-packq.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
":packq_benchmark",
"//:allocator",
],
Expand All @@ -495,84 +493,64 @@ xnnpack_benchmark(
xnnpack_benchmark(
name = "x8_packw_bench",
srcs = [
"bgemm.h",
"packw-benchmark.h",
"x8-packw.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
":packw_benchmark",
"//:allocator",
],
)

xnnpack_benchmark(
name = "qs8_packw_bench",
srcs = [
"bgemm.h",
"packw-benchmark.h",
"qs8-packw.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
":packw_benchmark",
"//:allocator",
],
)

xnnpack_benchmark(
name = "x16_packw_bench",
srcs = [
"bgemm.h",
"packw-benchmark.h",
"x16-packw.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
":packw_benchmark",
"//:allocator",
],
)

xnnpack_benchmark(
name = "x32_packw_bench",
srcs = [
"bgemm.h",
"packw-benchmark.h",
"x32-packw.cc",
],
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":bgemm",
":packw_benchmark",
"//:allocator",
],
)

########################### Benchmarks for operators ###########################

[xnnpack_benchmark(
name = "%s_bench" % op,
srcs = [
"%s.cc" % op.replace("_", "-"),
"unary_operator.h",
],
xnnpack_benchmark(
name = "unary_bench",
srcs = ["unary.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = ["nowin32"],
deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
) for op in [
"abs",
"bankers_rounding",
"ceiling",
"convert",
"elu",
"floor",
"leaky_relu",
"negate",
"hardswish",
"reciprocal_square_root",
"sigmoid",
"softmax",
"square",
"square_root",
"tanh",
"truncation",
]]
)

xnnpack_benchmark(
name = "average_pooling_bench",
Expand Down Expand Up @@ -600,9 +578,7 @@ xnnpack_benchmark(
name = "convolution_bench",
srcs = ["convolution.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = xnnpack_slow_benchmark_tags() + [
"nowin32",
],
tags = xnnpack_slow_benchmark_tags() + ["nowin32"],
deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
)

Expand All @@ -622,13 +598,6 @@ xnnpack_benchmark(
deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
)

xnnpack_benchmark(
name = "global_average_pooling_bench",
srcs = ["global-average-pooling.cc"],
tags = xnnpack_slow_benchmark_tags(),
deps = OPERATOR_BENCHMARK_DEPS,
)

xnnpack_benchmark(
name = "max_pooling_bench",
srcs = ["max-pooling.cc"],
Expand All @@ -647,8 +616,6 @@ xnnpack_benchmark(
name = "scaled_dot_product_attention_bench",
srcs = ["scaled-dot-product-attention.cc"],
copts = xnnpack_optional_tflite_copts(),
tags = xnnpack_slow_benchmark_tags() + [
"nowin32",
],
tags = xnnpack_slow_benchmark_tags() + ["nowin32"],
deps = OPERATOR_BENCHMARK_DEPS + xnnpack_optional_tflite_deps(),
)
Loading

0 comments on commit 433a325

Please sign in to comment.