-
Notifications
You must be signed in to change notification settings - Fork 559
MetalPerformancePrimitives tvOS xcode26.4 b3
Alex Soto edited this page Mar 11, 2026
·
1 revision
#MetalPerformancePrimitives.framework
diff -ruN /Applications/Xcode_26.4.0-beta2.app/Contents/Developer/Platforms/AppleTVOS.platform/Developer/SDKs/AppleTVOS.sdk/System/Library/Frameworks/MetalPerformancePrimitives.framework/Headers/__impl/MPPTensorOpsMatMul2dImpl.h /Applications/Xcode_26.4.0-beta3.app/Contents/Developer/Platforms/AppleTVOS.platform/Developer/SDKs/AppleTVOS.sdk/System/Library/Frameworks/MetalPerformancePrimitives.framework/Headers/__impl/MPPTensorOpsMatMul2dImpl.h
--- /Applications/Xcode_26.4.0-beta2.app/Contents/Developer/Platforms/AppleTVOS.platform/Developer/SDKs/AppleTVOS.sdk/System/Library/Frameworks/MetalPerformancePrimitives.framework/Headers/__impl/MPPTensorOpsMatMul2dImpl.h 2026-02-16 03:54:25
+++ /Applications/Xcode_26.4.0-beta3.app/Contents/Developer/Platforms/AppleTVOS.platform/Developer/SDKs/AppleTVOS.sdk/System/Library/Frameworks/MetalPerformancePrimitives.framework/Headers/__impl/MPPTensorOpsMatMul2dImpl.h 2026-03-04 01:10:24
@@ -3765,14 +3765,19 @@
static_assert((descriptor.m % 16) == 0 || (descriptor.n % 16) == 0, "At least one of M or N must be a multiple of 16");
if constexpr (descriptor.k != static_cast<int>(metal::dynamic_extent) && descriptor.k != dynamic_length_v<int>) {
+
+#if __HAVE_INT4B_FORMAT_TYPE__
if constexpr (metal::is_same_v<leftValueType, metal::int4b_format> || metal::is_same_v<leftValueType, metal::uint4b_format> ||
metal::is_same_v<rightValueType, metal::int4b_format> || metal::is_same_v<rightValueType, metal::uint4b_format>)
{
static_assert((descriptor.k % 32) == 0, "K must be dynamic or a multiple of 32 with sub-byte element types");
}
else
+#endif
+ {
static_assert((descriptor.k % 16) == 0, "K must be dynamic or a multiple of 16");
}
+ }
}
else {
// Single thread scope
@@ -5298,6 +5303,7 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, half>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_dv_f16_dv_i4_dv_f16(desc, left, leftDescType, right, rightDescType, destination, destinationDescType, threads);
@@ -5318,6 +5324,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_dv_f16_dv_i4_dv_f32(desc, left, leftDescType, right, rightDescType, destination, destinationDescType, threads);
@@ -5338,6 +5346,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, half>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_dv_f16_dv_ui4_dv_f16(desc, left, leftDescType, right, rightDescType, destination, destinationDescType, threads);
@@ -5358,6 +5368,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_dv_f16_dv_ui4_dv_f32(desc, left, leftDescType, right, rightDescType, destination, destinationDescType, threads);
@@ -5378,6 +5390,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, int8_t> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, int32_t>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_dv_i8_dv_i4_dv_i32(desc, left, leftDescType, right, rightDescType, destination, destinationDescType, threads);
@@ -5398,6 +5412,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, uint8_t> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, int32_t>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_dv_ui8_dv_ui4_dv_i32(desc, left, leftDescType, right, rightDescType, destination, destinationDescType, threads);
@@ -5418,6 +5434,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, bfloat>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_dv_b16_dv_i4_dv_b16(desc, left, leftDescType, right, rightDescType, destination, destinationDescType, threads);
@@ -5438,6 +5456,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, bfloat>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_dv_b16_dv_ui4_dv_b16(desc, left, leftDescType, right, rightDescType, destination, destinationDescType, threads);
@@ -5458,6 +5478,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_dv_b16_dv_i4_dv_f32(desc, left, leftDescType, right, rightDescType, destination, destinationDescType, threads);
@@ -5478,6 +5500,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_dv_b16_dv_ui4_dv_f32(desc, left, leftDescType, right, rightDescType, destination, destinationDescType, threads);
@@ -5498,6 +5522,7 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationValueType>, "Unsupported type");
}
@@ -5954,6 +5979,7 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, half>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_dv_f16_dv_i4_f16(desc, left, leftDescType, right, rightDescType, destination, threads);
@@ -5966,6 +5992,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_dv_f16_dv_i4_f32(desc, left, leftDescType, right, rightDescType, destination, threads);
@@ -5978,6 +6006,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, half>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_dv_f16_dv_ui4_f16(desc, left, leftDescType, right, rightDescType, destination, threads);
@@ -5990,6 +6020,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_dv_f16_dv_ui4_f32(desc, left, leftDescType, right, rightDescType, destination, threads);
@@ -6002,6 +6034,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, int8_t> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, int32_t>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_dv_i8_dv_i4_i32(desc, left, leftDescType, right, rightDescType, destination, threads);
@@ -6014,6 +6048,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, uint8_t> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, int32_t>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_dv_ui8_dv_ui4_i32(desc, left, leftDescType, right, rightDescType, destination, threads);
@@ -6026,6 +6062,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, bfloat>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_dv_b16_dv_i4_b16(desc, left, leftDescType, right, rightDescType, destination, threads);
@@ -6038,6 +6076,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, bfloat>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_dv_b16_dv_ui4_b16(desc, left, leftDescType, right, rightDescType, destination, threads);
@@ -6050,6 +6090,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_dv_b16_dv_i4_f32(desc, left, leftDescType, right, rightDescType, destination, threads);
@@ -6062,6 +6104,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<leftPtrType> && __tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_dv_b16_dv_ui4_f32(desc, left, leftDescType, right, rightDescType, destination, threads);
@@ -6074,6 +6118,7 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationValueType>, "Unsupported type");
}
@@ -7293,6 +7338,7 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, half>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_f16_dv_i4_dv_f16(desc, left, right, rightDescType, destination, destinationDescType, threads);
@@ -7305,6 +7351,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_f16_dv_i4_dv_f32(desc, left, right, rightDescType, destination, destinationDescType, threads);
@@ -7317,6 +7365,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, half>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_f16_dv_ui4_dv_f16(desc, left, right, rightDescType, destination, destinationDescType, threads);
@@ -7329,6 +7379,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_f16_dv_ui4_dv_f32(desc, left, right, rightDescType, destination, destinationDescType, threads);
@@ -7341,6 +7393,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, int8_t> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, int32_t>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_i8_dv_i4_dv_i32(desc, left, right, rightDescType, destination, destinationDescType, threads);
@@ -7353,6 +7407,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, uint8_t> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, int32_t>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_ui8_dv_ui4_dv_i32(desc, left, right, rightDescType, destination, destinationDescType, threads);
@@ -7365,6 +7421,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, bfloat>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_b16_dv_i4_dv_b16(desc, left, right, rightDescType, destination, destinationDescType, threads);
@@ -7377,6 +7435,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, bfloat>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_b16_dv_ui4_dv_b16(desc, left, right, rightDescType, destination, destinationDescType, threads);
@@ -7389,6 +7449,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_b16_dv_i4_dv_f32(desc, left, right, rightDescType, destination, destinationDescType, threads);
@@ -7401,6 +7463,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType> && __tensor_ops_detail::__is_device_addrspace_v<destinationPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_b16_dv_ui4_dv_f32(desc, left, right, rightDescType, destination, destinationDescType, threads);
@@ -7413,6 +7477,7 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationValueType>, "Unsupported type");
}
@@ -7720,6 +7785,7 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, half>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_f16_dv_i4_f16(desc, left, right, rightDescType, destination, threads);
@@ -7728,6 +7794,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_f16_dv_i4_f32(desc, left, right, rightDescType, destination, threads);
@@ -7736,6 +7804,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, half>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_f16_dv_ui4_f16(desc, left, right, rightDescType, destination, threads);
@@ -7744,6 +7814,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, half> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_f16_dv_ui4_f32(desc, left, right, rightDescType, destination, threads);
@@ -7752,6 +7824,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, int8_t> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, int32_t>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_i8_dv_i4_i32(desc, left, right, rightDescType, destination, threads);
@@ -7760,6 +7834,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, uint8_t> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, int32_t>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_ui8_dv_ui4_i32(desc, left, right, rightDescType, destination, threads);
@@ -7768,6 +7844,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, bfloat>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_b16_dv_i4_b16(desc, left, right, rightDescType, destination, threads);
@@ -7776,6 +7854,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, bfloat>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_b16_dv_ui4_b16(desc, left, right, rightDescType, destination, threads);
@@ -7784,6 +7864,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::int4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_b16_dv_i4_f32(desc, left, right, rightDescType, destination, threads);
@@ -7792,6 +7874,8 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
+#if __HAVE_INT4B_FORMAT_TYPE__
else if constexpr (__tensor_ops_detail::__is_same_v<leftValueType, bfloat> && __tensor_ops_detail::__is_same_v<rightValueType, metal::uint4b_format> && __tensor_ops_detail::__is_same_v<destinationValueType, float>) {
if constexpr (__tensor_ops_detail::__is_device_addrspace_v<rightPtrType>)
__tensorops_impl_matmul2d_op_run_cooperative_b16_dv_ui4_f32(desc, left, right, rightDescType, destination, threads);
@@ -7800,6 +7884,7 @@
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationPtrType>, "Unsupported address space");
}
+#endif
else
static_assert(__tensor_ops_detail::__assert_false_v<destinationValueType>, "Unsupported type");
}