[HLSL] Implement f32tof16() intrinsic #172469

tcorringham · 2025-12-16T12:26:53Z

Implement the f32tof16() intrinsic, DXIL and SPIRV codegen, and related tests.

Fixes #99113

Implement the f32tof16() intrinsic, DXIL and SPIRV codegen, and related tests. Fixes llvm#99113

llvmbot · 2025-12-16T12:27:24Z

@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-hlsl

@llvm/pr-subscribers-clang-codegen

Author: Tim Corringham (tcorringham)

Changes

Implement the f32tof16() intrinsic, DXIL and SPIRV codegen, and related tests.

Fixes #99113

Patch is 31.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172469.diff

17 Files Affected:

(modified) clang/include/clang/Basic/Builtins.td (+6)
(modified) clang/lib/CodeGen/CGHLSLBuiltins.cpp (+56)
(modified) clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h (+21)
(modified) clang/lib/Sema/SemaHLSL.cpp (+23)
(added) clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl (+27)
(added) clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll (+67)
(added) clang/test/CodeGenHLSL/builtins/f32tof16.hlsl (+27)
(added) clang/test/CodeGenHLSL/builtins/f32tof16.ll (+67)
(added) clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl (+134)
(modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+3)
(modified) llvm/include/llvm/IR/IntrinsicsSPIRV.td (+2)
(modified) llvm/lib/Target/DirectX/DXIL.td (+9)
(modified) llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp (+2)
(modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+3)
(added) llvm/test/CodeGen/DirectX/f32tof16.ll (+57)
(added) llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll (+10)
(added) llvm/test/CodeGen/SPIRV/packhalf2x16.ll (+15)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index aab2418511399..e5fe97f952d06 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5272,6 +5272,12 @@ def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLF32ToF16 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_f32tof16"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 317e64d595243..e0ee9ab47f92f 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -211,6 +211,59 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
   llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
 }
 
+static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
+                                        const CallExpr *E) {
+  Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
+  QualType Op0Ty = E->getArg(0)->getType();
+  llvm::Type *ResType = CGF.IntTy;
+  uint64_t NumElements = 0;
+  if (Op0->getType()->isVectorTy()) {
+    NumElements =
+        E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
+    ResType =
+        llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
+  }
+  if (!Op0Ty->hasFloatingRepresentation())
+    llvm_unreachable(
+        "f32tof16 operand must have a float representation");
+
+  if (CGF.CGM.getTriple().isDXIL())
+    return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16,
+                                       ArrayRef<Value *>{Op0}, nullptr,
+                                       "hlsl.f32tof16");
+
+  if (CGF.CGM.getTriple().isSPIRV()) {
+    // We use the SPIRV PackHalf2x16 operation to avoid the need for the
+    // Int16 and Float16 capabilities
+    auto PackType =
+        llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+    if (NumElements == 0) {
+      // a scalar input - simply insert the scalar in the first element
+      // of the 2 element float vector
+      Value *Float2 = Constant::getNullValue(PackType);
+      Float2 = CGF.Builder.CreateInsertElement(Float2, Op0, (uint64_t)0);
+      Value *Result = CGF.Builder.CreateIntrinsic(
+          ResType, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+      return Result;
+    } else {
+      // a vector input - build a congruent output vector by iterating through
+      // the input vector calling packhalf2x16 for each element
+      Value *Result = PoisonValue::get(ResType);
+      for (uint64_t i = 0; i < NumElements; i++) {
+        Value *Float2 = Constant::getNullValue(PackType);
+        Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
+        Float2 = CGF.Builder.CreateInsertElement(Float2, InVal, (uint64_t)0);
+        Value *Res = CGF.Builder.CreateIntrinsic(
+          CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+        Result = CGF.Builder.CreateInsertElement(Result, Res, i);
+      }
+      return Result;
+    }
+  }
+
+  llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
+}
+
 static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
                                LValue &Stride) {
   // Figure out the stride of the buffer elements from the handle type.
@@ -676,6 +729,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
   case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
     return handleElementwiseF16ToF32(*this, E);
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
+    return handleElementwiseF32ToF16(*this, E);
+  }
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     if (!E->getArg(0)->getType()->hasFloatingRepresentation())
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index f58150ed61106..8905dd9f897ca 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1073,6 +1073,27 @@ float3 f16tof32(uint3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
 float4 f16tof32(uint4);
 
+//===----------------------------------------------------------------------===//
+// f32tof16 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn uint f16tof32(float x)
+/// \brief Returns the float arg value converted to half in the low 16 bits of
+/// the uint return value
+/// \param x The float to be converted to half.
+///
+/// The return value is a uint containing the converted half value in the low
+/// 16 bits.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint f32tof16(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint2 f32tof16(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint3 f32tof16(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint4 f32tof16(float4);
+
 //===----------------------------------------------------------------------===//
 // firstbitlow builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 28744ff0ff42e..b8c558c3e73ac 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2877,6 +2877,20 @@ static bool CheckAllArgTypesAreCorrect(
   return false;
 }
 
+static bool CheckFloatRepresentation(Sema *S, SourceLocation Loc,
+                                     int ArgOrdinal,
+                                     clang::QualType PassedType) {
+  clang::QualType BaseType =
+      PassedType->isVectorType()
+          ? PassedType->castAs<clang::VectorType>()->getElementType()
+          : PassedType;
+  if (!BaseType->isFloat32Type())
+    return S->Diag(Loc, diag::err_builtin_invalid_arg_type)
+           << ArgOrdinal << /* scalar or vector of */ 5 << /* no int */ 0
+           << /* float */ 1 << PassedType;
+  return false;
+}
+
 static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc,
                                            int ArgOrdinal,
                                            clang::QualType PassedType) {
@@ -3564,6 +3578,15 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy);
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+    if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
+                                   CheckFloatRepresentation))
+      return true;
+    SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().UnsignedIntTy);
+    break;
+  }
   }
   return false;
 }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
new file mode 100644
index 0000000000000..ede6d5c0f3236
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+// CHECK: ret i32 %hlsl.f32tof16
+// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+uint test_scalar(float p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+// CHECK: ret <2 x i32> %hlsl.f32tof16
+// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+uint2 test_uint2(float2 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+// CHECK: ret <3 x i32> %hlsl.f32tof16
+// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+uint3 test_uint3(float3 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+// CHECK: ret <4 x i32> %hlsl.f32tof16
+// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+uint4 test_uint4(float4 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll
new file mode 100644
index 0000000000000..047423a1ed9a6
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll
@@ -0,0 +1,67 @@
+; ModuleID = 'clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl'
+source_filename = "clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl"
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"
+target triple = "dxilv1.3-pc-shadermodel6.3-library"
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca float, align 4
+  store float %p0, ptr %p0.addr, align 4
+  %0 = load float, ptr %p0.addr, align 4
+  %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+  ret i32 %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x float>, align 8
+  store <2 x float> %p0, ptr %p0.addr, align 8
+  %0 = load <2 x float>, ptr %p0.addr, align 8
+  %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+  ret <2 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x float>, align 16
+  store <3 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <3 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+  ret <3 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+  ret <4 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+
+attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!dx.valver = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 8}
+!2 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git 082f296ac09ca7c183aac27883cc6489250db75d)"}
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
new file mode 100644
index 0000000000000..008f495ef869c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+// CHECK: ret i32 %hlsl.f32tof16
+// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+uint test_scalar(float p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+// CHECK: ret <2 x i32> %hlsl.f32tof16
+// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+uint2 test_uint2(float2 p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+// CHECK: ret <3 x i32> %hlsl.f32tof16
+// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+uint3 test_uint3(float3 p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+// CHECK: ret <4 x i32> %hlsl.f32tof16
+// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+uint4 test_uint4(float4 p0) { return f32tof16(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16.ll b/clang/test/CodeGenHLSL/builtins/f32tof16.ll
new file mode 100644
index 0000000000000..2e92b73ee5cfb
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16.ll
@@ -0,0 +1,67 @@
+; ModuleID = 'f32tof16.hlsl'
+source_filename = "f32tof16.hlsl"
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"
+target triple = "dxilv1.3-pc-shadermodel6.3-library"
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca float, align 4
+  store float %p0, ptr %p0.addr, align 4
+  %0 = load float, ptr %p0.addr, align 4
+  %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+  ret i32 %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x float>, align 8
+  store <2 x float> %p0, ptr %p0.addr, align 8
+  %0 = load <2 x float>, ptr %p0.addr, align 8
+  %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+  ret <2 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x float>, align 16
+  store <3 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <3 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+  ret <3 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+  ret <4 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+
+attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!dx.valver = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 8}
+!2 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git a5e8e77f7ccd15945eb432a3619e57f9600c142a)"}
diff --git a/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
new file mode 100644
index 0000000000000..cd95602b413c5
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify
+
+uint builtin_f32tof16_too_few_arg() {
+  return __builtin_hlsl_elementwise_f32tof16();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+  // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}}
+}
+
+uint builtin_f32tof16_too_many_arg(uint p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+  // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}}
+}
+
+uint builtin_f32tof16_bool(bool p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool')}}
+}
+
+uint builtin_f32tof16_bool4(bool4 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool4' (aka 'vector<bool, 4>')}}
+}
+
+uint builtin_f32tof16_short(short p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'short')}}
+}
+
+uint builtin_f32tof16_unsigned_short(unsigned short p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned short')}}
+}
+
+uint builtin_f32tof16_int(int p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+}
+
+uint builtin_f32tof16_int64_t(long p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'long')}}
+}
+
+uint2 builtin_f32tof16_int2_to_float2_promotion(int2 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int2' (aka 'vector<int, 2>'))}}
+}
+
+uint builtin_f32tof16_half(half p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a...
[truncated]

llvmbot · 2025-12-16T12:27:24Z

@llvm/pr-subscribers-backend-directx

Author: Tim Corringham (tcorringham)

Changes

Implement the f32tof16() intrinsic, DXIL and SPIRV codegen, and related tests.

Fixes #99113

Patch is 31.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172469.diff

17 Files Affected:

(modified) clang/include/clang/Basic/Builtins.td (+6)
(modified) clang/lib/CodeGen/CGHLSLBuiltins.cpp (+56)
(modified) clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h (+21)
(modified) clang/lib/Sema/SemaHLSL.cpp (+23)
(added) clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl (+27)
(added) clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll (+67)
(added) clang/test/CodeGenHLSL/builtins/f32tof16.hlsl (+27)
(added) clang/test/CodeGenHLSL/builtins/f32tof16.ll (+67)
(added) clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl (+134)
(modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+3)
(modified) llvm/include/llvm/IR/IntrinsicsSPIRV.td (+2)
(modified) llvm/lib/Target/DirectX/DXIL.td (+9)
(modified) llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp (+2)
(modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+3)
(added) llvm/test/CodeGen/DirectX/f32tof16.ll (+57)
(added) llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll (+10)
(added) llvm/test/CodeGen/SPIRV/packhalf2x16.ll (+15)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index aab2418511399..e5fe97f952d06 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5272,6 +5272,12 @@ def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLF32ToF16 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_f32tof16"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 317e64d595243..e0ee9ab47f92f 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -211,6 +211,59 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
   llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
 }
 
+static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
+                                        const CallExpr *E) {
+  Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
+  QualType Op0Ty = E->getArg(0)->getType();
+  llvm::Type *ResType = CGF.IntTy;
+  uint64_t NumElements = 0;
+  if (Op0->getType()->isVectorTy()) {
+    NumElements =
+        E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
+    ResType =
+        llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
+  }
+  if (!Op0Ty->hasFloatingRepresentation())
+    llvm_unreachable(
+        "f32tof16 operand must have a float representation");
+
+  if (CGF.CGM.getTriple().isDXIL())
+    return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16,
+                                       ArrayRef<Value *>{Op0}, nullptr,
+                                       "hlsl.f32tof16");
+
+  if (CGF.CGM.getTriple().isSPIRV()) {
+    // We use the SPIRV PackHalf2x16 operation to avoid the need for the
+    // Int16 and Float16 capabilities
+    auto PackType =
+        llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+    if (NumElements == 0) {
+      // a scalar input - simply insert the scalar in the first element
+      // of the 2 element float vector
+      Value *Float2 = Constant::getNullValue(PackType);
+      Float2 = CGF.Builder.CreateInsertElement(Float2, Op0, (uint64_t)0);
+      Value *Result = CGF.Builder.CreateIntrinsic(
+          ResType, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+      return Result;
+    } else {
+      // a vector input - build a congruent output vector by iterating through
+      // the input vector calling packhalf2x16 for each element
+      Value *Result = PoisonValue::get(ResType);
+      for (uint64_t i = 0; i < NumElements; i++) {
+        Value *Float2 = Constant::getNullValue(PackType);
+        Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
+        Float2 = CGF.Builder.CreateInsertElement(Float2, InVal, (uint64_t)0);
+        Value *Res = CGF.Builder.CreateIntrinsic(
+          CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+        Result = CGF.Builder.CreateInsertElement(Result, Res, i);
+      }
+      return Result;
+    }
+  }
+
+  llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
+}
+
 static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
                                LValue &Stride) {
   // Figure out the stride of the buffer elements from the handle type.
@@ -676,6 +729,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
   case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
     return handleElementwiseF16ToF32(*this, E);
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
+    return handleElementwiseF32ToF16(*this, E);
+  }
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     if (!E->getArg(0)->getType()->hasFloatingRepresentation())
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index f58150ed61106..8905dd9f897ca 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1073,6 +1073,27 @@ float3 f16tof32(uint3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
 float4 f16tof32(uint4);
 
+//===----------------------------------------------------------------------===//
+// f32tof16 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn uint f16tof32(float x)
+/// \brief Returns the float arg value converted to half in the low 16 bits of
+/// the uint return value
+/// \param x The float to be converted to half.
+///
+/// The return value is a uint containing the converted half value in the low
+/// 16 bits.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint f32tof16(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint2 f32tof16(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint3 f32tof16(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint4 f32tof16(float4);
+
 //===----------------------------------------------------------------------===//
 // firstbitlow builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 28744ff0ff42e..b8c558c3e73ac 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2877,6 +2877,20 @@ static bool CheckAllArgTypesAreCorrect(
   return false;
 }
 
+static bool CheckFloatRepresentation(Sema *S, SourceLocation Loc,
+                                     int ArgOrdinal,
+                                     clang::QualType PassedType) {
+  clang::QualType BaseType =
+      PassedType->isVectorType()
+          ? PassedType->castAs<clang::VectorType>()->getElementType()
+          : PassedType;
+  if (!BaseType->isFloat32Type())
+    return S->Diag(Loc, diag::err_builtin_invalid_arg_type)
+           << ArgOrdinal << /* scalar or vector of */ 5 << /* no int */ 0
+           << /* float */ 1 << PassedType;
+  return false;
+}
+
 static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc,
                                            int ArgOrdinal,
                                            clang::QualType PassedType) {
@@ -3564,6 +3578,15 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy);
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+    if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
+                                   CheckFloatRepresentation))
+      return true;
+    SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().UnsignedIntTy);
+    break;
+  }
   }
   return false;
 }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
new file mode 100644
index 0000000000000..ede6d5c0f3236
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+// CHECK: ret i32 %hlsl.f32tof16
+// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+uint test_scalar(float p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+// CHECK: ret <2 x i32> %hlsl.f32tof16
+// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+uint2 test_uint2(float2 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+// CHECK: ret <3 x i32> %hlsl.f32tof16
+// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+uint3 test_uint3(float3 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+// CHECK: ret <4 x i32> %hlsl.f32tof16
+// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+uint4 test_uint4(float4 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll
new file mode 100644
index 0000000000000..047423a1ed9a6
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll
@@ -0,0 +1,67 @@
+; ModuleID = 'clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl'
+source_filename = "clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl"
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"
+target triple = "dxilv1.3-pc-shadermodel6.3-library"
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca float, align 4
+  store float %p0, ptr %p0.addr, align 4
+  %0 = load float, ptr %p0.addr, align 4
+  %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+  ret i32 %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x float>, align 8
+  store <2 x float> %p0, ptr %p0.addr, align 8
+  %0 = load <2 x float>, ptr %p0.addr, align 8
+  %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+  ret <2 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x float>, align 16
+  store <3 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <3 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+  ret <3 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+  ret <4 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+
+attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!dx.valver = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 8}
+!2 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git 082f296ac09ca7c183aac27883cc6489250db75d)"}
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
new file mode 100644
index 0000000000000..008f495ef869c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+// CHECK: ret i32 %hlsl.f32tof16
+// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+uint test_scalar(float p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+// CHECK: ret <2 x i32> %hlsl.f32tof16
+// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+uint2 test_uint2(float2 p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+// CHECK: ret <3 x i32> %hlsl.f32tof16
+// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+uint3 test_uint3(float3 p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+// CHECK: ret <4 x i32> %hlsl.f32tof16
+// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+uint4 test_uint4(float4 p0) { return f32tof16(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16.ll b/clang/test/CodeGenHLSL/builtins/f32tof16.ll
new file mode 100644
index 0000000000000..2e92b73ee5cfb
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16.ll
@@ -0,0 +1,67 @@
+; ModuleID = 'f32tof16.hlsl'
+source_filename = "f32tof16.hlsl"
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"
+target triple = "dxilv1.3-pc-shadermodel6.3-library"
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca float, align 4
+  store float %p0, ptr %p0.addr, align 4
+  %0 = load float, ptr %p0.addr, align 4
+  %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+  ret i32 %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x float>, align 8
+  store <2 x float> %p0, ptr %p0.addr, align 8
+  %0 = load <2 x float>, ptr %p0.addr, align 8
+  %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+  ret <2 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x float>, align 16
+  store <3 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <3 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+  ret <3 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+  ret <4 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+
+attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!dx.valver = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 8}
+!2 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git a5e8e77f7ccd15945eb432a3619e57f9600c142a)"}
diff --git a/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
new file mode 100644
index 0000000000000..cd95602b413c5
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify
+
+uint builtin_f32tof16_too_few_arg() {
+  return __builtin_hlsl_elementwise_f32tof16();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+  // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}}
+}
+
+uint builtin_f32tof16_too_many_arg(uint p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+  // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}}
+}
+
+uint builtin_f32tof16_bool(bool p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool')}}
+}
+
+uint builtin_f32tof16_bool4(bool4 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool4' (aka 'vector<bool, 4>')}}
+}
+
+uint builtin_f32tof16_short(short p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'short')}}
+}
+
+uint builtin_f32tof16_unsigned_short(unsigned short p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned short')}}
+}
+
+uint builtin_f32tof16_int(int p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+}
+
+uint builtin_f32tof16_int64_t(long p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'long')}}
+}
+
+uint2 builtin_f32tof16_int2_to_float2_promotion(int2 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int2' (aka 'vector<int, 2>'))}}
+}
+
+uint builtin_f32tof16_half(half p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a...
[truncated]

llvmbot · 2025-12-16T12:27:24Z

@llvm/pr-subscribers-backend-spir-v

Author: Tim Corringham (tcorringham)

Changes

Implement the f32tof16() intrinsic, DXIL and SPIRV codegen, and related tests.

Fixes #99113

Patch is 31.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172469.diff

17 Files Affected:

(modified) clang/include/clang/Basic/Builtins.td (+6)
(modified) clang/lib/CodeGen/CGHLSLBuiltins.cpp (+56)
(modified) clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h (+21)
(modified) clang/lib/Sema/SemaHLSL.cpp (+23)
(added) clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl (+27)
(added) clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll (+67)
(added) clang/test/CodeGenHLSL/builtins/f32tof16.hlsl (+27)
(added) clang/test/CodeGenHLSL/builtins/f32tof16.ll (+67)
(added) clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl (+134)
(modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+3)
(modified) llvm/include/llvm/IR/IntrinsicsSPIRV.td (+2)
(modified) llvm/lib/Target/DirectX/DXIL.td (+9)
(modified) llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp (+2)
(modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+3)
(added) llvm/test/CodeGen/DirectX/f32tof16.ll (+57)
(added) llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll (+10)
(added) llvm/test/CodeGen/SPIRV/packhalf2x16.ll (+15)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index aab2418511399..e5fe97f952d06 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5272,6 +5272,12 @@ def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLF32ToF16 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_f32tof16"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 317e64d595243..e0ee9ab47f92f 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -211,6 +211,59 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
   llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
 }
 
+static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
+                                        const CallExpr *E) {
+  Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
+  QualType Op0Ty = E->getArg(0)->getType();
+  llvm::Type *ResType = CGF.IntTy;
+  uint64_t NumElements = 0;
+  if (Op0->getType()->isVectorTy()) {
+    NumElements =
+        E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
+    ResType =
+        llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
+  }
+  if (!Op0Ty->hasFloatingRepresentation())
+    llvm_unreachable(
+        "f32tof16 operand must have a float representation");
+
+  if (CGF.CGM.getTriple().isDXIL())
+    return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16,
+                                       ArrayRef<Value *>{Op0}, nullptr,
+                                       "hlsl.f32tof16");
+
+  if (CGF.CGM.getTriple().isSPIRV()) {
+    // We use the SPIRV PackHalf2x16 operation to avoid the need for the
+    // Int16 and Float16 capabilities
+    auto PackType =
+        llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+    if (NumElements == 0) {
+      // a scalar input - simply insert the scalar in the first element
+      // of the 2 element float vector
+      Value *Float2 = Constant::getNullValue(PackType);
+      Float2 = CGF.Builder.CreateInsertElement(Float2, Op0, (uint64_t)0);
+      Value *Result = CGF.Builder.CreateIntrinsic(
+          ResType, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+      return Result;
+    } else {
+      // a vector input - build a congruent output vector by iterating through
+      // the input vector calling packhalf2x16 for each element
+      Value *Result = PoisonValue::get(ResType);
+      for (uint64_t i = 0; i < NumElements; i++) {
+        Value *Float2 = Constant::getNullValue(PackType);
+        Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
+        Float2 = CGF.Builder.CreateInsertElement(Float2, InVal, (uint64_t)0);
+        Value *Res = CGF.Builder.CreateIntrinsic(
+          CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+        Result = CGF.Builder.CreateInsertElement(Result, Res, i);
+      }
+      return Result;
+    }
+  }
+
+  llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
+}
+
 static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
                                LValue &Stride) {
   // Figure out the stride of the buffer elements from the handle type.
@@ -676,6 +729,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
   case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
     return handleElementwiseF16ToF32(*this, E);
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
+    return handleElementwiseF32ToF16(*this, E);
+  }
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     if (!E->getArg(0)->getType()->hasFloatingRepresentation())
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index f58150ed61106..8905dd9f897ca 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1073,6 +1073,27 @@ float3 f16tof32(uint3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
 float4 f16tof32(uint4);
 
+//===----------------------------------------------------------------------===//
+// f32tof16 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn uint f16tof32(float x)
+/// \brief Returns the float arg value converted to half in the low 16 bits of
+/// the uint return value
+/// \param x The float to be converted to half.
+///
+/// The return value is a uint containing the converted half value in the low
+/// 16 bits.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint f32tof16(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint2 f32tof16(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint3 f32tof16(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint4 f32tof16(float4);
+
 //===----------------------------------------------------------------------===//
 // firstbitlow builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 28744ff0ff42e..b8c558c3e73ac 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2877,6 +2877,20 @@ static bool CheckAllArgTypesAreCorrect(
   return false;
 }
 
+static bool CheckFloatRepresentation(Sema *S, SourceLocation Loc,
+                                     int ArgOrdinal,
+                                     clang::QualType PassedType) {
+  clang::QualType BaseType =
+      PassedType->isVectorType()
+          ? PassedType->castAs<clang::VectorType>()->getElementType()
+          : PassedType;
+  if (!BaseType->isFloat32Type())
+    return S->Diag(Loc, diag::err_builtin_invalid_arg_type)
+           << ArgOrdinal << /* scalar or vector of */ 5 << /* no int */ 0
+           << /* float */ 1 << PassedType;
+  return false;
+}
+
 static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc,
                                            int ArgOrdinal,
                                            clang::QualType PassedType) {
@@ -3564,6 +3578,15 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy);
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+    if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
+                                   CheckFloatRepresentation))
+      return true;
+    SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().UnsignedIntTy);
+    break;
+  }
   }
   return false;
 }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
new file mode 100644
index 0000000000000..ede6d5c0f3236
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+// CHECK: ret i32 %hlsl.f32tof16
+// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+uint test_scalar(float p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+// CHECK: ret <2 x i32> %hlsl.f32tof16
+// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+uint2 test_uint2(float2 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+// CHECK: ret <3 x i32> %hlsl.f32tof16
+// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+uint3 test_uint3(float3 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+// CHECK: ret <4 x i32> %hlsl.f32tof16
+// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+uint4 test_uint4(float4 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll
new file mode 100644
index 0000000000000..047423a1ed9a6
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll
@@ -0,0 +1,67 @@
+; ModuleID = 'clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl'
+source_filename = "clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl"
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"
+target triple = "dxilv1.3-pc-shadermodel6.3-library"
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca float, align 4
+  store float %p0, ptr %p0.addr, align 4
+  %0 = load float, ptr %p0.addr, align 4
+  %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+  ret i32 %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x float>, align 8
+  store <2 x float> %p0, ptr %p0.addr, align 8
+  %0 = load <2 x float>, ptr %p0.addr, align 8
+  %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+  ret <2 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x float>, align 16
+  store <3 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <3 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+  ret <3 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+  ret <4 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+
+attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!dx.valver = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 8}
+!2 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git 082f296ac09ca7c183aac27883cc6489250db75d)"}
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
new file mode 100644
index 0000000000000..008f495ef869c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+// CHECK: ret i32 %hlsl.f32tof16
+// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+uint test_scalar(float p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+// CHECK: ret <2 x i32> %hlsl.f32tof16
+// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+uint2 test_uint2(float2 p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+// CHECK: ret <3 x i32> %hlsl.f32tof16
+// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+uint3 test_uint3(float3 p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+// CHECK: ret <4 x i32> %hlsl.f32tof16
+// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+uint4 test_uint4(float4 p0) { return f32tof16(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16.ll b/clang/test/CodeGenHLSL/builtins/f32tof16.ll
new file mode 100644
index 0000000000000..2e92b73ee5cfb
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16.ll
@@ -0,0 +1,67 @@
+; ModuleID = 'f32tof16.hlsl'
+source_filename = "f32tof16.hlsl"
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"
+target triple = "dxilv1.3-pc-shadermodel6.3-library"
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca float, align 4
+  store float %p0, ptr %p0.addr, align 4
+  %0 = load float, ptr %p0.addr, align 4
+  %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+  ret i32 %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x float>, align 8
+  store <2 x float> %p0, ptr %p0.addr, align 8
+  %0 = load <2 x float>, ptr %p0.addr, align 8
+  %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+  ret <2 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x float>, align 16
+  store <3 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <3 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+  ret <3 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+  ret <4 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+
+attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!dx.valver = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 8}
+!2 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git a5e8e77f7ccd15945eb432a3619e57f9600c142a)"}
diff --git a/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
new file mode 100644
index 0000000000000..cd95602b413c5
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify
+
+uint builtin_f32tof16_too_few_arg() {
+  return __builtin_hlsl_elementwise_f32tof16();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+  // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}}
+}
+
+uint builtin_f32tof16_too_many_arg(uint p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+  // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}}
+}
+
+uint builtin_f32tof16_bool(bool p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool')}}
+}
+
+uint builtin_f32tof16_bool4(bool4 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool4' (aka 'vector<bool, 4>')}}
+}
+
+uint builtin_f32tof16_short(short p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'short')}}
+}
+
+uint builtin_f32tof16_unsigned_short(unsigned short p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned short')}}
+}
+
+uint builtin_f32tof16_int(int p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+}
+
+uint builtin_f32tof16_int64_t(long p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'long')}}
+}
+
+uint2 builtin_f32tof16_int2_to_float2_promotion(int2 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int2' (aka 'vector<int, 2>'))}}
+}
+
+uint builtin_f32tof16_half(half p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a...
[truncated]

llvmbot · 2025-12-16T12:27:24Z

@llvm/pr-subscribers-llvm-ir

Author: Tim Corringham (tcorringham)

Changes

Implement the f32tof16() intrinsic, DXIL and SPIRV codegen, and related tests.

Fixes #99113

Patch is 31.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172469.diff

17 Files Affected:

(modified) clang/include/clang/Basic/Builtins.td (+6)
(modified) clang/lib/CodeGen/CGHLSLBuiltins.cpp (+56)
(modified) clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h (+21)
(modified) clang/lib/Sema/SemaHLSL.cpp (+23)
(added) clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl (+27)
(added) clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll (+67)
(added) clang/test/CodeGenHLSL/builtins/f32tof16.hlsl (+27)
(added) clang/test/CodeGenHLSL/builtins/f32tof16.ll (+67)
(added) clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl (+134)
(modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+3)
(modified) llvm/include/llvm/IR/IntrinsicsSPIRV.td (+2)
(modified) llvm/lib/Target/DirectX/DXIL.td (+9)
(modified) llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp (+2)
(modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+3)
(added) llvm/test/CodeGen/DirectX/f32tof16.ll (+57)
(added) llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll (+10)
(added) llvm/test/CodeGen/SPIRV/packhalf2x16.ll (+15)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index aab2418511399..e5fe97f952d06 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5272,6 +5272,12 @@ def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLF32ToF16 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_f32tof16"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 317e64d595243..e0ee9ab47f92f 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -211,6 +211,59 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
   llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
 }
 
+static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
+                                        const CallExpr *E) {
+  Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
+  QualType Op0Ty = E->getArg(0)->getType();
+  llvm::Type *ResType = CGF.IntTy;
+  uint64_t NumElements = 0;
+  if (Op0->getType()->isVectorTy()) {
+    NumElements =
+        E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
+    ResType =
+        llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
+  }
+  if (!Op0Ty->hasFloatingRepresentation())
+    llvm_unreachable(
+        "f32tof16 operand must have a float representation");
+
+  if (CGF.CGM.getTriple().isDXIL())
+    return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16,
+                                       ArrayRef<Value *>{Op0}, nullptr,
+                                       "hlsl.f32tof16");
+
+  if (CGF.CGM.getTriple().isSPIRV()) {
+    // We use the SPIRV PackHalf2x16 operation to avoid the need for the
+    // Int16 and Float16 capabilities
+    auto PackType =
+        llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+    if (NumElements == 0) {
+      // a scalar input - simply insert the scalar in the first element
+      // of the 2 element float vector
+      Value *Float2 = Constant::getNullValue(PackType);
+      Float2 = CGF.Builder.CreateInsertElement(Float2, Op0, (uint64_t)0);
+      Value *Result = CGF.Builder.CreateIntrinsic(
+          ResType, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+      return Result;
+    } else {
+      // a vector input - build a congruent output vector by iterating through
+      // the input vector calling packhalf2x16 for each element
+      Value *Result = PoisonValue::get(ResType);
+      for (uint64_t i = 0; i < NumElements; i++) {
+        Value *Float2 = Constant::getNullValue(PackType);
+        Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
+        Float2 = CGF.Builder.CreateInsertElement(Float2, InVal, (uint64_t)0);
+        Value *Res = CGF.Builder.CreateIntrinsic(
+          CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+        Result = CGF.Builder.CreateInsertElement(Result, Res, i);
+      }
+      return Result;
+    }
+  }
+
+  llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
+}
+
 static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
                                LValue &Stride) {
   // Figure out the stride of the buffer elements from the handle type.
@@ -676,6 +729,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
   case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
     return handleElementwiseF16ToF32(*this, E);
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
+    return handleElementwiseF32ToF16(*this, E);
+  }
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     if (!E->getArg(0)->getType()->hasFloatingRepresentation())
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index f58150ed61106..8905dd9f897ca 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1073,6 +1073,27 @@ float3 f16tof32(uint3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
 float4 f16tof32(uint4);
 
+//===----------------------------------------------------------------------===//
+// f32tof16 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn uint f16tof32(float x)
+/// \brief Returns the float arg value converted to half in the low 16 bits of
+/// the uint return value
+/// \param x The float to be converted to half.
+///
+/// The return value is a uint containing the converted half value in the low
+/// 16 bits.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint f32tof16(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint2 f32tof16(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint3 f32tof16(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint4 f32tof16(float4);
+
 //===----------------------------------------------------------------------===//
 // firstbitlow builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 28744ff0ff42e..b8c558c3e73ac 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2877,6 +2877,20 @@ static bool CheckAllArgTypesAreCorrect(
   return false;
 }
 
+static bool CheckFloatRepresentation(Sema *S, SourceLocation Loc,
+                                     int ArgOrdinal,
+                                     clang::QualType PassedType) {
+  clang::QualType BaseType =
+      PassedType->isVectorType()
+          ? PassedType->castAs<clang::VectorType>()->getElementType()
+          : PassedType;
+  if (!BaseType->isFloat32Type())
+    return S->Diag(Loc, diag::err_builtin_invalid_arg_type)
+           << ArgOrdinal << /* scalar or vector of */ 5 << /* no int */ 0
+           << /* float */ 1 << PassedType;
+  return false;
+}
+
 static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc,
                                            int ArgOrdinal,
                                            clang::QualType PassedType) {
@@ -3564,6 +3578,15 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy);
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+    if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
+                                   CheckFloatRepresentation))
+      return true;
+    SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().UnsignedIntTy);
+    break;
+  }
   }
   return false;
 }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
new file mode 100644
index 0000000000000..ede6d5c0f3236
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+// CHECK: ret i32 %hlsl.f32tof16
+// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+uint test_scalar(float p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+// CHECK: ret <2 x i32> %hlsl.f32tof16
+// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+uint2 test_uint2(float2 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+// CHECK: ret <3 x i32> %hlsl.f32tof16
+// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+uint3 test_uint3(float3 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+// CHECK: ret <4 x i32> %hlsl.f32tof16
+// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+uint4 test_uint4(float4 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll
new file mode 100644
index 0000000000000..047423a1ed9a6
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll
@@ -0,0 +1,67 @@
+; ModuleID = 'clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl'
+source_filename = "clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl"
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"
+target triple = "dxilv1.3-pc-shadermodel6.3-library"
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca float, align 4
+  store float %p0, ptr %p0.addr, align 4
+  %0 = load float, ptr %p0.addr, align 4
+  %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+  ret i32 %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x float>, align 8
+  store <2 x float> %p0, ptr %p0.addr, align 8
+  %0 = load <2 x float>, ptr %p0.addr, align 8
+  %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+  ret <2 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x float>, align 16
+  store <3 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <3 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+  ret <3 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+  ret <4 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+
+attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!dx.valver = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 8}
+!2 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git 082f296ac09ca7c183aac27883cc6489250db75d)"}
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
new file mode 100644
index 0000000000000..008f495ef869c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+// CHECK: ret i32 %hlsl.f32tof16
+// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+uint test_scalar(float p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+// CHECK: ret <2 x i32> %hlsl.f32tof16
+// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+uint2 test_uint2(float2 p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+// CHECK: ret <3 x i32> %hlsl.f32tof16
+// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+uint3 test_uint3(float3 p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+// CHECK: ret <4 x i32> %hlsl.f32tof16
+// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+uint4 test_uint4(float4 p0) { return f32tof16(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16.ll b/clang/test/CodeGenHLSL/builtins/f32tof16.ll
new file mode 100644
index 0000000000000..2e92b73ee5cfb
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16.ll
@@ -0,0 +1,67 @@
+; ModuleID = 'f32tof16.hlsl'
+source_filename = "f32tof16.hlsl"
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"
+target triple = "dxilv1.3-pc-shadermodel6.3-library"
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca float, align 4
+  store float %p0, ptr %p0.addr, align 4
+  %0 = load float, ptr %p0.addr, align 4
+  %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+  ret i32 %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x float>, align 8
+  store <2 x float> %p0, ptr %p0.addr, align 8
+  %0 = load <2 x float>, ptr %p0.addr, align 8
+  %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+  ret <2 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x float>, align 16
+  store <3 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <3 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+  ret <3 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16
+  %0 = load <4 x float>, ptr %p0.addr, align 16
+  %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+  ret <4 x i32> %hlsl.f32tof16
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+
+attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0}
+!dx.valver = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 8}
+!2 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git a5e8e77f7ccd15945eb432a3619e57f9600c142a)"}
diff --git a/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
new file mode 100644
index 0000000000000..cd95602b413c5
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify
+
+uint builtin_f32tof16_too_few_arg() {
+  return __builtin_hlsl_elementwise_f32tof16();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+  // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}}
+}
+
+uint builtin_f32tof16_too_many_arg(uint p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+  // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}}
+}
+
+uint builtin_f32tof16_bool(bool p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool')}}
+}
+
+uint builtin_f32tof16_bool4(bool4 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool4' (aka 'vector<bool, 4>')}}
+}
+
+uint builtin_f32tof16_short(short p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'short')}}
+}
+
+uint builtin_f32tof16_unsigned_short(unsigned short p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned short')}}
+}
+
+uint builtin_f32tof16_int(int p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+}
+
+uint builtin_f32tof16_int64_t(long p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'long')}}
+}
+
+uint2 builtin_f32tof16_int2_to_float2_promotion(int2 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int2' (aka 'vector<int, 2>'))}}
+}
+
+uint builtin_f32tof16_half(half p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error@-1 {{1st argument must be a...
[truncated]

github-actions · 2025-12-16T12:28:34Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Fix git-clang-format errors

clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll

clang/test/CodeGenHLSL/builtins/f32tof16.ll

alsepkow

Reviewed together with @inbelic. Leaving our feedback for some minor changes.

alsepkow · 2026-01-08T23:47:05Z

clang/lib/CodeGen/CGHLSLBuiltins.cpp

+  uint64_t NumElements = 0;
+  if (Op0->getType()->isVectorTy()) {
+    NumElements =


nit: Because NumElements is only used for SPIRV would it make more sense to declare it and set it after the isDXIL block on 229?

I assume you just copied the logic from handleElementWiseF16ToF32, could also change it there.

Actually NumElements is used when building the ResType for vectors which is used for DXIL too.

clang/lib/CodeGen/CGHLSLBuiltins.cpp

clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h

llvm/lib/Target/DirectX/DXIL.td

alsepkow · 2026-01-09T00:12:48Z

clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl

+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {


We'll need to add the SPIRV variants of these tests.

alsepkow · 2026-01-09T00:16:26Z

llvm/test/CodeGen/SPIRV/packhalf2x16.ll

+; CHECK: [[P0:%.*]] = OpFunctionParameter [[FLOAT2]]
+; CHECK: [[PACK:%.*]] = OpExtInst [[UINT]] [[SET]] PackHalf2x16 [[P0]]
+; CHECK: OpReturnValue [[PACK]]
+define hidden spir_func noundef i32 @_Z9test_funcj(<2 x float> noundef %0) local_unnamed_addr #0 {


Should we also add a scalar test case?

As far as I'm aware there isn't a scalar case - the argument to packhalf2x16 can only be a 2 x float vector

llvm/test/CodeGen/DirectX/f32tof16.ll

Resolve issues raised in review.

Accidentally introduced formatting error by using clang-tidy

farzonl · 2026-01-26T15:42:04Z

Hi Tim I apologize I didn’t get to review this pr. That said our process is to get approvals before merging.

tcorringham · 2026-01-27T11:18:36Z

Sorry Farzon, I thought you had reviewed it, and I just assumed it was approved when it became mergeable.

Implement the f32tof16() intrinsic, DXIL and SPIRV codegen, and related tests. Fixes llvm#99113 --------- Co-authored-by: Tim Corringham <[email protected]>

[HLSL] Implement f32tof16() intrinsic

6f2b5ae

Implement the f32tof16() intrinsic, DXIL and SPIRV codegen, and related tests. Fixes llvm#99113

tcorringham requested a review from farzonl December 16, 2025 12:26

Fix code formatting

5f6848d

Fix git-clang-format errors

farzonl reviewed Jan 8, 2026

View reviewed changes

clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll Outdated Show resolved Hide resolved

farzonl reviewed Jan 8, 2026

View reviewed changes

clang/test/CodeGenHLSL/builtins/f32tof16.ll Outdated Show resolved Hide resolved

alsepkow reviewed Jan 9, 2026

View reviewed changes

Tim Corringham added 3 commits January 20, 2026 10:49

Merge remote-tracking branch 'origin/main' into f32tof16

f41dae5

Address review comments

452f074

Resolve issues raised in review.

Fix clang-format error

3064ac9

Accidentally introduced formatting error by using clang-tidy

tcorringham merged commit d5f4055 into llvm:main Jan 26, 2026
13 checks passed

tcorringham deleted the f32tof16 branch January 26, 2026 15:06

[HLSL] Implement f32tof16() intrinsic #172469

[HLSL] Implement f32tof16() intrinsic #172469

Uh oh!

Conversation

tcorringham commented Dec 16, 2025

Uh oh!

llvmbot commented Dec 16, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 16, 2025

Uh oh!

llvmbot commented Dec 16, 2025

Uh oh!

llvmbot commented Dec 16, 2025

Uh oh!

github-actions bot commented Dec 16, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

alsepkow left a comment

Choose a reason for hiding this comment

Uh oh!

alsepkow Jan 8, 2026

Choose a reason for hiding this comment

Uh oh!

tcorringham Jan 20, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

alsepkow Jan 9, 2026

Choose a reason for hiding this comment

Uh oh!

alsepkow Jan 9, 2026

Choose a reason for hiding this comment

Uh oh!

tcorringham Jan 20, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

farzonl commented Jan 26, 2026

Uh oh!

tcorringham commented Jan 27, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Dec 16, 2025 •

edited

Loading

github-actions bot commented Dec 16, 2025 •

edited

Loading