Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 27 additions & 18 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}

foreach Cmp = ["cmpeq", "cmplt", "cmple", "cmpunord", "cmpneq", "cmpnlt",
"cmpnle", "cmpord", "min", "max"] in {
"cmpnle", "cmpord"] in {
let Features = "sse" in {
def Cmp#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def Cmp#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
Expand Down Expand Up @@ -149,6 +149,10 @@ let Features = "sse", Header = "xmmintrin.h", Attributes = [NoThrow, RequireDecl

let Features = "sse", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
def minps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def minss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def maxps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def maxss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
}

let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -237,6 +241,11 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;

def minpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
def minsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
def maxpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
def maxsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;

def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
def pslldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
def psllqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
Expand Down Expand Up @@ -451,6 +460,10 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
def maxpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
def maxps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
def minpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
def minps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
Expand All @@ -465,10 +478,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
def cvtpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
def cvttps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
foreach Op = ["max", "min"] in {
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
}
}

let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -1009,10 +1018,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
def cvtpd2dq512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">;
def cvtps2udq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
def cvtpd2udq512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">;
def minps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
def minpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
def maxps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
def cvtdq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
def cvtudq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
Expand All @@ -1023,6 +1028,10 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def minps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
def minpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
def maxps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
}

let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
Expand Down Expand Up @@ -3374,23 +3383,20 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<5
def subph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
def mulph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
def divph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
}

let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def maxph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
def minph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
}

let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def minph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">;
}

let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def minph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
}

let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def maxph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">;
}

let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def minph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
def maxph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
}

Expand All @@ -3399,6 +3405,9 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<1
def divsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
def mulsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
def subsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
}

let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def maxsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
def minsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
}
Expand Down
74 changes: 74 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2433,6 +2433,46 @@ static bool interp__builtin_elementwise_int_unaryop(
return true;
}

static bool interp__builtin_elementwise_fp_binop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APFloat(const APFloat &, const APFloat &,
std::optional<APSInt> RoundingMode)>
Fn) {
assert((Call->getNumArgs() == 2) || (Call->getNumArgs() == 3));
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
assert(VT->getElementType()->isFloatingType());
unsigned NumElems = VT->getNumElements();

// Vector case.
assert(Call->getArg(0)->getType()->isVectorType() &&
Call->getArg(1)->getType()->isVectorType());
assert(VT->getElementType() ==
Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
assert(VT->getNumElements() ==
Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());

std::optional<APSInt> RoundingMode = std::nullopt;
if (Call->getNumArgs() == 3)
RoundingMode = popToAPSInt(S, Call->getArg(2));

const Pointer &BPtr = S.Stk.pop<Pointer>();
const Pointer &APtr = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
for (unsigned ElemIdx = 0; ElemIdx != NumElems; ++ElemIdx) {
using T = PrimConv<PT_Float>::T;
APFloat ElemA = APtr.elem<T>(ElemIdx).getAPFloat();
APFloat ElemB = BPtr.elem<T>(ElemIdx).getAPFloat();
if (ElemA.isNaN() || ElemA.isInfinity() || ElemA.isDenormal() ||
ElemB.isNaN() || ElemB.isInfinity() || ElemB.isDenormal())
return false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think eventually we're going to have to move this into the callback - and possibly allow the callbacks to return false - but this should be enough for min/max cases

Dst.elem<T>(ElemIdx) = static_cast<T>(Fn(ElemA, ElemB, RoundingMode));
}

Dst.initializeAllElements();

return true;
}

static bool interp__builtin_elementwise_int_binop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
Expand Down Expand Up @@ -5662,6 +5702,40 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
});
}

case clang::X86::BI__builtin_ia32_minps:
case clang::X86::BI__builtin_ia32_minpd:
case clang::X86::BI__builtin_ia32_minph128:
case clang::X86::BI__builtin_ia32_minph256:
case clang::X86::BI__builtin_ia32_minps256:
case clang::X86::BI__builtin_ia32_minpd256:
case clang::X86::BI__builtin_ia32_minps512:
case clang::X86::BI__builtin_ia32_minpd512:
case clang::X86::BI__builtin_ia32_minph512:
return interp__builtin_elementwise_fp_binop(
S, OpPC, Call,
[](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
if (A.isZero() && B.isZero())
return B;
return llvm::minimum(A, B);
});

case clang::X86::BI__builtin_ia32_maxps:
case clang::X86::BI__builtin_ia32_maxpd:
case clang::X86::BI__builtin_ia32_maxph128:
case clang::X86::BI__builtin_ia32_maxph256:
case clang::X86::BI__builtin_ia32_maxps256:
case clang::X86::BI__builtin_ia32_maxpd256:
case clang::X86::BI__builtin_ia32_maxps512:
case clang::X86::BI__builtin_ia32_maxpd512:
case clang::X86::BI__builtin_ia32_maxph512:
return interp__builtin_elementwise_fp_binop(
S, OpPC, Call,
[](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
if (A.isZero() && B.isZero())
return B;
return llvm::maximum(A, B);
});

default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
Expand Down
68 changes: 68 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12278,6 +12278,42 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), SourceLen), E);
};

auto EvaluateFpBinOpExpr =
[&](llvm::function_ref<APFloat(const APFloat &, const APFloat &,
std::optional<APSInt>)>
Fn) {
assert(E->getNumArgs() == 2 || E->getNumArgs() == 3);
APValue A, B;
if (!EvaluateAsRValue(Info, E->getArg(0), A) ||
!EvaluateAsRValue(Info, E->getArg(1), B))
return false;

assert(A.isVector() && B.isVector());
assert(A.getVectorLength() == B.getVectorLength());

std::optional<APSInt> RoundingMode;
if (E->getNumArgs() == 3) {
APSInt Imm;
if (!EvaluateInteger(E->getArg(2), Imm, Info))
return false;
RoundingMode = Imm;
}

unsigned NumElems = A.getVectorLength();
SmallVector<APValue, 4> ResultElements;
ResultElements.reserve(NumElems);

for (unsigned EltNum = 0; EltNum < NumElems; ++EltNum) {
const APFloat &EltA = A.getVectorElt(EltNum).getFloat();
const APFloat &EltB = B.getVectorElt(EltNum).getFloat();
if (EltA.isNaN() || EltA.isInfinity() || EltA.isDenormal() ||
EltB.isNaN() || EltB.isInfinity() || EltB.isDenormal())
return false;
ResultElements.push_back(APValue(Fn(EltA, EltB, RoundingMode)));
}
return Success(APValue(ResultElements.data(), NumElems), E);
};

auto EvalSelectScalar = [&](unsigned Len) -> bool {
APSInt Mask;
APValue AVal, WVal;
Expand Down Expand Up @@ -14311,6 +14347,38 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}

case clang::X86::BI__builtin_ia32_minps:
case clang::X86::BI__builtin_ia32_minpd:
case clang::X86::BI__builtin_ia32_minps256:
case clang::X86::BI__builtin_ia32_minpd256:
case clang::X86::BI__builtin_ia32_minps512:
case clang::X86::BI__builtin_ia32_minpd512:
case clang::X86::BI__builtin_ia32_minph128:
case clang::X86::BI__builtin_ia32_minph256:
case clang::X86::BI__builtin_ia32_minph512:
return EvaluateFpBinOpExpr(
[](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
if (A.isZero() && B.isZero())
return B;
return llvm::minimum(A, B);
});

case clang::X86::BI__builtin_ia32_maxps:
case clang::X86::BI__builtin_ia32_maxpd:
case clang::X86::BI__builtin_ia32_maxps256:
case clang::X86::BI__builtin_ia32_maxpd256:
case clang::X86::BI__builtin_ia32_maxps512:
case clang::X86::BI__builtin_ia32_maxpd512:
case clang::X86::BI__builtin_ia32_maxph128:
case clang::X86::BI__builtin_ia32_maxph256:
case clang::X86::BI__builtin_ia32_maxph512:
return EvaluateFpBinOpExpr(
[](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
if (A.isZero() && B.isZero())
return B;
return llvm::maximum(A, B);
});

case clang::X86::BI__builtin_ia32_vcvtps2ph:
case clang::X86::BI__builtin_ia32_vcvtps2ph256: {
APValue SrcVec;
Expand Down
Loading