22#include "llvm/IR/IntrinsicsAMDGPU.h"
27#define DEBUG_TYPE "amdgpu-simplifylib"
33 cl::desc(
"Enable pre-link mode optimizations"),
38 cl::desc(
"Comma separated list of functions to replace with native, or all"),
42#define MATH_PI numbers::pi
43#define MATH_E numbers::e
44#define MATH_SQRT2 numbers::sqrt2
45#define MATH_SQRT1_2 numbers::inv_sqrt2
58 bool AllNative =
false;
79 bool parseFunctionName(
const StringRef &FMangledName, FuncInfo &FInfo);
81 bool TDOFold(
CallInst *CI,
const FuncInfo &FInfo);
92 const FuncInfo &FInfo);
98 bool sincosUseNative(
CallInst *aCI,
const FuncInfo &FInfo);
101 bool evaluateScalarMathFunc(
const FuncInfo &FInfo,
APFloat &Res0,
103 bool evaluateCall(
CallInst *aCI,
const FuncInfo &FInfo);
107 std::tuple<Value *, Value *, Value *> insertSinCos(
Value *Arg,
117 const FuncInfo &FInfo);
124 bool shouldReplaceLibcallWithIntrinsic(
const CallInst *CI,
125 bool AllowMinSizeF32 =
false,
126 bool AllowF64 =
false,
127 bool AllowStrictFP =
false);
133 bool AllowMinSizeF32 =
false,
134 bool AllowF64 =
false,
135 bool AllowStrictFP =
false);
143 I->replaceAllUsesWith(With);
144 I->eraseFromParent();
164template <
typename IRB>
166 const Twine &Name =
"") {
167 CallInst *R =
B.CreateCall(Callee, Arg, Name);
169 R->setCallingConv(
F->getCallingConv());
173template <
typename IRB>
176 CallInst *R =
B.CreateCall(Callee, {Arg1, Arg2}, Name);
178 R->setCallingConv(
F->getCallingConv());
188 {FT->getParamType(0), PowNExpTy},
false);
426 : AMDGPULibFunc::getFunction(
M, fInfo);
430 Module *M,
const FuncInfo &fInfo, FuncInfo &newInfo,
432 assert(NewFunc != FastVariant);
436 newInfo = AMDGPULibFunc(FastVariant, fInfo);
437 if (FunctionCallee NewCallee = getFunction(M, newInfo))
441 newInfo = AMDGPULibFunc(NewFunc, fInfo);
442 return getFunction(M, newInfo);
445bool AMDGPULibCalls::parseFunctionName(
const StringRef &FMangledName,
466bool AMDGPULibCalls::useNativeFunc(
const StringRef F)
const {
471 AllNative = useNativeFunc(
"all") ||
476bool AMDGPULibCalls::sincosUseNative(
CallInst *aCI,
const FuncInfo &FInfo) {
477 bool native_sin = useNativeFunc(
"sin");
478 bool native_cos = useNativeFunc(
"cos");
480 if (native_sin && native_cos) {
495 if (sinExpr && cosExpr) {
503 <<
" with native version of sin/cos");
518 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.
isMangled() ||
521 !(AllNative || useNativeFunc(FInfo.
getName()))) {
526 return sincosUseNative(aCI, FInfo);
535 <<
" with native version");
547 const FuncInfo &FInfo) {
549 if (!Callee->isDeclaration())
552 assert(Callee->hasName() &&
"Invalid read_pipe/write_pipe function");
553 auto *M = Callee->getParent();
554 std::string Name = std::string(Callee->getName());
556 if (NumArg != 4 && NumArg != 6)
562 if (!PacketSize || !PacketAlign)
567 if (Alignment !=
Size)
570 unsigned PtrArgLoc = CI->
arg_size() - 3;
575 for (
unsigned I = 0;
I != PtrArgLoc; ++
I)
579 Name = Name +
"_" + std::to_string(
Size);
588 for (
unsigned I = 0;
I != PtrArgLoc; ++
I)
590 Args.push_back(PtrArg);
592 auto *NCI =
B.CreateCall(
F, Args);
605 if (!Callee || Callee->isIntrinsic() || CI->
isNoBuiltin())
609 if (!parseFunctionName(Callee->getName(), FInfo))
619 if (TDOFold(CI, FInfo))
624 B.setIsFPConstrained(
true);
635 B.setFastMathFlags(FMF);
640 switch (FInfo.
getId()) {
644 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::exp,
649 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::exp2,
654 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log,
659 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log2,
664 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log10,
667 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::minnum,
670 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::maxnum,
673 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fma,
true,
676 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fmuladd,
679 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fabs,
true,
682 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::copysign,
685 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::floor,
true,
688 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::ceil,
true,
691 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::trunc,
true,
694 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::rint,
true,
697 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::round,
true,
700 if (!shouldReplaceLibcallWithIntrinsic(CI,
true,
true))
706 Value *SplatArg1 =
B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
712 {CI->getType(), CI->getArgOperand(1)->getType()}));
718 return tryOptimizePow(FPOp,
B, FInfo);
721 if (fold_pow(FPOp,
B, FInfo))
728 Module *M = Callee->getParent();
736 if (!shouldReplaceLibcallWithIntrinsic(CI))
742 if (fold_pow(FPOp,
B, FInfo))
749 Module *M = Callee->getParent();
757 if (!shouldReplaceLibcallWithIntrinsic(CI))
763 if (fold_rootn(FPOp,
B, FInfo))
769 Module *M = Callee->getParent();
771 if (
FunctionCallee RootnFastFunc = getFunction(M, RootnFastInfo)) {
781 return tryReplaceLibcallWithSimpleIntrinsic(
782 B, CI, Intrinsic::sqrt,
true,
true,
false);
785 return fold_sincos(FPOp,
B, FInfo);
791 switch (FInfo.
getId()) {
796 return fold_read_write_pipe(CI,
B, FInfo);
815 ConstValues.
push_back(ConstantFP::get(ElemTy, APF));
820bool AMDGPULibCalls::TDOFold(
CallInst *CI,
const FuncInfo &FInfo) {
826 int const sz = (int)tr.
size();
836 for (
int eltNo = 0; eltNo < vecSize; ++eltNo) {
839 auto MatchingRow =
llvm::find_if(tr, [eltval](
const TableEntry &entry) {
842 if (MatchingRow == tr.
end())
847 LLVM_DEBUG(
errs() <<
"AMDIC: " << *CI <<
" ---> " << *NewValues <<
"\n");
854 for (
int i = 0; i < sz; ++i) {
855 if (CF->isExactlyValue(tr[i].input)) {
856 Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
857 LLVM_DEBUG(
errs() <<
"AMDIC: " << *CI <<
" ---> " << *nval <<
"\n");
870#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
879 const FuncInfo &FInfo) {
886 "fold_pow: encounter a wrong function call");
888 Module *
M =
B.GetInsertBlock()->getModule();
894 const APInt *CINT =
nullptr;
899 int ci_opr1 = (CINT ? (int)CINT->
getSExtValue() : 0x1111111);
901 if ((CF && CF->
isZero()) || (CINT && ci_opr1 == 0)) {
904 Constant *cnval = ConstantFP::get(eltType, 1.0);
913 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
"\n");
919 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
" * "
921 Value *nval =
B.CreateFMul(opr0, opr0,
"__pow2");
925 if ((CF && CF->
isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
927 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> 1 / " << *opr0 <<
"\n");
928 Constant *cnval = ConstantFP::get(eltType, 1.0);
932 Value *nval =
B.CreateFDiv(cnval, opr0,
"__powrecip");
940 if (FunctionCallee FPExpr =
944 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << FInfo.getName()
945 <<
'(' << *opr0 <<
")\n");
963 int ival = (int)dval;
964 if ((
double)ival == dval) {
967 ci_opr1 = 0x11111111;
972 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
973 if (abs_opr1 <= 12) {
977 cnval = ConstantFP::get(eltType, 1.0);
983 Value *valx2 =
nullptr;
985 while (abs_opr1 > 0) {
986 valx2 = valx2 ?
B.CreateFMul(valx2, valx2,
"__powx2") : opr0;
988 nval = nval ?
B.CreateFMul(nval, valx2,
"__powprod") : valx2;
995 cnval = ConstantFP::get(eltType, 1.0);
999 nval =
B.CreateFDiv(cnval, nval,
"__1powprod");
1002 << ((ci_opr1 < 0) ?
"1/prod(" :
"prod(") << *opr0
1013 FunctionCallee ExpExpr;
1014 if (ShouldUseIntrinsic)
1023 bool needlog =
false;
1024 bool needabs =
false;
1025 bool needcopysign =
false;
1036 V =
log2(std::abs(V));
1037 cnval = ConstantFP::get(eltType, V);
1055 "Wrong vector size detected");
1060 if (V < 0.0) needcopysign =
true;
1061 V =
log2(std::abs(V));
1066 for (
double D : DVal)
1088 nval =
B.CreateFAbs(opr0,
nullptr,
"__fabs");
1090 nval = cnval ? cnval : opr0;
1093 FunctionCallee LogExpr;
1094 if (ShouldUseIntrinsic) {
1109 opr1 =
B.CreateSIToFP(opr1, nval->
getType(),
"pownI2F");
1111 nval =
B.CreateFMul(opr1, nval,
"__ylogx");
1113 CallInst *Exp2Call =
CreateCallEx(
B, ExpExpr, nval,
"__exp2");
1129 opr_n =
B.CreateZExtOrTrunc(opr_n, nTy,
"__ytou");
1131 opr_n =
B.CreateFPToSI(opr1, nTy,
"__ytou");
1134 Value *sign =
B.CreateShl(opr_n,
size-1,
"__yeven");
1135 sign =
B.CreateAnd(
B.CreateBitCast(opr0, nTy), sign,
"__pow_sign");
1137 nval =
B.CreateCopySign(nval,
B.CreateBitCast(sign, nval->
getType()),
1138 nullptr,
"__pow_sign");
1142 <<
"exp2(" << *opr1 <<
" * log2(" << *opr0 <<
"))\n");
1149 const FuncInfo &FInfo) {
1153 const APInt *CINT =
nullptr;
1157 Function *Parent =
B.GetInsertBlock()->getParent();
1160 if (ci_opr1 == 1 && !Parent->
hasFnAttribute(Attribute::StrictFP)) {
1164 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
'\n');
1169 Module *
M =
B.GetInsertBlock()->getModule();
1173 shouldReplaceLibcallWithIntrinsic(CI,
1177 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> sqrt(" << *opr0 <<
")\n");
1179 CallInst *NewCall =
B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1184 MDBuilder MDHelper(
M->getContext());
1185 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->
getFPAccuracy(), 2.0f));
1186 NewCall->
setMetadata(LLVMContext::MD_fpmath, FPMD);
1193 if (FunctionCallee FPExpr =
1195 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> cbrt(" << *opr0
1201 }
else if (ci_opr1 == -1) {
1202 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> 1.0 / " << *opr0 <<
"\n");
1203 Value *nval =
B.CreateFDiv(ConstantFP::get(opr0->
getType(), 1.0),
1210 if (ci_opr1 == -2 &&
1211 shouldReplaceLibcallWithIntrinsic(CI,
1218 MDBuilder MDHelper(
M->getContext());
1219 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->
getFPAccuracy(), 2.0f));
1225 CallInst *Sqrt =
B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1227 B.CreateFDiv(ConstantFP::get(opr0->
getType(), 1.0), Sqrt));
1232 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> rsqrt(" << *opr0
1243 Value *TruncY =
B.CreateUnaryIntrinsic(Intrinsic::trunc,
Y);
1244 return B.CreateFCmpOEQ(TruncY,
Y);
1249 auto *HalfY =
B.CreateFMul(
Y, ConstantFP::get(
Y->getType(), 0.5));
1257 Value *NotEvenY =
B.CreateNot(IsEvenY);
1258 return B.CreateAnd(IsIntY, NotEvenY);
1263 auto *fabsVal =
B.CreateFAbs(val);
1269 Value *AbsX =
B.CreateFAbs(
X);
1270 Value *LogAbsX =
B.CreateUnaryIntrinsic(Intrinsic::log2, AbsX);
1271 Value *YTimesLogX =
B.CreateFMul(
Y, LogAbsX);
1272 return B.CreateUnaryIntrinsic(Intrinsic::exp2, YTimesLogX);
1282 Constant *One = ConstantFP::get(
X->getType(), 1.0);
1292 Value *SelSign =
B.CreateSelect(IsOddY,
X, One);
1293 Value *Ret =
B.CreateCopySign(ExpYLnX, SelSign);
1297 Value *condNegX =
B.CreateFCmpOLT(
X, Zero);
1298 Value *condNotIntY =
B.CreateNot(IsIntY);
1299 Value *condNaN =
B.CreateAnd(condNegX, condNotIntY);
1300 Ret =
B.CreateSelect(condNaN, QNaN, Ret);
1309 Value *YIsNegInf =
B.CreateFCmpUNE(
Y, AY);
1312 Value *AxEqOne =
B.CreateFCmpOEQ(AX, One);
1313 Value *AxLtOne =
B.CreateFCmpOLT(AX, One);
1314 Value *XorCond =
B.CreateXor(AxLtOne, YIsNegInf);
1316 B.CreateSelect(AxEqOne, AX,
B.CreateSelect(XorCond, Zero, AY));
1317 Ret =
B.CreateSelect(YIsInf, SelInf, Ret);
1321 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1322 Value *AxInfOrZero =
B.CreateOr(XIsInf, XEqZero);
1323 Value *YLtZero =
B.CreateFCmpOLT(
Y, Zero);
1324 Value *XorZeroInf =
B.CreateXor(XEqZero, YLtZero);
1325 Value *SelVal =
B.CreateSelect(XorZeroInf, Zero, PInf);
1326 Value *SelSign2 =
B.CreateSelect(IsOddY,
X, Zero);
1327 Value *Copysign =
B.CreateCopySign(SelVal, SelSign2);
1328 Ret =
B.CreateSelect(AxInfOrZero, Copysign, Ret);
1331 Value *isUnordered =
B.CreateFCmpUNO(
X,
Y);
1332 return B.CreateSelect(isUnordered, QNaN, Ret);
1335 Value *YIsNeg =
B.CreateFCmpOLT(
Y, Zero);
1336 Value *IZ =
B.CreateSelect(YIsNeg, PInf, Zero);
1337 Value *ZI =
B.CreateSelect(YIsNeg, Zero, PInf);
1339 Value *YEqZero =
B.CreateFCmpOEQ(
Y, Zero);
1340 Value *SelZeroCase =
B.CreateSelect(YEqZero, QNaN, IZ);
1341 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1342 Value *Ret =
B.CreateSelect(XEqZero, SelZeroCase, ExpYLnX);
1344 Value *XEqInf =
B.CreateFCmpOEQ(
X, PInf);
1345 Value *YNeZero =
B.CreateFCmpUNE(
Y, Zero);
1346 Value *CondInfCase =
B.CreateAnd(XEqInf, YNeZero);
1347 Ret =
B.CreateSelect(CondInfCase, ZI, Ret);
1350 Value *XNeOne =
B.CreateFCmpUNE(
X, One);
1351 Value *CondInfY =
B.CreateAnd(IsInfY, XNeOne);
1352 Value *XLtOne =
B.CreateFCmpOLT(
X, One);
1353 Value *SelInfYCase =
B.CreateSelect(XLtOne, IZ, ZI);
1354 Ret =
B.CreateSelect(CondInfY, SelInfYCase, Ret);
1356 Value *IsUnordered =
B.CreateFCmpUNO(
X,
Y);
1357 return B.CreateSelect(IsUnordered, QNaN, Ret);
1360 Constant *ZeroI = ConstantInt::get(
Y->getType(), 0);
1363 Value *OneI = ConstantInt::get(
Y->getType(), 1);
1364 Value *YAnd1 =
B.CreateAnd(
Y, OneI);
1365 Value *IsOddY =
B.CreateICmpNE(YAnd1, ZeroI);
1368 Value *SelSign =
B.CreateSelect(IsOddY,
X, One);
1369 Value *Ret =
B.CreateCopySign(ExpYLnX, SelSign);
1372 Value *FabsX =
B.CreateFAbs(
X);
1373 Value *XIsInf =
B.CreateFCmpOEQ(FabsX, PInf);
1374 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1375 Value *InfOrZero =
B.CreateOr(XIsInf, XEqZero);
1378 Value *YLtZero =
B.CreateICmpSLT(
Y, ZeroI);
1379 Value *XorZeroInf =
B.CreateXor(XEqZero, YLtZero);
1380 Value *SelVal =
B.CreateSelect(XorZeroInf, Zero, PInf);
1383 Value *SelSign2 =
B.CreateSelect(IsOddY,
X, Zero);
1384 Value *Copysign =
B.CreateCopySign(SelVal, SelSign2);
1386 return B.CreateSelect(InfOrZero, Copysign, Ret);
1389 Constant *ZeroI = ConstantInt::get(
Y->getType(), 0);
1392 Value *YAnd1 =
B.CreateAnd(
Y, ConstantInt::get(
Y->getType(), 1));
1393 Value *IsOddY =
B.CreateICmpNE(YAnd1, ZeroI);
1396 Value *SelSign =
B.CreateSelect(IsOddY,
X, One);
1397 Value *Ret =
B.CreateCopySign(ExpYLnX, SelSign);
1400 Value *FabsX =
B.CreateFAbs(
X);
1401 Value *IsInfX =
B.CreateFCmpOEQ(FabsX, PInf);
1402 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1403 Value *CondInfOrZero =
B.CreateOr(IsInfX, XEqZero);
1406 Value *YLtZero =
B.CreateICmpSLT(
Y, ZeroI);
1407 Value *XorZeroInf =
B.CreateXor(XEqZero, YLtZero);
1408 Value *SelVal =
B.CreateSelect(XorZeroInf, Zero, PInf);
1411 Value *SelSign2 =
B.CreateSelect(IsOddY,
X, Zero);
1412 Value *Copysign =
B.CreateCopySign(SelVal, SelSign2);
1414 Ret =
B.CreateSelect(CondInfOrZero, Copysign, Ret);
1417 Value *XIsNeg =
B.CreateFCmpOLT(
X, Zero);
1418 Value *NotOddY =
B.CreateNot(IsOddY);
1419 Value *CondNegAndNotOdd =
B.CreateAnd(XIsNeg, NotOddY);
1420 Value *YEqZero =
B.CreateICmpEQ(
Y, ZeroI);
1421 Value *CondBad =
B.CreateOr(CondNegAndNotOdd, YEqZero);
1422 return B.CreateSelect(CondBad, QNaN, Ret);
1447 Constant *One = ConstantFP::get(
X->getType(), 1.0);
1450 Value *XEqOne =
B.CreateFCmpOEQ(
X, One);
1451 Y =
B.CreateSelect(XEqOne, One,
Y);
1455 X =
B.CreateSelect(YEqZero, One,
X);
1473 Value *YEqZero =
B.CreateICmpEQ(
Y, ConstantInt::get(
Y->getType(), 0));
1476 X =
B.CreateSelect(YEqZero, ConstantFP::get(
X->getType(), 1.0),
X);
1478 Value *CastY =
B.CreateSIToFP(
Y,
X->getType());
1485 Value *CastY =
B.CreateSIToFP(
Y,
X->getType());
1488 Value *RcpY =
B.CreateFDiv(ConstantFP::get(
X->getType(), 1.0), CastY);
1500 const FuncInfo &FInfo) {
1510 FunctionCallee PowrFunc = getFloatFastVariant(
1518 SQ.getWithInstruction(
Call))) {
1520 return fold_pow(FPOp,
B, PowrInfo) ||
true;
1533 FunctionCallee PownFunc = getFloatFastVariant(
1541 B.CreateFPToSI(FPOp->
getOperand(1), PownType->getParamType(1));
1544 1, AttributeFuncs::typeIncompatible(CastedArg->
getType(),
1548 return fold_pow(FPOp,
B, PownInfo) ||
true;
1552 if (fold_pow(FPOp,
B, FInfo))
1561 if (FunctionCallee PowFastFunc = getFunction(M, PowFastInfo)) {
1563 return fold_pow(FPOp,
B, PowFastInfo) ||
true;
1572 const FuncInfo &FInfo) {
1575 FuncInfo nf = FInfo;
1577 return getFunction(M, nf);
1583bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(
const CallInst *CI,
1584 bool AllowMinSizeF32,
1586 bool AllowStrictFP) {
1601 if (!AllowStrictFP && ParentF->
hasFnAttribute(Attribute::StrictFP))
1604 if (IsF32 && !AllowMinSizeF32 && ParentF->
hasMinSize())
1609void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(
IRBuilder<> &
B,
1617 if (Arg0VecTy && !Arg1VecTy) {
1618 Value *SplatRHS =
B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1620 }
else if (!Arg0VecTy && Arg1VecTy) {
1621 Value *SplatLHS =
B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1627 CI->
getModule(), IntrID, {CI->getType()}));
1631bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1633 bool AllowF64,
bool AllowStrictFP) {
1634 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1637 replaceLibCallWithSimpleIntrinsic(
B, CI, IntrID);
1641std::tuple<Value *, Value *, Value *>
1645 Function *
F =
B.GetInsertBlock()->getParent();
1646 B.SetInsertPointPastAllocas(
F);
1648 AllocaInst *
Alloc =
B.CreateAlloca(Arg->
getType(),
nullptr,
"__sincos_");
1655 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1658 B.SetCurrentDebugLocation(
DL);
1666 Value *CastAlloc =
B.CreateAddrSpaceCast(
Alloc, CosPtrTy);
1674 return {SinCos, LoadCos, SinCos};
1679 const FuncInfo &fInfo) {
1698 Function *
F =
B.GetInsertBlock()->getParent();
1704 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1708 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1711 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1712 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1713 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1722 const std::string PairName = PartnerInfo.mangle();
1726 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1727 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1731 MDNode *FPMath = CI->
getMetadata(LLVMContext::MD_fpmath);
1735 for (User* U : CArgVal->
users()) {
1744 bool Handled =
true;
1746 if (UCallee->
getName() == SinName)
1748 else if (UCallee->
getName() == CosName)
1750 else if (UCallee->
getName() == SinCosPrivateName ||
1751 UCallee->
getName() == SinCosGenericName)
1759 FMF &= OtherOp->getFastMathFlags();
1768 B.setFastMathFlags(FMF);
1769 B.setDefaultFPMathTag(FPMath);
1771 B.SetCurrentDebugLocation(DbgLoc);
1773 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF,
B, FSinCos);
1776 for (CallInst *
C : Calls)
1777 C->replaceAllUsesWith(Res);
1782 replaceTrigInsts(SinCalls, Sin);
1783 replaceTrigInsts(CosCalls, Cos);
1784 replaceTrigInsts(SinCosCalls, SinCos);
1791bool AMDGPULibCalls::evaluateScalarMathFunc(
const FuncInfo &FInfo,
1797 double opr0 = 0.0, opr1 = 0.0;
1812 switch (FInfo.getId()) {
1822 Res0 =
APFloat{log(opr0 + sqrt(opr0 * opr0 - 1.0))};
1835 Res0 =
APFloat{log(opr0 + sqrt(opr0 * opr0 + 1.0))};
1848 Res0 =
APFloat{(log(opr0 + 1.0) - log(opr0 - 1.0)) / 2.0};
1857 APFloat{(opr0 < 0.0) ? -
pow(-opr0, 1.0 / 3.0) :
pow(opr0, 1.0 / 3.0)};
1889 Res0 =
APFloat{log(opr0) / log(2.0)};
1893 Res0 =
APFloat{log(opr0) / log(10.0)};
1897 Res0 =
APFloat{1.0 / sqrt(opr0)};
1932 double val = (double)iopr1->getSExtValue();
1941 double val = (double)iopr1->getSExtValue();
1958bool AMDGPULibCalls::evaluateCall(
CallInst *aCI,
const FuncInfo &FInfo) {
1959 int numArgs = (int)aCI->
arg_size();
1983 if (FuncVecSize == 1) {
1984 if (!evaluateScalarMathFunc(FInfo, Val0.
emplace_back(0.0),
1991 for (
int i = 0; i < FuncVecSize; ++i) {
1994 if (!evaluateScalarMathFunc(FInfo, Val0.
emplace_back(0.0),
2002 if (FuncVecSize == 1) {
2003 nval0 = ConstantFP::get(aCI->
getType(), Val0[0]);
2005 nval1 = ConstantFP::get(aCI->
getType(), Val1[0]);
2012 if (hasTwoResults) {
2015 "math function with ptr arg not supported yet");
2026 Simplifier.initNativeFuncs();
2031 F.printAsOperand(
dbgs(),
false,
F.getParent());
dbgs() <<
'\n';);
2033 for (
auto &BB :
F) {
2040 if (Simplifier.fold(CI))
2054 Simplifier.initNativeFuncs();
2057 for (
auto &BB :
F) {
2062 if (CI && Simplifier.useNative(CI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static Constant * getConstantFloatVector(const ArrayRef< APFloat > Values, const Type *Ty)
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
static Value * emitIsInf(IRBuilder<> &B, Value *val)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static Value * emitFastExpYLnx(IRBuilder<> &B, Value *X, Value *Y)
static Value * emitIsInteger(IRBuilder<> &B, Value *Y)
static Value * emitIsEvenInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static Value * emitPowFixup(IRBuilder<> &B, Value *X, Value *Y, Value *ExpYLnX, PowKind Kind)
Emit special case management epilog code for fast pow, powr, pown, and rootn expansions.
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static Value * emitIsOddInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_exp2[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Machine Check Debug Module
FunctionAnalysisManager FAM
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
static void replaceCall(Instruction *I, Value *With)
AMDGPULibCalls(Function &F, FunctionAnalysisManager &FAM)
bool useNative(CallInst *CI)
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const Module &M, const FunctionType *FuncTy) const
Param * getLeads()
Get leading parameters for mangled lib functions.
ENamePrefix getPrefix() const
static constexpr roundingMode rmNearestTiesToEven
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
A function analysis which provides an AssumptionCache.
static LLVM_ABI Attribute getWithNoFPClass(LLVMContext &Context, FPClassTest Mask)
InstListType::iterator iterator
Instruction iterators...
void setCallingConv(CallingConv::ID CC)
void removeParamAttrs(unsigned ArgNo, const AttributeMask &AttrsToRemove)
Removes the attributes from the given argument.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
AttributeSet getParamAttributes(unsigned ArgNo) const
Return the param attributes for this call.
bool isNoInline() const
Return true if the call should not be inlined.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI APFloat getElementAsAPFloat(uint64_t i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
LLVM_ABI Constant * getElementAsConstant(uint64_t i) const
Return a Constant for a specified index's element.
LLVM_ABI uint64_t getNumElements() const
Return the number of elements in the array or vector.
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
const APFloat & getValueAPF() const
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static LLVM_ABI DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
Analysis pass which computes a DominatorTree.
Utility class for floating point operations which can have information about relaxed accuracy require...
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
void setAllowContract(bool B=true)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_ABI APInt pow(const APInt &X, int64_t N)
Compute X^N for N>=0.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloatAllowPoison(const APFloat *&Res)
Match APFloat while allowing poison in splat vector constants.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto dyn_cast_or_null(const Y &Val)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI bool isKnownIntegral(const Value *V, const SimplifyQuery &SQ, FastMathFlags FMF)
Return true if the floating-point value V is known to be an integer value.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.