22#include "llvm/IR/IntrinsicsAMDGPU.h"
27#define DEBUG_TYPE "amdgpu-simplifylib"
33 cl::desc(
"Enable pre-link mode optimizations"),
38 cl::desc(
"Comma separated list of functions to replace with native, or all"),
42#define MATH_PI numbers::pi
43#define MATH_E numbers::e
44#define MATH_SQRT2 numbers::sqrt2
45#define MATH_SQRT1_2 numbers::inv_sqrt2
58 bool AllNative =
false;
79 bool parseFunctionName(
const StringRef &FMangledName, FuncInfo &FInfo);
81 bool TDOFold(
CallInst *CI,
const FuncInfo &FInfo);
92 const FuncInfo &FInfo);
98 bool sincosUseNative(
CallInst *aCI,
const FuncInfo &FInfo);
101 bool evaluateScalarMathFunc(
const FuncInfo &FInfo,
double &Res0,
double &Res1,
103 bool evaluateCall(
CallInst *aCI,
const FuncInfo &FInfo);
107 std::tuple<Value *, Value *, Value *> insertSinCos(
Value *Arg,
117 const FuncInfo &FInfo);
124 bool shouldReplaceLibcallWithIntrinsic(
const CallInst *CI,
125 bool AllowMinSizeF32 =
false,
126 bool AllowF64 =
false,
127 bool AllowStrictFP =
false);
133 bool AllowMinSizeF32 =
false,
134 bool AllowF64 =
false,
135 bool AllowStrictFP =
false);
143 I->replaceAllUsesWith(With);
144 I->eraseFromParent();
164template <
typename IRB>
166 const Twine &Name =
"") {
167 CallInst *R =
B.CreateCall(Callee, Arg, Name);
169 R->setCallingConv(
F->getCallingConv());
173template <
typename IRB>
176 CallInst *R =
B.CreateCall(Callee, {Arg1, Arg2}, Name);
178 R->setCallingConv(
F->getCallingConv());
188 {FT->getParamType(0), PowNExpTy},
false);
426 : AMDGPULibFunc::getFunction(
M, fInfo);
430 Module *M,
const FuncInfo &fInfo, FuncInfo &newInfo,
432 assert(NewFunc != FastVariant);
436 newInfo = AMDGPULibFunc(FastVariant, fInfo);
437 if (FunctionCallee NewCallee = getFunction(M, newInfo))
441 newInfo = AMDGPULibFunc(NewFunc, fInfo);
442 return getFunction(M, newInfo);
445bool AMDGPULibCalls::parseFunctionName(
const StringRef &FMangledName,
466bool AMDGPULibCalls::useNativeFunc(
const StringRef F)
const {
471 AllNative = useNativeFunc(
"all") ||
476bool AMDGPULibCalls::sincosUseNative(
CallInst *aCI,
const FuncInfo &FInfo) {
477 bool native_sin = useNativeFunc(
"sin");
478 bool native_cos = useNativeFunc(
"cos");
480 if (native_sin && native_cos) {
495 if (sinExpr && cosExpr) {
503 <<
" with native version of sin/cos");
518 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.
isMangled() ||
521 !(AllNative || useNativeFunc(FInfo.
getName()))) {
526 return sincosUseNative(aCI, FInfo);
535 <<
" with native version");
547 const FuncInfo &FInfo) {
549 if (!Callee->isDeclaration())
552 assert(Callee->hasName() &&
"Invalid read_pipe/write_pipe function");
553 auto *M = Callee->getParent();
554 std::string Name = std::string(Callee->getName());
556 if (NumArg != 4 && NumArg != 6)
562 if (!PacketSize || !PacketAlign)
567 if (Alignment !=
Size)
570 unsigned PtrArgLoc = CI->
arg_size() - 3;
575 for (
unsigned I = 0;
I != PtrArgLoc; ++
I)
579 Name = Name +
"_" + std::to_string(
Size);
588 for (
unsigned I = 0;
I != PtrArgLoc; ++
I)
590 Args.push_back(PtrArg);
592 auto *NCI =
B.CreateCall(
F, Args);
605 if (!Callee || Callee->isIntrinsic() || CI->
isNoBuiltin())
609 if (!parseFunctionName(Callee->getName(), FInfo))
619 if (TDOFold(CI, FInfo))
624 B.setIsFPConstrained(
true);
635 B.setFastMathFlags(FMF);
640 switch (FInfo.
getId()) {
644 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::exp,
649 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::exp2,
654 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log,
659 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log2,
664 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log10,
667 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::minnum,
670 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::maxnum,
673 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fma,
true,
676 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fmuladd,
679 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fabs,
true,
682 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::copysign,
685 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::floor,
true,
688 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::ceil,
true,
691 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::trunc,
true,
694 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::rint,
true,
697 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::round,
true,
700 if (!shouldReplaceLibcallWithIntrinsic(CI,
true,
true))
706 Value *SplatArg1 =
B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
712 {CI->getType(), CI->getArgOperand(1)->getType()}));
717 return tryOptimizePow(FPOp,
B, FInfo);
720 if (fold_pow(FPOp,
B, FInfo))
727 Module *M = Callee->getParent();
735 if (!shouldReplaceLibcallWithIntrinsic(CI))
741 if (fold_pow(FPOp,
B, FInfo))
748 Module *M = Callee->getParent();
756 if (!shouldReplaceLibcallWithIntrinsic(CI))
762 if (fold_rootn(FPOp,
B, FInfo))
768 Module *M = Callee->getParent();
770 if (
FunctionCallee RootnFastFunc = getFunction(M, RootnFastInfo)) {
780 return tryReplaceLibcallWithSimpleIntrinsic(
781 B, CI, Intrinsic::sqrt,
true,
true,
false);
784 return fold_sincos(FPOp,
B, FInfo);
790 switch (FInfo.
getId()) {
795 return fold_read_write_pipe(CI,
B, FInfo);
804bool AMDGPULibCalls::TDOFold(
CallInst *CI,
const FuncInfo &FInfo) {
810 int const sz = (int)tr.
size();
816 for (
int eltNo = 0; eltNo <
getVecSize(FInfo); ++eltNo) {
818 CV->getElementAsConstant((
unsigned)eltNo));
819 assert(eltval &&
"Non-FP arguments in math function!");
821 for (
int i=0; i < sz; ++i) {
837 for (
double D : DVal)
845 LLVM_DEBUG(
errs() <<
"AMDIC: " << *CI <<
" ---> " << *nval <<
"\n");
852 for (
int i = 0; i < sz; ++i) {
853 if (CF->isExactlyValue(tr[i].input)) {
854 Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
855 LLVM_DEBUG(
errs() <<
"AMDIC: " << *CI <<
" ---> " << *nval <<
"\n");
868#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
877 const FuncInfo &FInfo) {
884 "fold_pow: encounter a wrong function call");
886 Module *
M =
B.GetInsertBlock()->getModule();
892 const APInt *CINT =
nullptr;
897 int ci_opr1 = (CINT ? (int)CINT->
getSExtValue() : 0x1111111);
899 if ((CF && CF->
isZero()) || (CINT && ci_opr1 == 0)) {
902 Constant *cnval = ConstantFP::get(eltType, 1.0);
911 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
"\n");
917 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
" * "
919 Value *nval =
B.CreateFMul(opr0, opr0,
"__pow2");
923 if ((CF && CF->
isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
925 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> 1 / " << *opr0 <<
"\n");
926 Constant *cnval = ConstantFP::get(eltType, 1.0);
930 Value *nval =
B.CreateFDiv(cnval, opr0,
"__powrecip");
938 if (FunctionCallee FPExpr =
942 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << FInfo.getName()
943 <<
'(' << *opr0 <<
")\n");
961 int ival = (int)dval;
962 if ((
double)ival == dval) {
965 ci_opr1 = 0x11111111;
970 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
971 if (abs_opr1 <= 12) {
975 cnval = ConstantFP::get(eltType, 1.0);
981 Value *valx2 =
nullptr;
983 while (abs_opr1 > 0) {
984 valx2 = valx2 ?
B.CreateFMul(valx2, valx2,
"__powx2") : opr0;
986 nval = nval ?
B.CreateFMul(nval, valx2,
"__powprod") : valx2;
993 cnval = ConstantFP::get(eltType, 1.0);
997 nval =
B.CreateFDiv(cnval, nval,
"__1powprod");
1000 << ((ci_opr1 < 0) ?
"1/prod(" :
"prod(") << *opr0
1011 FunctionCallee ExpExpr;
1012 if (ShouldUseIntrinsic)
1021 bool needlog =
false;
1022 bool needabs =
false;
1023 bool needcopysign =
false;
1034 V =
log2(std::abs(V));
1035 cnval = ConstantFP::get(eltType, V);
1053 "Wrong vector size detected");
1058 if (V < 0.0) needcopysign =
true;
1059 V =
log2(std::abs(V));
1064 for (
double D : DVal)
1086 nval =
B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0,
nullptr,
"__fabs");
1088 nval = cnval ? cnval : opr0;
1091 FunctionCallee LogExpr;
1092 if (ShouldUseIntrinsic) {
1107 opr1 =
B.CreateSIToFP(opr1, nval->
getType(),
"pownI2F");
1109 nval =
B.CreateFMul(opr1, nval,
"__ylogx");
1111 CallInst *Exp2Call =
CreateCallEx(
B, ExpExpr, nval,
"__exp2");
1127 opr_n =
B.CreateZExtOrTrunc(opr_n, nTy,
"__ytou");
1129 opr_n =
B.CreateFPToSI(opr1, nTy,
"__ytou");
1132 Value *sign =
B.CreateShl(opr_n,
size-1,
"__yeven");
1133 sign =
B.CreateAnd(
B.CreateBitCast(opr0, nTy), sign,
"__pow_sign");
1135 nval =
B.CreateCopySign(nval,
B.CreateBitCast(sign, nval->
getType()),
1136 nullptr,
"__pow_sign");
1140 <<
"exp2(" << *opr1 <<
" * log2(" << *opr0 <<
"))\n");
1147 const FuncInfo &FInfo) {
1151 const APInt *CINT =
nullptr;
1155 Function *Parent =
B.GetInsertBlock()->getParent();
1158 if (ci_opr1 == 1 && !Parent->
hasFnAttribute(Attribute::StrictFP)) {
1162 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
'\n');
1167 Module *
M =
B.GetInsertBlock()->getModule();
1171 shouldReplaceLibcallWithIntrinsic(CI,
1175 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> sqrt(" << *opr0 <<
")\n");
1177 CallInst *NewCall =
B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1182 MDBuilder MDHelper(
M->getContext());
1183 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->
getFPAccuracy(), 2.0f));
1184 NewCall->
setMetadata(LLVMContext::MD_fpmath, FPMD);
1191 if (FunctionCallee FPExpr =
1193 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> cbrt(" << *opr0
1199 }
else if (ci_opr1 == -1) {
1200 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> 1.0 / " << *opr0 <<
"\n");
1201 Value *nval =
B.CreateFDiv(ConstantFP::get(opr0->
getType(), 1.0),
1208 if (ci_opr1 == -2 &&
1209 shouldReplaceLibcallWithIntrinsic(CI,
1216 MDBuilder MDHelper(
M->getContext());
1217 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->
getFPAccuracy(), 2.0f));
1223 CallInst *Sqrt =
B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1225 B.CreateFDiv(ConstantFP::get(opr0->
getType(), 1.0), Sqrt));
1230 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> rsqrt(" << *opr0
1241 Value *TruncY =
B.CreateUnaryIntrinsic(Intrinsic::trunc,
Y);
1242 return B.CreateFCmpOEQ(TruncY,
Y);
1247 auto *HalfY =
B.CreateFMul(
Y, ConstantFP::get(
Y->getType(), 0.5));
1255 Value *NotEvenY =
B.CreateNot(IsEvenY);
1256 return B.CreateAnd(IsIntY, NotEvenY);
1261 auto *fabsVal =
B.CreateUnaryIntrinsic(Intrinsic::fabs, val);
1267 Value *AbsX =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
X);
1268 Value *LogAbsX =
B.CreateUnaryIntrinsic(Intrinsic::log2, AbsX);
1269 Value *YTimesLogX =
B.CreateFMul(
Y, LogAbsX);
1270 return B.CreateUnaryIntrinsic(Intrinsic::exp2, YTimesLogX);
1280 Constant *One = ConstantFP::get(
X->getType(), 1.0);
1290 Value *SelSign =
B.CreateSelect(IsOddY,
X, One);
1291 Value *Ret =
B.CreateCopySign(ExpYLnX, SelSign);
1295 Value *condNegX =
B.CreateFCmpOLT(
X, Zero);
1296 Value *condNotIntY =
B.CreateNot(IsIntY);
1297 Value *condNaN =
B.CreateAnd(condNegX, condNotIntY);
1298 Ret =
B.CreateSelect(condNaN, QNaN, Ret);
1306 Value *AY =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
Y);
1307 Value *YIsNegInf =
B.CreateFCmpUNE(
Y, AY);
1309 Value *AX =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
X);
1310 Value *AxEqOne =
B.CreateFCmpOEQ(AX, One);
1311 Value *AxLtOne =
B.CreateFCmpOLT(AX, One);
1312 Value *XorCond =
B.CreateXor(AxLtOne, YIsNegInf);
1314 B.CreateSelect(AxEqOne, AX,
B.CreateSelect(XorCond, Zero, AY));
1315 Ret =
B.CreateSelect(YIsInf, SelInf, Ret);
1319 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1320 Value *AxInfOrZero =
B.CreateOr(XIsInf, XEqZero);
1321 Value *YLtZero =
B.CreateFCmpOLT(
Y, Zero);
1322 Value *XorZeroInf =
B.CreateXor(XEqZero, YLtZero);
1323 Value *SelVal =
B.CreateSelect(XorZeroInf, Zero, PInf);
1324 Value *SelSign2 =
B.CreateSelect(IsOddY,
X, Zero);
1325 Value *Copysign =
B.CreateCopySign(SelVal, SelSign2);
1326 Ret =
B.CreateSelect(AxInfOrZero, Copysign, Ret);
1329 Value *isUnordered =
B.CreateFCmpUNO(
X,
Y);
1330 return B.CreateSelect(isUnordered, QNaN, Ret);
1333 Value *YIsNeg =
B.CreateFCmpOLT(
Y, Zero);
1334 Value *IZ =
B.CreateSelect(YIsNeg, PInf, Zero);
1335 Value *ZI =
B.CreateSelect(YIsNeg, Zero, PInf);
1337 Value *YEqZero =
B.CreateFCmpOEQ(
Y, Zero);
1338 Value *SelZeroCase =
B.CreateSelect(YEqZero, QNaN, IZ);
1339 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1340 Value *Ret =
B.CreateSelect(XEqZero, SelZeroCase, ExpYLnX);
1342 Value *XEqInf =
B.CreateFCmpOEQ(
X, PInf);
1343 Value *YNeZero =
B.CreateFCmpUNE(
Y, Zero);
1344 Value *CondInfCase =
B.CreateAnd(XEqInf, YNeZero);
1345 Ret =
B.CreateSelect(CondInfCase, ZI, Ret);
1348 Value *XNeOne =
B.CreateFCmpUNE(
X, One);
1349 Value *CondInfY =
B.CreateAnd(IsInfY, XNeOne);
1350 Value *XLtOne =
B.CreateFCmpOLT(
X, One);
1351 Value *SelInfYCase =
B.CreateSelect(XLtOne, IZ, ZI);
1352 Ret =
B.CreateSelect(CondInfY, SelInfYCase, Ret);
1354 Value *IsUnordered =
B.CreateFCmpUNO(
X,
Y);
1355 return B.CreateSelect(IsUnordered, QNaN, Ret);
1358 Constant *ZeroI = ConstantInt::get(
Y->getType(), 0);
1361 Value *OneI = ConstantInt::get(
Y->getType(), 1);
1362 Value *YAnd1 =
B.CreateAnd(
Y, OneI);
1363 Value *IsOddY =
B.CreateICmpNE(YAnd1, ZeroI);
1366 Value *SelSign =
B.CreateSelect(IsOddY,
X, One);
1367 Value *Ret =
B.CreateCopySign(ExpYLnX, SelSign);
1370 Value *FabsX =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
X);
1371 Value *XIsInf =
B.CreateFCmpOEQ(FabsX, PInf);
1372 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1373 Value *InfOrZero =
B.CreateOr(XIsInf, XEqZero);
1376 Value *YLtZero =
B.CreateICmpSLT(
Y, ZeroI);
1377 Value *XorZeroInf =
B.CreateXor(XEqZero, YLtZero);
1378 Value *SelVal =
B.CreateSelect(XorZeroInf, Zero, PInf);
1381 Value *SelSign2 =
B.CreateSelect(IsOddY,
X, Zero);
1382 Value *Copysign =
B.CreateCopySign(SelVal, SelSign2);
1384 return B.CreateSelect(InfOrZero, Copysign, Ret);
1387 Constant *ZeroI = ConstantInt::get(
Y->getType(), 0);
1390 Value *YAnd1 =
B.CreateAnd(
Y, ConstantInt::get(
Y->getType(), 1));
1391 Value *IsOddY =
B.CreateICmpNE(YAnd1, ZeroI);
1394 Value *SelSign =
B.CreateSelect(IsOddY,
X, One);
1395 Value *Ret =
B.CreateCopySign(ExpYLnX, SelSign);
1398 Value *FabsX =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
X);
1399 Value *IsInfX =
B.CreateFCmpOEQ(FabsX, PInf);
1400 Value *XEqZero =
B.CreateFCmpOEQ(
X, Zero);
1401 Value *CondInfOrZero =
B.CreateOr(IsInfX, XEqZero);
1404 Value *YLtZero =
B.CreateICmpSLT(
Y, ZeroI);
1405 Value *XorZeroInf =
B.CreateXor(XEqZero, YLtZero);
1406 Value *SelVal =
B.CreateSelect(XorZeroInf, Zero, PInf);
1409 Value *SelSign2 =
B.CreateSelect(IsOddY,
X, Zero);
1410 Value *Copysign =
B.CreateCopySign(SelVal, SelSign2);
1412 Ret =
B.CreateSelect(CondInfOrZero, Copysign, Ret);
1415 Value *XIsNeg =
B.CreateFCmpOLT(
X, Zero);
1416 Value *NotOddY =
B.CreateNot(IsOddY);
1417 Value *CondNegAndNotOdd =
B.CreateAnd(XIsNeg, NotOddY);
1418 Value *YEqZero =
B.CreateICmpEQ(
Y, ZeroI);
1419 Value *CondBad =
B.CreateOr(CondNegAndNotOdd, YEqZero);
1420 return B.CreateSelect(CondBad, QNaN, Ret);
1445 Constant *One = ConstantFP::get(
X->getType(), 1.0);
1448 Value *XEqOne =
B.CreateFCmpOEQ(
X, One);
1449 Y =
B.CreateSelect(XEqOne, One,
Y);
1453 X =
B.CreateSelect(YEqZero, One,
X);
1471 Value *YEqZero =
B.CreateICmpEQ(
Y, ConstantInt::get(
Y->getType(), 0));
1474 X =
B.CreateSelect(YEqZero, ConstantFP::get(
X->getType(), 1.0),
X);
1476 Value *CastY =
B.CreateSIToFP(
Y,
X->getType());
1483 Value *CastY =
B.CreateSIToFP(
Y,
X->getType());
1484 Value *RcpY =
B.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, CastY);
1495 const FuncInfo &FInfo) {
1505 FunctionCallee PowrFunc = getFloatFastVariant(
1513 SQ.getWithInstruction(
Call))) {
1515 return fold_pow(FPOp,
B, PowrInfo) ||
true;
1528 FunctionCallee PownFunc = getFloatFastVariant(
1536 B.CreateFPToSI(FPOp->
getOperand(1), PownType->getParamType(1));
1539 1, AttributeFuncs::typeIncompatible(CastedArg->
getType(),
1543 return fold_pow(FPOp,
B, PownInfo) ||
true;
1547 if (fold_pow(FPOp,
B, FInfo))
1556 if (FunctionCallee PowFastFunc = getFunction(M, PowFastInfo)) {
1558 return fold_pow(FPOp,
B, PowFastInfo) ||
true;
1567 const FuncInfo &FInfo) {
1570 FuncInfo nf = FInfo;
1572 return getFunction(M, nf);
1578bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(
const CallInst *CI,
1579 bool AllowMinSizeF32,
1581 bool AllowStrictFP) {
1596 if (!AllowStrictFP && ParentF->
hasFnAttribute(Attribute::StrictFP))
1599 if (IsF32 && !AllowMinSizeF32 && ParentF->
hasMinSize())
1604void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(
IRBuilder<> &
B,
1612 if (Arg0VecTy && !Arg1VecTy) {
1613 Value *SplatRHS =
B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1615 }
else if (!Arg0VecTy && Arg1VecTy) {
1616 Value *SplatLHS =
B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1622 CI->
getModule(), IntrID, {CI->getType()}));
1625bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1627 bool AllowF64,
bool AllowStrictFP) {
1628 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1631 replaceLibCallWithSimpleIntrinsic(
B, CI, IntrID);
1635std::tuple<Value *, Value *, Value *>
1639 Function *
F =
B.GetInsertBlock()->getParent();
1640 B.SetInsertPointPastAllocas(
F);
1642 AllocaInst *
Alloc =
B.CreateAlloca(Arg->
getType(),
nullptr,
"__sincos_");
1649 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1652 B.SetCurrentDebugLocation(
DL);
1660 Value *CastAlloc =
B.CreateAddrSpaceCast(
Alloc, CosPtrTy);
1668 return {SinCos, LoadCos, SinCos};
1673 const FuncInfo &fInfo) {
1692 Function *
F =
B.GetInsertBlock()->getParent();
1698 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1702 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1705 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1706 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1707 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1716 const std::string PairName = PartnerInfo.mangle();
1720 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1721 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1725 MDNode *FPMath = CI->
getMetadata(LLVMContext::MD_fpmath);
1729 for (User* U : CArgVal->
users()) {
1738 bool Handled =
true;
1740 if (UCallee->
getName() == SinName)
1742 else if (UCallee->
getName() == CosName)
1744 else if (UCallee->
getName() == SinCosPrivateName ||
1745 UCallee->
getName() == SinCosGenericName)
1753 FMF &= OtherOp->getFastMathFlags();
1762 B.setFastMathFlags(FMF);
1763 B.setDefaultFPMathTag(FPMath);
1765 B.SetCurrentDebugLocation(DbgLoc);
1767 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF,
B, FSinCos);
1770 for (CallInst *
C : Calls)
1771 C->replaceAllUsesWith(Res);
1776 replaceTrigInsts(SinCalls, Sin);
1777 replaceTrigInsts(CosCalls, Cos);
1778 replaceTrigInsts(SinCosCalls, SinCos);
1785bool AMDGPULibCalls::evaluateScalarMathFunc(
const FuncInfo &FInfo,
double &Res0,
1791 double opr0 = 0.0, opr1 = 0.0;
1806 switch (FInfo.getId()) {
1807 default :
return false;
1815 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1828 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1841 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1849 Res0 = (opr0 < 0.0) ? -
pow(-opr0, 1.0/3.0) :
pow(opr0, 1.0/3.0);
1869 Res0 =
pow(2.0, opr0);
1873 Res0 =
pow(10.0, opr0);
1881 Res0 = log(opr0) / log(2.0);
1885 Res0 = log(opr0) / log(10.0);
1889 Res0 = 1.0 / sqrt(opr0);
1919 Res0 =
pow(opr0, opr1);
1924 double val = (double)iopr1->getSExtValue();
1925 Res0 =
pow(opr0, val);
1933 double val = (double)iopr1->getSExtValue();
1934 Res0 =
pow(opr0, 1.0 / val);
1950bool AMDGPULibCalls::evaluateCall(
CallInst *aCI,
const FuncInfo &FInfo) {
1951 int numArgs = (int)aCI->
arg_size();
1972 double DVal0[16], DVal1[16];
1975 if (FuncVecSize == 1) {
1976 if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) {
1982 for (
int i = 0; i < FuncVecSize; ++i) {
1985 if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) {
1993 if (FuncVecSize == 1) {
1994 nval0 = ConstantFP::get(aCI->
getType(), DVal0[0]);
1996 nval1 = ConstantFP::get(aCI->
getType(), DVal1[0]);
2000 for (
int i = 0; i < FuncVecSize; ++i)
2004 if (hasTwoResults) {
2005 for (
int i = 0; i < FuncVecSize; ++i)
2013 if (hasTwoResults) {
2020 if (hasTwoResults) {
2023 "math function with ptr arg not supported yet");
2034 Simplifier.initNativeFuncs();
2039 F.printAsOperand(
dbgs(),
false,
F.getParent());
dbgs() <<
'\n';);
2041 for (
auto &BB :
F) {
2048 if (Simplifier.fold(CI))
2062 Simplifier.initNativeFuncs();
2065 for (
auto &BB :
F) {
2070 if (CI && Simplifier.useNative(CI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
static Value * emitIsInf(IRBuilder<> &B, Value *val)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static Value * emitFastExpYLnx(IRBuilder<> &B, Value *X, Value *Y)
static Value * emitIsInteger(IRBuilder<> &B, Value *Y)
static Value * emitIsEvenInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static Value * emitPowFixup(IRBuilder<> &B, Value *X, Value *Y, Value *ExpYLnX, PowKind Kind)
Emit special case management epilog code for fast pow, powr, pown, and rootn expansions.
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static Value * emitIsOddInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_exp2[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Machine Check Debug Module
FunctionAnalysisManager FAM
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
static void replaceCall(Instruction *I, Value *With)
AMDGPULibCalls(Function &F, FunctionAnalysisManager &FAM)
bool useNative(CallInst *CI)
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const Module &M, const FunctionType *FuncTy) const
Param * getLeads()
Get leading parameters for mangled lib functions.
ENamePrefix getPrefix() const
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
static LLVM_ABI Attribute getWithNoFPClass(LLVMContext &Context, FPClassTest Mask)
InstListType::iterator iterator
Instruction iterators...
void removeParamAttrs(unsigned ArgNo, const AttributeMask &AttrsToRemove)
Removes the attributes from the given argument.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
AttributeSet getParamAttributes(unsigned ArgNo) const
Return the param attributes for this call.
bool isNoInline() const
Return true if the call should not be inlined.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI APFloat getElementAsAPFloat(uint64_t i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
LLVM_ABI Constant * getElementAsConstant(uint64_t i) const
Return a Constant for a specified index's element.
LLVM_ABI uint64_t getNumElements() const
Return the number of elements in the array or vector.
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
This is an important base class in LLVM.
static LLVM_ABI DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
Analysis pass which computes a DominatorTree.
Utility class for floating point operations which can have information about relaxed accuracy require...
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
void setAllowContract(bool B=true)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_ABI APInt pow(const APInt &X, int64_t N)
Compute X^N for N>=0.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloatAllowPoison(const APFloat *&Res)
Match APFloat while allowing poison in splat vector constants.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto dyn_cast_or_null(const Y &Val)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI bool isKnownIntegral(const Value *V, const SimplifyQuery &SQ, FastMathFlags FMF)
Return true if the floating-point value V is known to be an integer value.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.