27#define DEBUG_TYPE "amdgpu-simplifylib"
33 cl::desc(
"Enable pre-link mode optimizations"),
38 cl::desc(
"Comma separated list of functions to replace with native, or all"),
42#define MATH_PI numbers::pi
43#define MATH_E numbers::e
44#define MATH_SQRT2 numbers::sqrt2
45#define MATH_SQRT1_2 numbers::inv_sqrt2
57 bool UnsafeFPMath =
false;
60 bool AllNative =
false;
84 bool evaluateScalarMathFunc(
const FuncInfo &FInfo,
double &Res0,
double &Res1,
90 std::tuple<Value *, Value *, Value *> insertSinCos(
Value *Arg,
107 bool shouldReplaceLibcallWithIntrinsic(
const CallInst *CI,
108 bool AllowMinSizeF32 =
false,
109 bool AllowF64 =
false,
110 bool AllowStrictFP =
false);
116 bool AllowMinSizeF32 =
false,
117 bool AllowF64 =
false,
118 bool AllowStrictFP =
false);
127 I->replaceAllUsesWith(With);
128 I->eraseFromParent();
149template <
typename IRB>
153 if (
Function *
F = dyn_cast<Function>(Callee.getCallee()))
154 R->setCallingConv(
F->getCallingConv());
158template <
typename IRB>
162 if (
Function *
F = dyn_cast<Function>(Callee.getCallee()))
163 R->setCallingConv(
F->getCallingConv());
169 if (
VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
170 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
172 return FunctionType::get(FT->getReturnType(),
173 {FT->getParamType(0), PowNExpTy},
false);
331 case AMDGPULibFunc::EI_DIVIDE:
332 case AMDGPULibFunc::EI_COS:
333 case AMDGPULibFunc::EI_EXP:
334 case AMDGPULibFunc::EI_EXP2:
335 case AMDGPULibFunc::EI_EXP10:
336 case AMDGPULibFunc::EI_LOG:
337 case AMDGPULibFunc::EI_LOG2:
338 case AMDGPULibFunc::EI_LOG10:
339 case AMDGPULibFunc::EI_POWR:
340 case AMDGPULibFunc::EI_RECIP:
341 case AMDGPULibFunc::EI_RSQRT:
342 case AMDGPULibFunc::EI_SIN:
343 case AMDGPULibFunc::EI_SINCOS:
344 case AMDGPULibFunc::EI_SQRT:
345 case AMDGPULibFunc::EI_TAN:
366 case AMDGPULibFunc::EI_NCOS:
373 case AMDGPULibFunc::EI_NEXP2:
378 case AMDGPULibFunc::EI_NLOG2:
381 case AMDGPULibFunc::EI_NRSQRT:
383 case AMDGPULibFunc::EI_NSIN:
387 case AMDGPULibFunc::EI_NSQRT:
414bool AMDGPULibCalls::parseFunctionName(
const StringRef &FMangledName,
420 return UnsafeFPMath || FPOp->
isFast();
424 return UnsafeFPMath ||
435 UnsafeFPMath =
F.getFnAttribute(
"unsafe-fp-math").getValueAsBool();
441bool AMDGPULibCalls::useNativeFunc(
const StringRef F)
const {
446 AllNative = useNativeFunc(
"all") ||
451bool AMDGPULibCalls::sincosUseNative(
CallInst *aCI,
const FuncInfo &FInfo) {
452 bool native_sin = useNativeFunc(
"sin");
453 bool native_cos = useNativeFunc(
"cos");
455 if (native_sin && native_cos) {
470 if (sinExpr && cosExpr) {
478 <<
" with native version of sin/cos");
493 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.
isMangled() ||
496 !(AllNative || useNativeFunc(FInfo.
getName()))) {
501 return sincosUseNative(aCI, FInfo);
510 <<
" with native version");
522 const FuncInfo &FInfo) {
524 if (!Callee->isDeclaration())
527 assert(Callee->hasName() &&
"Invalid read_pipe/write_pipe function");
528 auto *M = Callee->getParent();
529 std::string
Name = std::string(Callee->getName());
531 if (NumArg != 4 && NumArg != 6)
537 if (!PacketSize || !PacketAlign)
542 if (Alignment !=
Size)
545 unsigned PtrArgLoc = CI->
arg_size() - 3;
550 for (
unsigned I = 0;
I != PtrArgLoc; ++
I)
563 for (
unsigned I = 0;
I != PtrArgLoc; ++
I)
565 Args.push_back(PtrArg);
567 auto *NCI =
B.CreateCall(
F, Args);
578 if (isa<PoisonValue>(V))
580 if (isa<UndefValue>(V))
583 if (
const ConstantFP *CF = dyn_cast<ConstantFP>(V))
584 return CF->getValueAPF().isInteger();
586 auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
587 const Constant *CV = dyn_cast<Constant>(V);
589 unsigned NumElts = VFVTy->getNumElements();
590 for (
unsigned i = 0; i != NumElts; ++i) {
594 if (isa<PoisonValue>(Elt))
597 const ConstantFP *CFP = dyn_cast<ConstantFP>(Elt);
609 switch (
I->getOpcode()) {
610 case Instruction::SIToFP:
611 case Instruction::UIToFP:
619 case Instruction::Call: {
622 case Intrinsic::trunc:
623 case Intrinsic::floor:
624 case Intrinsic::ceil:
625 case Intrinsic::rint:
626 case Intrinsic::nearbyint:
627 case Intrinsic::round:
628 case Intrinsic::roundeven:
648 if (!Callee || Callee->isIntrinsic() || CI->
isNoBuiltin())
652 if (!parseFunctionName(Callee->getName(), FInfo))
662 if (TDOFold(CI, FInfo))
667 B.setIsFPConstrained(
true);
678 B.setFastMathFlags(FMF);
683 switch (FInfo.
getId()) {
687 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::exp,
692 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::exp2,
697 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log,
702 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log2,
707 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log10,
710 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::minnum,
713 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::maxnum,
716 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fma,
true,
719 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fmuladd,
722 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fabs,
true,
725 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::copysign,
728 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::floor,
true,
731 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::ceil,
true,
734 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::trunc,
true,
737 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::rint,
true,
740 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::round,
true,
743 if (!shouldReplaceLibcallWithIntrinsic(CI,
true,
true))
748 VecTy && !isa<VectorType>(Arg1->
getType())) {
749 Value *SplatArg1 =
B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
755 {CI->getType(), CI->getArgOperand(1)->getType()}));
759 Module *M = Callee->getParent();
762 CallInst *Call = cast<CallInst>(FPOp);
768 FPOp->getOperand(0), 0,
770 Call->setCalledFunction(PowrFunc);
771 return fold_pow(FPOp,
B, PowrInfo) ||
true;
776 FPOp->getFastMathFlags())) {
785 B.CreateFPToSI(FPOp->getOperand(1), PownType->
getParamType(1));
787 Call->removeParamAttrs(
789 Call->getParamAttributes(1)));
790 Call->setCalledFunction(PownFunc);
791 Call->setArgOperand(1, CastedArg);
792 return fold_pow(FPOp,
B, PownInfo) ||
true;
796 return fold_pow(FPOp,
B, FInfo);
800 return fold_pow(FPOp,
B, FInfo);
802 return fold_rootn(FPOp,
B, FInfo);
805 return tryReplaceLibcallWithSimpleIntrinsic(
806 B, CI, Intrinsic::sqrt,
true,
true,
false);
809 return fold_sincos(FPOp,
B, FInfo);
815 switch (FInfo.
getId()) {
820 return fold_read_write_pipe(CI,
B, FInfo);
829bool AMDGPULibCalls::TDOFold(
CallInst *CI,
const FuncInfo &FInfo) {
835 int const sz = (int)tr.
size();
841 for (
int eltNo = 0; eltNo <
getVecSize(FInfo); ++eltNo) {
843 CV->getElementAsConstant((
unsigned)eltNo));
844 assert(eltval &&
"Non-FP arguments in math function!");
846 for (
int i=0; i < sz; ++i) {
862 for (
double D : DVal)
870 LLVM_DEBUG(
errs() <<
"AMDIC: " << *CI <<
" ---> " << *nval <<
"\n");
876 if (
ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
877 for (
int i = 0; i < sz; ++i) {
878 if (CF->isExactlyValue(tr[i].input)) {
879 Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
880 LLVM_DEBUG(
errs() <<
"AMDIC: " << *CI <<
" ---> " << *nval <<
"\n");
893#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
902 const FuncInfo &FInfo) {
906 "fold_pow: encounter a wrong function call");
908 Module *
M =
B.GetInsertBlock()->getModule();
914 const APInt *CINT =
nullptr;
919 int ci_opr1 = (CINT ? (int)CINT->
getSExtValue() : 0x1111111);
921 if ((CF && CF->
isZero()) || (CINT && ci_opr1 == 0)) {
924 Constant *cnval = ConstantFP::get(eltType, 1.0);
933 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
"\n");
939 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
" * "
941 Value *nval =
B.CreateFMul(opr0, opr0,
"__pow2");
945 if ((CF && CF->
isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
947 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> 1 / " << *opr0 <<
"\n");
948 Constant *cnval = ConstantFP::get(eltType, 1.0);
952 Value *nval =
B.CreateFDiv(cnval, opr0,
"__powrecip");
964 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << FInfo.getName()
965 <<
'(' << *opr0 <<
")\n");
983 int ival = (int)dval;
984 if ((
double)ival == dval) {
987 ci_opr1 = 0x11111111;
992 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
993 if (abs_opr1 <= 12) {
997 cnval = ConstantFP::get(eltType, 1.0);
1003 Value *valx2 =
nullptr;
1005 while (abs_opr1 > 0) {
1006 valx2 = valx2 ?
B.CreateFMul(valx2, valx2,
"__powx2") : opr0;
1008 nval = nval ?
B.CreateFMul(nval, valx2,
"__powprod") : valx2;
1015 cnval = ConstantFP::get(eltType, 1.0);
1019 nval =
B.CreateFDiv(cnval, nval,
"__1powprod");
1022 << ((ci_opr1 < 0) ?
"1/prod(" :
"prod(") << *opr0
1034 if (ShouldUseIntrinsic)
1043 bool needlog =
false;
1044 bool needabs =
false;
1045 bool needcopysign =
false;
1056 V =
log2(std::abs(V));
1057 cnval = ConstantFP::get(eltType, V);
1072 "Wrong vector size detected");
1077 if (V < 0.0) needcopysign =
true;
1078 V =
log2(std::abs(V));
1083 for (
double D : DVal)
1103 nval =
B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0,
nullptr,
"__fabs");
1105 nval = cnval ? cnval : opr0;
1109 if (ShouldUseIntrinsic) {
1123 opr1 =
B.CreateSIToFP(opr1, nval->
getType(),
"pownI2F");
1125 nval =
B.CreateFMul(opr1, nval,
"__ylogx");
1134 opr_n =
B.CreateZExtOrTrunc(opr_n, nTy,
"__ytou");
1136 opr_n =
B.CreateFPToSI(opr1, nTy,
"__ytou");
1138 Value *sign =
B.CreateShl(opr_n, size-1,
"__yeven");
1139 sign =
B.CreateAnd(
B.CreateBitCast(opr0, nTy), sign,
"__pow_sign");
1140 nval =
B.CreateOr(
B.CreateBitCast(nval, nTy), sign);
1141 nval =
B.CreateBitCast(nval, opr0->
getType());
1145 <<
"exp2(" << *opr1 <<
" * log2(" << *opr0 <<
"))\n");
1152 const FuncInfo &FInfo) {
1156 const APInt *CINT =
nullptr;
1160 Function *Parent =
B.GetInsertBlock()->getParent();
1163 if (ci_opr1 == 1 && !Parent->
hasFnAttribute(Attribute::StrictFP)) {
1167 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
'\n');
1172 Module *
M =
B.GetInsertBlock()->getModule();
1174 CallInst *CI = cast<CallInst>(FPOp);
1176 shouldReplaceLibcallWithIntrinsic(CI,
1180 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> sqrt(" << *opr0 <<
")\n");
1182 CallInst *NewCall =
B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1189 NewCall->
setMetadata(LLVMContext::MD_fpmath, FPMD);
1198 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> cbrt(" << *opr0
1204 }
else if (ci_opr1 == -1) {
1205 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> 1.0 / " << *opr0 <<
"\n");
1206 Value *nval =
B.CreateFDiv(ConstantFP::get(opr0->
getType(), 1.0),
1213 if (ci_opr1 == -2 &&
1214 shouldReplaceLibcallWithIntrinsic(CI,
1228 CallInst *Sqrt =
B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1230 B.CreateFDiv(ConstantFP::get(opr0->
getType(), 1.0), Sqrt));
1235 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> rsqrt(" << *opr0
1246 const FuncInfo &FInfo) {
1249 FuncInfo nf = FInfo;
1251 return getFunction(M, nf);
1257bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(
const CallInst *CI,
1258 bool AllowMinSizeF32,
1260 bool AllowStrictFP) {
1275 if (!AllowStrictFP && ParentF->
hasFnAttribute(Attribute::StrictFP))
1278 if (IsF32 && !AllowMinSizeF32 && ParentF->
hasMinSize())
1283void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(
IRBuilder<> &
B,
1291 if (Arg0VecTy && !Arg1VecTy) {
1292 Value *SplatRHS =
B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1294 }
else if (!Arg0VecTy && Arg1VecTy) {
1295 Value *SplatLHS =
B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1301 CI->
getModule(), IntrID, {CI->getType()}));
1304bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1306 bool AllowF64,
bool AllowStrictFP) {
1307 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1310 replaceLibCallWithSimpleIntrinsic(
B, CI, IntrID);
1314std::tuple<Value *, Value *, Value *>
1318 Function *
F =
B.GetInsertBlock()->getParent();
1319 B.SetInsertPointPastAllocas(
F);
1323 if (
Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
1328 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1331 B.SetCurrentDebugLocation(
DL);
1339 Value *CastAlloc =
B.CreateAddrSpaceCast(
Alloc, CosPtrTy);
1347 return {SinCos, LoadCos, SinCos};
1352 const FuncInfo &fInfo) {
1364 CallInst *CI = cast<CallInst>(FPOp);
1366 Function *
F =
B.GetInsertBlock()->getParent();
1372 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1376 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1379 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1380 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1381 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1390 const std::string PairName = PartnerInfo.mangle();
1394 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1395 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1404 CallInst *XI = dyn_cast<CallInst>(U);
1412 bool Handled =
true;
1414 if (UCallee->
getName() == SinName)
1416 else if (UCallee->
getName() == CosName)
1418 else if (UCallee->
getName() == SinCosPrivateName ||
1419 UCallee->
getName() == SinCosGenericName)
1426 auto *OtherOp = cast<FPMathOperator>(XI);
1427 FMF &= OtherOp->getFastMathFlags();
1436 B.setFastMathFlags(FMF);
1437 B.setDefaultFPMathTag(FPMath);
1439 B.SetCurrentDebugLocation(DbgLoc);
1441 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF,
B, FSinCos);
1445 C->replaceAllUsesWith(Res);
1450 replaceTrigInsts(SinCalls, Sin);
1451 replaceTrigInsts(CosCalls, Cos);
1452 replaceTrigInsts(SinCosCalls, SinCos);
1459bool AMDGPULibCalls::evaluateScalarMathFunc(
const FuncInfo &FInfo,
double &Res0,
1465 double opr0 = 0.0, opr1 = 0.0;
1466 ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
1467 ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
1480 switch (FInfo.getId()) {
1481 default :
return false;
1489 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1502 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1515 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1523 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1543 Res0 = pow(2.0, opr0);
1547 Res0 = pow(10.0, opr0);
1555 Res0 = log(opr0) / log(2.0);
1559 Res0 = log(opr0) / log(10.0);
1563 Res0 = 1.0 / sqrt(opr0);
1593 Res0 = pow(opr0, opr1);
1597 if (
ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1598 double val = (double)iopr1->getSExtValue();
1599 Res0 = pow(opr0, val);
1606 if (
ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1607 double val = (double)iopr1->getSExtValue();
1608 Res0 = pow(opr0, 1.0 / val);
1624bool AMDGPULibCalls::evaluateCall(
CallInst *aCI,
const FuncInfo &FInfo) {
1625 int numArgs = (int)aCI->
arg_size();
1632 if ((copr0 = dyn_cast<Constant>(aCI->
getArgOperand(0))) ==
nullptr)
1637 if ((copr1 = dyn_cast<Constant>(aCI->
getArgOperand(1))) ==
nullptr) {
1646 double DVal0[16], DVal1[16];
1649 if (FuncVecSize == 1) {
1650 if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) {
1656 for (
int i = 0; i < FuncVecSize; ++i) {
1659 if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) {
1667 if (FuncVecSize == 1) {
1668 nval0 = ConstantFP::get(aCI->
getType(), DVal0[0]);
1670 nval1 = ConstantFP::get(aCI->
getType(), DVal1[0]);
1673 SmallVector <float, 0> FVal0, FVal1;
1674 for (
int i = 0; i < FuncVecSize; ++i)
1678 if (hasTwoResults) {
1679 for (
int i = 0; i < FuncVecSize; ++i)
1687 if (hasTwoResults) {
1694 if (hasTwoResults) {
1697 "math function with ptr arg not supported yet");
1711 bool Changed =
false;
1714 F.printAsOperand(
dbgs(),
false,
F.getParent());
dbgs() <<
'\n';);
1716 for (
auto &BB :
F) {
1723 if (Simplifier.
fold(CI))
1740 bool Changed =
false;
1741 for (
auto &BB :
F) {
static bool isKnownIntegral(const Value *V, const DataLayout &DL, FastMathFlags FMF)
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static const TableEntry tbl_exp2[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
AMD GCN specific subclass of TargetSubtarget.
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
static void replaceCall(Instruction *I, Value *With)
bool useNative(CallInst *CI)
void initFunction(Function &F, FunctionAnalysisManager &FAM)
bool isUnsafeMath(const FPMathOperator *FPOp) const
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const Module &M, const FunctionType *FuncTy) const
Param * getLeads()
Get leading parameters for mangled lib functions.
ENamePrefix getPrefix() const
double convertToDouble() const
Converts this APFloat to host double value.
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
float convertToFloat() const
Converts this APFloat to host float value.
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
InstListType::iterator iterator
Instruction iterators...
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
bool isNoInline() const
Return true if the call should not be inlined.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
unsigned getNumElements() const
Return the number of elements in the array or vector.
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index's element.
APFloat getElementAsAPFloat(unsigned i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValue() const
const APFloat & getValueAPF() const
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
This is an important base class in LLVM.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Utility class for floating point operations which can have information about relaxed accuracy require...
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
void setAllowContract(bool B=true)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
const ParentTy * getParent() const
self_iterator getIterator()
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
AttributeMask typeIncompatible(Type *Ty, AttributeSet AS, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
@ C
The default llvm calling convention, compatible with C.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
apfloat_match m_APFloatAllowPoison(const APFloat *&Res)
Match APFloat while allowing poison in splat vector constants.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool isKnownNeverInfinity(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
bool isKnownNeverInfOrNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point value can never contain a NaN or infinity.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool cannotBeOrderedLessThanZero(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.