24#include "llvm/IR/IntrinsicsAMDGPU.h"
29#define DEBUG_TYPE "amdgpu-simplifylib"
35 cl::desc(
"Enable pre-link mode optimizations"),
40 cl::desc(
"Comma separated list of functions to replace with native, or all"),
44#define MATH_PI numbers::pi
45#define MATH_E numbers::e
46#define MATH_SQRT2 numbers::sqrt2
47#define MATH_SQRT1_2 numbers::inv_sqrt2
59 bool UnsafeFPMath =
false;
62 bool AllNative =
false;
86 bool evaluateScalarMathFunc(
const FuncInfo &FInfo,
double &Res0,
double &Res1,
95 std::tuple<Value *, Value *, Value *> insertSinCos(
Value *Arg,
112 bool shouldReplaceLibcallWithIntrinsic(
const CallInst *CI,
113 bool AllowMinSizeF32 =
false,
114 bool AllowF64 =
false,
115 bool AllowStrictFP =
false);
121 bool AllowMinSizeF32 =
false,
122 bool AllowF64 =
false,
123 bool AllowStrictFP =
false);
132 I->replaceAllUsesWith(With);
133 I->eraseFromParent();
154template <
typename IRB>
158 if (
Function *
F = dyn_cast<Function>(Callee.getCallee()))
159 R->setCallingConv(
F->getCallingConv());
163template <
typename IRB>
167 if (
Function *
F = dyn_cast<Function>(Callee.getCallee()))
168 R->setCallingConv(
F->getCallingConv());
174 if (
VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
175 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
177 return FunctionType::get(FT->getReturnType(),
178 {FT->getParamType(0), PowNExpTy},
false);
336 case AMDGPULibFunc::EI_DIVIDE:
337 case AMDGPULibFunc::EI_COS:
338 case AMDGPULibFunc::EI_EXP:
339 case AMDGPULibFunc::EI_EXP2:
340 case AMDGPULibFunc::EI_EXP10:
341 case AMDGPULibFunc::EI_LOG:
342 case AMDGPULibFunc::EI_LOG2:
343 case AMDGPULibFunc::EI_LOG10:
344 case AMDGPULibFunc::EI_POWR:
345 case AMDGPULibFunc::EI_RECIP:
346 case AMDGPULibFunc::EI_RSQRT:
347 case AMDGPULibFunc::EI_SIN:
348 case AMDGPULibFunc::EI_SINCOS:
349 case AMDGPULibFunc::EI_SQRT:
350 case AMDGPULibFunc::EI_TAN:
371 case AMDGPULibFunc::EI_NCOS:
378 case AMDGPULibFunc::EI_NEXP2:
383 case AMDGPULibFunc::EI_NLOG2:
386 case AMDGPULibFunc::EI_NRSQRT:
388 case AMDGPULibFunc::EI_NSIN:
392 case AMDGPULibFunc::EI_NSQRT:
419bool AMDGPULibCalls::parseFunctionName(
const StringRef &FMangledName,
425 return UnsafeFPMath || FPOp->
isFast();
429 return UnsafeFPMath ||
440 UnsafeFPMath =
F.getFnAttribute(
"unsafe-fp-math").getValueAsBool();
446bool AMDGPULibCalls::useNativeFunc(
const StringRef F)
const {
451 AllNative = useNativeFunc(
"all") ||
456bool AMDGPULibCalls::sincosUseNative(
CallInst *aCI,
const FuncInfo &FInfo) {
457 bool native_sin = useNativeFunc(
"sin");
458 bool native_cos = useNativeFunc(
"cos");
460 if (native_sin && native_cos) {
475 if (sinExpr && cosExpr) {
481 <<
" with native version of sin/cos");
496 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.
isMangled() ||
499 !(AllNative || useNativeFunc(FInfo.
getName()))) {
504 return sincosUseNative(aCI, FInfo);
513 <<
" with native version");
525 const FuncInfo &FInfo) {
527 if (!Callee->isDeclaration())
530 assert(Callee->hasName() &&
"Invalid read_pipe/write_pipe function");
531 auto *M = Callee->getParent();
532 std::string
Name = std::string(Callee->getName());
534 if (NumArg != 4 && NumArg != 6)
540 if (!PacketSize || !PacketAlign)
545 if (Alignment !=
Size)
548 unsigned PtrArgLoc = CI->
arg_size() - 3;
553 for (
unsigned I = 0;
I != PtrArgLoc; ++
I)
565 auto *BCast =
B.CreatePointerCast(PtrArg, PtrTy);
567 for (
unsigned I = 0;
I != PtrArgLoc; ++
I)
569 Args.push_back(BCast);
571 auto *NCI =
B.CreateCall(
F, Args);
582 if (isa<UndefValue>(V))
585 if (
const ConstantFP *CF = dyn_cast<ConstantFP>(V))
586 return CF->getValueAPF().isInteger();
589 for (
unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
590 Constant *ConstElt = CDV->getElementAsConstant(i);
591 if (isa<UndefValue>(ConstElt))
593 const ConstantFP *CFP = dyn_cast<ConstantFP>(ConstElt);
605 switch (
I->getOpcode()) {
606 case Instruction::SIToFP:
607 case Instruction::UIToFP:
615 case Instruction::Call: {
618 case Intrinsic::trunc:
619 case Intrinsic::floor:
620 case Intrinsic::ceil:
621 case Intrinsic::rint:
622 case Intrinsic::nearbyint:
623 case Intrinsic::round:
624 case Intrinsic::roundeven:
644 if (!Callee || Callee->isIntrinsic() || CI->
isNoBuiltin())
648 if (!parseFunctionName(Callee->getName(), FInfo))
658 if (TDOFold(CI, FInfo))
672 B.setFastMathFlags(FMF);
679 switch (FInfo.
getId()) {
683 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::exp,
688 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::exp2,
693 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log,
698 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log2,
703 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::log10,
706 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::minnum,
709 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::maxnum,
712 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fma,
true,
715 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fmuladd,
718 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::fabs,
true,
721 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::copysign,
724 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::floor,
true,
727 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::ceil,
true,
730 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::trunc,
true,
733 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::rint,
true,
736 return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::round,
true,
739 if (!shouldReplaceLibcallWithIntrinsic(CI,
true,
true))
744 VecTy && !isa<VectorType>(Arg1->
getType())) {
745 Value *SplatArg1 =
B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
751 {CI->getType(), CI->getArgOperand(1)->getType()}));
755 Module *M = Callee->getParent();
758 CallInst *Call = cast<CallInst>(FPOp);
764 TLInfo, 0, AC, Call, DT)) {
765 Call->setCalledFunction(PowrFunc);
766 return fold_pow(FPOp,
B, PowrInfo) ||
true;
771 FPOp->getFastMathFlags())) {
780 B.CreateFPToSI(FPOp->getOperand(1), PownType->
getParamType(1));
782 Call->removeParamAttrs(
784 Call->setCalledFunction(PownFunc);
785 Call->setArgOperand(1, CastedArg);
786 return fold_pow(FPOp,
B, PownInfo) ||
true;
790 return fold_pow(FPOp,
B, FInfo);
794 return fold_pow(FPOp,
B, FInfo);
796 return fold_rootn(FPOp,
B, FInfo);
798 return fold_sqrt(FPOp,
B, FInfo);
801 return fold_sincos(FPOp,
B, FInfo);
807 switch (FInfo.
getId()) {
812 return fold_read_write_pipe(CI,
B, FInfo);
821bool AMDGPULibCalls::TDOFold(
CallInst *CI,
const FuncInfo &FInfo) {
827 int const sz = (int)tr.
size();
833 for (
int eltNo = 0; eltNo <
getVecSize(FInfo); ++eltNo) {
835 CV->getElementAsConstant((
unsigned)eltNo));
836 assert(eltval &&
"Non-FP arguments in math function!");
838 for (
int i=0; i < sz; ++i) {
854 for (
unsigned i = 0; i < DVal.
size(); ++i) {
863 LLVM_DEBUG(
errs() <<
"AMDIC: " << *CI <<
" ---> " << *nval <<
"\n");
869 if (
ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
870 for (
int i = 0; i < sz; ++i) {
871 if (CF->isExactlyValue(tr[i].input)) {
873 LLVM_DEBUG(
errs() <<
"AMDIC: " << *CI <<
" ---> " << *nval <<
"\n");
886#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
895 const FuncInfo &FInfo) {
899 "fold_pow: encounter a wrong function call");
901 Module *
M =
B.GetInsertBlock()->getModule();
907 const APInt *CINT =
nullptr;
912 int ci_opr1 = (CINT ? (int)CINT->
getSExtValue() : 0x1111111);
914 if ((CF && CF->
isZero()) || (CINT && ci_opr1 == 0)) {
926 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
"\n");
932 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
" * "
934 Value *nval =
B.CreateFMul(opr0, opr0,
"__pow2");
938 if ((CF && CF->
isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
940 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> 1 / " << *opr0 <<
"\n");
945 Value *nval =
B.CreateFDiv(cnval, opr0,
"__powrecip");
957 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << FInfo.getName()
958 <<
'(' << *opr0 <<
")\n");
976 int ival = (int)dval;
977 if ((
double)ival == dval) {
980 ci_opr1 = 0x11111111;
985 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
986 if (abs_opr1 <= 12) {
996 Value *valx2 =
nullptr;
998 while (abs_opr1 > 0) {
999 valx2 = valx2 ?
B.CreateFMul(valx2, valx2,
"__powx2") : opr0;
1001 nval = nval ?
B.CreateFMul(nval, valx2,
"__powprod") : valx2;
1012 nval =
B.CreateFDiv(cnval, nval,
"__1powprod");
1015 << ((ci_opr1 < 0) ?
"1/prod(" :
"prod(") << *opr0
1027 if (ShouldUseIntrinsic)
1035 bool needlog =
false;
1036 bool needabs =
false;
1037 bool needcopysign =
false;
1048 V =
log2(std::abs(V));
1065 "Wrong vector size detected");
1070 if (V < 0.0) needcopysign =
true;
1071 V =
log2(std::abs(V));
1076 for (
unsigned i=0; i < DVal.
size(); ++i) {
1097 nval =
B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0,
nullptr,
"__fabs");
1099 nval = cnval ? cnval : opr0;
1103 if (ShouldUseIntrinsic) {
1117 opr1 =
B.CreateSIToFP(opr1, nval->
getType(),
"pownI2F");
1119 nval =
B.CreateFMul(opr1, nval,
"__ylogx");
1127 if (
const auto *vTy = dyn_cast<FixedVectorType>(rTy))
1132 opr_n =
B.CreateZExtOrTrunc(opr_n, nTy,
"__ytou");
1134 opr_n =
B.CreateFPToSI(opr1, nTy,
"__ytou");
1136 Value *sign =
B.CreateShl(opr_n, size-1,
"__yeven");
1137 sign =
B.CreateAnd(
B.CreateBitCast(opr0, nTy), sign,
"__pow_sign");
1138 nval =
B.CreateOr(
B.CreateBitCast(nval, nTy), sign);
1139 nval =
B.CreateBitCast(nval, opr0->
getType());
1143 <<
"exp2(" << *opr1 <<
" * log2(" << *opr0 <<
"))\n");
1150 const FuncInfo &FInfo) {
1164 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> " << *opr0 <<
"\n");
1169 Module *
M =
B.GetInsertBlock()->getModule();
1173 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> sqrt(" << *opr0
1179 }
else if (ci_opr1 == 3) {
1182 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> cbrt(" << *opr0
1188 }
else if (ci_opr1 == -1) {
1189 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> 1.0 / " << *opr0 <<
"\n");
1195 }
else if (ci_opr1 == -2) {
1198 LLVM_DEBUG(
errs() <<
"AMDIC: " << *FPOp <<
" ---> rsqrt(" << *opr0
1210 const FuncInfo &FInfo) {
1213 FuncInfo nf = FInfo;
1215 return getFunction(M, nf);
1221bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(
const CallInst *CI,
1222 bool AllowMinSizeF32,
1224 bool AllowStrictFP) {
1239 if (!AllowStrictFP && ParentF->
hasFnAttribute(Attribute::StrictFP))
1242 if (IsF32 && !AllowMinSizeF32 && ParentF->
hasMinSize())
1247void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(
IRBuilder<> &
B,
1255 if (Arg0VecTy && !Arg1VecTy) {
1256 Value *SplatRHS =
B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1258 }
else if (!Arg0VecTy && Arg1VecTy) {
1259 Value *SplatLHS =
B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1268bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1270 bool AllowF64,
bool AllowStrictFP) {
1271 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1274 replaceLibCallWithSimpleIntrinsic(
B, CI, IntrID);
1280 const FuncInfo &FInfo) {
1286 Module *
M =
B.GetInsertBlock()->getModule();
1292 <<
"sqrt(" << *opr0 <<
")\n");
1301std::tuple<Value *, Value *, Value *>
1305 Function *
F =
B.GetInsertBlock()->getParent();
1306 B.SetInsertPointPastAllocas(
F);
1310 if (
Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
1315 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1318 B.SetCurrentDebugLocation(
DL);
1326 Value *CastAlloc =
B.CreateAddrSpaceCast(
Alloc, CosPtrTy);
1334 return {SinCos, LoadCos, SinCos};
1339 const FuncInfo &fInfo) {
1351 CallInst *CI = cast<CallInst>(FPOp);
1353 Function *
F =
B.GetInsertBlock()->getParent();
1359 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1363 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1366 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1367 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1368 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1377 const std::string PairName = PartnerInfo.mangle();
1381 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1382 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1391 CallInst *XI = dyn_cast<CallInst>(U);
1399 bool Handled =
true;
1401 if (UCallee->
getName() == SinName)
1403 else if (UCallee->
getName() == CosName)
1405 else if (UCallee->
getName() == SinCosPrivateName ||
1406 UCallee->
getName() == SinCosGenericName)
1413 auto *OtherOp = cast<FPMathOperator>(XI);
1414 FMF &= OtherOp->getFastMathFlags();
1423 B.setFastMathFlags(FMF);
1424 B.setDefaultFPMathTag(FPMath);
1426 B.SetCurrentDebugLocation(DbgLoc);
1428 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF,
B, FSinCos);
1432 C->replaceAllUsesWith(Res);
1437 replaceTrigInsts(SinCalls, Sin);
1438 replaceTrigInsts(CosCalls, Cos);
1439 replaceTrigInsts(SinCosCalls, SinCos);
1446bool AMDGPULibCalls::evaluateScalarMathFunc(
const FuncInfo &FInfo,
double &Res0,
1452 double opr0 = 0.0, opr1 = 0.0;
1453 ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
1454 ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
1467 switch (FInfo.getId()) {
1468 default :
return false;
1476 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1489 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1502 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1510 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1530 Res0 = pow(2.0, opr0);
1534 Res0 = pow(10.0, opr0);
1542 Res0 = log(opr0) / log(2.0);
1546 Res0 = log(opr0) / log(10.0);
1550 Res0 = 1.0 / sqrt(opr0);
1580 Res0 = pow(opr0, opr1);
1584 if (
ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1585 double val = (double)iopr1->getSExtValue();
1586 Res0 = pow(opr0, val);
1593 if (
ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1594 double val = (double)iopr1->getSExtValue();
1595 Res0 = pow(opr0, 1.0 / val);
1611bool AMDGPULibCalls::evaluateCall(
CallInst *aCI,
const FuncInfo &FInfo) {
1612 int numArgs = (int)aCI->
arg_size();
1619 if ((copr0 = dyn_cast<Constant>(aCI->
getArgOperand(0))) ==
nullptr)
1624 if ((copr1 = dyn_cast<Constant>(aCI->
getArgOperand(1))) ==
nullptr) {
1633 double DVal0[16], DVal1[16];
1636 if (FuncVecSize == 1) {
1637 if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) {
1643 for (
int i = 0; i < FuncVecSize; ++i) {
1646 if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) {
1654 if (FuncVecSize == 1) {
1660 SmallVector <float, 0> FVal0, FVal1;
1661 for (
int i = 0; i < FuncVecSize; ++i)
1665 if (hasTwoResults) {
1666 for (
int i = 0; i < FuncVecSize; ++i)
1674 if (hasTwoResults) {
1681 if (hasTwoResults) {
1684 "math function with ptr arg not supported yet");
1698 bool Changed =
false;
1701 F.printAsOperand(
dbgs(),
false,
F.getParent());
dbgs() <<
'\n';);
1703 for (
auto &BB :
F) {
1710 if (Simplifier.
fold(CI))
1727 bool Changed =
false;
1728 for (
auto &BB :
F) {
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isKnownIntegral(const Value *V, const DataLayout &DL, FastMathFlags FMF)
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static const TableEntry tbl_exp2[]
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
AMD GCN specific subclass of TargetSubtarget.
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
static void replaceCall(Instruction *I, Value *With)
bool useNative(CallInst *CI)
void initFunction(Function &F, FunctionAnalysisManager &FAM)
bool isUnsafeMath(const FPMathOperator *FPOp) const
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const FunctionType *FuncTy) const
Param * getLeads()
Get leading parameters for mangled lib functions.
ENamePrefix getPrefix() const
double convertToDouble() const
Converts this APFloat to host double value.
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
float convertToFloat() const
Converts this APFloat to host float value.
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isNoInline() const
Return true if the call should not be inlined.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
unsigned arg_size() const
AttributeList getAttributes() const
Return the parameter attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
unsigned getNumElements() const
Return the number of elements in the array or vector.
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index's element.
APFloat getElementAsAPFloat(unsigned i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValue() const
const APFloat & getValueAPF() const
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
This is an important base class in LLVM.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Utility class for floating point operations which can have information about relaxed accuracy require...
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
Convenience struct for specifying and reasoning about fast-math flags.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
Base class of all SIMD vector types.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
apfloat_match m_APFloatAllowUndef(const APFloat *&Res)
Match APFloat while allowing undefs in splat vector constants.
apint_match m_APIntAllowUndef(const APInt *&Res)
Match APInt while allowing undefs in splat vector constants.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool isKnownNeverInfinity(const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
bool cannotBeOrderedLessThanZero(const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isKnownNeverInfOrNaN(const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the floating-point value can never contain a NaN or infinity.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.