Go to the documentation of this file.
25 #include "llvm/IR/IntrinsicsARM.h"
43 #define DEBUG_TYPE "armtti"
47 cl::desc(
"Enable the generation of masked loads and stores"));
51 cl::desc(
"Disable the generation of low-overhead loops"));
55 cl::desc(
"Enable the generation of WLS loops"));
73 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
75 : IntrAlign->getLimitedValue();
89 TM.getSubtargetImpl(*Caller)->getFeatureBits();
91 TM.getSubtargetImpl(*Callee)->getFeatureBits();
94 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
95 (CalleeBits & ~InlineFeaturesAllowed);
98 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
99 (CalleeBits & InlineFeaturesAllowed);
100 return MatchExact && MatchSubset;
121 using namespace PatternMatch;
126 case Intrinsic::arm_neon_vld1: {
136 case Intrinsic::arm_neon_vld2:
137 case Intrinsic::arm_neon_vld3:
138 case Intrinsic::arm_neon_vld4:
139 case Intrinsic::arm_neon_vld2lane:
140 case Intrinsic::arm_neon_vld3lane:
141 case Intrinsic::arm_neon_vld4lane:
142 case Intrinsic::arm_neon_vst1:
143 case Intrinsic::arm_neon_vst2:
144 case Intrinsic::arm_neon_vst3:
145 case Intrinsic::arm_neon_vst4:
146 case Intrinsic::arm_neon_vst2lane:
147 case Intrinsic::arm_neon_vst3lane:
148 case Intrinsic::arm_neon_vst4lane: {
154 MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
164 case Intrinsic::arm_mve_pred_i2v: {
167 if (
match(
Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
173 if (
match(
Arg,
m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
177 if (
auto *CI = dyn_cast<ConstantInt>(XorMask)) {
178 if (CI->getValue().trunc(16).isAllOnesValue()) {
180 cast<FixedVectorType>(II.
getType())->getNumElements(),
193 case Intrinsic::arm_mve_pred_v2i: {
196 if (
match(
Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
210 case Intrinsic::arm_mve_vadc:
211 case Intrinsic::arm_mve_vadc_predicated: {
213 (II.
getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
215 "Bad type for intrinsic!");
224 case Intrinsic::arm_mve_vmldava: {
226 if (
I->hasOneUse()) {
227 auto *
User = cast<Instruction>(*
I->user_begin());
231 Value *OpX =
I->getOperand(4);
232 Value *OpY =
I->getOperand(5);
238 {
I->getOperand(0),
I->getOperand(1),
239 I->getOperand(2), OpZ, OpX, OpY});
262 if ((SImmVal >= 0 && SImmVal < 65536) ||
269 if ((SImmVal >= 0 && SImmVal < 65536) ||
276 if (
Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
303 C->getValue() == Imm && Imm.
isNegative() && (-Imm).isPowerOf2()) {
305 auto isSSatMin = [&](
Value *MinInst) {
306 if (isa<SelectInst>(MinInst)) {
307 Value *MinLHS, *MinRHS;
335 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
336 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
340 if (Opcode == Instruction::And) {
342 if (Imm == 255 || Imm == 65535)
354 if (Opcode == Instruction::ICmp && Imm.
isNegative() &&
357 if (ST->
isThumb2() && NegImm < 1<<12)
360 if (ST->
isThumb() && NegImm < 1<<8)
374 (isa<ICmpInst>(Inst) && Inst->
hasOneUse() &&
399 assert(ISD &&
"Invalid opcode");
402 auto AdjustCost = [&
CostKind](
int Cost) {
404 return Cost == 0 ? 0 : 1;
407 auto IsLegalFPType = [
this](
EVT VT) {
425 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
426 Opcode == Instruction::SExt)) ||
428 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
429 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
453 return AdjustCost(Entry->Cost);
473 if (
const auto *Entry =
485 if (
const auto *Entry =
502 if (
const auto *Entry =
513 if (
const auto *Entry =
538 auto *
User = cast<Instruction>(*
I->user_begin());
543 return AdjustCost(Entry->Cost);
561 return AdjustCost(
LT.first * Entry->Cost);
654 return AdjustCost(Entry->Cost);
684 return AdjustCost(Entry->Cost);
715 return AdjustCost(Entry->Cost);
752 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
755 return Lanes * CallCost;
786 return AdjustCost(Entry->Cost);
804 if (ST->
hasNEON() && (Opcode == Instruction::InsertElement ||
805 Opcode == Instruction::ExtractElement)) {
808 if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy())
819 Opcode == Instruction::ExtractElement)) {
826 cast<FixedVectorType>(ValTy)->getNumElements() / 2;
866 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
868 Sel = cast<Instruction>(Sel->
user_back());
871 const Value *LHS, *RHS;
931 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
932 cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
934 FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
957 if (
LT.second.getVectorNumElements() > 2) {
959 return LT.first * BaseCost +
981 unsigned NumVectorInstToHideOverhead = 10;
982 int MaxMergeDistance = 64;
987 return NumVectorInstToHideOverhead;
1000 switch (II->getIntrinsicID()) {
1001 case Intrinsic::arm_mve_vctp8:
1002 case Intrinsic::arm_mve_vctp16:
1003 case Intrinsic::arm_mve_vctp32:
1004 case Intrinsic::arm_mve_vctp64:
1017 if (
auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) {
1019 if (VecTy->getNumElements() == 2)
1024 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1029 return (EltWidth == 32 && Alignment >= 4) ||
1030 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1046 if (isa<VectorType>(Ty))
1050 return ((EltWidth == 32 && Alignment >= 4) ||
1051 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1059 unsigned DstAddrSpace = ~0u;
1060 unsigned SrcAddrSpace = ~0u;
1061 const Function *
F =
I->getParent()->getParent();
1063 if (
const auto *MC = dyn_cast<MemTransferInst>(
I)) {
1064 ConstantInt *
C = dyn_cast<ConstantInt>(MC->getLength());
1069 const unsigned Size =
C->getValue().getZExtValue();
1070 const Align DstAlign = *MC->getDestAlign();
1071 const Align SrcAlign = *MC->getSourceAlign();
1075 DstAddrSpace = MC->getDestAddressSpace();
1076 SrcAddrSpace = MC->getSourceAddressSpace();
1078 else if (
const auto *MS = dyn_cast<MemSetInst>(
I)) {
1079 ConstantInt *
C = dyn_cast<ConstantInt>(MS->getLength());
1084 const unsigned Size =
C->getValue().getZExtValue();
1085 const Align DstAlign = *MS->getDestAlign();
1089 DstAddrSpace = MS->getDestAddressSpace();
1094 unsigned Limit, Factor = 2;
1095 switch(
I->getIntrinsicID()) {
1099 case Intrinsic::memmove:
1102 case Intrinsic::memset:
1113 std::vector<EVT> MemOps;
1114 if (getTLI()->findOptimalMemOpLowering(
1115 MemOps, Limit, MOp, DstAddrSpace,
1116 SrcAddrSpace,
F->getAttributes()))
1117 return MemOps.size() * Factor;
1153 if (
const auto *Entry =
1155 return LT.first * Entry->Cost;
1175 if (
const auto *Entry =
1177 return LT.first * Entry->Cost;
1201 return LT.first * Entry->Cost;
1218 return LT.first * Entry->Cost *
1241 switch (ISDOpcode) {
1255 const unsigned FunctionCallDivCost = 20;
1256 const unsigned ReciprocalDivCost = 10;
1299 return LT.first * Entry->Cost;
1303 Opd1PropInfo, Opd2PropInfo);
1321 auto LooksLikeAFreeShift = [&]() {
1331 switch (cast<Instruction>(CxtI->
user_back())->getOpcode()) {
1333 case Instruction::Sub:
1334 case Instruction::And:
1335 case Instruction::Xor:
1336 case Instruction::Or:
1337 case Instruction::ICmp:
1343 if (LooksLikeAFreeShift())
1357 return LT.first * BaseCost;
1360 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
1361 unsigned Num = VTy->getNumElements();
1387 (Alignment && *Alignment !=
Align(16)) &&
1388 cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
1392 return LT.first * 4;
1399 isa<FPExtInst>(*
I->user_begin())) ||
1405 : cast<Instruction>(
I->getOperand(0))->getOperand(0)->getType();
1428 if (!isa<FixedVectorType>(Src))
1433 return cast<FixedVectorType>(Src)->getNumElements() * 8;
1439 bool UseMaskForCond,
bool UseMaskForGaps) {
1440 assert(Factor >= 2 &&
"Invalid interleave factor");
1441 assert(isa<VectorType>(VecTy) &&
"Expect a vector type");
1446 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1447 !UseMaskForCond && !UseMaskForGaps) {
1448 unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
1457 if (NumElts % Factor == 0 &&
1469 return 2 * BaseCost;
1474 UseMaskForCond, UseMaskForGaps);
1478 const Value *Ptr,
bool VariableMask,
1482 using namespace PatternMatch;
1488 auto *VTy = cast<FixedVectorType>(DataTy);
1492 unsigned NumElems = VTy->getNumElements();
1493 unsigned EltSize = VTy->getScalarSizeInBits();
1494 std::pair<int, MVT>
LT = TLI->getTypeLegalizationCost(
DL, DataTy);
1501 unsigned VectorCost =
1502 NumElems *
LT.first * ST->getMVEVectorCostFactor(
CostKind);
1505 unsigned ScalarCost = NumElems *
LT.first +
1509 if (EltSize < 8 || Alignment < EltSize / 8)
1512 unsigned ExtSize = EltSize;
1519 match(
I, m_Intrinsic<Intrinsic::masked_gather>())) &&
1521 const User *Us = *
I->users().begin();
1522 if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) {
1525 cast<Instruction>(Us)->getType()->getScalarSizeInBits();
1526 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1527 (
TypeSize == 16 && EltSize == 8)) &&
1536 match(
I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
1537 (
T = dyn_cast<TruncInst>(
I->getOperand(0)))) {
1539 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1540 if (((EltSize == 16 &&
TypeSize == 32) ||
1547 if (ExtSize * NumElems != 128 || NumElems < 4)
1556 if (ExtSize != 8 && ExtSize != 16)
1559 if (
const auto *BC = dyn_cast<BitCastInst>(Ptr))
1560 Ptr = BC->getOperand(0);
1561 if (
const auto *
GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1562 if (
GEP->getNumOperands() != 2)
1566 if (Scale != 1 && Scale * 8 != ExtSize)
1569 if (
const auto *ZExt = dyn_cast<ZExtInst>(
GEP->getOperand(1))) {
1570 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1579 bool IsPairwiseForm,
1622 switch (ICA.
getID()) {
1623 case Intrinsic::get_active_lane_mask:
1634 case Intrinsic::sadd_sat:
1635 case Intrinsic::ssub_sat:
1636 case Intrinsic::uadd_sat:
1637 case Intrinsic::usub_sat: {
1642 std::pair<int, MVT>
LT =
1685 if (!
F->isIntrinsic())
1689 if (
F->getName().startswith(
"llvm.arm"))
1692 switch (
F->getIntrinsicID()) {
1695 case Intrinsic::sin:
1696 case Intrinsic::cos:
1697 case Intrinsic::pow:
1698 case Intrinsic::log:
1699 case Intrinsic::log10:
1701 case Intrinsic::exp:
1702 case Intrinsic::exp2:
1704 case Intrinsic::sqrt:
1705 case Intrinsic::fabs:
1706 case Intrinsic::copysign:
1710 case Intrinsic::rint:
1711 case Intrinsic::nearbyint:
1713 case Intrinsic::canonicalize:
1714 case Intrinsic::lround:
1715 case Intrinsic::llround:
1716 case Intrinsic::lrint:
1717 case Intrinsic::llrint:
1718 if (
F->getReturnType()->isDoubleTy() && !ST->
hasFP64())
1720 if (
F->getReturnType()->isHalfTy() && !ST->
hasFullFP16())
1726 case Intrinsic::masked_store:
1727 case Intrinsic::masked_load:
1728 case Intrinsic::masked_gather:
1729 case Intrinsic::masked_scatter:
1731 case Intrinsic::sadd_with_overflow:
1732 case Intrinsic::uadd_with_overflow:
1733 case Intrinsic::ssub_with_overflow:
1734 case Intrinsic::usub_with_overflow:
1735 case Intrinsic::sadd_sat:
1736 case Intrinsic::uadd_sat:
1737 case Intrinsic::ssub_sat:
1738 case Intrinsic::usub_sat:
1753 if (
auto *Call = dyn_cast<CallInst>(&
I)) {
1754 if (
auto *II = dyn_cast<IntrinsicInst>(Call)) {
1755 switch(II->getIntrinsicID()) {
1757 case Intrinsic::memset:
1758 case Intrinsic::memmove:
1761 if (
const Function *
F = Call->getCalledFunction())
1770 switch (
I.getOpcode()) {
1773 case Instruction::FPToSI:
1774 case Instruction::FPToUI:
1775 case Instruction::SIToFP:
1776 case Instruction::UIToFP:
1777 case Instruction::FPTrunc:
1778 case Instruction::FPExt:
1808 switch (
I.getOpcode()) {
1811 case Instruction::Alloca:
1815 case Instruction::PHI:
1822 if (
I.getType()->isDoubleTy() && !ST->
hasFP64())
1849 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
1854 const SCEV *TripCountSCEV =
1860 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
1868 if (
auto *Call = dyn_cast<IntrinsicInst>(&
I)) {
1869 switch (Call->getIntrinsicID()) {
1872 case Intrinsic::start_loop_iterations:
1873 case Intrinsic::test_set_loop_iterations:
1874 case Intrinsic::loop_decrement:
1875 case Intrinsic::loop_decrement_reg:
1885 bool IsTailPredLoop =
false;
1886 auto ScanLoop = [&](
Loop *L) {
1888 for (
auto &
I : *
BB) {
1890 isa<InlineAsm>(
I)) {
1894 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
1896 II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
1897 II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
1898 II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
1899 II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
1900 II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
1907 for (
auto Inner : *L)
1908 if (!ScanLoop(Inner))
1930 if (isa<ICmpInst>(&
I) && ++ICmpCount > 1)
1933 if (isa<FCmpInst>(&
I))
1938 if (isa<FPExtInst>(&
I) || isa<FPTruncInst>(&
I))
1942 if (isa<SExtInst>(&
I) || isa<ZExtInst>(&
I) )
1943 if (!
I.getOperand(0)->hasOneUse() || !isa<LoadInst>(
I.getOperand(0)))
1947 if (isa<TruncInst>(&
I) )
1948 if (!
I.hasOneUse() || !isa<StoreInst>(*
I.user_begin()))
1967 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
1978 bool ReductionsDisabled =
1982 for (
auto *
I : LiveOuts) {
1983 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
1984 !
I->getType()->isHalfTy()) {
1985 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
1986 "live-out value\n");
1989 if (ReductionsDisabled) {
2002 if (isa<PHINode>(&
I))
2010 if (
T->isPointerTy())
2011 T =
T->getPointerElementType();
2013 if (
T->getScalarSizeInBits() > 32) {
2017 if (isa<StoreInst>(
I) || isa<LoadInst>(
I)) {
2018 Value *Ptr = isa<LoadInst>(
I) ?
I.getOperand(0) :
I.getOperand(1);
2020 if (NextStride == 1) {
2025 }
else if (NextStride == -1 ||
2029 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2030 "be tail-predicated\n.");
2038 const SCEV *PtrScev =
2040 if (
auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
2041 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2047 "tail-predicate\n.");
2053 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2076 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: not a single block "
2085 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2093 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2099 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2137 <<
"Exit blocks: " << ExitingBlocks.size() <<
"\n");
2141 if (ExitingBlocks.size() > 2)
2157 for (
auto &
I : *
BB) {
2160 if (
I.getType()->isVectorTy())
2163 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2206 return ScalarBits <= 64;
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
@ SPF_SMAX
Unsigned minimum.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This class represents lattice values for constants.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
bool hasMVEFloatOps() const
int getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
A parsed version of the target data layout string in and methods for querying it.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
DominatorTree & getDominatorTree() const
const Function * getParent() const
Return the enclosing method, or null if none.
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
ScalarTy getFixedSize() const
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Represents a single loop in the control flow graph.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
SelectPatternFlavor Flavor
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool hasFPARMv8Base() const
int64_t getSExtValue() const
Get sign extended value.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Type * getReturnType() const
The main scalar evolution driver.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
int getMemcpyCost(const Instruction *I)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getBitWidth() const
Return the number of bits in the APInt.
@ SPF_UMAX
Signed maximum.
bool hasMVEIntegerOps() const
Container class for subtarget features.
user_iterator user_begin()
unsigned getNumArgOperands() const
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
SelectPatternFlavor
Specific patterns of select instructions we can match.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Op1Info=TTI::OK_AnyValue, TTI::OperandValueKind Op2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
static wasm::ValType getType(const TargetRegisterClass *RC)
Class to represent fixed width SIMD vectors.
unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwise, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction operations.
int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind)
int getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, bool IsPairwiseForm, TTI::TargetCostKind CostKind)
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
static IntegerType * getInt32Ty(LLVMContext &C)
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
bool isLegalMaskedLoad(Type *DataTy, Align Alignment)
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
LLVM Basic Block Representation.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
unsigned getNumElements() const
This is the shared class of boolean and integer constants.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
bool isNegative() const
Determine sign of this APInt.
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
bool match(Val *V, const Pattern &P)
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
(vector float) vec_cmpeq(*A, *B) C
bool hasBranchPredictor() const
static uint64_t round(uint64_t Acc, uint64_t Input)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
iterator_range< block_iterator > blocks() const
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
bool isVectorTy() const
True if this is an instance of VectorType.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
bool useSoftFloat() const override
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency)
Compute a cost of the given call instruction.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bool isInteger() const
Return true if this is an integer or a vector integer type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isLoweredToCall(const Function *F)
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
uint64_t getZExtValue() const
Get zero extended value.
Type Conversion Cost Table.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, VectorType *SubTp)
@ AND
Bitwise operators - logical and, logical or, logical xor.
bool emitGetActiveLaneMask() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This struct is a compact representation of a valid (non-zero power of two) alignment.
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const
unsigned getIntegerBitWidth() const
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
bool isAllOnesValue() const
Determine if all bits are set.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
bool isProfitableLSRChainElement(Instruction *I)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
mir Rename Register Operands
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)
static const Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Base class of all SIMD vector types.
This class represents an analyzed expression in the program.
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind)
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, VectorType *SubTp)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an important base class in LLVM.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isThumb1Only() const
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
AssumptionCache & getAssumptionCache() const
int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
const DataLayout & getDataLayout() const
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
This class represents a truncation of integer types.
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==.
This is an important class for using LLVM in a threaded context.
initializer< Ty > init(const Ty &Val)
Drive the analysis of memory accesses in the loop.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
bool hasSlowLoadDSubregister() const
@ SPF_ABS
Floating point maxnum.
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with args, mangled using Types.
ConstantInt * getTrue()
Get the constant value for i1 true.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Class for arbitrary precision integers.
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool isVector() const
Return true if this is a vector value type.
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
A cache of @llvm.assume calls within a function.
bool isLegalMaskedGather(Type *Ty, Align Alignment)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Type * getType() const
All values are typed, get the type of this value.
if(llvm_vc STREQUAL "") set(fake_version_inc "$
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
LLVMContext & getContext() const
All values hold a context through their type.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
@ SPF_FMINNUM
Unsigned maximum.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
@ SPF_FMAXNUM
Floating point minnum.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
const TargetMachine & getTargetMachine() const
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind)
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
uint64_t value() const
This is a hole in the type system and should not be abused.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
BlockT * getHeader() const
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Provides information about what library functions are available for the current target.
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
static double log2(double V)
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth=0)=0
The core instruction combiner logic.
@ ADD
Simple integer binary arithmetic operators.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
A wrapper class for inspecting calls to intrinsic functions.
Attributes of a target dependent hardware loop.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
bool maybeLoweredToCall(Instruction &I)
Intrinsic::ID getID() const
@ SHL
Shift and rotation operations.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Value * getArgOperand(unsigned i) const
bool isLegalMaskedStore(Type *DataTy, Align Alignment)
@ SPF_UMIN
Signed minimum.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Align max(MaybeAlign Lhs, Align Rhs)
unsigned getActiveBits() const
Compute the number of active bits in the value.
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
bool canAnalyze(LoopInfo &LI)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Type * getType() const
Return the LLVM type of this SCEV expression.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, bool UseMaskForCond=false, bool UseMaskForGaps=false)
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
unsigned getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * getOperand(unsigned i) const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
@ SIGN_EXTEND
Conversion operators.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
bool isFixedLengthVector() const
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
LLVM Value Representation.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
@ ForceEnabledNoReductions