Go to the documentation of this file.
25 #include "llvm/IR/IntrinsicsARM.h"
43 #define DEBUG_TYPE "armtti"
47 cl::desc(
"Enable the generation of masked loads and stores"));
51 cl::desc(
"Disable the generation of low-overhead loops"));
55 cl::desc(
"Enable the generation of WLS loops"));
73 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
75 : IntrAlign->getLimitedValue();
89 TM.getSubtargetImpl(*Caller)->getFeatureBits();
91 TM.getSubtargetImpl(*Callee)->getFeatureBits();
94 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
95 (CalleeBits & ~InlineFeaturesAllowed);
98 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
99 (CalleeBits & InlineFeaturesAllowed);
100 return MatchExact && MatchSubset;
106 if (ST->hasMVEIntegerOps())
121 using namespace PatternMatch;
126 case Intrinsic::arm_neon_vld1: {
136 case Intrinsic::arm_neon_vld2:
137 case Intrinsic::arm_neon_vld3:
138 case Intrinsic::arm_neon_vld4:
139 case Intrinsic::arm_neon_vld2lane:
140 case Intrinsic::arm_neon_vld3lane:
141 case Intrinsic::arm_neon_vld4lane:
142 case Intrinsic::arm_neon_vst1:
143 case Intrinsic::arm_neon_vst2:
144 case Intrinsic::arm_neon_vst3:
145 case Intrinsic::arm_neon_vst4:
146 case Intrinsic::arm_neon_vst2lane:
147 case Intrinsic::arm_neon_vst3lane:
148 case Intrinsic::arm_neon_vst4lane: {
152 unsigned AlignArg = II.
arg_size() - 1;
154 MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
164 case Intrinsic::arm_mve_pred_i2v: {
167 if (
match(
Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
173 if (
match(
Arg,
m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
177 if (
auto *CI = dyn_cast<ConstantInt>(XorMask)) {
178 if (CI->getValue().trunc(16).isAllOnes()) {
180 cast<FixedVectorType>(II.
getType())->getNumElements(),
193 case Intrinsic::arm_mve_pred_v2i: {
196 if (
match(
Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
210 case Intrinsic::arm_mve_vadc:
211 case Intrinsic::arm_mve_vadc_predicated: {
213 (II.
getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
215 "Bad type for intrinsic!");
224 case Intrinsic::arm_mve_vmldava: {
226 if (
I->hasOneUse()) {
227 auto *
User = cast<Instruction>(*
I->user_begin());
231 Value *OpX =
I->getOperand(4);
232 Value *OpY =
I->getOperand(5);
238 {
I->getOperand(0),
I->getOperand(1),
239 I->getOperand(2), OpZ, OpX, OpY});
255 SimplifyAndSetOp)
const {
260 auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
261 unsigned NumElts = cast<FixedVectorType>(II.
getType())->getNumElements();
262 unsigned IsTop = cast<ConstantInt>(II.
getOperand(TopOpc))->getZExtValue();
269 SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
279 case Intrinsic::arm_mve_vcvt_narrow:
280 SimplifyNarrowInstrTopBottom(2);
282 case Intrinsic::arm_mve_vqmovn:
283 SimplifyNarrowInstrTopBottom(4);
285 case Intrinsic::arm_mve_vshrn:
286 SimplifyNarrowInstrTopBottom(7);
303 if (!ST->isThumb()) {
304 if ((SImmVal >= 0 && SImmVal < 65536) ||
308 return ST->hasV6T2Ops() ? 2 : 3;
311 if ((SImmVal >= 0 && SImmVal < 65536) ||
315 return ST->hasV6T2Ops() ? 2 : 3;
318 if (
Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
348 auto isSSatMin = [&](
Value *MinInst) {
349 if (isa<SelectInst>(MinInst)) {
350 Value *MinLHS, *MinRHS;
363 return cast<Instruction>(Inst->
getOperand(1))->getOperand(1);
378 if (!
FP && isa<ICmpInst>(Inst) && Inst->
hasOneUse())
382 return isa<FPToSIInst>(
FP);
393 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
394 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
400 if (Opcode == Instruction::GetElementPtr && Idx != 0)
403 if (Opcode == Instruction::And) {
405 if (Imm == 255 || Imm == 65535)
417 if (Opcode == Instruction::ICmp && Imm.
isNegative() &&
420 if (ST->
isThumb2() && NegImm < 1<<12)
423 if (ST->isThumb() && NegImm < 1<<8)
429 if (Opcode == Instruction::Xor && Imm.
isAllOnes())
434 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->
isThumb2()) &&
437 (isa<ICmpInst>(Inst) && Inst->
hasOneUse() &&
446 if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.
isAllOnesValue()) {
460 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
476 assert(ISD &&
"Invalid opcode");
481 return Cost == 0 ? 0 : 1;
484 auto IsLegalFPType = [
this](
EVT VT) {
487 (EltVT ==
MVT::f64 && ST->hasFP64()) ||
488 (EltVT ==
MVT::f16 && ST->hasFullFP16());
501 if ((ST->hasMVEIntegerOps() &&
502 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
503 Opcode == Instruction::SExt)) ||
504 (ST->hasMVEFloatOps() &&
505 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
506 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
530 return AdjustCost(Entry->Cost);
549 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
550 if (
const auto *Entry =
561 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
562 if (
const auto *Entry =
578 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
579 if (
const auto *Entry =
589 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
590 if (
const auto *Entry =
599 I &&
I->hasOneUse() && ST->hasNEON() && SrcTy.
isVector()) {
615 auto *
User = cast<Instruction>(*
I->user_begin());
620 return AdjustCost(Entry->Cost);
625 if (Src->isVectorTy() && ST->hasNEON() &&
638 return AdjustCost(
LT.first * Entry->Cost);
727 if (SrcTy.
isVector() && ST->hasNEON()) {
731 return AdjustCost(Entry->Cost);
761 return AdjustCost(Entry->Cost);
788 if (SrcTy.
isInteger() && ST->hasNEON()) {
792 return AdjustCost(Entry->Cost);
813 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
830 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
833 return Lanes * CallCost;
864 return AdjustCost(Entry->Cost);
867 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
878 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
882 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
883 Opcode == Instruction::ExtractElement)) {
886 if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy())
893 return std::max<InstructionCost>(
897 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
898 Opcode == Instruction::ExtractElement)) {
902 std::pair<InstructionCost, MVT>
LT =
945 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
947 Sel = cast<Instruction>(Sel->
user_back());
1005 std::pair<InstructionCost, MVT>
LT =
1010 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy() &&
1011 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1012 cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
1014 FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
1019 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1030 std::pair<InstructionCost, MVT>
LT =
1038 if (
LT.second.getVectorNumElements() > 2) {
1040 return LT.first * BaseCost +
1049 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy())
1063 unsigned NumVectorInstToHideOverhead = 10;
1064 int MaxMergeDistance = 64;
1066 if (ST->hasNEON()) {
1069 return NumVectorInstToHideOverhead;
1083 case Intrinsic::arm_mve_vctp8:
1084 case Intrinsic::arm_mve_vctp16:
1085 case Intrinsic::arm_mve_vctp32:
1086 case Intrinsic::arm_mve_vctp64:
1099 if (
auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) {
1101 if (VecTy->getNumElements() == 2)
1106 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1111 return (EltWidth == 32 && Alignment >= 4) ||
1112 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1120 return ((EltWidth == 32 && Alignment >= 4) ||
1121 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1129 unsigned DstAddrSpace = ~0u;
1130 unsigned SrcAddrSpace = ~0u;
1131 const Function *
F =
I->getParent()->getParent();
1133 if (
const auto *MC = dyn_cast<MemTransferInst>(
I)) {
1134 ConstantInt *
C = dyn_cast<ConstantInt>(MC->getLength());
1139 const unsigned Size =
C->getValue().getZExtValue();
1140 const Align DstAlign = *MC->getDestAlign();
1141 const Align SrcAlign = *MC->getSourceAlign();
1143 MOp =
MemOp::Copy(Size,
false, DstAlign, SrcAlign,
1145 DstAddrSpace = MC->getDestAddressSpace();
1146 SrcAddrSpace = MC->getSourceAddressSpace();
1148 else if (
const auto *MS = dyn_cast<MemSetInst>(
I)) {
1149 ConstantInt *
C = dyn_cast<ConstantInt>(MS->getLength());
1154 const unsigned Size =
C->getValue().getZExtValue();
1155 const Align DstAlign = *MS->getDestAlign();
1159 DstAddrSpace = MS->getDestAddressSpace();
1164 unsigned Limit, Factor = 2;
1165 switch(
I->getIntrinsicID()) {
1169 case Intrinsic::memmove:
1172 case Intrinsic::memset:
1183 std::vector<EVT> MemOps;
1184 if (getTLI()->findOptimalMemOpLowering(
1185 MemOps, Limit, MOp, DstAddrSpace,
1186 SrcAddrSpace,
F->getAttributes()))
1187 return MemOps.size() * Factor;
1208 if (ST->hasNEON()) {
1225 if (
const auto *Entry =
1227 return LT.first * Entry->Cost;
1246 if (
const auto *Entry =
1248 return LT.first * Entry->Cost;
1272 return LT.first * Entry->Cost;
1275 if (ST->hasMVEIntegerOps()) {
1288 return LT.first * Entry->Cost *
1292 if (!
Mask.empty()) {
1294 if (
Mask.size() <=
LT.second.getVectorNumElements() &&
1301 int BaseCost = ST->hasMVEIntegerOps() && Tp->
isVectorTy()
1318 switch (ISDOpcode) {
1331 if (ST->hasNEON()) {
1332 const unsigned FunctionCallDivCost = 20;
1333 const unsigned ReciprocalDivCost = 10;
1376 return LT.first * Entry->Cost;
1379 Opcode, Ty,
CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
1397 auto LooksLikeAFreeShift = [&]() {
1407 switch (cast<Instruction>(CxtI->
user_back())->getOpcode()) {
1409 case Instruction::Sub:
1410 case Instruction::And:
1411 case Instruction::Xor:
1412 case Instruction::Or:
1413 case Instruction::ICmp:
1419 if (LooksLikeAFreeShift())
1425 if (ST->hasMVEIntegerOps() && Ty->
isVectorTy())
1433 return LT.first * BaseCost;
1436 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
1437 unsigned Num = VTy->getNumElements();
1463 if (ST->hasNEON() && Src->isVectorTy() &&
1464 (Alignment && *Alignment !=
Align(16)) &&
1465 cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
1469 return LT.first * 4;
1474 if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) &&
I &&
1476 isa<FPExtInst>(*
I->user_begin())) ||
1482 : cast<Instruction>(
I->getOperand(0))->getOperand(0)->getType();
1488 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1499 if (ST->hasMVEIntegerOps()) {
1505 if (!isa<FixedVectorType>(Src))
1510 return cast<FixedVectorType>(Src)->getNumElements() * 8;
1516 bool UseMaskForCond,
bool UseMaskForGaps) {
1517 assert(Factor >= 2 &&
"Invalid interleave factor");
1518 assert(isa<VectorType>(VecTy) &&
"Expect a vector type");
1523 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1524 !UseMaskForCond && !UseMaskForGaps) {
1525 unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
1534 if (NumElts % Factor == 0 &&
1543 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1546 return 2 * BaseCost;
1551 UseMaskForCond, UseMaskForGaps);
1555 unsigned Opcode,
Type *DataTy,
const Value *Ptr,
bool VariableMask,
1557 using namespace PatternMatch;
1563 auto *VTy = cast<FixedVectorType>(DataTy);
1567 unsigned NumElems = VTy->getNumElements();
1568 unsigned EltSize = VTy->getScalarSizeInBits();
1569 std::pair<InstructionCost, MVT>
LT = TLI->getTypeLegalizationCost(
DL, DataTy);
1577 NumElems *
LT.first * ST->getMVEVectorCostFactor(
CostKind);
1584 if (EltSize < 8 || Alignment < EltSize / 8)
1587 unsigned ExtSize = EltSize;
1594 match(
I, m_Intrinsic<Intrinsic::masked_gather>())) &&
1596 const User *Us = *
I->users().begin();
1597 if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) {
1600 cast<Instruction>(Us)->getType()->getScalarSizeInBits();
1601 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1602 (
TypeSize == 16 && EltSize == 8)) &&
1611 match(
I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
1612 (
T = dyn_cast<TruncInst>(
I->getOperand(0)))) {
1614 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1615 if (((EltSize == 16 &&
TypeSize == 32) ||
1622 if (ExtSize * NumElems != 128 || NumElems < 4)
1631 if (ExtSize != 8 && ExtSize != 16)
1634 if (
const auto *BC = dyn_cast<BitCastInst>(Ptr))
1635 Ptr = BC->getOperand(0);
1636 if (
const auto *
GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1637 if (
GEP->getNumOperands() != 2)
1641 if (Scale != 1 && Scale * 8 != ExtSize)
1644 if (
const auto *ZExt = dyn_cast<ZExtInst>(
GEP->getOperand(1))) {
1645 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1686 std::pair<InstructionCost, MVT>
LT =
1700 (
LT.second ==
MVT::v8i16 && RevVTSize <= (IsMLA ? 64u : 32u)) ||
1712 switch (ICA.
getID()) {
1713 case Intrinsic::get_active_lane_mask:
1721 if (ST->hasMVEIntegerOps())
1724 case Intrinsic::sadd_sat:
1725 case Intrinsic::ssub_sat:
1726 case Intrinsic::uadd_sat:
1727 case Intrinsic::usub_sat: {
1728 if (!ST->hasMVEIntegerOps())
1748 if (!ST->hasMVEIntegerOps())
1760 if (!ST->hasMVEFloatOps())
1768 case Intrinsic::fptosi_sat:
1769 case Intrinsic::fptoui_sat: {
1772 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1782 if (ST->hasMVEFloatOps() &&
1790 (ST->hasFullFP16() &&
LT.second ==
MVT::f16) ||
1791 (ST->hasMVEFloatOps() &&
1795 LT.second.getScalarSizeInBits());
1800 LegalTy, {LegalTy, LegalTy});
1804 LegalTy, {LegalTy, LegalTy});
1806 return LT.first * Cost;
1816 if (!
F->isIntrinsic())
1820 if (
F->getName().startswith(
"llvm.arm"))
1823 switch (
F->getIntrinsicID()) {
1826 case Intrinsic::sin:
1827 case Intrinsic::cos:
1828 case Intrinsic::pow:
1829 case Intrinsic::log:
1830 case Intrinsic::log10:
1832 case Intrinsic::exp:
1833 case Intrinsic::exp2:
1835 case Intrinsic::sqrt:
1836 case Intrinsic::fabs:
1837 case Intrinsic::copysign:
1841 case Intrinsic::rint:
1842 case Intrinsic::nearbyint:
1844 case Intrinsic::canonicalize:
1845 case Intrinsic::lround:
1846 case Intrinsic::llround:
1847 case Intrinsic::lrint:
1848 case Intrinsic::llrint:
1849 if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
1851 if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
1857 case Intrinsic::masked_store:
1858 case Intrinsic::masked_load:
1859 case Intrinsic::masked_gather:
1860 case Intrinsic::masked_scatter:
1861 return !ST->hasMVEIntegerOps();
1862 case Intrinsic::sadd_with_overflow:
1863 case Intrinsic::uadd_with_overflow:
1864 case Intrinsic::ssub_with_overflow:
1865 case Intrinsic::usub_with_overflow:
1866 case Intrinsic::sadd_sat:
1867 case Intrinsic::uadd_sat:
1868 case Intrinsic::ssub_sat:
1869 case Intrinsic::usub_sat:
1884 if (
auto *Call = dyn_cast<CallInst>(&
I)) {
1885 if (
auto *II = dyn_cast<IntrinsicInst>(Call)) {
1888 case Intrinsic::memset:
1889 case Intrinsic::memmove:
1892 if (
const Function *
F = Call->getCalledFunction())
1901 switch (
I.getOpcode()) {
1904 case Instruction::FPToSI:
1905 case Instruction::FPToUI:
1906 case Instruction::SIToFP:
1907 case Instruction::UIToFP:
1908 case Instruction::FPTrunc:
1909 case Instruction::FPExt:
1939 switch (
I.getOpcode()) {
1942 case Instruction::Alloca:
1946 case Instruction::PHI:
1953 if (
I.getType()->isDoubleTy() && !ST->hasFP64())
1957 if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
1980 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
1985 const SCEV *TripCountSCEV =
1991 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
1999 if (
auto *Call = dyn_cast<IntrinsicInst>(&
I)) {
2000 switch (Call->getIntrinsicID()) {
2003 case Intrinsic::start_loop_iterations:
2004 case Intrinsic::test_start_loop_iterations:
2005 case Intrinsic::loop_decrement:
2006 case Intrinsic::loop_decrement_reg:
2016 bool IsTailPredLoop =
false;
2017 auto ScanLoop = [&](
Loop *L) {
2019 for (
auto &
I : *
BB) {
2021 isa<InlineAsm>(
I)) {
2025 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
2038 for (
auto Inner : *L)
2039 if (!ScanLoop(Inner))
2061 if (isa<ICmpInst>(&
I) && ++ICmpCount > 1)
2070 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
2078 if (isa<FCmpInst>(&
I))
2083 if (isa<FPExtInst>(&
I) || isa<FPTruncInst>(&
I))
2087 if (isa<SExtInst>(&
I) || isa<ZExtInst>(&
I) )
2088 if (!
I.getOperand(0)->hasOneUse() || !isa<LoadInst>(
I.getOperand(0)))
2092 if (isa<TruncInst>(&
I) )
2093 if (!
I.hasOneUse() || !isa<StoreInst>(*
I.user_begin()))
2112 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
2123 bool ReductionsDisabled =
2127 for (
auto *
I : LiveOuts) {
2128 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
2129 !
I->getType()->isHalfTy()) {
2130 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
2131 "live-out value\n");
2134 if (ReductionsDisabled) {
2147 if (isa<PHINode>(&
I))
2155 if (
T->getScalarSizeInBits() > 32) {
2159 if (isa<StoreInst>(
I) || isa<LoadInst>(
I)) {
2162 int64_t NextStride =
getPtrStride(PSE, AccessTy, Ptr, L);
2163 if (NextStride == 1) {
2168 }
else if (NextStride == -1 ||
2172 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2173 "be tail-predicated\n.");
2182 if (
auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
2183 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2189 "tail-predicate\n.");
2195 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2213 if (!ST->hasMVEIntegerOps())
2218 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: not a single block "
2227 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2235 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2241 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2280 <<
"Exit blocks: " << ExitingBlocks.size() <<
"\n");
2284 if (ExitingBlocks.size() > 2)
2300 for (
auto &
I : *
BB) {
2303 if (
I.getType()->isVectorTy())
2306 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2328 unsigned ExitingValues = 0;
2331 for (
auto *Exit : ExitBlocks) {
2334 unsigned LiveOuts =
count_if(Exit->phis(), [](
auto &PH) {
2335 return PH.getNumOperands() != 1 ||
2336 !isa<GetElementPtrInst>(PH.getOperand(0));
2338 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2369 if (!ST->hasMVEIntegerOps())
2375 return ScalarBits <= 64;
2383 if (!ST->hasMVEIntegerOps())
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
@ SPF_SMAX
Unsigned minimum.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This is an optimization pass for GlobalISel generic memory operations.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType >> Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
A parsed version of the target data layout string in and methods for querying it.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
DominatorTree & getDominatorTree() const
const Function * getParent() const
Return the enclosing method, or null if none.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
ScalarTy getFixedSize() const
Represents a single loop in the control flow graph.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
SelectPatternFlavor Flavor
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool hasFPARMv8Base() const
int64_t getSExtValue() const
Get sign extended value.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Type * getReturnType() const
The main scalar evolution driver.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
@ ICMP_SGT
signed greater than
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
The instances of the Type class are immutable: once they are created, they are never changed.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
unsigned getBitWidth() const
Return the number of bits in the APInt.
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
@ SPF_UMAX
Signed maximum.
Container class for subtarget features.
std::pair< InstructionCost, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
user_iterator user_begin()
@ ICMP_SLE
signed less or equal
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
SelectPatternFlavor
Specific patterns of select instructions we can match.
Class to represent fixed width SIMD vectors.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
static IntegerType * getInt32Ty(LLVMContext &C)
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
bool isLegalMaskedLoad(Type *DataTy, Align Alignment)
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
LLVM Basic Block Representation.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned getNumElements() const
const SmallVectorImpl< Type * > & getArgTypes() const
This is the shared class of boolean and integer constants.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
bool isNegative() const
Determine sign of this APInt.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
Type Conversion Cost Table.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
bool match(Val *V, const Pattern &P)
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
(vector float) vec_cmpeq(*A, *B) C
static uint64_t round(uint64_t Acc, uint64_t Input)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
iterator_range< block_iterator > blocks() const
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
bool isVectorTy() const
True if this is an instance of VectorType.
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
bool useSoftFloat() const override
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bool isInteger() const
Return true if this is an integer or a vector integer type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isLoweredToCall(const Function *F)
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
uint64_t getZExtValue() const
Get zero extended value.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
@ AND
Bitwise operators - logical and, logical or, logical xor.
bool emitGetActiveLaneMask() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const
unsigned getIntegerBitWidth() const
bool isAllOnesValue() const
NOTE: This is soft-deprecated. Please use isAllOnes() instead.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
bool isProfitableLSRChainElement(Instruction *I)
mir Rename Register Operands
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Base class of all SIMD vector types.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
This class represents an analyzed expression in the program.
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an important base class in LLVM.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType >> Tbl, int ISD, MVT Ty)
Find in cost table.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isThumb1Only() const
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
InstructionCost getMemcpyCost(const Instruction *I)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Op1Info=TTI::OK_AnyValue, TTI::OperandValueKind Op2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
AssumptionCache & getAssumptionCache() const
const DataLayout & getDataLayout() const
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
This class represents a truncation of integer types.
This is an important class for using LLVM in a threaded context.
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
initializer< Ty > init(const Ty &Val)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Drive the analysis of memory accesses in the loop.
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
@ SPF_ABS
Floating point maxnum.
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
print Print MemDeps of function
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with args, mangled using Types.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
ConstantInt * getTrue()
Get the constant value for i1 true.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Class for arbitrary precision integers.
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
bool isVector() const
Return true if this is a vector value type.
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
uint64_t getScalarSizeInBits() const
A cache of @llvm.assume calls within a function.
bool isLegalMaskedGather(Type *Ty, Align Alignment)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
if(llvm_vc STREQUAL "") set(fake_version_inc "$
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
LLVMContext & getContext() const
All values hold a context through their type.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
@ SPF_FMINNUM
Unsigned maximum.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
@ SPF_FMAXNUM
Floating point minnum.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
const TargetMachine & getTargetMachine() const
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
uint64_t value() const
This is a hole in the type system and should not be abused.
unsigned arg_size() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
BlockT * getHeader() const
Provides information about what library functions are available for the current target.
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
static double log2(double V)
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth=0)=0
The core instruction combiner logic.
@ ADD
Simple integer binary arithmetic operators.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
A wrapper class for inspecting calls to intrinsic functions.
Attributes of a target dependent hardware loop.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool maybeLoweredToCall(Instruction &I)
Intrinsic::ID getID() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
@ SHL
Shift and rotation operations.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Value * getArgOperand(unsigned i) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
bool isLegalMaskedStore(Type *DataTy, Align Alignment)
@ SPF_UMIN
Signed minimum.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
unsigned getActiveBits() const
Compute the number of active bits in the value.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool canAnalyze(LoopInfo &LI)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Type * getType() const
Return the LLVM type of this SCEV expression.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const char LLVMTargetMachineRef TM
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * getOperand(unsigned i) const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
@ SIGN_EXTEND
Conversion operators.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
bool isFixedLengthVector() const
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
LLVM Value Representation.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
@ ForceEnabledNoReductions