25#include "llvm/IR/IntrinsicsARM.h"
45#define DEBUG_TYPE "armtti"
49 cl::desc(
"Enable the generation of masked loads and stores"));
53 cl::desc(
"Disable the generation of low-overhead loops"));
57 cl::desc(
"Enable the generation of WLS loops"));
75 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
77 : IntrAlign->getLimitedValue();
83 PointerType::get(II.
getType(), 0));
91 TM.getSubtargetImpl(*Caller)->getFeatureBits();
93 TM.getSubtargetImpl(*Callee)->getFeatureBits();
96 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
97 (CalleeBits & ~InlineFeaturesAllowed);
100 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
101 (CalleeBits & InlineFeaturesAllowed);
102 return MatchExact && MatchSubset;
108 if (ST->hasMVEIntegerOps())
111 if (L->getHeader()->getParent()->hasOptSize())
115 L->getNumBlocks() == 1)
121std::optional<Instruction *>
123 using namespace PatternMatch;
128 case Intrinsic::arm_neon_vld1: {
138 case Intrinsic::arm_neon_vld2:
139 case Intrinsic::arm_neon_vld3:
140 case Intrinsic::arm_neon_vld4:
141 case Intrinsic::arm_neon_vld2lane:
142 case Intrinsic::arm_neon_vld3lane:
143 case Intrinsic::arm_neon_vld4lane:
144 case Intrinsic::arm_neon_vst1:
145 case Intrinsic::arm_neon_vst2:
146 case Intrinsic::arm_neon_vst3:
147 case Intrinsic::arm_neon_vst4:
148 case Intrinsic::arm_neon_vst2lane:
149 case Intrinsic::arm_neon_vst3lane:
150 case Intrinsic::arm_neon_vst4lane: {
154 unsigned AlignArg = II.
arg_size() - 1;
156 MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
166 case Intrinsic::arm_mve_pred_i2v: {
169 if (
match(
Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
175 if (
match(
Arg,
m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
179 if (
auto *CI = dyn_cast<ConstantInt>(XorMask)) {
180 if (CI->getValue().trunc(16).isAllOnes()) {
182 cast<FixedVectorType>(II.
getType())->getNumElements(),
195 case Intrinsic::arm_mve_pred_v2i: {
198 if (
match(
Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
212 case Intrinsic::arm_mve_vadc:
213 case Intrinsic::arm_mve_vadc_predicated: {
215 (II.
getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
217 "Bad type for intrinsic!");
226 case Intrinsic::arm_mve_vmldava: {
228 if (
I->hasOneUse()) {
229 auto *
User = cast<Instruction>(*
I->user_begin());
233 Value *OpX =
I->getOperand(4);
234 Value *OpY =
I->getOperand(5);
240 {
I->getOperand(0),
I->getOperand(1),
241 I->getOperand(2), OpZ, OpX, OpY});
257 SimplifyAndSetOp)
const {
262 auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
263 unsigned NumElts = cast<FixedVectorType>(II.
getType())->getNumElements();
264 unsigned IsTop = cast<ConstantInt>(II.
getOperand(TopOpc))->getZExtValue();
271 SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
281 case Intrinsic::arm_mve_vcvt_narrow:
282 SimplifyNarrowInstrTopBottom(2);
284 case Intrinsic::arm_mve_vqmovn:
285 SimplifyNarrowInstrTopBottom(4);
287 case Intrinsic::arm_mve_vshrn:
288 SimplifyNarrowInstrTopBottom(7);
300 if (Bits == 0 || Imm.getActiveBits() >= 64)
303 int64_t SImmVal = Imm.getSExtValue();
304 uint64_t ZImmVal = Imm.getZExtValue();
305 if (!ST->isThumb()) {
306 if ((SImmVal >= 0 && SImmVal < 65536) ||
310 return ST->hasV6T2Ops() ? 2 : 3;
313 if ((SImmVal >= 0 && SImmVal < 65536) ||
317 return ST->hasV6T2Ops() ? 2 : 3;
320 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
332 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
348 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
350 auto isSSatMin = [&](
Value *MinInst) {
351 if (isa<SelectInst>(MinInst)) {
352 Value *MinLHS, *MinRHS;
365 return cast<Instruction>(Inst->
getOperand(1))->getOperand(1);
376 if (Imm.getBitWidth() != 64 ||
380 if (!
FP && isa<ICmpInst>(Inst) && Inst->
hasOneUse())
384 return isa<FPToSIInst>(
FP);
395 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
396 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
402 if (Opcode == Instruction::GetElementPtr &&
Idx != 0)
405 if (Opcode == Instruction::And) {
407 if (Imm == 255 || Imm == 65535)
414 if (Opcode == Instruction::Add)
419 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
421 int64_t NegImm = -Imm.getSExtValue();
422 if (ST->
isThumb2() && NegImm < 1<<12)
425 if (ST->isThumb() && NegImm < 1<<8)
431 if (Opcode == Instruction::Xor && Imm.isAllOnes())
436 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->
isThumb2()) &&
439 (isa<ICmpInst>(Inst) && Inst->
hasOneUse() &&
448 if (Inst && Opcode == Instruction::ICmp &&
Idx == 1 && Imm.isAllOnes()) {
462 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
478 assert(ISD &&
"Invalid opcode");
483 return Cost == 0 ? 0 : 1;
486 auto IsLegalFPType = [
this](
EVT VT) {
489 (EltVT ==
MVT::f64 && ST->hasFP64()) ||
490 (EltVT ==
MVT::f16 && ST->hasFullFP16());
503 if ((ST->hasMVEIntegerOps() &&
504 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
505 Opcode == Instruction::SExt)) ||
506 (ST->hasMVEFloatOps() &&
507 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
508 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
532 return AdjustCost(Entry->Cost);
551 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
552 if (
const auto *Entry =
563 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
564 if (
const auto *Entry =
580 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
581 if (
const auto *Entry =
591 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
592 if (
const auto *Entry =
601 I &&
I->hasOneUse() && ST->hasNEON() && SrcTy.
isVector()) {
617 auto *
User = cast<Instruction>(*
I->user_begin());
622 return AdjustCost(Entry->Cost);
627 if (Src->isVectorTy() && ST->hasNEON() &&
639 if (
const auto *Entry =
CostTableLookup(NEONFltDblTbl, ISD, LT.second))
640 return AdjustCost(LT.first * Entry->Cost);
729 if (SrcTy.
isVector() && ST->hasNEON()) {
733 return AdjustCost(Entry->Cost);
763 return AdjustCost(Entry->Cost);
790 if (SrcTy.
isInteger() && ST->hasNEON()) {
794 return AdjustCost(Entry->Cost);
815 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
832 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
835 return Lanes * CallCost;
866 return AdjustCost(Entry->Cost);
869 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
882 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
886 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
887 Opcode == Instruction::ExtractElement)) {
890 if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy())
897 return std::max<InstructionCost>(
902 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
903 Opcode == Instruction::ExtractElement)) {
907 std::pair<InstructionCost, MVT> LT =
950 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
952 Sel = cast<Instruction>(Sel->
user_back());
960 IID = Intrinsic::abs;
963 IID = Intrinsic::smin;
966 IID = Intrinsic::smax;
969 IID = Intrinsic::umin;
972 IID = Intrinsic::umax;
975 IID = Intrinsic::minnum;
978 IID = Intrinsic::maxnum;
1014 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy() &&
1015 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1016 cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
1018 FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
1023 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1043 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
1045 return LT.first * BaseCost +
1055 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy())
1069 unsigned NumVectorInstToHideOverhead = 10;
1070 int MaxMergeDistance = 64;
1072 if (ST->hasNEON()) {
1075 return NumVectorInstToHideOverhead;
1089 case Intrinsic::arm_mve_vctp8:
1090 case Intrinsic::arm_mve_vctp16:
1091 case Intrinsic::arm_mve_vctp32:
1092 case Intrinsic::arm_mve_vctp64:
1105 if (
auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) {
1107 if (VecTy->getNumElements() == 2)
1112 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1117 return (EltWidth == 32 && Alignment >= 4) ||
1118 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1126 return ((EltWidth == 32 && Alignment >= 4) ||
1127 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1135 unsigned DstAddrSpace = ~0u;
1136 unsigned SrcAddrSpace = ~0u;
1137 const Function *
F =
I->getParent()->getParent();
1139 if (
const auto *MC = dyn_cast<MemTransferInst>(
I)) {
1140 ConstantInt *
C = dyn_cast<ConstantInt>(MC->getLength());
1145 const unsigned Size =
C->getValue().getZExtValue();
1146 const Align DstAlign = *MC->getDestAlign();
1147 const Align SrcAlign = *MC->getSourceAlign();
1151 DstAddrSpace = MC->getDestAddressSpace();
1152 SrcAddrSpace = MC->getSourceAddressSpace();
1154 else if (
const auto *MS = dyn_cast<MemSetInst>(
I)) {
1155 ConstantInt *
C = dyn_cast<ConstantInt>(MS->getLength());
1160 const unsigned Size =
C->getValue().getZExtValue();
1161 const Align DstAlign = *MS->getDestAlign();
1165 DstAddrSpace = MS->getDestAddressSpace();
1170 unsigned Limit, Factor = 2;
1171 switch(
I->getIntrinsicID()) {
1172 case Intrinsic::memcpy:
1175 case Intrinsic::memmove:
1178 case Intrinsic::memset:
1189 std::vector<EVT> MemOps;
1190 if (getTLI()->findOptimalMemOpLowering(
1191 MemOps, Limit, MOp, DstAddrSpace,
1192 SrcAddrSpace,
F->getAttributes()))
1193 return MemOps.size() * Factor;
1215 if (ST->hasNEON()) {
1232 if (
const auto *Entry =
1234 return LT.first * Entry->Cost;
1253 if (
const auto *Entry =
1255 return LT.first * Entry->Cost;
1279 return LT.first * Entry->Cost;
1282 if (ST->hasMVEIntegerOps()) {
1295 return LT.first * Entry->Cost *
1299 if (!Mask.empty()) {
1301 if (LT.second.isVector() &&
1302 Mask.size() <= LT.second.getVectorNumElements() &&
1309 int BaseCost = ST->hasMVEIntegerOps() && Tp->
isVectorTy()
1326 switch (ISDOpcode) {
1339 if (ST->hasNEON()) {
1340 const unsigned FunctionCallDivCost = 20;
1341 const unsigned ReciprocalDivCost = 10;
1383 if (
const auto *Entry =
CostTableLookup(CostTbl, ISDOpcode, LT.second))
1384 return LT.first * Entry->Cost;
1387 Opcode, Ty,
CostKind, Op1Info, Op2Info);
1404 auto LooksLikeAFreeShift = [&]() {
1414 switch (cast<Instruction>(CxtI->
user_back())->getOpcode()) {
1415 case Instruction::Add:
1416 case Instruction::Sub:
1417 case Instruction::And:
1418 case Instruction::Xor:
1419 case Instruction::Or:
1420 case Instruction::ICmp:
1426 if (LooksLikeAFreeShift())
1432 if (ST->hasMVEIntegerOps() && Ty->
isVectorTy())
1440 return LT.first * BaseCost;
1443 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
1444 unsigned Num = VTy->getNumElements();
1472 if (ST->hasNEON() && Src->isVectorTy() &&
1473 (Alignment && *Alignment !=
Align(16)) &&
1474 cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
1478 return LT.first * 4;
1483 if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) &&
I &&
1484 ((Opcode == Instruction::Load &&
I->hasOneUse() &&
1485 isa<FPExtInst>(*
I->user_begin())) ||
1486 (Opcode == Instruction::Store && isa<FPTruncInst>(
I->getOperand(0))))) {
1489 Opcode == Instruction::Load
1491 : cast<Instruction>(
I->getOperand(0))->getOperand(0)->getType();
1497 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1508 if (ST->hasMVEIntegerOps()) {
1514 if (!isa<FixedVectorType>(Src))
1519 return cast<FixedVectorType>(Src)->getNumElements() * 8;
1525 bool UseMaskForCond,
bool UseMaskForGaps) {
1526 assert(Factor >= 2 &&
"Invalid interleave factor");
1527 assert(isa<VectorType>(VecTy) &&
"Expect a vector type");
1532 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1533 !UseMaskForCond && !UseMaskForGaps) {
1534 unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
1543 if (NumElts % Factor == 0 &&
1552 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1555 return 2 * BaseCost;
1560 UseMaskForCond, UseMaskForGaps);
1564 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1566 using namespace PatternMatch;
1572 auto *VTy = cast<FixedVectorType>(DataTy);
1576 unsigned NumElems = VTy->getNumElements();
1577 unsigned EltSize = VTy->getScalarSizeInBits();
1590 NumElems * LT.first +
1596 if (EltSize < 8 || Alignment < EltSize / 8)
1599 unsigned ExtSize = EltSize;
1605 if ((
I->getOpcode() == Instruction::Load ||
1606 match(
I, m_Intrinsic<Intrinsic::masked_gather>())) &&
1608 const User *Us = *
I->users().begin();
1609 if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) {
1612 cast<Instruction>(Us)->getType()->getScalarSizeInBits();
1613 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1614 (
TypeSize == 16 && EltSize == 8)) &&
1622 if ((
I->getOpcode() == Instruction::Store ||
1623 match(
I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
1624 (
T = dyn_cast<TruncInst>(
I->getOperand(0)))) {
1626 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1627 if (((EltSize == 16 &&
TypeSize == 32) ||
1634 if (ExtSize * NumElems != 128 || NumElems < 4)
1643 if (ExtSize != 8 && ExtSize != 16)
1646 if (
const auto *BC = dyn_cast<BitCastInst>(
Ptr))
1647 Ptr = BC->getOperand(0);
1648 if (
const auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr)) {
1649 if (
GEP->getNumOperands() != 2)
1653 if (Scale != 1 && Scale * 8 != ExtSize)
1656 if (
const auto *ZExt = dyn_cast<ZExtInst>(
GEP->getOperand(1))) {
1657 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1667 std::optional<FastMathFlags> FMF,
1691 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1711 ((LT.second ==
MVT::v16i8 && RevVTSize <= 32) ||
1712 (LT.second ==
MVT::v8i16 && RevVTSize <= 32) ||
1713 (LT.second ==
MVT::v4i32 && RevVTSize <= 64)))
1742 ((LT.second ==
MVT::v16i8 && RevVTSize <= 32) ||
1743 (LT.second ==
MVT::v8i16 && RevVTSize <= 64) ||
1744 (LT.second ==
MVT::v4i32 && RevVTSize <= 64)))
1754 switch (ICA.
getID()) {
1755 case Intrinsic::get_active_lane_mask:
1763 if (ST->hasMVEIntegerOps())
1766 case Intrinsic::sadd_sat:
1767 case Intrinsic::ssub_sat:
1768 case Intrinsic::uadd_sat:
1769 case Intrinsic::usub_sat: {
1770 if (!ST->hasMVEIntegerOps())
1785 case Intrinsic::abs:
1786 case Intrinsic::smin:
1787 case Intrinsic::smax:
1788 case Intrinsic::umin:
1789 case Intrinsic::umax: {
1790 if (!ST->hasMVEIntegerOps())
1800 case Intrinsic::minnum:
1801 case Intrinsic::maxnum: {
1802 if (!ST->hasMVEFloatOps())
1810 case Intrinsic::fptosi_sat:
1811 case Intrinsic::fptoui_sat: {
1814 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1824 if (ST->hasMVEFloatOps() &&
1831 (ST->hasFP64() && LT.second ==
MVT::f64) ||
1832 (ST->hasFullFP16() && LT.second ==
MVT::f16) ||
1833 (ST->hasMVEFloatOps() &&
1837 LT.second.getScalarSizeInBits());
1842 LegalTy, {LegalTy, LegalTy});
1846 LegalTy, {LegalTy, LegalTy});
1848 return LT.first *
Cost;
1858 if (!
F->isIntrinsic())
1862 if (
F->getName().startswith(
"llvm.arm"))
1865 switch (
F->getIntrinsicID()) {
1867 case Intrinsic::powi:
1868 case Intrinsic::sin:
1869 case Intrinsic::cos:
1870 case Intrinsic::pow:
1871 case Intrinsic::log:
1872 case Intrinsic::log10:
1873 case Intrinsic::log2:
1874 case Intrinsic::exp:
1875 case Intrinsic::exp2:
1877 case Intrinsic::sqrt:
1878 case Intrinsic::fabs:
1879 case Intrinsic::copysign:
1880 case Intrinsic::floor:
1881 case Intrinsic::ceil:
1882 case Intrinsic::trunc:
1883 case Intrinsic::rint:
1884 case Intrinsic::nearbyint:
1885 case Intrinsic::round:
1886 case Intrinsic::canonicalize:
1887 case Intrinsic::lround:
1888 case Intrinsic::llround:
1889 case Intrinsic::lrint:
1890 case Intrinsic::llrint:
1891 if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
1893 if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
1899 case Intrinsic::masked_store:
1900 case Intrinsic::masked_load:
1901 case Intrinsic::masked_gather:
1902 case Intrinsic::masked_scatter:
1903 return !ST->hasMVEIntegerOps();
1904 case Intrinsic::sadd_with_overflow:
1905 case Intrinsic::uadd_with_overflow:
1906 case Intrinsic::ssub_with_overflow:
1907 case Intrinsic::usub_with_overflow:
1908 case Intrinsic::sadd_sat:
1909 case Intrinsic::uadd_sat:
1910 case Intrinsic::ssub_sat:
1911 case Intrinsic::usub_sat:
1926 if (
auto *Call = dyn_cast<CallInst>(&
I)) {
1927 if (
auto *II = dyn_cast<IntrinsicInst>(Call)) {
1929 case Intrinsic::memcpy:
1930 case Intrinsic::memset:
1931 case Intrinsic::memmove:
1934 if (
const Function *
F = Call->getCalledFunction())
1943 switch (
I.getOpcode()) {
1946 case Instruction::FPToSI:
1947 case Instruction::FPToUI:
1948 case Instruction::SIToFP:
1949 case Instruction::UIToFP:
1950 case Instruction::FPTrunc:
1951 case Instruction::FPExt:
1981 switch (
I.getOpcode()) {
1984 case Instruction::Alloca:
1985 case Instruction::Load:
1986 case Instruction::Store:
1987 case Instruction::Select:
1988 case Instruction::PHI:
1995 if (
I.getType()->isDoubleTy() && !ST->hasFP64())
1999 if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
2022 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2027 const SCEV *TripCountSCEV =
2033 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
2041 if (
auto *Call = dyn_cast<IntrinsicInst>(&
I)) {
2042 switch (Call->getIntrinsicID()) {
2045 case Intrinsic::start_loop_iterations:
2046 case Intrinsic::test_start_loop_iterations:
2047 case Intrinsic::loop_decrement:
2048 case Intrinsic::loop_decrement_reg:
2058 bool IsTailPredLoop =
false;
2059 auto ScanLoop = [&](
Loop *L) {
2060 for (
auto *BB : L->getBlocks()) {
2061 for (
auto &
I : *BB) {
2063 isa<InlineAsm>(
I)) {
2067 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
2080 for (
auto *Inner : *L)
2081 if (!ScanLoop(Inner))
2103 if (isa<ICmpInst>(&
I) && ++ICmpCount > 1)
2112 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
2120 if (isa<FCmpInst>(&
I))
2125 if (isa<FPExtInst>(&
I) || isa<FPTruncInst>(&
I))
2129 if (isa<SExtInst>(&
I) || isa<ZExtInst>(&
I) )
2130 if (!
I.getOperand(0)->hasOneUse() || !isa<LoadInst>(
I.getOperand(0)))
2134 if (isa<TruncInst>(&
I) )
2135 if (!
I.hasOneUse() || !isa<StoreInst>(*
I.user_begin()))
2154 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
2165 bool ReductionsDisabled =
2169 for (
auto *
I : LiveOuts) {
2170 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
2171 !
I->getType()->isHalfTy()) {
2172 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
2173 "live-out value\n");
2176 if (ReductionsDisabled) {
2188 for (
Instruction &
I : BB->instructionsWithoutDebug()) {
2189 if (isa<PHINode>(&
I))
2197 if (
T->getScalarSizeInBits() > 32) {
2201 if (isa<StoreInst>(
I) || isa<LoadInst>(
I)) {
2205 if (NextStride == 1) {
2210 }
else if (NextStride == -1 ||
2214 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2215 "be tail-predicated\n.");
2224 if (
auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
2225 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2231 "tail-predicate\n.");
2237 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2253 if (!ST->hasMVEIntegerOps())
2257 if (L->getNumBlocks() > 1) {
2258 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: not a single block "
2263 assert(L->isInnermost() &&
"preferPredicateOverEpilogue: inner-loop expected");
2267 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2275 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2281 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2314 if (L->getHeader()->getParent()->hasOptSize())
2318 L->getExitingBlocks(ExitingBlocks);
2320 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2321 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2325 if (ExitingBlocks.
size() > 2)
2330 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
2340 for (
auto *BB : L->getBlocks()) {
2341 for (
auto &
I : *BB) {
2344 if (
I.getType()->isVectorTy())
2347 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2369 unsigned ExitingValues = 0;
2371 L->getExitBlocks(ExitBlocks);
2372 for (
auto *Exit : ExitBlocks) {
2375 unsigned LiveOuts =
count_if(Exit->phis(), [](
auto &PH) {
2376 return PH.getNumOperands() != 1 ||
2377 !isa<GetElementPtrInst>(PH.getOperand(0));
2379 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2410 if (!ST->hasMVEIntegerOps())
2415 case Instruction::Add:
2416 return ScalarBits <= 64;
2424 if (!ST->hasMVEIntegerOps())
2431 bool HasBaseReg, int64_t Scale,
2432 unsigned AddrSpace)
const {
2440 return AM.
Scale < 0 ? 1 : 0;
2451 return ST->
isThumb2() || ST->hasV8MBaselineOps();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file implements a class to represent arbitrary precision integral constant values and operations...
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool isThumb1Only() const
bool hasFPARMv8Base() const
unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
bool maybeLoweredToCall(Instruction &I)
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
bool isLegalMaskedStore(Type *DataTy, Align Alignment)
bool isLegalMaskedLoad(Type *DataTy, Align Alignment)
InstructionCost getMemcpyCost(const Instruction *I)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLoweredToCall(const Function *F)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool hasArmWideBranch(bool Thumb) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalMaskedGather(Type *Ty, Align Alignment)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool isProfitableLSRChainElement(Instruction *I)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool useSoftFloat() const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth=0)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
AssumptionCache & getAssumptionCache() const
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Drive the analysis of interleaved memory accesses in the loop.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
const LoopAccessInfo * getLAI() const
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Primary interface to the complete machine description for the target machine.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
@ C
The default llvm calling convention, compatible with C.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
@ ForceEnabledNoReductions
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_FMAXNUM
Floating point minnum.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_FMINNUM
Unsigned maximum.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
@ Data
Use predicate only to mask operations on data in the loop.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
SelectPatternFlavor Flavor
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Type Conversion Cost Table.