26#include "llvm/IR/IntrinsicsARM.h"
45#define DEBUG_TYPE "armtti"
49 cl::desc(
"Enable the generation of masked loads and stores"));
53 cl::desc(
"Disable the generation of low-overhead loops"));
57 cl::desc(
"Enable the generation of WLS loops"));
75 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
77 : IntrAlign->getLimitedValue();
83 PointerType::get(II.
getType(), 0));
91 TM.getSubtargetImpl(*Caller)->getFeatureBits();
93 TM.getSubtargetImpl(*Callee)->getFeatureBits();
96 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
97 (CalleeBits & ~InlineFeaturesAllowed);
100 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
101 (CalleeBits & InlineFeaturesAllowed);
102 return MatchExact && MatchSubset;
108 if (ST->hasMVEIntegerOps())
111 if (L->getHeader()->getParent()->hasOptSize())
115 L->getNumBlocks() == 1)
121std::optional<Instruction *>
123 using namespace PatternMatch;
128 case Intrinsic::arm_neon_vld1: {
138 case Intrinsic::arm_neon_vld2:
139 case Intrinsic::arm_neon_vld3:
140 case Intrinsic::arm_neon_vld4:
141 case Intrinsic::arm_neon_vld2lane:
142 case Intrinsic::arm_neon_vld3lane:
143 case Intrinsic::arm_neon_vld4lane:
144 case Intrinsic::arm_neon_vst1:
145 case Intrinsic::arm_neon_vst2:
146 case Intrinsic::arm_neon_vst3:
147 case Intrinsic::arm_neon_vst4:
148 case Intrinsic::arm_neon_vst2lane:
149 case Intrinsic::arm_neon_vst3lane:
150 case Intrinsic::arm_neon_vst4lane: {
154 unsigned AlignArg = II.
arg_size() - 1;
156 MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
166 case Intrinsic::arm_mve_pred_i2v: {
169 if (
match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
175 if (
match(Arg,
m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
179 if (
auto *CI = dyn_cast<ConstantInt>(XorMask)) {
180 if (CI->getValue().trunc(16).isAllOnes()) {
182 cast<FixedVectorType>(II.
getType())->getNumElements(),
195 case Intrinsic::arm_mve_pred_v2i: {
198 if (
match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
214 case Intrinsic::arm_mve_vadc:
215 case Intrinsic::arm_mve_vadc_predicated: {
217 (II.
getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
219 "Bad type for intrinsic!");
228 case Intrinsic::arm_mve_vmldava: {
230 if (
I->hasOneUse()) {
231 auto *
User = cast<Instruction>(*
I->user_begin());
235 Value *OpX =
I->getOperand(4);
236 Value *OpY =
I->getOperand(5);
242 {
I->getOperand(0),
I->getOperand(1),
243 I->getOperand(2), OpZ, OpX, OpY});
259 SimplifyAndSetOp)
const {
264 auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
265 unsigned NumElts = cast<FixedVectorType>(II.
getType())->getNumElements();
266 unsigned IsTop = cast<ConstantInt>(II.
getOperand(TopOpc))->getZExtValue();
273 SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
283 case Intrinsic::arm_mve_vcvt_narrow:
284 SimplifyNarrowInstrTopBottom(2);
286 case Intrinsic::arm_mve_vqmovn:
287 SimplifyNarrowInstrTopBottom(4);
289 case Intrinsic::arm_mve_vshrn:
290 SimplifyNarrowInstrTopBottom(7);
302 if (Bits == 0 || Imm.getActiveBits() >= 64)
305 int64_t SImmVal = Imm.getSExtValue();
306 uint64_t ZImmVal = Imm.getZExtValue();
307 if (!ST->isThumb()) {
308 if ((SImmVal >= 0 && SImmVal < 65536) ||
312 return ST->hasV6T2Ops() ? 2 : 3;
315 if ((SImmVal >= 0 && SImmVal < 65536) ||
319 return ST->hasV6T2Ops() ? 2 : 3;
322 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
334 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
350 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
352 auto isSSatMin = [&](
Value *MinInst) {
353 if (isa<SelectInst>(MinInst)) {
354 Value *MinLHS, *MinRHS;
367 return cast<Instruction>(Inst->
getOperand(1))->getOperand(1);
378 if (Imm.getBitWidth() != 64 ||
382 if (!
FP && isa<ICmpInst>(Inst) && Inst->
hasOneUse())
386 return isa<FPToSIInst>(
FP);
397 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
398 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
404 if (Opcode == Instruction::GetElementPtr &&
Idx != 0)
407 if (Opcode == Instruction::And) {
409 if (Imm == 255 || Imm == 65535)
416 if (Opcode == Instruction::Add)
421 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
423 int64_t NegImm = -Imm.getSExtValue();
424 if (ST->
isThumb2() && NegImm < 1<<12)
427 if (ST->isThumb() && NegImm < 1<<8)
433 if (Opcode == Instruction::Xor && Imm.isAllOnes())
438 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->
isThumb2()) &&
441 (isa<ICmpInst>(Inst) && Inst->
hasOneUse() &&
450 if (Inst && Opcode == Instruction::ICmp &&
Idx == 1 && Imm.isAllOnes()) {
464 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
480 assert(ISD &&
"Invalid opcode");
485 return Cost == 0 ? 0 : 1;
488 auto IsLegalFPType = [
this](
EVT VT) {
491 (EltVT == MVT::f64 && ST->hasFP64()) ||
492 (EltVT == MVT::f16 && ST->hasFullFP16());
505 if ((ST->hasMVEIntegerOps() &&
506 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
507 Opcode == Instruction::SExt)) ||
508 (ST->hasMVEFloatOps() &&
509 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
510 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
534 return AdjustCost(Entry->Cost);
553 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
554 if (
const auto *Entry =
565 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
566 if (
const auto *Entry =
582 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
583 if (
const auto *Entry =
593 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
594 if (
const auto *Entry =
603 I &&
I->hasOneUse() && ST->hasNEON() && SrcTy.
isVector()) {
606 {
ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
607 {
ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
609 {
ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
610 {
ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
612 {
ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
613 {
ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
615 {
ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
616 {
ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
619 auto *
User = cast<Instruction>(*
I->user_begin());
624 return AdjustCost(Entry->Cost);
629 if (Src->isVectorTy() && ST->hasNEON() &&
641 if (
const auto *Entry =
CostTableLookup(NEONFltDblTbl, ISD, LT.second))
642 return AdjustCost(LT.first * Entry->Cost);
731 if (SrcTy.
isVector() && ST->hasNEON()) {
735 return AdjustCost(Entry->Cost);
765 return AdjustCost(Entry->Cost);
792 if (SrcTy.
isInteger() && ST->hasNEON()) {
796 return AdjustCost(Entry->Cost);
817 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
834 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
837 return Lanes * CallCost;
868 return AdjustCost(Entry->Cost);
871 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
884 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
888 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
889 Opcode == Instruction::ExtractElement)) {
892 if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy())
899 return std::max<InstructionCost>(
904 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
905 Opcode == Instruction::ExtractElement)) {
909 std::pair<InstructionCost, MVT> LT =
952 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
954 Sel = cast<Instruction>(Sel->
user_back());
962 IID = Intrinsic::abs;
965 IID = Intrinsic::smin;
968 IID = Intrinsic::smax;
971 IID = Intrinsic::umin;
974 IID = Intrinsic::umax;
977 IID = Intrinsic::minnum;
980 IID = Intrinsic::maxnum;
998 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
1016 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy() &&
1017 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1018 cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
1020 FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
1025 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1045 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
1047 return LT.first * BaseCost +
1057 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy())
1071 unsigned NumVectorInstToHideOverhead = 10;
1072 int MaxMergeDistance = 64;
1074 if (ST->hasNEON()) {
1077 return NumVectorInstToHideOverhead;
1091 case Intrinsic::arm_mve_vctp8:
1092 case Intrinsic::arm_mve_vctp16:
1093 case Intrinsic::arm_mve_vctp32:
1094 case Intrinsic::arm_mve_vctp64:
1107 if (
auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) {
1109 if (VecTy->getNumElements() == 2)
1114 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1119 return (EltWidth == 32 && Alignment >= 4) ||
1120 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1128 return ((EltWidth == 32 && Alignment >= 4) ||
1129 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1137 unsigned DstAddrSpace = ~0u;
1138 unsigned SrcAddrSpace = ~0u;
1139 const Function *
F =
I->getParent()->getParent();
1141 if (
const auto *MC = dyn_cast<MemTransferInst>(
I)) {
1142 ConstantInt *
C = dyn_cast<ConstantInt>(MC->getLength());
1147 const unsigned Size =
C->getValue().getZExtValue();
1148 const Align DstAlign = *MC->getDestAlign();
1149 const Align SrcAlign = *MC->getSourceAlign();
1153 DstAddrSpace = MC->getDestAddressSpace();
1154 SrcAddrSpace = MC->getSourceAddressSpace();
1156 else if (
const auto *MS = dyn_cast<MemSetInst>(
I)) {
1157 ConstantInt *
C = dyn_cast<ConstantInt>(MS->getLength());
1162 const unsigned Size =
C->getValue().getZExtValue();
1163 const Align DstAlign = *MS->getDestAlign();
1167 DstAddrSpace = MS->getDestAddressSpace();
1172 unsigned Limit, Factor = 2;
1173 switch(
I->getIntrinsicID()) {
1174 case Intrinsic::memcpy:
1177 case Intrinsic::memmove:
1180 case Intrinsic::memset:
1191 std::vector<EVT> MemOps;
1192 if (getTLI()->findOptimalMemOpLowering(
1193 MemOps, Limit, MOp, DstAddrSpace,
1194 SrcAddrSpace,
F->getAttributes()))
1195 return MemOps.size() * Factor;
1220 if (IsExtractSubvector)
1222 if (ST->hasNEON()) {
1239 if (
const auto *Entry =
1241 return LT.first * Entry->Cost;
1260 if (
const auto *Entry =
1262 return LT.first * Entry->Cost;
1286 return LT.first * Entry->Cost;
1289 if (ST->hasMVEIntegerOps()) {
1302 return LT.first * Entry->Cost *
1306 if (!Mask.empty()) {
1308 if (LT.second.isVector() &&
1309 Mask.size() <= LT.second.getVectorNumElements() &&
1317 if (IsExtractSubvector)
1319 int BaseCost = ST->hasMVEIntegerOps() && Tp->
isVectorTy()
1336 switch (ISDOpcode) {
1349 if (ST->hasNEON()) {
1350 const unsigned FunctionCallDivCost = 20;
1351 const unsigned ReciprocalDivCost = 10;
1357 {
ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1358 {
ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1359 {
ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
1360 {
ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
1361 {
ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1362 {
ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1363 {
ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
1364 {
ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
1365 {
ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
1366 {
ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
1367 {
ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
1368 {
ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
1369 {
ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
1370 {
ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
1371 {
ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
1372 {
ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
1374 {
ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1375 {
ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1376 {
ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
1377 {
ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
1378 {
ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1379 {
ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1380 {
ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
1381 {
ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
1382 {
ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1383 {
ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1384 {
ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
1385 {
ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
1386 {
ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1387 {
ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1388 {
ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
1389 {
ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
1393 if (
const auto *Entry =
CostTableLookup(CostTbl, ISDOpcode, LT.second))
1394 return LT.first * Entry->Cost;
1397 Opcode, Ty,
CostKind, Op1Info, Op2Info);
1414 auto LooksLikeAFreeShift = [&]() {
1424 switch (cast<Instruction>(CxtI->
user_back())->getOpcode()) {
1425 case Instruction::Add:
1426 case Instruction::Sub:
1427 case Instruction::And:
1428 case Instruction::Xor:
1429 case Instruction::Or:
1430 case Instruction::ICmp:
1436 if (LooksLikeAFreeShift())
1442 if (ST->hasMVEIntegerOps() && Ty->
isVectorTy())
1450 return LT.first * BaseCost;
1453 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
1454 unsigned Num = VTy->getNumElements();
1482 if (ST->hasNEON() && Src->isVectorTy() &&
1483 (Alignment && *Alignment !=
Align(16)) &&
1484 cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
1488 return LT.first * 4;
1493 if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) &&
I &&
1494 ((Opcode == Instruction::Load &&
I->hasOneUse() &&
1495 isa<FPExtInst>(*
I->user_begin())) ||
1496 (Opcode == Instruction::Store && isa<FPTruncInst>(
I->getOperand(0))))) {
1499 Opcode == Instruction::Load
1501 : cast<Instruction>(
I->getOperand(0))->getOperand(0)->getType();
1507 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1518 if (ST->hasMVEIntegerOps()) {
1524 if (!isa<FixedVectorType>(Src))
1529 return cast<FixedVectorType>(Src)->getNumElements() * 8;
1535 bool UseMaskForCond,
bool UseMaskForGaps) {
1536 assert(Factor >= 2 &&
"Invalid interleave factor");
1537 assert(isa<VectorType>(VecTy) &&
"Expect a vector type");
1542 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1543 !UseMaskForCond && !UseMaskForGaps) {
1544 unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
1553 if (NumElts % Factor == 0 &&
1562 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1565 return 2 * BaseCost;
1570 UseMaskForCond, UseMaskForGaps);
1574 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1576 using namespace PatternMatch;
1582 auto *VTy = cast<FixedVectorType>(DataTy);
1586 unsigned NumElems = VTy->getNumElements();
1587 unsigned EltSize = VTy->getScalarSizeInBits();
1602 NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +
1608 if (EltSize < 8 || Alignment < EltSize / 8)
1611 unsigned ExtSize = EltSize;
1617 if ((
I->getOpcode() == Instruction::Load ||
1618 match(
I, m_Intrinsic<Intrinsic::masked_gather>())) &&
1620 const User *Us = *
I->users().begin();
1621 if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) {
1624 cast<Instruction>(Us)->getType()->getScalarSizeInBits();
1625 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1626 (
TypeSize == 16 && EltSize == 8)) &&
1634 if ((
I->getOpcode() == Instruction::Store ||
1635 match(
I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
1636 (
T = dyn_cast<TruncInst>(
I->getOperand(0)))) {
1638 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1639 if (((EltSize == 16 &&
TypeSize == 32) ||
1646 if (ExtSize * NumElems != 128 || NumElems < 4)
1655 if (ExtSize != 8 && ExtSize != 16)
1658 if (
const auto *BC = dyn_cast<BitCastInst>(
Ptr))
1659 Ptr = BC->getOperand(0);
1660 if (
const auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr)) {
1661 if (
GEP->getNumOperands() != 2)
1665 if (Scale != 1 && Scale * 8 != ExtSize)
1668 if (
const auto *ZExt = dyn_cast<ZExtInst>(
GEP->getOperand(1))) {
1669 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1679 std::optional<FastMathFlags> FMF,
1691 (EltSize == 64 && ST->hasFP64()) ||
1692 (EltSize == 16 && ST->hasFullFP16()))) {
1693 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1694 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1697 NumElts * EltSize > VecLimit) {
1711 ExtractCost = NumElts / 2;
1713 return VecCost + ExtractCost +
1719 (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
1720 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1722 ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1724 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1732 NumElts * EltSize == 64) {
1741 return VecCost + ExtractCost +
1764 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1784 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1785 (LT.second == MVT::v8i16 && RevVTSize <= 32) ||
1786 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1815 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1816 (LT.second == MVT::v8i16 && RevVTSize <= 64) ||
1817 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1833 if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&
1837 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1839 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1841 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1856 ExtractCost = cast<FixedVectorType>(Ty)->getNumElements() / 2;
1859 {Ty->getElementType(), Ty->getElementType()},
1861 return VecCost + ExtractCost +
1865 if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
1866 IID == Intrinsic::umin || IID == Intrinsic::umax) {
1887 switch (ICA.
getID()) {
1888 case Intrinsic::get_active_lane_mask:
1896 if (ST->hasMVEIntegerOps())
1899 case Intrinsic::sadd_sat:
1900 case Intrinsic::ssub_sat:
1901 case Intrinsic::uadd_sat:
1902 case Intrinsic::usub_sat: {
1903 if (!ST->hasMVEIntegerOps())
1908 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
1909 LT.second == MVT::v16i8) {
1918 case Intrinsic::abs:
1919 case Intrinsic::smin:
1920 case Intrinsic::smax:
1921 case Intrinsic::umin:
1922 case Intrinsic::umax: {
1923 if (!ST->hasMVEIntegerOps())
1928 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
1929 LT.second == MVT::v16i8)
1933 case Intrinsic::minnum:
1934 case Intrinsic::maxnum: {
1935 if (!ST->hasMVEFloatOps())
1939 if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
1943 case Intrinsic::fptosi_sat:
1944 case Intrinsic::fptoui_sat: {
1947 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1951 if ((ST->
hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
1952 (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
1953 (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
1957 if (ST->hasMVEFloatOps() &&
1958 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
1963 if (((ST->
hasVFP2Base() && LT.second == MVT::f32) ||
1964 (ST->hasFP64() && LT.second == MVT::f64) ||
1965 (ST->hasFullFP16() && LT.second == MVT::f16) ||
1966 (ST->hasMVEFloatOps() &&
1967 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
1970 LT.second.getScalarSizeInBits());
1975 LegalTy, {LegalTy, LegalTy});
1979 LegalTy, {LegalTy, LegalTy});
1981 return LT.first *
Cost;
1991 if (!
F->isIntrinsic())
1995 if (
F->getName().starts_with(
"llvm.arm"))
1998 switch (
F->getIntrinsicID()) {
2000 case Intrinsic::powi:
2001 case Intrinsic::sin:
2002 case Intrinsic::cos:
2003 case Intrinsic::pow:
2004 case Intrinsic::log:
2005 case Intrinsic::log10:
2006 case Intrinsic::log2:
2007 case Intrinsic::exp:
2008 case Intrinsic::exp2:
2010 case Intrinsic::sqrt:
2011 case Intrinsic::fabs:
2012 case Intrinsic::copysign:
2013 case Intrinsic::floor:
2014 case Intrinsic::ceil:
2015 case Intrinsic::trunc:
2016 case Intrinsic::rint:
2017 case Intrinsic::nearbyint:
2018 case Intrinsic::round:
2019 case Intrinsic::canonicalize:
2020 case Intrinsic::lround:
2021 case Intrinsic::llround:
2022 case Intrinsic::lrint:
2023 case Intrinsic::llrint:
2024 if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
2026 if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
2032 case Intrinsic::masked_store:
2033 case Intrinsic::masked_load:
2034 case Intrinsic::masked_gather:
2035 case Intrinsic::masked_scatter:
2036 return !ST->hasMVEIntegerOps();
2037 case Intrinsic::sadd_with_overflow:
2038 case Intrinsic::uadd_with_overflow:
2039 case Intrinsic::ssub_with_overflow:
2040 case Intrinsic::usub_with_overflow:
2041 case Intrinsic::sadd_sat:
2042 case Intrinsic::uadd_sat:
2043 case Intrinsic::ssub_sat:
2044 case Intrinsic::usub_sat:
2059 if (
auto *Call = dyn_cast<CallInst>(&
I)) {
2060 if (
auto *II = dyn_cast<IntrinsicInst>(Call)) {
2062 case Intrinsic::memcpy:
2063 case Intrinsic::memset:
2064 case Intrinsic::memmove:
2067 if (
const Function *
F = Call->getCalledFunction())
2076 switch (
I.getOpcode()) {
2079 case Instruction::FPToSI:
2080 case Instruction::FPToUI:
2081 case Instruction::SIToFP:
2082 case Instruction::UIToFP:
2083 case Instruction::FPTrunc:
2084 case Instruction::FPExt:
2114 switch (
I.getOpcode()) {
2117 case Instruction::Alloca:
2118 case Instruction::Load:
2119 case Instruction::Store:
2120 case Instruction::Select:
2121 case Instruction::PHI:
2128 if (
I.getType()->isDoubleTy() && !ST->hasFP64())
2132 if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
2155 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2160 const SCEV *TripCountSCEV =
2166 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
2174 if (
auto *Call = dyn_cast<IntrinsicInst>(&
I)) {
2175 switch (Call->getIntrinsicID()) {
2178 case Intrinsic::start_loop_iterations:
2179 case Intrinsic::test_start_loop_iterations:
2180 case Intrinsic::loop_decrement:
2181 case Intrinsic::loop_decrement_reg:
2191 bool IsTailPredLoop =
false;
2192 auto ScanLoop = [&](
Loop *L) {
2193 for (
auto *BB : L->getBlocks()) {
2194 for (
auto &
I : *BB) {
2196 isa<InlineAsm>(
I)) {
2200 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
2213 for (
auto *Inner : *L)
2214 if (!ScanLoop(Inner))
2236 if (isa<ICmpInst>(&
I) && ++ICmpCount > 1)
2245 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
2253 if (isa<FCmpInst>(&
I))
2258 if (isa<FPExtInst>(&
I) || isa<FPTruncInst>(&
I))
2262 if (isa<SExtInst>(&
I) || isa<ZExtInst>(&
I) )
2263 if (!
I.getOperand(0)->hasOneUse() || !isa<LoadInst>(
I.getOperand(0)))
2267 if (isa<TruncInst>(&
I) )
2268 if (!
I.hasOneUse() || !isa<StoreInst>(*
I.user_begin()))
2287 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
2298 bool ReductionsDisabled =
2302 for (
auto *
I : LiveOuts) {
2303 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
2304 !
I->getType()->isHalfTy()) {
2305 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
2306 "live-out value\n");
2309 if (ReductionsDisabled) {
2321 for (
Instruction &
I : BB->instructionsWithoutDebug()) {
2322 if (isa<PHINode>(&
I))
2330 if (
T->getScalarSizeInBits() > 32) {
2334 if (isa<StoreInst>(
I) || isa<LoadInst>(
I)) {
2338 if (NextStride == 1) {
2343 }
else if (NextStride == -1 ||
2347 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2348 "be tail-predicated\n.");
2357 if (
auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
2358 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2364 "tail-predicate\n.");
2370 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2383 if (!ST->hasMVEIntegerOps())
2390 if (L->getNumBlocks() > 1) {
2391 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: not a single block "
2396 assert(L->isInnermost() &&
"preferPredicateOverEpilogue: inner-loop expected");
2401 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2412 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2419 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2446 return isa<IntrinsicInst>(I) &&
2447 cast<IntrinsicInst>(I).getIntrinsicID() ==
2448 Intrinsic::get_active_lane_mask;
2458 if (L->getHeader()->getParent()->hasOptSize())
2462 L->getExitingBlocks(ExitingBlocks);
2464 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2465 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2469 if (ExitingBlocks.
size() > 2)
2474 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
2484 for (
auto *BB : L->getBlocks()) {
2485 for (
auto &
I : *BB) {
2488 if (
I.getType()->isVectorTy())
2491 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2513 unsigned ExitingValues = 0;
2515 L->getExitBlocks(ExitBlocks);
2516 for (
auto *Exit : ExitBlocks) {
2519 unsigned LiveOuts =
count_if(Exit->phis(), [](
auto &PH) {
2520 return PH.getNumOperands() != 1 ||
2521 !isa<GetElementPtrInst>(PH.getOperand(0));
2523 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2554 if (!ST->hasMVEIntegerOps())
2559 case Instruction::Add:
2560 return ScalarBits <= 64;
2568 if (!ST->hasMVEIntegerOps())
2575 bool HasBaseReg, int64_t Scale,
2576 unsigned AddrSpace)
const {
2584 return AM.
Scale < 0 ? 1 : 0;
2595 return ST->
isThumb2() || ST->hasV8MBaselineOps();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool isThumb1Only() const
bool hasFPARMv8Base() const
unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
bool maybeLoweredToCall(Instruction &I)
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
bool isLegalMaskedStore(Type *DataTy, Align Alignment)
bool isLegalMaskedLoad(Type *DataTy, Align Alignment)
InstructionCost getMemcpyCost(const Instruction *I)
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLoweredToCall(const Function *F)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool hasArmWideBranch(bool Thumb) const
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalMaskedGather(Type *Ty, Align Alignment)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool isProfitableLSRChainElement(Instruction *I)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool useSoftFloat() const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth=0)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
AssumptionCache & getAssumptionCache() const
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
LoopInfo * getLoopInfo() const
DominatorTree * getDominatorTree() const
AssumptionCache * getAssumptionCache() const
const LoopAccessInfo * getLAI() const
ScalarEvolution * getScalarEvolution() const
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Primary interface to the complete machine description for the target machine.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
@ C
The default llvm calling convention, compatible with C.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
@ ForceEnabledNoReductions
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_FMAXNUM
Floating point minnum.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_FMINNUM
Unsigned maximum.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
@ Data
Use predicate only to mask operations on data in the loop.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
SelectPatternFlavor Flavor
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.