26#include "llvm/IR/IntrinsicsARM.h"
45#define DEBUG_TYPE "armtti"
49 cl::desc(
"Enable the generation of masked loads and stores"));
53 cl::desc(
"Disable the generation of low-overhead loops"));
57 cl::desc(
"Enable the generation of WLS loops"));
70 auto *IntrAlign = dyn_cast<ConstantInt>(
II.getArgOperand(1));
75 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
77 : IntrAlign->getLimitedValue();
83 PointerType::get(
II.getType(), 0));
91 TM.getSubtargetImpl(*Caller)->getFeatureBits();
93 TM.getSubtargetImpl(*Callee)->getFeatureBits();
96 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
97 (CalleeBits & ~InlineFeaturesAllowed);
100 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
101 (CalleeBits & InlineFeaturesAllowed);
102 return MatchExact && MatchSubset;
108 if (ST->hasMVEIntegerOps())
111 if (L->getHeader()->getParent()->hasOptSize())
115 L->getNumBlocks() == 1)
121std::optional<Instruction *>
123 using namespace PatternMatch;
128 case Intrinsic::arm_neon_vld1: {
138 case Intrinsic::arm_neon_vld2:
139 case Intrinsic::arm_neon_vld3:
140 case Intrinsic::arm_neon_vld4:
141 case Intrinsic::arm_neon_vld2lane:
142 case Intrinsic::arm_neon_vld3lane:
143 case Intrinsic::arm_neon_vld4lane:
144 case Intrinsic::arm_neon_vst1:
145 case Intrinsic::arm_neon_vst2:
146 case Intrinsic::arm_neon_vst3:
147 case Intrinsic::arm_neon_vst4:
148 case Intrinsic::arm_neon_vst2lane:
149 case Intrinsic::arm_neon_vst3lane:
150 case Intrinsic::arm_neon_vst4lane: {
154 unsigned AlignArg =
II.arg_size() - 1;
155 Value *AlignArgOp =
II.getArgOperand(AlignArg);
156 MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
166 case Intrinsic::arm_mve_pred_i2v: {
167 Value *Arg =
II.getArgOperand(0);
169 if (
match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
175 if (
match(Arg,
m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
179 if (
auto *CI = dyn_cast<ConstantInt>(XorMask)) {
180 if (CI->getValue().trunc(16).isAllOnes()) {
182 cast<FixedVectorType>(
II.getType())->getNumElements(),
195 case Intrinsic::arm_mve_pred_v2i: {
196 Value *Arg =
II.getArgOperand(0);
198 if (
match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
203 if (
II.getMetadata(LLVMContext::MD_range))
208 if (
auto CurrentRange =
II.getRange()) {
210 if (
Range == CurrentRange)
215 II.addRetAttr(Attribute::NoUndef);
218 case Intrinsic::arm_mve_vadc:
219 case Intrinsic::arm_mve_vadc_predicated: {
221 (
II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
222 assert(
II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
223 "Bad type for intrinsic!");
232 case Intrinsic::arm_mve_vmldava: {
234 if (
I->hasOneUse()) {
235 auto *
User = cast<Instruction>(*
I->user_begin());
239 Value *OpX =
I->getOperand(4);
240 Value *OpY =
I->getOperand(5);
246 {
I->getOperand(0),
I->getOperand(1),
247 I->getOperand(2), OpZ, OpX, OpY});
263 SimplifyAndSetOp)
const {
268 auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
269 unsigned NumElts = cast<FixedVectorType>(
II.getType())->getNumElements();
270 unsigned IsTop = cast<ConstantInt>(
II.getOperand(TopOpc))->getZExtValue();
277 SimplifyAndSetOp(&
II, 0, OrigDemandedElts & DemandedElts, UndefElts);
284 switch (
II.getIntrinsicID()) {
287 case Intrinsic::arm_mve_vcvt_narrow:
288 SimplifyNarrowInstrTopBottom(2);
290 case Intrinsic::arm_mve_vqmovn:
291 SimplifyNarrowInstrTopBottom(4);
293 case Intrinsic::arm_mve_vshrn:
294 SimplifyNarrowInstrTopBottom(7);
306 if (Bits == 0 || Imm.getActiveBits() >= 64)
309 int64_t SImmVal = Imm.getSExtValue();
310 uint64_t ZImmVal = Imm.getZExtValue();
311 if (!ST->isThumb()) {
312 if ((SImmVal >= 0 && SImmVal < 65536) ||
316 return ST->hasV6T2Ops() ? 2 : 3;
319 if ((SImmVal >= 0 && SImmVal < 65536) ||
323 return ST->hasV6T2Ops() ? 2 : 3;
326 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
338 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
354 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
356 auto isSSatMin = [&](
Value *MinInst) {
357 if (isa<SelectInst>(MinInst)) {
358 Value *MinLHS, *MinRHS;
371 return cast<Instruction>(Inst->
getOperand(1))->getOperand(1);
382 if (Imm.getBitWidth() != 64 ||
386 if (!
FP && isa<ICmpInst>(Inst) && Inst->
hasOneUse())
390 return isa<FPToSIInst>(
FP);
401 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
402 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
408 if (Opcode == Instruction::GetElementPtr &&
Idx != 0)
411 if (Opcode == Instruction::And) {
413 if (Imm == 255 || Imm == 65535)
420 if (Opcode == Instruction::Add)
425 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
427 int64_t NegImm = -Imm.getSExtValue();
428 if (ST->
isThumb2() && NegImm < 1<<12)
431 if (ST->isThumb() && NegImm < 1<<8)
437 if (Opcode == Instruction::Xor && Imm.isAllOnes())
442 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->
isThumb2()) &&
445 (isa<ICmpInst>(Inst) && Inst->
hasOneUse() &&
454 if (Inst && Opcode == Instruction::ICmp &&
Idx == 1 && Imm.isAllOnes()) {
468 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
484 assert(ISD &&
"Invalid opcode");
489 return Cost == 0 ? 0 : 1;
492 auto IsLegalFPType = [
this](
EVT VT) {
495 (EltVT == MVT::f64 && ST->hasFP64()) ||
496 (EltVT == MVT::f16 && ST->hasFullFP16());
509 if ((ST->hasMVEIntegerOps() &&
510 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
511 Opcode == Instruction::SExt)) ||
512 (ST->hasMVEFloatOps() &&
513 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
514 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
538 return AdjustCost(Entry->Cost);
557 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
558 if (
const auto *Entry =
569 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
570 if (
const auto *Entry =
586 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
587 if (
const auto *Entry =
597 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
598 if (
const auto *Entry =
607 I &&
I->hasOneUse() && ST->hasNEON() && SrcTy.
isVector()) {
610 {
ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
611 {
ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
613 {
ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
614 {
ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
616 {
ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
617 {
ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
619 {
ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
620 {
ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
623 auto *
User = cast<Instruction>(*
I->user_begin());
628 return AdjustCost(Entry->Cost);
633 if (Src->isVectorTy() && ST->hasNEON() &&
645 if (
const auto *Entry =
CostTableLookup(NEONFltDblTbl, ISD, LT.second))
646 return AdjustCost(LT.first * Entry->Cost);
735 if (SrcTy.
isVector() && ST->hasNEON()) {
739 return AdjustCost(Entry->Cost);
769 return AdjustCost(Entry->Cost);
796 if (SrcTy.
isInteger() && ST->hasNEON()) {
800 return AdjustCost(Entry->Cost);
821 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
838 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
841 return Lanes * CallCost;
872 return AdjustCost(Entry->Cost);
875 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
888 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
892 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
893 Opcode == Instruction::ExtractElement)) {
896 if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy())
903 return std::max<InstructionCost>(
908 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
909 Opcode == Instruction::ExtractElement)) {
913 std::pair<InstructionCost, MVT> LT =
956 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
958 Sel = cast<Instruction>(Sel->
user_back());
966 IID = Intrinsic::abs;
969 IID = Intrinsic::smin;
972 IID = Intrinsic::smax;
975 IID = Intrinsic::umin;
978 IID = Intrinsic::umax;
981 IID = Intrinsic::minnum;
984 IID = Intrinsic::maxnum;
1002 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
1020 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy() &&
1021 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1022 cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
1024 FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
1029 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1049 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
1051 return LT.first * BaseCost +
1061 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy())
1075 unsigned NumVectorInstToHideOverhead = 10;
1076 int MaxMergeDistance = 64;
1078 if (ST->hasNEON()) {
1081 return NumVectorInstToHideOverhead;
1094 switch (
II->getIntrinsicID()) {
1095 case Intrinsic::arm_mve_vctp8:
1096 case Intrinsic::arm_mve_vctp16:
1097 case Intrinsic::arm_mve_vctp32:
1098 case Intrinsic::arm_mve_vctp64:
1111 if (
auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) {
1113 if (VecTy->getNumElements() == 2)
1118 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1123 return (EltWidth == 32 && Alignment >= 4) ||
1124 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1132 return ((EltWidth == 32 && Alignment >= 4) ||
1133 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1141 unsigned DstAddrSpace = ~0u;
1142 unsigned SrcAddrSpace = ~0u;
1143 const Function *
F =
I->getParent()->getParent();
1145 if (
const auto *MC = dyn_cast<MemTransferInst>(
I)) {
1146 ConstantInt *
C = dyn_cast<ConstantInt>(MC->getLength());
1151 const unsigned Size =
C->getValue().getZExtValue();
1152 const Align DstAlign = *MC->getDestAlign();
1153 const Align SrcAlign = *MC->getSourceAlign();
1157 DstAddrSpace = MC->getDestAddressSpace();
1158 SrcAddrSpace = MC->getSourceAddressSpace();
1160 else if (
const auto *MS = dyn_cast<MemSetInst>(
I)) {
1161 ConstantInt *
C = dyn_cast<ConstantInt>(MS->getLength());
1166 const unsigned Size =
C->getValue().getZExtValue();
1167 const Align DstAlign = *MS->getDestAlign();
1171 DstAddrSpace = MS->getDestAddressSpace();
1176 unsigned Limit, Factor = 2;
1177 switch(
I->getIntrinsicID()) {
1178 case Intrinsic::memcpy:
1181 case Intrinsic::memmove:
1184 case Intrinsic::memset:
1195 std::vector<EVT> MemOps;
1196 if (getTLI()->findOptimalMemOpLowering(
1197 MemOps, Limit, MOp, DstAddrSpace,
1198 SrcAddrSpace,
F->getAttributes()))
1199 return MemOps.size() * Factor;
1224 if (IsExtractSubvector)
1226 if (ST->hasNEON()) {
1243 if (
const auto *Entry =
1245 return LT.first * Entry->Cost;
1264 if (
const auto *Entry =
1266 return LT.first * Entry->Cost;
1290 return LT.first * Entry->Cost;
1293 if (ST->hasMVEIntegerOps()) {
1306 return LT.first * Entry->Cost *
1310 if (!Mask.empty()) {
1312 if (LT.second.isVector() &&
1313 Mask.size() <= LT.second.getVectorNumElements() &&
1321 if (IsExtractSubvector)
1323 int BaseCost = ST->hasMVEIntegerOps() && Tp->
isVectorTy()
1340 switch (ISDOpcode) {
1353 if (ST->hasNEON()) {
1354 const unsigned FunctionCallDivCost = 20;
1355 const unsigned ReciprocalDivCost = 10;
1361 {
ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1362 {
ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1363 {
ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
1364 {
ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
1365 {
ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1366 {
ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1367 {
ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
1368 {
ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
1369 {
ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
1370 {
ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
1371 {
ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
1372 {
ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
1373 {
ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
1374 {
ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
1375 {
ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
1376 {
ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
1378 {
ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1379 {
ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1380 {
ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
1381 {
ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
1382 {
ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1383 {
ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1384 {
ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
1385 {
ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
1386 {
ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1387 {
ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1388 {
ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
1389 {
ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
1390 {
ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1391 {
ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1392 {
ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
1393 {
ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
1397 if (
const auto *Entry =
CostTableLookup(CostTbl, ISDOpcode, LT.second))
1398 return LT.first * Entry->Cost;
1401 Opcode, Ty,
CostKind, Op1Info, Op2Info);
1418 auto LooksLikeAFreeShift = [&]() {
1428 switch (cast<Instruction>(CxtI->
user_back())->getOpcode()) {
1429 case Instruction::Add:
1430 case Instruction::Sub:
1431 case Instruction::And:
1432 case Instruction::Xor:
1433 case Instruction::Or:
1434 case Instruction::ICmp:
1440 if (LooksLikeAFreeShift())
1446 if (ST->hasMVEIntegerOps() && Ty->
isVectorTy())
1454 return LT.first * BaseCost;
1457 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
1458 unsigned Num = VTy->getNumElements();
1486 if (ST->hasNEON() && Src->isVectorTy() &&
1487 (Alignment && *Alignment !=
Align(16)) &&
1488 cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
1492 return LT.first * 4;
1497 if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) &&
I &&
1498 ((Opcode == Instruction::Load &&
I->hasOneUse() &&
1499 isa<FPExtInst>(*
I->user_begin())) ||
1500 (Opcode == Instruction::Store && isa<FPTruncInst>(
I->getOperand(0))))) {
1503 Opcode == Instruction::Load
1505 : cast<Instruction>(
I->getOperand(0))->getOperand(0)->getType();
1511 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1522 if (ST->hasMVEIntegerOps()) {
1528 if (!isa<FixedVectorType>(Src))
1533 return cast<FixedVectorType>(Src)->getNumElements() * 8;
1539 bool UseMaskForCond,
bool UseMaskForGaps) {
1540 assert(Factor >= 2 &&
"Invalid interleave factor");
1541 assert(isa<VectorType>(VecTy) &&
"Expect a vector type");
1546 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1547 !UseMaskForCond && !UseMaskForGaps) {
1548 unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
1557 if (NumElts % Factor == 0 &&
1566 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1569 return 2 * BaseCost;
1574 UseMaskForCond, UseMaskForGaps);
1578 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1580 using namespace PatternMatch;
1586 auto *VTy = cast<FixedVectorType>(DataTy);
1590 unsigned NumElems = VTy->getNumElements();
1591 unsigned EltSize = VTy->getScalarSizeInBits();
1606 NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +
1612 if (EltSize < 8 || Alignment < EltSize / 8)
1615 unsigned ExtSize = EltSize;
1621 if ((
I->getOpcode() == Instruction::Load ||
1622 match(
I, m_Intrinsic<Intrinsic::masked_gather>())) &&
1624 const User *Us = *
I->users().begin();
1625 if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) {
1628 cast<Instruction>(Us)->getType()->getScalarSizeInBits();
1629 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1630 (
TypeSize == 16 && EltSize == 8)) &&
1638 if ((
I->getOpcode() == Instruction::Store ||
1639 match(
I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
1640 (
T = dyn_cast<TruncInst>(
I->getOperand(0)))) {
1642 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1643 if (((EltSize == 16 &&
TypeSize == 32) ||
1650 if (ExtSize * NumElems != 128 || NumElems < 4)
1659 if (ExtSize != 8 && ExtSize != 16)
1662 if (
const auto *BC = dyn_cast<BitCastInst>(
Ptr))
1663 Ptr = BC->getOperand(0);
1664 if (
const auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr)) {
1665 if (
GEP->getNumOperands() != 2)
1669 if (Scale != 1 && Scale * 8 != ExtSize)
1672 if (
const auto *ZExt = dyn_cast<ZExtInst>(
GEP->getOperand(1))) {
1673 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1683 std::optional<FastMathFlags> FMF,
1695 (EltSize == 64 && ST->hasFP64()) ||
1696 (EltSize == 16 && ST->hasFullFP16()))) {
1697 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1698 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1701 NumElts * EltSize > VecLimit) {
1715 ExtractCost = NumElts / 2;
1717 return VecCost + ExtractCost +
1723 (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
1724 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1726 ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1728 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1736 NumElts * EltSize == 64) {
1745 return VecCost + ExtractCost +
1768 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1788 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1789 (LT.second == MVT::v8i16 && RevVTSize <= 32) ||
1790 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1819 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1820 (LT.second == MVT::v8i16 && RevVTSize <= 64) ||
1821 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1837 if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&
1841 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1843 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1845 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1860 ExtractCost = cast<FixedVectorType>(Ty)->getNumElements() / 2;
1863 {Ty->getElementType(), Ty->getElementType()},
1865 return VecCost + ExtractCost +
1869 if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
1870 IID == Intrinsic::umin || IID == Intrinsic::umax) {
1891 unsigned Opc = ICA.
getID();
1893 case Intrinsic::get_active_lane_mask:
1901 if (ST->hasMVEIntegerOps())
1904 case Intrinsic::sadd_sat:
1905 case Intrinsic::ssub_sat:
1906 case Intrinsic::uadd_sat:
1907 case Intrinsic::usub_sat: {
1908 bool IsAdd = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
1909 bool IsSigned = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
1912 if (
auto *ITy = dyn_cast<IntegerType>(
RetTy)) {
1913 if (IsSigned && ST->hasDSP() && ITy->getBitWidth() == 32)
1915 if (ST->hasDSP() && (ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16))
1920 Type *CondTy =
RetTy->getWithNewBitWidth(1);
1929 if (!ST->hasMVEIntegerOps())
1933 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
1934 LT.second == MVT::v16i8) {
1938 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1
1944 case Intrinsic::abs:
1945 case Intrinsic::smin:
1946 case Intrinsic::smax:
1947 case Intrinsic::umin:
1948 case Intrinsic::umax: {
1949 if (!ST->hasMVEIntegerOps())
1954 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
1955 LT.second == MVT::v16i8)
1959 case Intrinsic::minnum:
1960 case Intrinsic::maxnum: {
1961 if (!ST->hasMVEFloatOps())
1965 if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
1969 case Intrinsic::fptosi_sat:
1970 case Intrinsic::fptoui_sat: {
1973 bool IsSigned = Opc == Intrinsic::fptosi_sat;
1977 if ((ST->
hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
1978 (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
1979 (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
1983 if (ST->hasMVEFloatOps() &&
1984 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
1989 if (((ST->
hasVFP2Base() && LT.second == MVT::f32) ||
1990 (ST->hasFP64() && LT.second == MVT::f64) ||
1991 (ST->hasFullFP16() && LT.second == MVT::f16) ||
1992 (ST->hasMVEFloatOps() &&
1993 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
1996 LT.second.getScalarSizeInBits());
2001 LegalTy, {LegalTy, LegalTy});
2005 LegalTy, {LegalTy, LegalTy});
2007 return LT.first *
Cost;
2021 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2035 if (!
F->isIntrinsic())
2039 if (
F->getName().starts_with(
"llvm.arm"))
2042 switch (
F->getIntrinsicID()) {
2044 case Intrinsic::powi:
2045 case Intrinsic::sin:
2046 case Intrinsic::cos:
2047 case Intrinsic::pow:
2048 case Intrinsic::log:
2049 case Intrinsic::log10:
2050 case Intrinsic::log2:
2051 case Intrinsic::exp:
2052 case Intrinsic::exp2:
2054 case Intrinsic::sqrt:
2055 case Intrinsic::fabs:
2056 case Intrinsic::copysign:
2057 case Intrinsic::floor:
2058 case Intrinsic::ceil:
2059 case Intrinsic::trunc:
2060 case Intrinsic::rint:
2061 case Intrinsic::nearbyint:
2062 case Intrinsic::round:
2063 case Intrinsic::canonicalize:
2064 case Intrinsic::lround:
2065 case Intrinsic::llround:
2066 case Intrinsic::lrint:
2067 case Intrinsic::llrint:
2068 if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
2070 if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
2076 case Intrinsic::masked_store:
2077 case Intrinsic::masked_load:
2078 case Intrinsic::masked_gather:
2079 case Intrinsic::masked_scatter:
2080 return !ST->hasMVEIntegerOps();
2081 case Intrinsic::sadd_with_overflow:
2082 case Intrinsic::uadd_with_overflow:
2083 case Intrinsic::ssub_with_overflow:
2084 case Intrinsic::usub_with_overflow:
2085 case Intrinsic::sadd_sat:
2086 case Intrinsic::uadd_sat:
2087 case Intrinsic::ssub_sat:
2088 case Intrinsic::usub_sat:
2103 if (
auto *Call = dyn_cast<CallInst>(&
I)) {
2104 if (
auto *
II = dyn_cast<IntrinsicInst>(Call)) {
2105 switch(
II->getIntrinsicID()) {
2106 case Intrinsic::memcpy:
2107 case Intrinsic::memset:
2108 case Intrinsic::memmove:
2111 if (
const Function *
F = Call->getCalledFunction())
2120 switch (
I.getOpcode()) {
2123 case Instruction::FPToSI:
2124 case Instruction::FPToUI:
2125 case Instruction::SIToFP:
2126 case Instruction::UIToFP:
2127 case Instruction::FPTrunc:
2128 case Instruction::FPExt:
2158 switch (
I.getOpcode()) {
2161 case Instruction::Alloca:
2162 case Instruction::Load:
2163 case Instruction::Store:
2164 case Instruction::Select:
2165 case Instruction::PHI:
2172 if (
I.getType()->isDoubleTy() && !ST->hasFP64())
2176 if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
2199 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2204 const SCEV *TripCountSCEV =
2210 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
2218 if (
auto *Call = dyn_cast<IntrinsicInst>(&
I)) {
2219 switch (Call->getIntrinsicID()) {
2222 case Intrinsic::start_loop_iterations:
2223 case Intrinsic::test_start_loop_iterations:
2224 case Intrinsic::loop_decrement:
2225 case Intrinsic::loop_decrement_reg:
2235 bool IsTailPredLoop =
false;
2236 auto ScanLoop = [&](
Loop *L) {
2237 for (
auto *BB : L->getBlocks()) {
2238 for (
auto &
I : *BB) {
2240 isa<InlineAsm>(
I)) {
2244 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I))
2246 II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
2247 II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
2248 II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
2249 II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
2250 II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
2257 for (
auto *Inner : *L)
2258 if (!ScanLoop(Inner))
2280 if (isa<ICmpInst>(&
I) && ++ICmpCount > 1)
2289 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I))
2290 if ((
II->getIntrinsicID() == Intrinsic::smin ||
2291 II->getIntrinsicID() == Intrinsic::smax ||
2292 II->getIntrinsicID() == Intrinsic::umin ||
2293 II->getIntrinsicID() == Intrinsic::umax) &&
2297 if (isa<FCmpInst>(&
I))
2302 if (isa<FPExtInst>(&
I) || isa<FPTruncInst>(&
I))
2306 if (isa<SExtInst>(&
I) || isa<ZExtInst>(&
I) )
2307 if (!
I.getOperand(0)->hasOneUse() || !isa<LoadInst>(
I.getOperand(0)))
2311 if (isa<TruncInst>(&
I) )
2312 if (!
I.hasOneUse() || !isa<StoreInst>(*
I.user_begin()))
2331 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
2342 bool ReductionsDisabled =
2346 for (
auto *
I : LiveOuts) {
2347 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
2348 !
I->getType()->isHalfTy()) {
2349 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
2350 "live-out value\n");
2353 if (ReductionsDisabled) {
2365 for (
Instruction &
I : BB->instructionsWithoutDebug()) {
2366 if (isa<PHINode>(&
I))
2374 if (
T->getScalarSizeInBits() > 32) {
2378 if (isa<StoreInst>(
I) || isa<LoadInst>(
I)) {
2382 if (NextStride == 1) {
2387 }
else if (NextStride == -1 ||
2391 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2392 "be tail-predicated\n.");
2401 if (
auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
2402 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2408 "tail-predicate\n.");
2414 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2427 if (!ST->hasMVEIntegerOps())
2434 if (L->getNumBlocks() > 1) {
2435 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: not a single block "
2440 assert(L->isInnermost() &&
"preferPredicateOverEpilogue: inner-loop expected");
2445 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2456 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2463 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2490 return isa<IntrinsicInst>(I) &&
2491 cast<IntrinsicInst>(I).getIntrinsicID() ==
2492 Intrinsic::get_active_lane_mask;
2502 if (L->getHeader()->getParent()->hasOptSize())
2506 L->getExitingBlocks(ExitingBlocks);
2508 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2509 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2513 if (ExitingBlocks.
size() > 2)
2518 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
2528 for (
auto *BB : L->getBlocks()) {
2529 for (
auto &
I : *BB) {
2532 if (
I.getType()->isVectorTy())
2535 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2557 unsigned ExitingValues = 0;
2559 L->getExitBlocks(ExitBlocks);
2560 for (
auto *Exit : ExitBlocks) {
2563 unsigned LiveOuts =
count_if(Exit->phis(), [](
auto &PH) {
2564 return PH.getNumOperands() != 1 ||
2565 !isa<GetElementPtrInst>(PH.getOperand(0));
2567 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2598 if (!ST->hasMVEIntegerOps())
2603 case Instruction::Add:
2604 return ScalarBits <= 64;
2612 if (!ST->hasMVEIntegerOps())
2619 bool HasBaseReg, int64_t Scale,
2620 unsigned AddrSpace)
const {
2629 return AM.
Scale < 0 ? 1 : 0;
2640 return ST->
isThumb2() || ST->hasV8MBaselineOps();
This file implements a class to represent arbitrary precision integral constant values and operations...
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool isThumb1Only() const
bool hasFPARMv8Base() const
unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
bool maybeLoweredToCall(Instruction &I)
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
bool isLegalMaskedStore(Type *DataTy, Align Alignment)
bool isLegalMaskedLoad(Type *DataTy, Align Alignment)
InstructionCost getMemcpyCost(const Instruction *I)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLoweredToCall(const Function *F)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool hasArmWideBranch(bool Thumb) const
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalMaskedGather(Type *Ty, Align Alignment)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool isProfitableLSRChainElement(Instruction *I)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool useSoftFloat() const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, const SimplifyQuery &Q)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
AssumptionCache & getAssumptionCache() const
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
LoopInfo * getLoopInfo() const
DominatorTree * getDominatorTree() const
AssumptionCache * getAssumptionCache() const
const LoopAccessInfo * getLAI() const
ScalarEvolution * getScalarEvolution() const
Represents a single loop in the control flow graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
Provides information about what library functions are available for the current target.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Primary interface to the complete machine description for the target machine.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
@ C
The default llvm calling convention, compatible with C.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
@ ForceEnabledNoReductions
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_FMAXNUM
Floating point minnum.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_FMINNUM
Unsigned maximum.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
@ Data
Use predicate only to mask operations on data in the loop.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
SelectPatternFlavor Flavor
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.