26#include "llvm/IR/IntrinsicsARM.h"
45#define DEBUG_TYPE "armtti"
49 cl::desc(
"Enable the generation of masked loads and stores"));
53 cl::desc(
"Disable the generation of low-overhead loops"));
57 cl::desc(
"Enable the generation of WLS loops"));
61 cl::desc(
"Enable the widening of global strings to alignment boundaries"));
74 auto *IntrAlign = dyn_cast<ConstantInt>(
II.getArgOperand(1));
79 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
81 : IntrAlign->getLimitedValue();
87 PointerType::get(
II.getType(), 0));
95 TM.getSubtargetImpl(*Caller)->getFeatureBits();
97 TM.getSubtargetImpl(*Callee)->getFeatureBits();
100 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
101 (CalleeBits & ~InlineFeaturesAllowed);
104 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
105 (CalleeBits & InlineFeaturesAllowed);
106 return MatchExact && MatchSubset;
112 if (ST->hasMVEIntegerOps())
115 if (L->getHeader()->getParent()->hasOptSize())
119 L->getNumBlocks() == 1)
125std::optional<Instruction *>
127 using namespace PatternMatch;
132 case Intrinsic::arm_neon_vld1: {
142 case Intrinsic::arm_neon_vld2:
143 case Intrinsic::arm_neon_vld3:
144 case Intrinsic::arm_neon_vld4:
145 case Intrinsic::arm_neon_vld2lane:
146 case Intrinsic::arm_neon_vld3lane:
147 case Intrinsic::arm_neon_vld4lane:
148 case Intrinsic::arm_neon_vst1:
149 case Intrinsic::arm_neon_vst2:
150 case Intrinsic::arm_neon_vst3:
151 case Intrinsic::arm_neon_vst4:
152 case Intrinsic::arm_neon_vst2lane:
153 case Intrinsic::arm_neon_vst3lane:
154 case Intrinsic::arm_neon_vst4lane: {
158 unsigned AlignArg =
II.arg_size() - 1;
159 Value *AlignArgOp =
II.getArgOperand(AlignArg);
160 MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
170 case Intrinsic::arm_neon_vld1x2:
171 case Intrinsic::arm_neon_vld1x3:
172 case Intrinsic::arm_neon_vld1x4:
173 case Intrinsic::arm_neon_vst1x2:
174 case Intrinsic::arm_neon_vst1x3:
175 case Intrinsic::arm_neon_vst1x4: {
179 Align OldAlign =
II.getParamAlign(0).valueOrOne();
180 if (NewAlign > OldAlign)
186 case Intrinsic::arm_mve_pred_i2v: {
187 Value *Arg =
II.getArgOperand(0);
189 if (
match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
195 if (
match(Arg,
m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
199 if (
auto *CI = dyn_cast<ConstantInt>(XorMask)) {
200 if (CI->getValue().trunc(16).isAllOnes()) {
202 cast<FixedVectorType>(
II.getType())->getNumElements(),
215 case Intrinsic::arm_mve_pred_v2i: {
216 Value *Arg =
II.getArgOperand(0);
218 if (
match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
223 if (
II.getMetadata(LLVMContext::MD_range))
228 if (
auto CurrentRange =
II.getRange()) {
230 if (
Range == CurrentRange)
235 II.addRetAttr(Attribute::NoUndef);
238 case Intrinsic::arm_mve_vadc:
239 case Intrinsic::arm_mve_vadc_predicated: {
241 (
II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
242 assert(
II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
243 "Bad type for intrinsic!");
252 case Intrinsic::arm_mve_vmldava: {
254 if (
I->hasOneUse()) {
255 auto *
User = cast<Instruction>(*
I->user_begin());
259 Value *OpX =
I->getOperand(4);
260 Value *OpY =
I->getOperand(5);
266 {
I->getOperand(0),
I->getOperand(1),
267 I->getOperand(2), OpZ, OpX, OpY});
283 SimplifyAndSetOp)
const {
288 auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
289 unsigned NumElts = cast<FixedVectorType>(
II.getType())->getNumElements();
290 unsigned IsTop = cast<ConstantInt>(
II.getOperand(TopOpc))->getZExtValue();
297 SimplifyAndSetOp(&
II, 0, OrigDemandedElts & DemandedElts, UndefElts);
304 switch (
II.getIntrinsicID()) {
307 case Intrinsic::arm_mve_vcvt_narrow:
308 SimplifyNarrowInstrTopBottom(2);
310 case Intrinsic::arm_mve_vqmovn:
311 SimplifyNarrowInstrTopBottom(4);
313 case Intrinsic::arm_mve_vshrn:
314 SimplifyNarrowInstrTopBottom(7);
326 if (Bits == 0 || Imm.getActiveBits() >= 64)
329 int64_t SImmVal = Imm.getSExtValue();
330 uint64_t ZImmVal = Imm.getZExtValue();
331 if (!ST->isThumb()) {
332 if ((SImmVal >= 0 && SImmVal < 65536) ||
336 return ST->hasV6T2Ops() ? 2 : 3;
339 if ((SImmVal >= 0 && SImmVal < 65536) ||
343 return ST->hasV6T2Ops() ? 2 : 3;
346 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
358 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
374 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
376 auto isSSatMin = [&](
Value *MinInst) {
377 if (isa<SelectInst>(MinInst)) {
378 Value *MinLHS, *MinRHS;
391 return cast<Instruction>(Inst->
getOperand(1))->getOperand(1);
402 if (Imm.getBitWidth() != 64 ||
406 if (!
FP && isa<ICmpInst>(Inst) && Inst->
hasOneUse())
410 return isa<FPToSIInst>(
FP);
421 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
422 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
428 if (Opcode == Instruction::GetElementPtr &&
Idx != 0)
431 if (Opcode == Instruction::And) {
433 if (Imm == 255 || Imm == 65535)
440 if (Opcode == Instruction::Add)
445 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
447 int64_t NegImm = -Imm.getSExtValue();
448 if (ST->
isThumb2() && NegImm < 1<<12)
451 if (ST->isThumb() && NegImm < 1<<8)
457 if (Opcode == Instruction::Xor && Imm.isAllOnes())
462 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->
isThumb2()) &&
465 (isa<ICmpInst>(Inst) && Inst->
hasOneUse() &&
474 if (Inst && Opcode == Instruction::ICmp &&
Idx == 1 && Imm.isAllOnes()) {
488 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
504 assert(ISD &&
"Invalid opcode");
509 return Cost == 0 ? 0 : 1;
512 auto IsLegalFPType = [
this](
EVT VT) {
515 (EltVT == MVT::f64 && ST->hasFP64()) ||
516 (EltVT == MVT::f16 && ST->hasFullFP16());
529 if ((ST->hasMVEIntegerOps() &&
530 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
531 Opcode == Instruction::SExt)) ||
532 (ST->hasMVEFloatOps() &&
533 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
534 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
558 return AdjustCost(Entry->Cost);
577 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
578 if (
const auto *Entry =
589 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
590 if (
const auto *Entry =
606 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
607 if (
const auto *Entry =
617 if (SrcTy.
isVector() && ST->hasMVEFloatOps()) {
618 if (
const auto *Entry =
627 I &&
I->hasOneUse() && ST->hasNEON() && SrcTy.
isVector()) {
630 {
ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
631 {
ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
633 {
ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
634 {
ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
636 {
ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
637 {
ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
639 {
ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
640 {
ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
643 auto *
User = cast<Instruction>(*
I->user_begin());
648 return AdjustCost(Entry->Cost);
653 if (Src->isVectorTy() && ST->hasNEON() &&
665 if (
const auto *Entry =
CostTableLookup(NEONFltDblTbl, ISD, LT.second))
666 return AdjustCost(LT.first * Entry->Cost);
755 if (SrcTy.
isVector() && ST->hasNEON()) {
759 return AdjustCost(Entry->Cost);
789 return AdjustCost(Entry->Cost);
816 if (SrcTy.
isInteger() && ST->hasNEON()) {
820 return AdjustCost(Entry->Cost);
841 if (SrcTy.
isVector() && ST->hasMVEIntegerOps()) {
858 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
861 return Lanes * CallCost;
892 return AdjustCost(Entry->Cost);
895 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
904 unsigned Index,
Value *Op0,
908 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
912 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
913 Opcode == Instruction::ExtractElement)) {
916 if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy())
923 return std::max<InstructionCost>(
928 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
929 Opcode == Instruction::ExtractElement)) {
933 std::pair<InstructionCost, MVT> LT =
975 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
977 Sel = cast<Instruction>(Sel->
user_back());
985 IID = Intrinsic::abs;
988 IID = Intrinsic::smin;
991 IID = Intrinsic::smax;
994 IID = Intrinsic::umin;
997 IID = Intrinsic::umax;
1000 IID = Intrinsic::minnum;
1003 IID = Intrinsic::maxnum;
1021 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
1039 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy() &&
1040 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1041 cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
1043 FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
1048 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1068 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
1070 return LT.first * BaseCost +
1080 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy())
1094 unsigned NumVectorInstToHideOverhead = 10;
1095 int MaxMergeDistance = 64;
1097 if (ST->hasNEON()) {
1100 return NumVectorInstToHideOverhead;
1113 switch (
II->getIntrinsicID()) {
1114 case Intrinsic::arm_mve_vctp8:
1115 case Intrinsic::arm_mve_vctp16:
1116 case Intrinsic::arm_mve_vctp32:
1117 case Intrinsic::arm_mve_vctp64:
1130 if (
auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) {
1132 if (VecTy->getNumElements() == 2)
1137 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1142 return (EltWidth == 32 && Alignment >= 4) ||
1143 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1151 return ((EltWidth == 32 && Alignment >= 4) ||
1152 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1160 unsigned DstAddrSpace = ~0u;
1161 unsigned SrcAddrSpace = ~0u;
1162 const Function *
F =
I->getParent()->getParent();
1164 if (
const auto *MC = dyn_cast<MemTransferInst>(
I)) {
1165 ConstantInt *
C = dyn_cast<ConstantInt>(MC->getLength());
1170 const unsigned Size =
C->getValue().getZExtValue();
1171 const Align DstAlign = *MC->getDestAlign();
1172 const Align SrcAlign = *MC->getSourceAlign();
1176 DstAddrSpace = MC->getDestAddressSpace();
1177 SrcAddrSpace = MC->getSourceAddressSpace();
1179 else if (
const auto *MS = dyn_cast<MemSetInst>(
I)) {
1180 ConstantInt *
C = dyn_cast<ConstantInt>(MS->getLength());
1185 const unsigned Size =
C->getValue().getZExtValue();
1186 const Align DstAlign = *MS->getDestAlign();
1190 DstAddrSpace = MS->getDestAddressSpace();
1195 unsigned Limit, Factor = 2;
1196 switch(
I->getIntrinsicID()) {
1197 case Intrinsic::memcpy:
1200 case Intrinsic::memmove:
1203 case Intrinsic::memset:
1214 std::vector<EVT> MemOps;
1215 if (getTLI()->findOptimalMemOpLowering(
1216 MemOps, Limit, MOp, DstAddrSpace,
1217 SrcAddrSpace,
F->getAttributes()))
1218 return MemOps.size() * Factor;
1243 if (IsExtractSubvector)
1245 if (ST->hasNEON()) {
1262 if (
const auto *Entry =
1264 return LT.first * Entry->Cost;
1283 if (
const auto *Entry =
1285 return LT.first * Entry->Cost;
1309 return LT.first * Entry->Cost;
1312 if (ST->hasMVEIntegerOps()) {
1325 return LT.first * Entry->Cost *
1329 if (!Mask.empty()) {
1331 if (LT.second.isVector() &&
1332 Mask.size() <= LT.second.getVectorNumElements() &&
1340 if (IsExtractSubvector)
1342 int BaseCost = ST->hasMVEIntegerOps() && Tp->
isVectorTy()
1359 switch (ISDOpcode) {
1372 if (ST->hasNEON()) {
1373 const unsigned FunctionCallDivCost = 20;
1374 const unsigned ReciprocalDivCost = 10;
1380 {
ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1381 {
ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1382 {
ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
1383 {
ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
1384 {
ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1385 {
ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1386 {
ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
1387 {
ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
1388 {
ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
1389 {
ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
1390 {
ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
1391 {
ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
1392 {
ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
1393 {
ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
1394 {
ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
1395 {
ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
1397 {
ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1398 {
ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1399 {
ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
1400 {
ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
1401 {
ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1402 {
ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1403 {
ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
1404 {
ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
1405 {
ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1406 {
ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1407 {
ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
1408 {
ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
1409 {
ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1410 {
ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1411 {
ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
1412 {
ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
1416 if (
const auto *Entry =
CostTableLookup(CostTbl, ISDOpcode, LT.second))
1417 return LT.first * Entry->Cost;
1420 Opcode, Ty,
CostKind, Op1Info, Op2Info);
1437 auto LooksLikeAFreeShift = [&]() {
1447 switch (cast<Instruction>(CxtI->
user_back())->getOpcode()) {
1448 case Instruction::Add:
1449 case Instruction::Sub:
1450 case Instruction::And:
1451 case Instruction::Xor:
1452 case Instruction::Or:
1453 case Instruction::ICmp:
1459 if (LooksLikeAFreeShift())
1465 if (ST->hasMVEIntegerOps() && Ty->
isVectorTy())
1473 return LT.first * BaseCost;
1476 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
1477 unsigned Num = VTy->getNumElements();
1505 if (ST->hasNEON() && Src->isVectorTy() &&
1506 (Alignment && *Alignment !=
Align(16)) &&
1507 cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
1511 return LT.first * 4;
1516 if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) &&
I &&
1517 ((Opcode == Instruction::Load &&
I->hasOneUse() &&
1518 isa<FPExtInst>(*
I->user_begin())) ||
1519 (Opcode == Instruction::Store && isa<FPTruncInst>(
I->getOperand(0))))) {
1522 Opcode == Instruction::Load
1524 : cast<Instruction>(
I->getOperand(0))->getOperand(0)->getType();
1530 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1541 if (ST->hasMVEIntegerOps()) {
1547 if (!isa<FixedVectorType>(Src))
1552 return cast<FixedVectorType>(Src)->getNumElements() * 8;
1558 bool UseMaskForCond,
bool UseMaskForGaps) {
1559 assert(Factor >= 2 &&
"Invalid interleave factor");
1560 assert(isa<VectorType>(VecTy) &&
"Expect a vector type");
1565 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1566 !UseMaskForCond && !UseMaskForGaps) {
1567 unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
1576 if (NumElts % Factor == 0 &&
1585 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1588 return 2 * BaseCost;
1593 UseMaskForCond, UseMaskForGaps);
1597 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1599 using namespace PatternMatch;
1605 auto *VTy = cast<FixedVectorType>(DataTy);
1609 unsigned NumElems = VTy->getNumElements();
1610 unsigned EltSize = VTy->getScalarSizeInBits();
1625 NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +
1631 if (EltSize < 8 || Alignment < EltSize / 8)
1634 unsigned ExtSize = EltSize;
1640 if ((
I->getOpcode() == Instruction::Load ||
1641 match(
I, m_Intrinsic<Intrinsic::masked_gather>())) &&
1643 const User *Us = *
I->users().begin();
1644 if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) {
1647 cast<Instruction>(Us)->getType()->getScalarSizeInBits();
1648 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1649 (
TypeSize == 16 && EltSize == 8)) &&
1657 if ((
I->getOpcode() == Instruction::Store ||
1658 match(
I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
1659 (
T = dyn_cast<TruncInst>(
I->getOperand(0)))) {
1661 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1662 if (((EltSize == 16 &&
TypeSize == 32) ||
1669 if (ExtSize * NumElems != 128 || NumElems < 4)
1678 if (ExtSize != 8 && ExtSize != 16)
1681 if (
const auto *BC = dyn_cast<BitCastInst>(
Ptr))
1682 Ptr = BC->getOperand(0);
1683 if (
const auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr)) {
1684 if (
GEP->getNumOperands() != 2)
1688 if (Scale != 1 && Scale * 8 != ExtSize)
1691 if (
const auto *ZExt = dyn_cast<ZExtInst>(
GEP->getOperand(1))) {
1692 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1702 std::optional<FastMathFlags> FMF,
1714 (EltSize == 64 && ST->hasFP64()) ||
1715 (EltSize == 16 && ST->hasFullFP16()))) {
1716 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1717 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1720 NumElts * EltSize > VecLimit) {
1734 ExtractCost = NumElts / 2;
1736 return VecCost + ExtractCost +
1742 (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
1743 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1745 ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1747 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1755 NumElts * EltSize == 64) {
1764 return VecCost + ExtractCost +
1787 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1807 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1808 (LT.second == MVT::v8i16 && RevVTSize <= 32) ||
1809 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1838 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1839 (LT.second == MVT::v8i16 && RevVTSize <= 64) ||
1840 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1856 if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&
1860 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1862 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1864 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1879 ExtractCost = cast<FixedVectorType>(Ty)->getNumElements() / 2;
1882 {Ty->getElementType(), Ty->getElementType()},
1884 return VecCost + ExtractCost +
1888 if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
1889 IID == Intrinsic::umin || IID == Intrinsic::umax) {
1910 unsigned Opc = ICA.
getID();
1912 case Intrinsic::get_active_lane_mask:
1920 if (ST->hasMVEIntegerOps())
1923 case Intrinsic::sadd_sat:
1924 case Intrinsic::ssub_sat:
1925 case Intrinsic::uadd_sat:
1926 case Intrinsic::usub_sat: {
1927 bool IsAdd = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
1928 bool IsSigned = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
1931 if (
auto *ITy = dyn_cast<IntegerType>(
RetTy)) {
1932 if (IsSigned && ST->hasDSP() && ITy->getBitWidth() == 32)
1934 if (ST->hasDSP() && (ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16))
1939 Type *CondTy =
RetTy->getWithNewBitWidth(1);
1948 if (!ST->hasMVEIntegerOps())
1952 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
1953 LT.second == MVT::v16i8) {
1957 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1
1963 case Intrinsic::abs:
1964 case Intrinsic::smin:
1965 case Intrinsic::smax:
1966 case Intrinsic::umin:
1967 case Intrinsic::umax: {
1968 if (!ST->hasMVEIntegerOps())
1973 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
1974 LT.second == MVT::v16i8)
1978 case Intrinsic::minnum:
1979 case Intrinsic::maxnum: {
1980 if (!ST->hasMVEFloatOps())
1984 if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
1988 case Intrinsic::fptosi_sat:
1989 case Intrinsic::fptoui_sat: {
1992 bool IsSigned = Opc == Intrinsic::fptosi_sat;
1996 if ((ST->
hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
1997 (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
1998 (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
2002 if (ST->hasMVEFloatOps() &&
2003 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
2008 if (((ST->
hasVFP2Base() && LT.second == MVT::f32) ||
2009 (ST->hasFP64() && LT.second == MVT::f64) ||
2010 (ST->hasFullFP16() && LT.second == MVT::f16) ||
2011 (ST->hasMVEFloatOps() &&
2012 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
2015 LT.second.getScalarSizeInBits());
2020 LegalTy, {LegalTy, LegalTy});
2024 LegalTy, {LegalTy, LegalTy});
2026 return LT.first *
Cost;
2040 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2054 if (!
F->isIntrinsic())
2058 if (
F->getName().starts_with(
"llvm.arm"))
2061 switch (
F->getIntrinsicID()) {
2063 case Intrinsic::powi:
2064 case Intrinsic::sin:
2065 case Intrinsic::cos:
2066 case Intrinsic::sincos:
2067 case Intrinsic::pow:
2068 case Intrinsic::log:
2069 case Intrinsic::log10:
2070 case Intrinsic::log2:
2071 case Intrinsic::exp:
2072 case Intrinsic::exp2:
2074 case Intrinsic::sqrt:
2075 case Intrinsic::fabs:
2076 case Intrinsic::copysign:
2077 case Intrinsic::floor:
2078 case Intrinsic::ceil:
2079 case Intrinsic::trunc:
2080 case Intrinsic::rint:
2081 case Intrinsic::nearbyint:
2082 case Intrinsic::round:
2083 case Intrinsic::canonicalize:
2084 case Intrinsic::lround:
2085 case Intrinsic::llround:
2086 case Intrinsic::lrint:
2087 case Intrinsic::llrint:
2088 if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
2090 if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
2096 case Intrinsic::masked_store:
2097 case Intrinsic::masked_load:
2098 case Intrinsic::masked_gather:
2099 case Intrinsic::masked_scatter:
2100 return !ST->hasMVEIntegerOps();
2101 case Intrinsic::sadd_with_overflow:
2102 case Intrinsic::uadd_with_overflow:
2103 case Intrinsic::ssub_with_overflow:
2104 case Intrinsic::usub_with_overflow:
2105 case Intrinsic::sadd_sat:
2106 case Intrinsic::uadd_sat:
2107 case Intrinsic::ssub_sat:
2108 case Intrinsic::usub_sat:
2123 if (
auto *Call = dyn_cast<CallInst>(&
I)) {
2124 if (
auto *
II = dyn_cast<IntrinsicInst>(Call)) {
2125 switch(
II->getIntrinsicID()) {
2126 case Intrinsic::memcpy:
2127 case Intrinsic::memset:
2128 case Intrinsic::memmove:
2131 if (
const Function *
F = Call->getCalledFunction())
2140 switch (
I.getOpcode()) {
2143 case Instruction::FPToSI:
2144 case Instruction::FPToUI:
2145 case Instruction::SIToFP:
2146 case Instruction::UIToFP:
2147 case Instruction::FPTrunc:
2148 case Instruction::FPExt:
2178 switch (
I.getOpcode()) {
2181 case Instruction::Alloca:
2182 case Instruction::Load:
2183 case Instruction::Store:
2184 case Instruction::Select:
2185 case Instruction::PHI:
2192 if (
I.getType()->isDoubleTy() && !ST->hasFP64())
2196 if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
2219 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2224 const SCEV *TripCountSCEV =
2230 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
2238 if (
auto *Call = dyn_cast<IntrinsicInst>(&
I)) {
2239 switch (Call->getIntrinsicID()) {
2242 case Intrinsic::start_loop_iterations:
2243 case Intrinsic::test_start_loop_iterations:
2244 case Intrinsic::loop_decrement:
2245 case Intrinsic::loop_decrement_reg:
2255 bool IsTailPredLoop =
false;
2256 auto ScanLoop = [&](
Loop *L) {
2257 for (
auto *BB : L->getBlocks()) {
2258 for (
auto &
I : *BB) {
2260 isa<InlineAsm>(
I)) {
2264 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I))
2266 II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
2267 II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
2268 II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
2269 II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
2270 II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
2277 for (
auto *Inner : *L)
2278 if (!ScanLoop(Inner))
2300 if (isa<ICmpInst>(&
I) && ++ICmpCount > 1)
2309 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I))
2310 if ((
II->getIntrinsicID() == Intrinsic::smin ||
2311 II->getIntrinsicID() == Intrinsic::smax ||
2312 II->getIntrinsicID() == Intrinsic::umin ||
2313 II->getIntrinsicID() == Intrinsic::umax) &&
2317 if (isa<FCmpInst>(&
I))
2322 if (isa<FPExtInst>(&
I) || isa<FPTruncInst>(&
I))
2326 if (isa<SExtInst>(&
I) || isa<ZExtInst>(&
I) )
2327 if (!
I.getOperand(0)->hasOneUse() || !isa<LoadInst>(
I.getOperand(0)))
2331 if (isa<TruncInst>(&
I) )
2332 if (!
I.hasOneUse() || !isa<StoreInst>(*
I.user_begin()))
2351 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
2362 bool ReductionsDisabled =
2366 for (
auto *
I : LiveOuts) {
2367 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
2368 !
I->getType()->isHalfTy()) {
2369 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
2370 "live-out value\n");
2373 if (ReductionsDisabled) {
2385 for (
Instruction &
I : BB->instructionsWithoutDebug()) {
2386 if (isa<PHINode>(&
I))
2394 if (
T->getScalarSizeInBits() > 32) {
2398 if (isa<StoreInst>(
I) || isa<LoadInst>(
I)) {
2402 if (NextStride == 1) {
2407 }
else if (NextStride == -1 ||
2411 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2412 "be tail-predicated\n.");
2421 if (
auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
2422 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2428 "tail-predicate\n.");
2434 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2447 if (!ST->hasMVEIntegerOps())
2454 if (L->getNumBlocks() > 1) {
2455 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: not a single block "
2460 assert(L->isInnermost() &&
"preferPredicateOverEpilogue: inner-loop expected");
2465 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2476 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2483 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2510 return isa<IntrinsicInst>(I) &&
2511 cast<IntrinsicInst>(I).getIntrinsicID() ==
2512 Intrinsic::get_active_lane_mask;
2522 if (L->getHeader()->getParent()->hasOptSize())
2526 L->getExitingBlocks(ExitingBlocks);
2528 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2529 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2533 if (ExitingBlocks.
size() > 2)
2538 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
2548 for (
auto *BB : L->getBlocks()) {
2549 for (
auto &
I : *BB) {
2552 if (
I.getType()->isVectorTy())
2555 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2577 unsigned ExitingValues = 0;
2579 L->getExitBlocks(ExitBlocks);
2580 for (
auto *Exit : ExitBlocks) {
2583 unsigned LiveOuts =
count_if(Exit->phis(), [](
auto &PH) {
2584 return PH.getNumOperands() != 1 ||
2585 !isa<GetElementPtrInst>(PH.getOperand(0));
2587 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2608 auto *Outer = L->getOutermostLoop();
2609 if ((L != Outer && Outer != L->getParentLoop()) ||
2639 if (!ST->hasMVEIntegerOps())
2644 case Instruction::Add:
2645 return ScalarBits <= 64;
2653 if (!ST->hasMVEIntegerOps())
2660 bool HasBaseReg, int64_t Scale,
2661 unsigned AddrSpace)
const {
2670 return AM.
Scale < 0 ? 1 : 0;
2681 return ST->
isThumb2() || ST->hasV8MBaselineOps();
2692 using namespace PatternMatch;
2695 return Ext->getType()->getScalarSizeInBits() ==
2696 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
2701 !areExtDoubled(cast<Instruction>(Ext1)) ||
2702 !areExtDoubled(cast<Instruction>(Ext2)))
2713 using namespace PatternMatch;
2715 if (!
I->getType()->isVectorTy())
2718 if (ST->hasNEON()) {
2719 switch (
I->getOpcode()) {
2720 case Instruction::Sub:
2721 case Instruction::Add: {
2733 if (!ST->hasMVEIntegerOps())
2737 if (!
I->hasOneUse())
2739 auto *Sub = cast<Instruction>(*
I->users().begin());
2740 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) ==
I;
2750 switch (
I->getOpcode()) {
2751 case Instruction::Add:
2752 case Instruction::Mul:
2753 case Instruction::FAdd:
2754 case Instruction::ICmp:
2755 case Instruction::FCmp:
2757 case Instruction::FMul:
2758 return !IsFMSMul(
I);
2759 case Instruction::Sub:
2760 case Instruction::FSub:
2761 case Instruction::Shl:
2762 case Instruction::LShr:
2763 case Instruction::AShr:
2764 return Operand == 1;
2765 case Instruction::Call:
2766 if (
auto *
II = dyn_cast<IntrinsicInst>(
I)) {
2767 switch (
II->getIntrinsicID()) {
2768 case Intrinsic::fma:
2770 case Intrinsic::sadd_sat:
2771 case Intrinsic::uadd_sat:
2772 case Intrinsic::arm_mve_add_predicated:
2773 case Intrinsic::arm_mve_mul_predicated:
2774 case Intrinsic::arm_mve_qadd_predicated:
2775 case Intrinsic::arm_mve_vhadd:
2776 case Intrinsic::arm_mve_hadd_predicated:
2777 case Intrinsic::arm_mve_vqdmull:
2778 case Intrinsic::arm_mve_vqdmull_predicated:
2779 case Intrinsic::arm_mve_vqdmulh:
2780 case Intrinsic::arm_mve_qdmulh_predicated:
2781 case Intrinsic::arm_mve_vqrdmulh:
2782 case Intrinsic::arm_mve_qrdmulh_predicated:
2783 case Intrinsic::arm_mve_fma_predicated:
2785 case Intrinsic::ssub_sat:
2786 case Intrinsic::usub_sat:
2787 case Intrinsic::arm_mve_sub_predicated:
2788 case Intrinsic::arm_mve_qsub_predicated:
2789 case Intrinsic::arm_mve_hsub_predicated:
2790 case Intrinsic::arm_mve_vhsub:
2791 return Operand == 1;
2802 for (
auto OpIdx :
enumerate(
I->operands())) {
2803 Instruction *
Op = dyn_cast<Instruction>(OpIdx.value().get());
2805 if (!
Op ||
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
2809 if (Shuffle->
getOpcode() == Instruction::BitCast)
2810 Shuffle = dyn_cast<Instruction>(Shuffle->
getOperand(0));
2816 if (!IsSinker(
I, OpIdx.index()))
2821 for (
Use &U :
Op->uses()) {
2823 if (!IsSinker(
Insn, U.getOperandNo()))
2851 unsigned NumBytesToPad = 4 - (
Size % 4);
2852 unsigned NewSize =
Size + NumBytesToPad;
2858 if (NewSize > MaxMemIntrinsicSize)
2861 return NumBytesToPad;
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
This file implements a class to represent arbitrary precision integral constant values and operations...
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool isThumb1Only() const
bool hasFPARMv8Base() const
unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
bool maybeLoweredToCall(Instruction &I)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
bool isLegalMaskedStore(Type *DataTy, Align Alignment)
bool isLegalMaskedLoad(Type *DataTy, Align Alignment)
InstructionCost getMemcpyCost(const Instruction *I)
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLoweredToCall(const Function *F)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool hasArmWideBranch(bool Thumb) const
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalMaskedGather(Type *Ty, Align Alignment)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool isProfitableLSRChainElement(Instruction *I)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool useSoftFloat() const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, const SimplifyQuery &Q)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
AssumptionCache & getAssumptionCache() const
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
LoopInfo * getLoopInfo() const
DominatorTree * getDominatorTree() const
AssumptionCache * getAssumptionCache() const
const LoopAccessInfo * getLAI() const
ScalarEvolution * getScalarEvolution() const
Represents a single loop in the control flow graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
Provides information about what library functions are available for the current target.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Primary interface to the complete machine description for the target machine.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Type * getArrayElementType() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
@ C
The default llvm calling convention, compatible with C.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ ForceEnabledNoReductions
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_FMAXNUM
Floating point minnum.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_FMINNUM
Unsigned maximum.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
@ Data
Use predicate only to mask operations on data in the loop.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
SelectPatternFlavor Flavor
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.