26#include "llvm/IR/IntrinsicsARM.h"
45#define DEBUG_TYPE "armtti"
49 cl::desc(
"Enable the generation of masked loads and stores"));
53 cl::desc(
"Disable the generation of low-overhead loops"));
57 cl::desc(
"Enable the generation of WLS loops"));
61 cl::desc(
"Enable the widening of global strings to alignment boundaries"));
79 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
81 : IntrAlign->getLimitedValue();
86 return Builder.CreateAlignedLoad(
II.getType(),
II.getArgOperand(0),
99 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
100 (CalleeBits & ~InlineFeaturesAllowed);
103 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
104 (CalleeBits & InlineFeaturesAllowed);
105 return MatchExact && MatchSubset;
111 if (ST->hasMVEIntegerOps())
114 if (L->getHeader()->getParent()->hasOptSize())
117 if (ST->isMClass() && ST->isThumb2() &&
118 L->getNumBlocks() == 1)
124std::optional<Instruction *>
131 case Intrinsic::arm_neon_vld1: {
141 case Intrinsic::arm_neon_vld2:
142 case Intrinsic::arm_neon_vld3:
143 case Intrinsic::arm_neon_vld4:
144 case Intrinsic::arm_neon_vld2lane:
145 case Intrinsic::arm_neon_vld3lane:
146 case Intrinsic::arm_neon_vld4lane:
147 case Intrinsic::arm_neon_vst1:
148 case Intrinsic::arm_neon_vst2:
149 case Intrinsic::arm_neon_vst3:
150 case Intrinsic::arm_neon_vst4:
151 case Intrinsic::arm_neon_vst2lane:
152 case Intrinsic::arm_neon_vst3lane:
153 case Intrinsic::arm_neon_vst4lane: {
157 unsigned AlignArg =
II.arg_size() - 1;
158 Value *AlignArgOp =
II.getArgOperand(AlignArg);
169 case Intrinsic::arm_neon_vld1x2:
170 case Intrinsic::arm_neon_vld1x3:
171 case Intrinsic::arm_neon_vld1x4:
172 case Intrinsic::arm_neon_vst1x2:
173 case Intrinsic::arm_neon_vst1x3:
174 case Intrinsic::arm_neon_vst1x4: {
178 Align OldAlign =
II.getParamAlign(0).valueOrOne();
179 if (NewAlign > OldAlign)
185 case Intrinsic::arm_mve_pred_i2v: {
186 Value *Arg =
II.getArgOperand(0);
199 if (CI->getValue().trunc(16).isAllOnes()) {
214 case Intrinsic::arm_mve_pred_v2i: {
215 Value *Arg =
II.getArgOperand(0);
222 if (
II.getMetadata(LLVMContext::MD_range))
227 if (
auto CurrentRange =
II.getRange()) {
229 if (
Range == CurrentRange)
234 II.addRetAttr(Attribute::NoUndef);
237 case Intrinsic::arm_mve_vadc:
238 case Intrinsic::arm_mve_vadc_predicated: {
240 (
II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
241 assert(
II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
242 "Bad type for intrinsic!");
251 case Intrinsic::arm_mve_vmldava: {
253 if (
I->hasOneUse()) {
258 Value *OpX =
I->getOperand(4);
259 Value *OpY =
I->getOperand(5);
265 {
I->getOperand(0),
I->getOperand(1),
266 I->getOperand(2), OpZ, OpX, OpY});
282 SimplifyAndSetOp)
const {
287 auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
296 SimplifyAndSetOp(&
II, 0, OrigDemandedElts & DemandedElts, UndefElts);
303 switch (
II.getIntrinsicID()) {
306 case Intrinsic::arm_mve_vcvt_narrow:
307 SimplifyNarrowInstrTopBottom(2);
309 case Intrinsic::arm_mve_vqmovn:
310 SimplifyNarrowInstrTopBottom(4);
312 case Intrinsic::arm_mve_vshrn:
313 SimplifyNarrowInstrTopBottom(7);
322 assert(Ty->isIntegerTy());
324 unsigned Bits = Ty->getPrimitiveSizeInBits();
325 if (Bits == 0 || Imm.getActiveBits() >= 64)
328 int64_t SImmVal = Imm.getSExtValue();
329 uint64_t ZImmVal = Imm.getZExtValue();
330 if (!ST->isThumb()) {
331 if ((SImmVal >= 0 && SImmVal < 65536) ||
335 return ST->hasV6T2Ops() ? 2 : 3;
337 if (ST->isThumb2()) {
338 if ((SImmVal >= 0 && SImmVal < 65536) ||
342 return ST->hasV6T2Ops() ? 2 : 3;
345 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
358 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
374 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
376 auto isSSatMin = [&](
Value *MinInst) {
378 Value *MinLHS, *MinRHS;
402 if (Imm.getBitWidth() != 64 ||
421 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
422 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
428 if (Opcode == Instruction::GetElementPtr && Idx != 0)
431 if (Opcode == Instruction::And) {
433 if (Imm == 255 || Imm == 65535)
440 if (Opcode == Instruction::Add)
445 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
446 Ty->getIntegerBitWidth() == 32) {
447 int64_t NegImm = -Imm.getSExtValue();
448 if (ST->isThumb2() && NegImm < 1<<12)
451 if (ST->isThumb() && NegImm < 1<<8)
457 if (Opcode == Instruction::Xor && Imm.isAllOnes())
462 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
463 Ty->getIntegerBitWidth() <= 32) {
474 if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) {
488 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
503 int ISD = TLI->InstructionOpcodeToISD(Opcode);
509 return Cost == 0 ? 0 : 1;
512 auto IsLegalFPType = [
this](
EVT VT) {
514 return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
515 (EltVT == MVT::f64 && ST->hasFP64()) ||
516 (EltVT == MVT::f16 && ST->hasFullFP16());
519 EVT SrcTy = TLI->getValueType(
DL, Src);
520 EVT DstTy = TLI->getValueType(
DL, Dst);
522 if (!SrcTy.isSimple() || !DstTy.
isSimple())
529 if ((ST->hasMVEIntegerOps() &&
530 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
531 Opcode == Instruction::SExt)) ||
532 (ST->hasMVEFloatOps() &&
533 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
534 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
537 ST->getMVEVectorCostFactor(
CostKind);
557 LoadConversionTbl,
ISD, DstTy.
getSimpleVT(), SrcTy.getSimpleVT()))
558 return AdjustCost(Entry->Cost);
577 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
578 if (
const auto *Entry =
581 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
586 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
587 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3},
589 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
590 if (
const auto *Entry =
593 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
606 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
607 if (
const auto *Entry =
610 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
617 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
618 if (
const auto *Entry =
621 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
627 I &&
I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
630 {
ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
631 {
ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
633 {
ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
634 {
ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
636 {
ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
637 {
ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
639 {
ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
640 {
ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
644 int UserISD = TLI->InstructionOpcodeToISD(
User->getOpcode());
647 SrcTy.getSimpleVT())) {
648 return AdjustCost(Entry->Cost);
653 if (Src->isVectorTy() && ST->hasNEON() &&
656 (
ISD == ISD::FP_EXTEND && SrcTy.getScalarType() == MVT::f32 &&
661 {ISD::FP_EXTEND, MVT::v2f32, 2},
662 {ISD::FP_EXTEND, MVT::v4f32, 4}};
666 return AdjustCost(LT.first * Entry->Cost);
755 if (SrcTy.isVector() && ST->hasNEON()) {
758 SrcTy.getSimpleVT()))
759 return AdjustCost(Entry->Cost);
785 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
788 SrcTy.getSimpleVT()))
789 return AdjustCost(Entry->Cost);
816 if (SrcTy.isInteger() && ST->hasNEON()) {
819 SrcTy.getSimpleVT()))
820 return AdjustCost(Entry->Cost);
841 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
844 SrcTy.getSimpleVT()))
845 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
855 if (SrcTy.isFixedLengthVector())
856 Lanes = SrcTy.getVectorNumElements();
858 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
861 return Lanes * CallCost;
865 SrcTy.isFixedLengthVector()) {
868 if ((SrcTy.getScalarType() == MVT::i8 ||
869 SrcTy.getScalarType() == MVT::i16 ||
870 SrcTy.getScalarType() == MVT::i32) &&
871 SrcTy.getSizeInBits() > 128 &&
873 return SrcTy.getVectorNumElements() * 2;
888 if (SrcTy.isInteger()) {
891 SrcTy.getSimpleVT()))
892 return AdjustCost(Entry->Cost);
895 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
896 ? ST->getMVEVectorCostFactor(
CostKind)
904 unsigned Index,
const Value *Op0,
905 const Value *Op1)
const {
908 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
909 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
912 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
913 Opcode == Instruction::ExtractElement)) {
921 if (ValTy->isVectorTy() &&
922 ValTy->getScalarSizeInBits() <= 32)
923 return std::max<InstructionCost>(
928 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
929 Opcode == Instruction::ExtractElement)) {
933 std::pair<InstructionCost, MVT> LT =
935 return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
945 int ISD = TLI->InstructionOpcodeToISD(Opcode);
949 ST->isThumb() && !ValTy->isVectorTy()) {
951 if (TLI->getValueType(
DL, ValTy,
true) == MVT::Other)
965 if (ValTy->isIntegerTy(1))
975 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
978 if (Sel && ValTy->isVectorTy() &&
979 (ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy())) {
980 const Value *LHS, *RHS;
985 IID = Intrinsic::abs;
988 IID = Intrinsic::smin;
991 IID = Intrinsic::smax;
994 IID = Intrinsic::umin;
997 IID = Intrinsic::umax;
1000 IID = Intrinsic::minnum;
1003 IID = Intrinsic::maxnum;
1018 if (ST->hasNEON() && ValTy->isVectorTy() &&
ISD ==
ISD::SELECT && CondTy) {
1021 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
1026 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
1027 EVT SelValTy = TLI->getValueType(
DL, ValTy);
1039 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&
1040 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1048 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1062 int BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1068 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
1070 return LT.first * BaseCost +
1080 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy())
1081 BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1095 unsigned NumVectorInstToHideOverhead = 10;
1096 int MaxMergeDistance = 64;
1098 if (ST->hasNEON()) {
1101 return NumVectorInstToHideOverhead;
1114 switch (
II->getIntrinsicID()) {
1115 case Intrinsic::arm_mve_vctp8:
1116 case Intrinsic::arm_mve_vctp16:
1117 case Intrinsic::arm_mve_vctp32:
1118 case Intrinsic::arm_mve_vctp64:
1135 if (VecTy->getNumElements() == 2)
1140 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1145 return (EltWidth == 32 && Alignment >= 4) ||
1146 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1153 unsigned EltWidth = Ty->getScalarSizeInBits();
1154 return ((EltWidth == 32 && Alignment >= 4) ||
1155 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1163 unsigned DstAddrSpace = ~0u;
1164 unsigned SrcAddrSpace = ~0u;
1165 const Function *
F =
I->getParent()->getParent();
1173 const unsigned Size =
C->getValue().getZExtValue();
1174 const Align DstAlign = MC->getDestAlign().valueOrOne();
1175 const Align SrcAlign = MC->getSourceAlign().valueOrOne();
1179 DstAddrSpace = MC->getDestAddressSpace();
1180 SrcAddrSpace = MC->getSourceAddressSpace();
1188 const unsigned Size =
C->getValue().getZExtValue();
1189 const Align DstAlign = MS->getDestAlign().valueOrOne();
1193 DstAddrSpace = MS->getDestAddressSpace();
1198 unsigned Limit, Factor = 2;
1199 switch(
I->getIntrinsicID()) {
1200 case Intrinsic::memcpy:
1201 Limit = TLI->getMaxStoresPerMemcpy(
F->hasMinSize());
1203 case Intrinsic::memmove:
1204 Limit = TLI->getMaxStoresPerMemmove(
F->hasMinSize());
1206 case Intrinsic::memset:
1207 Limit = TLI->getMaxStoresPerMemset(
F->hasMinSize());
1217 std::vector<EVT> MemOps;
1219 if (getTLI()->findOptimalMemOpLowering(
C, MemOps, Limit, MOp, DstAddrSpace,
1220 SrcAddrSpace,
F->getAttributes()))
1221 return MemOps.size() * Factor;
1246 "Expected the Mask to match the return size if given");
1248 "Expected the same scalar types");
1253 if (IsExtractSubvector)
1255 if (ST->hasNEON()) {
1272 if (
const auto *Entry =
1274 return LT.first * Entry->Cost;
1293 if (
const auto *Entry =
1295 return LT.first * Entry->Cost;
1319 return LT.first * Entry->Cost;
1322 if (ST->hasMVEIntegerOps()) {
1335 return LT.first * Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
1338 if (!Mask.empty()) {
1345 (LT.second.getScalarSizeInBits() == 8 ||
1346 LT.second.getScalarSizeInBits() == 16 ||
1347 LT.second.getScalarSizeInBits() == 32) &&
1348 LT.second.getSizeInBits() == 128 &&
1349 ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
1351 (TLI->getMaxSupportedInterleaveFactor() == 4 &&
1353 return ST->getMVEVectorCostFactor(
CostKind) *
1354 std::max<InstructionCost>(1, LT.first / 4);
1361 (LT.second.getScalarSizeInBits() == 8 ||
1362 LT.second.getScalarSizeInBits() == 16 ||
1363 LT.second.getScalarSizeInBits() == 32) &&
1364 LT.second.getSizeInBits() == 128 &&
1365 ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
1367 Mask, 2, SrcTy->getElementCount().getKnownMinValue() * 2)) ||
1368 (TLI->getMaxSupportedInterleaveFactor() == 4 &&
1370 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2))))
1371 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1373 if (LT.second.isVector() &&
1374 Mask.size() <= LT.second.getVectorNumElements() &&
1377 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1382 if (IsExtractSubvector)
1384 int BaseCost = ST->hasMVEIntegerOps() && SrcTy->isVectorTy()
1385 ? ST->getMVEVectorCostFactor(
CostKind)
1395 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
1400 switch (ISDOpcode) {
1413 if (ST->hasNEON()) {
1414 const unsigned FunctionCallDivCost = 20;
1415 const unsigned ReciprocalDivCost = 10;
1421 {
ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1422 {
ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1423 {
ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
1424 {
ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
1425 {
ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1426 {
ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1427 {
ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
1428 {
ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
1429 {
ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
1430 {
ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
1431 {
ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
1432 {
ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
1433 {
ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
1434 {
ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
1435 {
ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
1436 {
ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
1438 {
ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1439 {
ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1440 {
ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
1441 {
ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
1442 {
ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1443 {
ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1444 {
ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
1445 {
ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
1446 {
ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1447 {
ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1448 {
ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
1449 {
ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
1450 {
ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1451 {
ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1452 {
ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
1453 {
ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
1457 if (
const auto *Entry =
CostTableLookup(CostTbl, ISDOpcode, LT.second))
1458 return LT.first * Entry->Cost;
1461 Opcode, Ty,
CostKind, Op1Info, Op2Info);
1478 auto LooksLikeAFreeShift = [&]() {
1479 if (ST->isThumb1Only() || Ty->isVectorTy())
1489 case Instruction::Add:
1490 case Instruction::Sub:
1491 case Instruction::And:
1492 case Instruction::Xor:
1493 case Instruction::Or:
1494 case Instruction::ICmp:
1500 if (LooksLikeAFreeShift())
1510 auto MulInDSPMLALPattern = [&](
const Instruction *
I,
unsigned Opcode,
1518 if (Opcode != Instruction::Mul)
1521 if (Ty->isVectorTy())
1524 auto ValueOpcodesEqual = [](
const Value *LHS,
const Value *RHS) ->
bool {
1528 auto IsExtInst = [](
const Value *V) ->
bool {
1531 auto IsExtensionFromHalf = [](
const Value *V) ->
bool {
1539 Value *Op0 = BinOp->getOperand(0);
1540 Value *Op1 = BinOp->getOperand(1);
1541 if (IsExtInst(Op0) && IsExtInst(Op1) && ValueOpcodesEqual(Op0, Op1)) {
1543 if (!
I->getType()->isIntegerTy(32) || !IsExtensionFromHalf(Op0) ||
1544 !IsExtensionFromHalf(Op1))
1548 for (
auto *U :
I->users())
1557 if (MulInDSPMLALPattern(CxtI, Opcode, Ty))
1563 if (ST->hasMVEIntegerOps() && Ty->isVectorTy())
1564 BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1570 if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
1571 return LT.first * BaseCost;
1575 unsigned Num = VTy->getNumElements();
1599 if (TLI->getValueType(
DL, Src,
true) == MVT::Other)
1603 if (ST->hasNEON() && Src->isVectorTy() && Alignment !=
Align(16) &&
1608 return LT.first * 4;
1614 ((Opcode == Instruction::Load &&
I->hasOneUse() &&
1619 Opcode == Instruction::Load
1620 ? (*
I->user_begin())->getType()
1624 return ST->getMVEVectorCostFactor(
CostKind);
1627 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1628 ? ST->getMVEVectorCostFactor(
CostKind)
1637 unsigned IID = MICA.
getID();
1641 if (ST->hasMVEIntegerOps()) {
1642 if (IID == Intrinsic::masked_load &&
1644 return ST->getMVEVectorCostFactor(
CostKind);
1645 if (IID == Intrinsic::masked_store &&
1647 return ST->getMVEVectorCostFactor(
CostKind);
1659 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1660 assert(Factor >= 2 &&
"Invalid interleave factor");
1664 bool EltIs64Bits =
DL.getTypeSizeInBits(VecTy->
getScalarType()) == 64;
1666 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1667 !UseMaskForCond && !UseMaskForGaps) {
1676 ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor(
CostKind) : 1;
1677 if (NumElts % Factor == 0 &&
1678 TLI->isLegalInterleavedAccessType(Factor, SubVecTy, Alignment,
DL))
1679 return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy,
DL);
1686 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1688 DL.getTypeSizeInBits(SubVecTy).getFixedValue() <= 64)
1689 return 2 * BaseCost;
1694 UseMaskForCond, UseMaskForGaps);
1698 unsigned Opcode,
Type *DataTy,
const Value *Ptr,
bool VariableMask,
1710 unsigned NumElems = VTy->getNumElements();
1711 unsigned EltSize = VTy->getScalarSizeInBits();
1720 NumElems * LT.first * ST->getMVEVectorCostFactor(
CostKind);
1726 NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +
1732 if (EltSize < 8 || Alignment < EltSize / 8)
1735 unsigned ExtSize = EltSize;
1741 if ((
I->getOpcode() == Instruction::Load ||
1744 const User *Us = *
I->users().begin();
1749 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1750 (
TypeSize == 16 && EltSize == 8)) &&
1758 if ((
I->getOpcode() == Instruction::Store ||
1762 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1763 if (((EltSize == 16 &&
TypeSize == 32) ||
1770 if (ExtSize * NumElems != 128 || NumElems < 4)
1779 if (ExtSize != 8 && ExtSize != 16)
1783 Ptr = BC->getOperand(0);
1785 if (
GEP->getNumOperands() != 2)
1787 unsigned Scale =
DL.getTypeAllocSize(
GEP->getResultElementType());
1789 if (Scale != 1 && Scale * 8 != ExtSize)
1793 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1803 std::optional<FastMathFlags> FMF,
1806 EVT ValVT = TLI->getValueType(
DL, ValTy);
1807 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1814 ((EltSize == 32 && ST->hasVFP2Base()) ||
1815 (EltSize == 64 && ST->hasFP64()) ||
1816 (EltSize == 16 && ST->hasFullFP16()))) {
1818 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1821 NumElts * EltSize > VecLimit) {
1832 VecCost += ST->getMVEVectorCostFactor(
CostKind) * 2;
1835 ExtractCost = NumElts / 2;
1837 return VecCost + ExtractCost +
1843 (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
1846 ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1848 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1856 NumElts * EltSize == 64) {
1858 VecCost += ST->getMVEVectorCostFactor(
CostKind) +
1865 return VecCost + ExtractCost +
1867 Opcode, ValTy->getElementType(),
CostKind);
1882 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1888 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1890 EVT ValVT = TLI->getValueType(
DL, ValTy);
1891 EVT ResVT = TLI->getValueType(
DL, ResTy);
1893 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1908 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1909 (LT.second == MVT::v8i16 && RevVTSize <= 32) ||
1910 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1911 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1925 if (RedOpcode != Instruction::Add)
1927 EVT ValVT = TLI->getValueType(
DL, ValTy);
1928 EVT ResVT = TLI->getValueType(
DL, ResTy);
1941 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1942 (LT.second == MVT::v8i16 && RevVTSize <= 64) ||
1943 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1944 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1955 EVT ValVT = TLI->getValueType(
DL, Ty);
1960 if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&
1966 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1968 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1980 VecCost += ST->getMVEVectorCostFactor(
CostKind) * 2;
1986 {Ty->getElementType(), Ty->getElementType()},
1988 return VecCost + ExtractCost +
1992 if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
1993 IID == Intrinsic::umin || IID == Intrinsic::umax) {
2005 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind) * LT.first;
2016 case Intrinsic::get_active_lane_mask:
2024 if (ST->hasMVEIntegerOps())
2027 case Intrinsic::sadd_sat:
2028 case Intrinsic::ssub_sat:
2029 case Intrinsic::uadd_sat:
2030 case Intrinsic::usub_sat: {
2031 bool IsAdd = (
Opc == Intrinsic::sadd_sat ||
Opc == Intrinsic::ssub_sat);
2032 bool IsSigned = (
Opc == Intrinsic::sadd_sat ||
Opc == Intrinsic::ssub_sat);
2036 if (IsSigned && ST->hasDSP() && ITy->getBitWidth() == 32)
2038 if (ST->hasDSP() && (ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16))
2052 if (!ST->hasMVEIntegerOps())
2056 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
2057 LT.second == MVT::v16i8) {
2063 return LT.first * ST->getMVEVectorCostFactor(
CostKind) * Instrs;
2067 case Intrinsic::abs:
2068 case Intrinsic::smin:
2069 case Intrinsic::smax:
2070 case Intrinsic::umin:
2071 case Intrinsic::umax: {
2072 if (!ST->hasMVEIntegerOps())
2077 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
2078 LT.second == MVT::v16i8)
2079 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2082 case Intrinsic::minnum:
2083 case Intrinsic::maxnum: {
2084 if (!ST->hasMVEFloatOps())
2088 if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
2089 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2092 case Intrinsic::fptosi_sat:
2093 case Intrinsic::fptoui_sat: {
2096 bool IsSigned =
Opc == Intrinsic::fptosi_sat;
2100 if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
2101 (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
2102 (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
2106 if (ST->hasMVEFloatOps() &&
2107 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
2109 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2112 if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||
2113 (ST->hasFP64() && LT.second == MVT::f64) ||
2114 (ST->hasFullFP16() && LT.second == MVT::f16) ||
2115 (ST->hasMVEFloatOps() &&
2116 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
2119 LT.second.getScalarSizeInBits());
2121 LT.second.isVector() ? ST->getMVEVectorCostFactor(
CostKind) : 1;
2124 LegalTy, {LegalTy, LegalTy});
2128 LegalTy, {LegalTy, LegalTy});
2130 return LT.first *
Cost;
2158 if (!
F->isIntrinsic())
2162 if (
F->getName().starts_with(
"llvm.arm"))
2165 switch (
F->getIntrinsicID()) {
2167 case Intrinsic::powi:
2168 case Intrinsic::sin:
2169 case Intrinsic::cos:
2170 case Intrinsic::sincos:
2171 case Intrinsic::pow:
2172 case Intrinsic::log:
2173 case Intrinsic::log10:
2174 case Intrinsic::log2:
2175 case Intrinsic::exp:
2176 case Intrinsic::exp2:
2178 case Intrinsic::sqrt:
2179 case Intrinsic::fabs:
2180 case Intrinsic::copysign:
2181 case Intrinsic::floor:
2182 case Intrinsic::ceil:
2183 case Intrinsic::trunc:
2184 case Intrinsic::rint:
2185 case Intrinsic::nearbyint:
2186 case Intrinsic::round:
2187 case Intrinsic::canonicalize:
2188 case Intrinsic::lround:
2189 case Intrinsic::llround:
2190 case Intrinsic::lrint:
2191 case Intrinsic::llrint:
2192 if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
2194 if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
2199 return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
2200 case Intrinsic::masked_store:
2201 case Intrinsic::masked_load:
2202 case Intrinsic::masked_gather:
2203 case Intrinsic::masked_scatter:
2204 return !ST->hasMVEIntegerOps();
2205 case Intrinsic::sadd_with_overflow:
2206 case Intrinsic::uadd_with_overflow:
2207 case Intrinsic::ssub_with_overflow:
2208 case Intrinsic::usub_with_overflow:
2209 case Intrinsic::sadd_sat:
2210 case Intrinsic::uadd_sat:
2211 case Intrinsic::ssub_sat:
2212 case Intrinsic::usub_sat:
2220 unsigned ISD = TLI->InstructionOpcodeToISD(
I.getOpcode());
2221 EVT VT = TLI->getValueType(
DL,
I.getType(),
true);
2229 switch(
II->getIntrinsicID()) {
2230 case Intrinsic::memcpy:
2231 case Intrinsic::memset:
2232 case Intrinsic::memmove:
2244 switch (
I.getOpcode()) {
2247 case Instruction::FPToSI:
2248 case Instruction::FPToUI:
2249 case Instruction::SIToFP:
2250 case Instruction::UIToFP:
2251 case Instruction::FPTrunc:
2252 case Instruction::FPExt:
2253 return !ST->hasFPARMv8Base();
2281 if (TLI->useSoftFloat()) {
2282 switch (
I.getOpcode()) {
2285 case Instruction::Alloca:
2286 case Instruction::Load:
2287 case Instruction::Store:
2288 case Instruction::Select:
2289 case Instruction::PHI:
2296 if (
I.getType()->isDoubleTy() && !ST->hasFP64())
2300 if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
2328 const SCEV *TripCountSCEV =
2334 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
2343 switch (
Call->getIntrinsicID()) {
2346 case Intrinsic::start_loop_iterations:
2347 case Intrinsic::test_start_loop_iterations:
2348 case Intrinsic::loop_decrement:
2349 case Intrinsic::loop_decrement_reg:
2359 bool IsTailPredLoop =
false;
2360 auto ScanLoop = [&](
Loop *L) {
2361 for (
auto *BB : L->getBlocks()) {
2362 for (
auto &
I : *BB) {
2370 II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
2371 II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
2372 II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
2373 II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
2374 II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
2381 for (
auto *Inner : *L)
2382 if (!ScanLoop(Inner))
2414 if ((
II->getIntrinsicID() == Intrinsic::smin ||
2415 II->getIntrinsicID() == Intrinsic::smax ||
2416 II->getIntrinsicID() == Intrinsic::umin ||
2417 II->getIntrinsicID() == Intrinsic::umax) &&
2456 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
2467 bool ReductionsDisabled =
2471 for (
auto *
I : LiveOuts) {
2472 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
2473 !
I->getType()->isHalfTy()) {
2474 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
2475 "live-out value\n");
2478 if (ReductionsDisabled) {
2489 for (
Instruction &
I : BB->instructionsWithoutDebug()) {
2498 if (
T->getScalarSizeInBits() > 32) {
2505 int64_t NextStride =
2507 if (NextStride == 1) {
2512 }
else if (NextStride == -1 ||
2516 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2517 "be tail-predicated\n.");
2527 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2533 "tail-predicate\n.");
2539 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2552 if (!ST->hasMVEIntegerOps())
2559 if (L->getNumBlocks() > 1) {
2560 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: not a single block "
2565 assert(L->isInnermost() &&
"preferPredicateOverEpilogue: inner-loop expected");
2570 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2581 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2588 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2616 return isa<IntrinsicInst>(I) &&
2617 cast<IntrinsicInst>(I).getIntrinsicID() ==
2618 Intrinsic::get_active_lane_mask;
2622 if (!ST->isMClass())
2628 if (L->getHeader()->getParent()->hasOptSize())
2632 L->getExitingBlocks(ExitingBlocks);
2634 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2635 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2639 if (ExitingBlocks.
size() > 2)
2644 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
2654 for (
auto *BB : L->getBlocks()) {
2655 for (
auto &
I : *BB) {
2658 if (
I.getType()->isVectorTy())
2682 if (ST->isThumb1Only()) {
2683 unsigned ExitingValues = 0;
2685 L->getExitBlocks(ExitBlocks);
2686 for (
auto *Exit : ExitBlocks) {
2689 unsigned LiveOuts =
count_if(Exit->phis(), [](
auto &PH) {
2690 return PH.getNumOperands() != 1 ||
2691 !isa<GetElementPtrInst>(PH.getOperand(0));
2693 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2714 auto *Outer = L->getOutermostLoop();
2715 if ((L != Outer && Outer != L->getParentLoop()) ||
2744 if (!ST->hasMVEIntegerOps())
2747 unsigned ScalarBits = Ty->getScalarSizeInBits();
2750 return ScalarBits <= 64;
2757 if (!ST->hasMVEIntegerOps())
2764 bool HasBaseReg, int64_t Scale,
2765 unsigned AddrSpace)
const {
2774 return AM.
Scale < 0 ? 1 : 0;
2785 return ST->isThumb2() || ST->hasV8MBaselineOps();
2789 return ST->hasARMOps();
2799 return Ext->getType()->getScalarSizeInBits() ==
2800 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
2819 if (!
I->getType()->isVectorTy())
2822 if (ST->hasNEON()) {
2823 switch (
I->getOpcode()) {
2824 case Instruction::Sub:
2825 case Instruction::Add: {
2828 Ops.push_back(&
I->getOperandUse(0));
2829 Ops.push_back(&
I->getOperandUse(1));
2837 if (!ST->hasMVEIntegerOps())
2841 if (!
I->hasOneUse())
2844 return Sub->getOpcode() == Instruction::FSub &&
Sub->getOperand(1) ==
I;
2854 switch (
I->getOpcode()) {
2855 case Instruction::Add:
2856 case Instruction::Mul:
2857 case Instruction::FAdd:
2858 case Instruction::ICmp:
2859 case Instruction::FCmp:
2861 case Instruction::FMul:
2862 return !IsFMSMul(
I);
2863 case Instruction::Sub:
2864 case Instruction::FSub:
2865 case Instruction::Shl:
2866 case Instruction::LShr:
2867 case Instruction::AShr:
2868 return Operand == 1;
2869 case Instruction::Call:
2871 switch (
II->getIntrinsicID()) {
2872 case Intrinsic::fma:
2874 case Intrinsic::sadd_sat:
2875 case Intrinsic::uadd_sat:
2876 case Intrinsic::arm_mve_add_predicated:
2877 case Intrinsic::arm_mve_mul_predicated:
2878 case Intrinsic::arm_mve_qadd_predicated:
2879 case Intrinsic::arm_mve_vhadd:
2880 case Intrinsic::arm_mve_hadd_predicated:
2881 case Intrinsic::arm_mve_vqdmull:
2882 case Intrinsic::arm_mve_vqdmull_predicated:
2883 case Intrinsic::arm_mve_vqdmulh:
2884 case Intrinsic::arm_mve_qdmulh_predicated:
2885 case Intrinsic::arm_mve_vqrdmulh:
2886 case Intrinsic::arm_mve_qrdmulh_predicated:
2887 case Intrinsic::arm_mve_fma_predicated:
2889 case Intrinsic::ssub_sat:
2890 case Intrinsic::usub_sat:
2891 case Intrinsic::arm_mve_sub_predicated:
2892 case Intrinsic::arm_mve_qsub_predicated:
2893 case Intrinsic::arm_mve_hsub_predicated:
2894 case Intrinsic::arm_mve_vhsub:
2895 return Operand == 1;
2913 if (Shuffle->
getOpcode() == Instruction::BitCast)
2920 if (!IsSinker(
I,
OpIdx.index()))
2925 for (
Use &U :
Op->uses()) {
2927 if (!IsSinker(Insn, U.getOperandNo()))
2933 Ops.push_back(&
Op->getOperandUse(0));
2955 unsigned NumBytesToPad = 4 - (
Size % 4);
2956 unsigned NewSize =
Size + NumBytesToPad;
2962 if (NewSize > MaxMemIntrinsicSize)
2965 return NumBytesToPad;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file provides the interface for the instcombine pass implementation.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file defines the SmallVector class.
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMemcpyCost(const Instruction *I) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
bool maybeLoweredToCall(Instruction &I) const
bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool hasArmWideBranch(bool Thumb) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override
bool isLoweredToCall(const Function *F) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isLegalMaskedStore(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool isLegalMaskedLoad(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
bool preferPredicatedReductionSelect() const override
bool isLegalMaskedGather(Type *Ty, Align Alignment) const override
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const override
bool isProfitableLSRChainElement(Instruction *I) const override
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
AssumptionCache & getAssumptionCache() const
static InstructionCost getInvalid(CostType Val=0)
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
LoopInfo * getLoopInfo() const
DominatorTree * getDominatorTree() const
AssumptionCache * getAssumptionCache() const
const LoopAccessInfo * getLAI() const
ScalarEvolution * getScalarEvolution() const
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
Intrinsic::ID getID() const
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isArrayTy() const
True if this is an instance of ArrayType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Type * getArrayElementType() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ ForceEnabledNoReductions
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
auto dyn_cast_or_null(const Y &Val)
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_FMAXNUM
Floating point minnum.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_FMINNUM
Unsigned maximum.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
RecurKind
These are the kinds of recurrences that we support.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
@ Data
Use predicate only to mask operations on data in the loop.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Attributes of a target dependent hardware loop.
LLVM_ABI bool canAnalyze(LoopInfo &LI)
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
SelectPatternFlavor Flavor
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...