21#include "llvm/IR/IntrinsicsAArch64.h"
31#define DEBUG_TYPE "aarch64tti"
50class TailFoldingOption {
65 bool NeedsDefault =
true;
69 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
84 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
85 "Initial bits should only include one of "
86 "(disabled|all|simple|default)");
87 Bits = NeedsDefault ? DefaultBits : InitialBits;
95 errs() <<
"invalid argument '" << Opt
96 <<
"' to -sve-tail-folding=; the option should be of the form\n"
97 " (disabled|all|default|simple)[+(reductions|recurrences"
98 "|reverse|noreductions|norecurrences|noreverse)]\n";
104 void operator=(
const std::string &Val) {
113 setNeedsDefault(
false);
118 unsigned StartIdx = 1;
119 if (TailFoldTypes[0] ==
"disabled")
120 setInitialBits(TailFoldingOpts::Disabled);
121 else if (TailFoldTypes[0] ==
"all")
122 setInitialBits(TailFoldingOpts::All);
123 else if (TailFoldTypes[0] ==
"default")
124 setNeedsDefault(
true);
125 else if (TailFoldTypes[0] ==
"simple")
126 setInitialBits(TailFoldingOpts::Simple);
129 setInitialBits(TailFoldingOpts::Disabled);
132 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
133 if (TailFoldTypes[
I] ==
"reductions")
134 setEnableBit(TailFoldingOpts::Reductions);
135 else if (TailFoldTypes[
I] ==
"recurrences")
136 setEnableBit(TailFoldingOpts::Recurrences);
137 else if (TailFoldTypes[
I] ==
"reverse")
138 setEnableBit(TailFoldingOpts::Reverse);
139 else if (TailFoldTypes[
I] ==
"noreductions")
140 setDisableBit(TailFoldingOpts::Reductions);
141 else if (TailFoldTypes[
I] ==
"norecurrences")
142 setDisableBit(TailFoldingOpts::Recurrences);
143 else if (TailFoldTypes[
I] ==
"noreverse")
144 setDisableBit(TailFoldingOpts::Reverse);
161 "Control the use of vectorisation using tail-folding for SVE where the"
162 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
163 "\ndisabled (Initial) No loop types will vectorize using "
165 "\ndefault (Initial) Uses the default tail-folding settings for "
167 "\nall (Initial) All legal loop types will vectorize using "
169 "\nsimple (Initial) Use tail-folding for simple loops (not "
170 "reductions or recurrences)"
171 "\nreductions Use tail-folding for loops containing reductions"
172 "\nnoreductions Inverse of above"
173 "\nrecurrences Use tail-folding for loops containing fixed order "
175 "\nnorecurrences Inverse of above"
176 "\nreverse Use tail-folding for loops requiring reversed "
178 "\nnoreverse Inverse of above"),
206 TM.getSubtargetImpl(*Caller)->getFeatureBits();
208 TM.getSubtargetImpl(*Callee)->getFeatureBits();
212 return (CallerBits & CalleeBits) == CalleeBits;
251 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
256 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
262 return std::max<InstructionCost>(1,
Cost);
277 unsigned ImmIdx = ~0U;
281 case Instruction::GetElementPtr:
286 case Instruction::Store:
289 case Instruction::Add:
290 case Instruction::Sub:
291 case Instruction::Mul:
292 case Instruction::UDiv:
293 case Instruction::SDiv:
294 case Instruction::URem:
295 case Instruction::SRem:
296 case Instruction::And:
297 case Instruction::Or:
298 case Instruction::Xor:
299 case Instruction::ICmp:
303 case Instruction::Shl:
304 case Instruction::LShr:
305 case Instruction::AShr:
309 case Instruction::Trunc:
310 case Instruction::ZExt:
311 case Instruction::SExt:
312 case Instruction::IntToPtr:
313 case Instruction::PtrToInt:
314 case Instruction::BitCast:
315 case Instruction::PHI:
316 case Instruction::Call:
317 case Instruction::Select:
318 case Instruction::Ret:
319 case Instruction::Load:
324 int NumConstants = (BitSize + 63) / 64;
348 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
354 case Intrinsic::sadd_with_overflow:
355 case Intrinsic::uadd_with_overflow:
356 case Intrinsic::ssub_with_overflow:
357 case Intrinsic::usub_with_overflow:
358 case Intrinsic::smul_with_overflow:
359 case Intrinsic::umul_with_overflow:
361 int NumConstants = (BitSize + 63) / 64;
368 case Intrinsic::experimental_stackmap:
369 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
372 case Intrinsic::experimental_patchpoint_void:
373 case Intrinsic::experimental_patchpoint_i64:
374 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
377 case Intrinsic::experimental_gc_statepoint:
378 if ((
Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
388 if (TyWidth == 32 || TyWidth == 64)
398 switch (ICA.
getID()) {
399 case Intrinsic::umin:
400 case Intrinsic::umax:
401 case Intrinsic::smin:
402 case Intrinsic::smax: {
403 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
404 MVT::v8i16, MVT::v2i32, MVT::v4i32,
405 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
409 if (LT.second == MVT::v2i64)
411 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
415 case Intrinsic::sadd_sat:
416 case Intrinsic::ssub_sat:
417 case Intrinsic::uadd_sat:
418 case Intrinsic::usub_sat: {
419 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
420 MVT::v8i16, MVT::v2i32, MVT::v4i32,
426 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1 : 4;
427 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
428 return LT.first * Instrs;
431 case Intrinsic::abs: {
432 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
433 MVT::v8i16, MVT::v2i32, MVT::v4i32,
436 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
440 case Intrinsic::bswap: {
441 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
442 MVT::v4i32, MVT::v2i64};
444 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
445 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits())
449 case Intrinsic::experimental_stepvector: {
458 Cost += AddCost * (LT.first - 1);
462 case Intrinsic::bitreverse: {
464 {Intrinsic::bitreverse, MVT::i32, 1},
465 {Intrinsic::bitreverse, MVT::i64, 1},
466 {Intrinsic::bitreverse, MVT::v8i8, 1},
467 {Intrinsic::bitreverse, MVT::v16i8, 1},
468 {Intrinsic::bitreverse, MVT::v4i16, 2},
469 {Intrinsic::bitreverse, MVT::v8i16, 2},
470 {Intrinsic::bitreverse, MVT::v2i32, 2},
471 {Intrinsic::bitreverse, MVT::v4i32, 2},
472 {Intrinsic::bitreverse, MVT::v1i64, 2},
473 {Intrinsic::bitreverse, MVT::v2i64, 2},
483 return LegalisationCost.first * Entry->Cost + 1;
485 return LegalisationCost.first * Entry->Cost;
489 case Intrinsic::ctpop: {
490 if (!ST->hasNEON()) {
511 RetTy->getScalarSizeInBits()
514 return LT.first * Entry->Cost + ExtraCost;
518 case Intrinsic::sadd_with_overflow:
519 case Intrinsic::uadd_with_overflow:
520 case Intrinsic::ssub_with_overflow:
521 case Intrinsic::usub_with_overflow:
522 case Intrinsic::smul_with_overflow:
523 case Intrinsic::umul_with_overflow: {
525 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
526 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
527 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
528 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
529 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
530 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
531 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
532 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
533 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
534 {Intrinsic::usub_with_overflow, MVT::i8, 3},
535 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
536 {Intrinsic::usub_with_overflow, MVT::i16, 3},
537 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
538 {Intrinsic::usub_with_overflow, MVT::i32, 1},
539 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
540 {Intrinsic::usub_with_overflow, MVT::i64, 1},
541 {Intrinsic::smul_with_overflow, MVT::i8, 5},
542 {Intrinsic::umul_with_overflow, MVT::i8, 4},
543 {Intrinsic::smul_with_overflow, MVT::i16, 5},
544 {Intrinsic::umul_with_overflow, MVT::i16, 4},
545 {Intrinsic::smul_with_overflow, MVT::i32, 2},
546 {Intrinsic::umul_with_overflow, MVT::i32, 2},
547 {Intrinsic::smul_with_overflow, MVT::i64, 3},
548 {Intrinsic::umul_with_overflow, MVT::i64, 3},
557 case Intrinsic::fptosi_sat:
558 case Intrinsic::fptoui_sat: {
561 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
566 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
567 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
568 LT.second == MVT::v2f64) &&
570 (LT.second == MVT::f64 && MTy == MVT::i32) ||
571 (LT.second == MVT::f32 && MTy == MVT::i64)))
574 if (ST->hasFullFP16() &&
575 ((LT.second == MVT::f16 && MTy == MVT::i32) ||
576 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
581 if ((LT.second.getScalarType() == MVT::f32 ||
582 LT.second.getScalarType() == MVT::f64 ||
583 (ST->hasFullFP16() && LT.second.getScalarType() == MVT::f16)) &&
587 if (LT.second.isVector())
591 LegalTy, {LegalTy, LegalTy});
594 LegalTy, {LegalTy, LegalTy});
596 return LT.first *
Cost;
600 case Intrinsic::fshl:
601 case Intrinsic::fshr: {
614 {Intrinsic::fshl, MVT::v4i32, 3},
615 {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
616 {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
617 {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
623 return LegalisationCost.first * Entry->Cost;
627 if (!
RetTy->isIntegerTy())
632 bool HigherCost = (
RetTy->getScalarSizeInBits() != 32 &&
633 RetTy->getScalarSizeInBits() < 64) ||
634 (
RetTy->getScalarSizeInBits() % 64 != 0);
635 unsigned ExtraCost = HigherCost ? 1 : 0;
636 if (
RetTy->getScalarSizeInBits() == 32 ||
637 RetTy->getScalarSizeInBits() == 64)
644 return TyL.first + ExtraCost;
657 auto RequiredType = II.
getType();
660 assert(PN &&
"Expected Phi Node!");
663 if (!PN->hasOneUse())
666 for (
Value *IncValPhi : PN->incoming_values()) {
667 auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
669 Reinterpret->getIntrinsicID() !=
670 Intrinsic::aarch64_sve_convert_to_svbool ||
671 RequiredType != Reinterpret->getArgOperand(0)->getType())
680 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
681 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(
I));
682 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
699static std::optional<Instruction *>
701 auto BinOp = dyn_cast<IntrinsicInst>(II.
getOperand(0));
705 auto IntrinsicID = BinOp->getIntrinsicID();
706 switch (IntrinsicID) {
707 case Intrinsic::aarch64_sve_and_z:
708 case Intrinsic::aarch64_sve_bic_z:
709 case Intrinsic::aarch64_sve_eor_z:
710 case Intrinsic::aarch64_sve_nand_z:
711 case Intrinsic::aarch64_sve_nor_z:
712 case Intrinsic::aarch64_sve_orn_z:
713 case Intrinsic::aarch64_sve_orr_z:
719 auto BinOpPred = BinOp->getOperand(0);
720 auto BinOpOp1 = BinOp->getOperand(1);
721 auto BinOpOp2 = BinOp->getOperand(2);
723 auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
725 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
728 auto PredOp = PredIntr->getOperand(0);
729 auto PredOpTy = cast<VectorType>(PredOp->getType());
735 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
736 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
737 if (BinOpOp1 == BinOpOp2)
738 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
741 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
748static std::optional<Instruction *>
759 isa<TargetExtType>(II.
getType()))
765 const auto *IVTy = cast<VectorType>(II.
getType());
771 const auto *CursorVTy = cast<VectorType>(Cursor->
getType());
772 if (CursorVTy->getElementCount().getKnownMinValue() <
773 IVTy->getElementCount().getKnownMinValue())
778 EarliestReplacement = Cursor;
780 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
783 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
784 Intrinsic::aarch64_sve_convert_to_svbool ||
785 IntrinsicCursor->getIntrinsicID() ==
786 Intrinsic::aarch64_sve_convert_from_svbool))
789 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
790 Cursor = IntrinsicCursor->getOperand(0);
795 if (!EarliestReplacement)
817 const auto PTruePattern =
818 cast<ConstantInt>(Pg->
getOperand(0))->getZExtValue();
819 if (PTruePattern != AArch64SVEPredPattern::vl1)
826 Insert->insertBefore(&II);
827 Insert->takeName(&II);
838 Splat->takeName(&II);
848 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
851 const auto PTruePattern =
852 cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
853 if (PTruePattern != AArch64SVEPredPattern::all)
859 if (!SplatValue || !SplatValue->isZero())
863 auto *DupQLane = dyn_cast<IntrinsicInst>(II.
getArgOperand(1));
865 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
869 if (!cast<ConstantInt>(DupQLane->getArgOperand(1))->isZero())
872 auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
873 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
878 if (!isa<UndefValue>(VecIns->getArgOperand(0)))
881 if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
884 auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
888 auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
889 auto *OutTy = dyn_cast<ScalableVectorType>(II.
getType());
890 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
893 unsigned NumElts = VecTy->getNumElements();
894 unsigned PredicateBits = 0;
897 for (
unsigned I = 0;
I < NumElts; ++
I) {
898 auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(
I));
902 PredicateBits |= 1 << (
I * (16 / NumElts));
906 if (PredicateBits == 0) {
908 PFalse->takeName(&II);
914 for (
unsigned I = 0;
I < 16; ++
I)
915 if ((PredicateBits & (1 <<
I)) != 0)
918 unsigned PredSize = Mask & -Mask;
923 for (
unsigned I = 0;
I < 16;
I += PredSize)
924 if ((PredicateBits & (1 <<
I)) == 0)
930 {PredType}, {PTruePat});
932 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
933 auto *ConvertFromSVBool =
935 {II.
getType()}, {ConvertToSVBool});
946 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
957 auto *OldBinOp = cast<BinaryOperator>(Vec);
958 auto OpC = OldBinOp->getOpcode();
964 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), &II);
969 auto *
C = dyn_cast<Constant>(Pg);
970 if (IsAfter &&
C &&
C->isNullValue()) {
974 Extract->insertBefore(&II);
975 Extract->takeName(&II);
979 auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
983 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
986 const auto PTruePattern =
987 cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
994 unsigned Idx = MinNumElts - 1;
1003 auto *PgVTy = cast<ScalableVectorType>(Pg->
getType());
1004 if (
Idx >= PgVTy->getMinNumElements())
1005 return std::nullopt;
1010 Extract->insertBefore(&II);
1011 Extract->takeName(&II);
1030 return std::nullopt;
1035 return std::nullopt;
1049 FPTy, cast<VectorType>(Vec->
getType())->getElementCount());
1052 II.
getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
1072static std::optional<Instruction *>
1076 if (
Pattern == AArch64SVEPredPattern::all) {
1085 return MinNumElts && NumElts >= MinNumElts
1098 if (PgVal == OpVal &&
1101 Value *Ops[] = {PgVal, OpVal};
1115 return std::nullopt;
1119 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1120 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
1134 if ((Pg ==
Op) && (II.
getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
1135 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
1136 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
1137 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
1138 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
1139 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
1140 (OpIID == Intrinsic::aarch64_sve_and_z) ||
1141 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
1142 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
1143 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
1144 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
1145 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
1146 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
1156 return std::nullopt;
1159template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
1160static std::optional<Instruction *>
1162 bool MergeIntoAddendOp) {
1164 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
1165 if (MergeIntoAddendOp) {
1175 return std::nullopt;
1177 if (!
Mul->hasOneUse())
1178 return std::nullopt;
1185 if (FAddFlags != cast<CallInst>(
Mul)->getFastMathFlags())
1186 return std::nullopt;
1188 return std::nullopt;
1193 if (MergeIntoAddendOp)
1195 {
P, AddendOp, MulOp0, MulOp1}, FMFSource);
1198 {
P, MulOp0, MulOp1, AddendOp}, FMFSource);
1205 Value *UncastedPred;
1206 if (
match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
1207 m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
1211 if (cast<ScalableVectorType>(Pred->
getType())->getMinNumElements() <=
1212 cast<ScalableVectorType>(UncastedPred->
getType())->getMinNumElements())
1213 Pred = UncastedPred;
1215 return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1216 m_ConstantInt<AArch64SVEPredPattern::all>()));
1219static std::optional<Instruction *>
1227 Load->copyMetadata(II);
1238static std::optional<Instruction *>
1246 Store->copyMetadata(II);
1257 switch (Intrinsic) {
1258 case Intrinsic::aarch64_sve_fmul_u:
1259 return Instruction::BinaryOps::FMul;
1260 case Intrinsic::aarch64_sve_fadd_u:
1261 return Instruction::BinaryOps::FAdd;
1262 case Intrinsic::aarch64_sve_fsub_u:
1263 return Instruction::BinaryOps::FSub;
1265 return Instruction::BinaryOpsEnd;
1269static std::optional<Instruction *>
1273 return std::nullopt;
1277 if (BinOpCode == Instruction::BinaryOpsEnd ||
1278 !
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1279 m_ConstantInt<AArch64SVEPredPattern::all>())))
1280 return std::nullopt;
1293 if (!
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1294 m_ConstantInt<AArch64SVEPredPattern::all>())))
1295 return std::nullopt;
1309 Intrinsic::aarch64_sve_mla>(
1313 Intrinsic::aarch64_sve_mad>(
1316 return std::nullopt;
1319static std::optional<Instruction *>
1325 Intrinsic::aarch64_sve_fmla>(IC, II,
1330 Intrinsic::aarch64_sve_fmad>(IC, II,
1335 Intrinsic::aarch64_sve_fmla>(IC, II,
1338 return std::nullopt;
1341static std::optional<Instruction *>
1345 Intrinsic::aarch64_sve_fmla>(IC, II,
1350 Intrinsic::aarch64_sve_fmad>(IC, II,
1355 Intrinsic::aarch64_sve_fmla_u>(
1361static std::optional<Instruction *>
1367 Intrinsic::aarch64_sve_fmls>(IC, II,
1372 Intrinsic::aarch64_sve_fnmsb>(
1377 Intrinsic::aarch64_sve_fmls>(IC, II,
1380 return std::nullopt;
1383static std::optional<Instruction *>
1387 Intrinsic::aarch64_sve_fmls>(IC, II,
1392 Intrinsic::aarch64_sve_fnmsb>(
1397 Intrinsic::aarch64_sve_fmls_u>(
1408 Intrinsic::aarch64_sve_mls>(
1411 return std::nullopt;
1427 auto IsUnitSplat = [](
auto *
I) {
1436 auto IsUnitDup = [](
auto *
I) {
1437 auto *IntrI = dyn_cast<IntrinsicInst>(
I);
1438 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
1441 auto *SplatValue = IntrI->getOperand(2);
1445 if (IsUnitSplat(OpMultiplier)) {
1447 OpMultiplicand->takeName(&II);
1449 }
else if (IsUnitDup(OpMultiplier)) {
1451 auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
1452 auto *DupPg = DupInst->getOperand(1);
1455 if (OpPredicate == DupPg) {
1456 OpMultiplicand->takeName(&II);
1468 bool IsSigned = II.
getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
1482 return std::nullopt;
1492 auto *SplatValue = dyn_cast_or_null<ConstantInt>(
getSplatValue(OpIndices));
1494 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
1495 return std::nullopt;
1519 return std::nullopt;
1522static std::optional<Instruction *>
1534 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1541 BasePtr, IndexBase);
1549 return std::nullopt;
1552static std::optional<Instruction *>
1564 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1570 BasePtr, IndexBase);
1579 return std::nullopt;
1590 ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
1591 if (!SplatConstantInt)
1592 return std::nullopt;
1598 Intrinsic::aarch64_sve_asrd, {II.
getType()}, {Pred, Vec, DivisorLog2});
1605 Intrinsic::aarch64_sve_asrd, {II.
getType()}, {Pred, Vec, DivisorLog2});
1607 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
1611 return std::nullopt;
1615 size_t VecSize = Vec.
size();
1620 size_t HalfVecSize = VecSize / 2;
1624 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
1632 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
1647 m_Intrinsic<Intrinsic::vector_insert>(
1649 !isa<FixedVectorType>(CurrentInsertElt->
getType()))
1650 return std::nullopt;
1651 auto IIScalableTy = cast<ScalableVectorType>(II.
getType());
1655 while (
auto InsertElt = dyn_cast<InsertElementInst>(CurrentInsertElt)) {
1656 auto Idx = cast<ConstantInt>(InsertElt->getOperand(2));
1657 Elts[
Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
1658 CurrentInsertElt = InsertElt->getOperand(0);
1662 isa<PoisonValue>(CurrentInsertElt) && isa<PoisonValue>(
Default);
1664 return std::nullopt;
1668 for (
size_t I = 0;
I < Elts.
size();
I++) {
1669 if (Elts[
I] ==
nullptr)
1674 if (InsertEltChain ==
nullptr)
1675 return std::nullopt;
1681 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
1682 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
1683 IIScalableTy->getMinNumElements() /
1688 auto *WideShuffleMaskTy =
1699 auto NarrowBitcast =
1712 return std::nullopt;
1722 Value *AbsPred, *MergedValue;
1723 if (!
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
1725 !
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
1728 return std::nullopt;
1736 return std::nullopt;
1741 return std::nullopt;
1744 {II.
getType()}, {Pred, Vec, Shift});
1749std::optional<Instruction *>
1756 case Intrinsic::aarch64_neon_fmaxnm:
1757 case Intrinsic::aarch64_neon_fminnm:
1759 case Intrinsic::aarch64_sve_convert_from_svbool:
1761 case Intrinsic::aarch64_sve_dup:
1763 case Intrinsic::aarch64_sve_dup_x:
1765 case Intrinsic::aarch64_sve_cmpne:
1766 case Intrinsic::aarch64_sve_cmpne_wide:
1768 case Intrinsic::aarch64_sve_rdffr:
1770 case Intrinsic::aarch64_sve_lasta:
1771 case Intrinsic::aarch64_sve_lastb:
1773 case Intrinsic::aarch64_sve_clasta_n:
1774 case Intrinsic::aarch64_sve_clastb_n:
1776 case Intrinsic::aarch64_sve_cntd:
1778 case Intrinsic::aarch64_sve_cntw:
1780 case Intrinsic::aarch64_sve_cnth:
1782 case Intrinsic::aarch64_sve_cntb:
1784 case Intrinsic::aarch64_sve_ptest_any:
1785 case Intrinsic::aarch64_sve_ptest_first:
1786 case Intrinsic::aarch64_sve_ptest_last:
1788 case Intrinsic::aarch64_sve_fabd:
1790 case Intrinsic::aarch64_sve_fadd:
1792 case Intrinsic::aarch64_sve_fadd_u:
1794 case Intrinsic::aarch64_sve_fdiv:
1796 case Intrinsic::aarch64_sve_fmax:
1798 case Intrinsic::aarch64_sve_fmaxnm:
1800 case Intrinsic::aarch64_sve_fmin:
1802 case Intrinsic::aarch64_sve_fminnm:
1804 case Intrinsic::aarch64_sve_fmla:
1806 case Intrinsic::aarch64_sve_fmls:
1808 case Intrinsic::aarch64_sve_fmul:
1809 case Intrinsic::aarch64_sve_fmul_u:
1811 case Intrinsic::aarch64_sve_fmulx:
1813 case Intrinsic::aarch64_sve_fnmla:
1815 case Intrinsic::aarch64_sve_fnmls:
1817 case Intrinsic::aarch64_sve_fsub:
1819 case Intrinsic::aarch64_sve_fsub_u:
1821 case Intrinsic::aarch64_sve_add:
1823 case Intrinsic::aarch64_sve_add_u:
1825 Intrinsic::aarch64_sve_mla_u>(
1827 case Intrinsic::aarch64_sve_mla:
1829 case Intrinsic::aarch64_sve_mls:
1831 case Intrinsic::aarch64_sve_mul:
1832 case Intrinsic::aarch64_sve_mul_u:
1834 case Intrinsic::aarch64_sve_sabd:
1836 case Intrinsic::aarch64_sve_smax:
1838 case Intrinsic::aarch64_sve_smin:
1840 case Intrinsic::aarch64_sve_smulh:
1842 case Intrinsic::aarch64_sve_sub:
1844 case Intrinsic::aarch64_sve_sub_u:
1846 Intrinsic::aarch64_sve_mls_u>(
1848 case Intrinsic::aarch64_sve_uabd:
1850 case Intrinsic::aarch64_sve_umax:
1852 case Intrinsic::aarch64_sve_umin:
1854 case Intrinsic::aarch64_sve_umulh:
1856 case Intrinsic::aarch64_sve_asr:
1858 case Intrinsic::aarch64_sve_lsl:
1860 case Intrinsic::aarch64_sve_lsr:
1862 case Intrinsic::aarch64_sve_and:
1864 case Intrinsic::aarch64_sve_bic:
1866 case Intrinsic::aarch64_sve_eor:
1868 case Intrinsic::aarch64_sve_orr:
1870 case Intrinsic::aarch64_sve_sqsub:
1872 case Intrinsic::aarch64_sve_uqsub:
1874 case Intrinsic::aarch64_sve_tbl:
1876 case Intrinsic::aarch64_sve_uunpkhi:
1877 case Intrinsic::aarch64_sve_uunpklo:
1878 case Intrinsic::aarch64_sve_sunpkhi:
1879 case Intrinsic::aarch64_sve_sunpklo:
1881 case Intrinsic::aarch64_sve_zip1:
1882 case Intrinsic::aarch64_sve_zip2:
1884 case Intrinsic::aarch64_sve_ld1_gather_index:
1886 case Intrinsic::aarch64_sve_st1_scatter_index:
1888 case Intrinsic::aarch64_sve_ld1:
1890 case Intrinsic::aarch64_sve_st1:
1892 case Intrinsic::aarch64_sve_sdiv:
1894 case Intrinsic::aarch64_sve_sel:
1896 case Intrinsic::aarch64_sve_srshl:
1898 case Intrinsic::aarch64_sve_dupq_lane:
1902 return std::nullopt;
1909 SimplifyAndSetOp)
const {
1913 case Intrinsic::aarch64_neon_fcvtxn:
1914 case Intrinsic::aarch64_neon_rshrn:
1915 case Intrinsic::aarch64_neon_sqrshrn:
1916 case Intrinsic::aarch64_neon_sqrshrun:
1917 case Intrinsic::aarch64_neon_sqshrn:
1918 case Intrinsic::aarch64_neon_sqshrun:
1919 case Intrinsic::aarch64_neon_sqxtn:
1920 case Intrinsic::aarch64_neon_sqxtun:
1921 case Intrinsic::aarch64_neon_uqrshrn:
1922 case Intrinsic::aarch64_neon_uqshrn:
1923 case Intrinsic::aarch64_neon_uqxtn:
1924 SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);
1928 return std::nullopt;
1954bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
1956 Type *SrcOverrideTy) {
1959 auto toVectorTy = [&](
Type *ArgTy) {
1961 cast<VectorType>(DstTy)->getElementCount());
1971 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
1981 Type *SrcTy = SrcOverrideTy;
1983 case Instruction::Add:
1984 case Instruction::Sub:
1986 if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
1989 toVectorTy(cast<Instruction>(Args[1])->getOperand(0)->
getType());
1993 case Instruction::Mul: {
1995 if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
1996 (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
1999 toVectorTy(cast<Instruction>(Args[0])->getOperand(0)->
getType());
2000 }
else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
2029 assert(SrcTy &&
"Expected some SrcTy");
2031 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
2037 DstTyL.first * DstTyL.second.getVectorMinNumElements();
2039 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
2043 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
2056 (Src->isScalableTy() && !ST->hasSVE2()))
2065 dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2066 if (AddUser && AddUser->getOpcode() == Instruction::Add)
2069 auto *Shr = dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2070 if (!Shr || Shr->getOpcode() != Instruction::LShr)
2073 auto *Trunc = dyn_cast_or_null<Instruction>(Shr->getUniqueUndroppableUser());
2074 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
2075 Src->getScalarSizeInBits() !=
2076 cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
2100 assert(ISD &&
"Invalid opcode");
2103 if (
I &&
I->hasOneUser()) {
2104 auto *SingleUser = cast<Instruction>(*
I->user_begin());
2106 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands, Src)) {
2110 if (SingleUser->getOpcode() == Instruction::Add) {
2111 if (
I == SingleUser->getOperand(1) ||
2112 (isa<CastInst>(SingleUser->getOperand(1)) &&
2113 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
2120 if ((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
2128 return Cost == 0 ? 0 : 1;
2413 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
2417 std::pair<InstructionCost, MVT> LT =
2420 LT.second.getVectorElementType().getSizeInBits();
2432 return AdjustCost(Entry->Cost);
2459 if (ST->hasFullFP16())
2462 return AdjustCost(Entry->Cost);
2481 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
2489 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
2505 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
2511 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
2521 case Instruction::SExt:
2526 case Instruction::ZExt:
2527 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
2540 return Opcode == Instruction::PHI ? 0 : 1;
2557 if (!LT.second.isVector())
2562 if (LT.second.isFixedLengthVector()) {
2563 unsigned Width = LT.second.getVectorNumElements();
2580 if (
I && dyn_cast<LoadInst>(
I->getOperand(1)))
2604 Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
2605 return getVectorInstrCostHelper(
nullptr, Val,
Index, HasRealUse);
2612 return getVectorInstrCostHelper(&
I, Val,
Index,
true );
2618 if (isa<ScalableVectorType>(Ty))
2623 return DemandedElts.
popcount() * (Insert + Extract) *
2636 Op2Info, Args, CxtI);
2678 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
2683 Opcode, Ty,
CostKind, Op1Info, Op2Info);
2688 if (isa<FixedVectorType>(Ty) && cast<FixedVectorType>(Ty)
2689 ->getPrimitiveSizeInBits()
2690 .getFixedValue() < 128) {
2701 if (
nullptr != Entry)
2706 if (LT.second.getScalarType() == MVT::i8)
2708 else if (LT.second.getScalarType() == MVT::i16)
2718 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
2721 return (4 + DivCost) * VTy->getNumElements();
2741 if (LT.second == MVT::v2i64 && ST->hasSVE())
2756 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
2758 return LT.first * 14;
2777 return 2 * LT.first;
2786 return 2 * LT.first;
2801 int MaxMergeDistance = 64;
2805 return NumVectorInstToHideOverhead;
2825 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SELECT) {
2827 const int AmortizationCost = 20;
2835 VecPred = CurrentPred;
2843 static const auto ValidMinMaxTys = {
2844 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
2845 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
2846 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
2849 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
2850 (ST->hasFullFP16() &&
2851 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
2856 VectorSelectTbl[] = {
2865 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
2866 {
ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
2867 {
ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
2880 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SETCC) {
2883 if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
2884 return LT.first * 4;
2905 if (ST->requiresStrictAlign()) {
2910 Options.AllowOverlappingLoads =
true;
2916 Options.LoadSizes = {8, 4, 2, 1};
2921 return ST->hasSVE();
2932 if (!LT.first.isValid())
2950 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
2955 auto *VT = cast<VectorType>(DataTy);
2957 if (!LT.first.isValid())
2964 if (cast<VectorType>(DataTy)->getElementCount() ==
2968 ElementCount LegalVF = LT.second.getVectorElementCount();
2971 {TTI::OK_AnyValue, TTI::OP_None},
I);
2991 if (VT == MVT::Other)
2996 if (!LT.first.isValid())
3003 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3014 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
3015 LT.second.is128BitVector() && (!Alignment || *Alignment <
Align(16))) {
3021 const int AmortizationCost = 6;
3023 return LT.first * 2 * AmortizationCost;
3034 if (VT == MVT::v4i8)
3037 return cast<FixedVectorType>(Ty)->getNumElements() * 2;
3046 bool UseMaskForCond,
bool UseMaskForGaps) {
3047 assert(Factor >= 2 &&
"Invalid interleave factor");
3048 auto *VecVTy = cast<VectorType>(VecTy);
3050 if (VecTy->
isScalableTy() && (!ST->hasSVE() || Factor != 2))
3055 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
3058 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
3059 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
3062 VecVTy->getElementCount().divideCoefficientBy(Factor));
3068 if (MinElts % Factor == 0 &&
3075 UseMaskForCond, UseMaskForGaps);
3082 for (
auto *
I : Tys) {
3083 if (!
I->isVectorTy())
3085 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
3104 enum { MaxStridedLoads = 7 };
3106 int StridedLoads = 0;
3109 for (
const auto BB : L->blocks()) {
3110 for (
auto &
I : *BB) {
3111 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
3116 if (L->isLoopInvariant(PtrValue))
3120 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
3121 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
3130 if (StridedLoads > MaxStridedLoads / 2)
3131 return StridedLoads;
3134 return StridedLoads;
3137 int StridedLoads = countStridedLoads(L, SE);
3139 <<
" strided loads\n");
3160 if (L->getLoopDepth() > 1)
3173 for (
auto *BB : L->getBlocks()) {
3174 for (
auto &
I : *BB) {
3176 if (
I.getType()->isVectorTy())
3179 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
3194 !ST->getSchedModel().isOutOfOrder()) {
3211 Type *ExpectedType) {
3215 case Intrinsic::aarch64_neon_st2:
3216 case Intrinsic::aarch64_neon_st3:
3217 case Intrinsic::aarch64_neon_st4: {
3219 StructType *ST = dyn_cast<StructType>(ExpectedType);
3222 unsigned NumElts = Inst->
arg_size() - 1;
3223 if (ST->getNumElements() != NumElts)
3225 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
3231 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
3233 Res =
Builder.CreateInsertValue(Res, L, i);
3237 case Intrinsic::aarch64_neon_ld2:
3238 case Intrinsic::aarch64_neon_ld3:
3239 case Intrinsic::aarch64_neon_ld4:
3240 if (Inst->
getType() == ExpectedType)
3251 case Intrinsic::aarch64_neon_ld2:
3252 case Intrinsic::aarch64_neon_ld3:
3253 case Intrinsic::aarch64_neon_ld4:
3254 Info.ReadMem =
true;
3255 Info.WriteMem =
false;
3258 case Intrinsic::aarch64_neon_st2:
3259 case Intrinsic::aarch64_neon_st3:
3260 case Intrinsic::aarch64_neon_st4:
3261 Info.ReadMem =
false;
3262 Info.WriteMem =
true;
3270 case Intrinsic::aarch64_neon_ld2:
3271 case Intrinsic::aarch64_neon_st2:
3272 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
3274 case Intrinsic::aarch64_neon_ld3:
3275 case Intrinsic::aarch64_neon_st3:
3276 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
3278 case Intrinsic::aarch64_neon_ld4:
3279 case Intrinsic::aarch64_neon_st4:
3280 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
3292 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
3293 bool Considerable =
false;
3294 AllowPromotionWithoutCommonHeader =
false;
3295 if (!isa<SExtInst>(&
I))
3297 Type *ConsideredSExtType =
3299 if (
I.getType() != ConsideredSExtType)
3303 for (
const User *U :
I.users()) {
3305 Considerable =
true;
3309 if (GEPInst->getNumOperands() > 2) {
3310 AllowPromotionWithoutCommonHeader =
true;
3315 return Considerable;
3354 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
3364 return LegalizationCost + 2;
3374 LegalizationCost *= LT.first - 1;
3378 assert(ISD &&
"Invalid opcode");
3386 return LegalizationCost + 2;
3394 std::optional<FastMathFlags> FMF,
3397 if (
auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
3402 return BaseCost + FixedVTy->getNumElements();
3405 if (Opcode != Instruction::FAdd)
3408 auto *VTy = cast<ScalableVectorType>(ValTy);
3415 if (isa<ScalableVectorType>(ValTy))
3419 MVT MTy = LT.second;
3421 assert(ISD &&
"Invalid opcode");
3465 return (LT.first - 1) + Entry->Cost;
3473 auto *ValVTy = cast<FixedVectorType>(ValTy);
3477 if (LT.first != 1) {
3483 ExtraCost *= LT.first - 1;
3486 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
3487 return Cost + ExtraCost;
3521 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
3536 if (LT.second.getScalarType() == MVT::i1) {
3545 assert(Entry &&
"Illegal Type for Splice");
3546 LegalizationCost += Entry->Cost;
3547 return LegalizationCost * LT.first;
3559 if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
3561 cast<FixedVectorType>(Tp)->getNumElements() >
3562 LT.second.getVectorNumElements() &&
3564 unsigned TpNumElts = cast<FixedVectorType>(Tp)->getNumElements();
3565 assert(Mask.size() == TpNumElts &&
"Expected Mask and Tp size to match!");
3566 unsigned LTNumElts = LT.second.getVectorNumElements();
3567 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
3571 for (
unsigned N = 0;
N < NumVecs;
N++) {
3575 unsigned Source1, Source2;
3576 unsigned NumSources = 0;
3577 for (
unsigned E = 0;
E < LTNumElts;
E++) {
3578 int MaskElt = (
N * LTNumElts +
E < TpNumElts) ? Mask[
N * LTNumElts +
E]
3587 unsigned Source = MaskElt / LTNumElts;
3588 if (NumSources == 0) {
3591 }
else if (NumSources == 1 && Source != Source1) {
3594 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
3600 if (Source == Source1)
3602 else if (Source == Source2)
3603 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
3609 if (NumSources <= 2)
3612 NTp, NMask,
CostKind, 0,
nullptr, Args);
3614 return ME.value() % LTNumElts == ME.index();
3616 Cost += LTNumElts - 1;
3633 bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
3634 if (IsLoad && LT.second.isVector() &&
3636 LT.second.getVectorElementCount()))
3644 all_of(Mask, [](
int E) {
return E < 8; }))
3767 return LT.first * Entry->Cost;
3776 LT.second.getSizeInBits() <= 128 && SubTp) {
3778 if (SubLT.second.isVector()) {
3779 int NumElts = LT.second.getVectorNumElements();
3780 int NumSubElts = SubLT.second.getVectorNumElements();
3781 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
3796 if (isa<LoadInst>(&
I) || isa<StoreInst>(&
I)) {
3840 unsigned NumInsns = 0;
3842 NumInsns += BB->sizeWithoutDebug();
3851 int64_t BaseOffset,
bool HasBaseReg,
3852 int64_t Scale,
unsigned AddrSpace)
const {
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
static unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
const char LLVMTargetMachineRef TM
static uint64_t getBits(uint64_t Val, int Start, int End)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned getMaxInterleaveFactor() const
TailFoldingOpts getSVETailFoldingDefaultOpts() const
bool useSVEForFixedLengthVectors() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
unsigned getMinSVEVectorSizeInBits() const
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
unsigned getMaxInterleaveFactor(ElementCount VF)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
bool isIntPredicate() const
static ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
std::optional< bool > requiresSMChange(const SMEAttrs &Callee, bool BodyOverridesInterface=false) const
bool requiresLazySave(const SMEAttrs &Callee) const
bool hasNewZABody() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFP128Ty() const
Return true if this is 'fp128'.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVMContext & getContext() const
All values hold a context through their type.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.