LLVM 22.0.0git
PPCTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
16#include "llvm/IR/IntrinsicsPowerPC.h"
21#include <optional>
22
23using namespace llvm;
24
25#define DEBUG_TYPE "ppctti"
26
27static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
28 cl::desc("Allow vp.load and vp.store for pwr9"),
29 cl::init(false), cl::Hidden);
30
31static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
32cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
33
34static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
35cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
36
37static cl::opt<bool>
38EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
39 cl::desc("Enable using coldcc calling conv for cold "
40 "internal functions"));
41
42static cl::opt<bool>
43LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),
44 cl::desc("Do not add instruction count to lsr cost model"));
45
46// The latency of mtctr is only justified if there are more than 4
47// comparisons that will be removed as a result.
49SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
50 cl::desc("Loops with a constant trip count smaller than "
51 "this value will not use the count register."));
52
53//===----------------------------------------------------------------------===//
54//
55// PPC cost model.
56//
57//===----------------------------------------------------------------------===//
58
60PPCTTIImpl::getPopcntSupport(unsigned TyWidth) const {
61 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
62 if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)
63 return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ?
65 return TTI::PSK_Software;
66}
67
68std::optional<Instruction *>
70 Intrinsic::ID IID = II.getIntrinsicID();
71 switch (IID) {
72 default:
73 break;
74 case Intrinsic::ppc_altivec_lvx:
75 case Intrinsic::ppc_altivec_lvxl:
76 // Turn PPC lvx -> load if the pointer is known aligned.
78 II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
79 &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
80 Value *Ptr = II.getArgOperand(0);
81 return new LoadInst(II.getType(), Ptr, "", false, Align(16));
82 }
83 break;
84 case Intrinsic::ppc_vsx_lxvw4x:
85 case Intrinsic::ppc_vsx_lxvd2x: {
86 // Turn PPC VSX loads into normal loads.
87 Value *Ptr = II.getArgOperand(0);
88 return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
89 }
90 case Intrinsic::ppc_altivec_stvx:
91 case Intrinsic::ppc_altivec_stvxl:
92 // Turn stvx -> store if the pointer is known aligned.
94 II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
95 &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
96 Value *Ptr = II.getArgOperand(1);
97 return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
98 }
99 break;
100 case Intrinsic::ppc_vsx_stxvw4x:
101 case Intrinsic::ppc_vsx_stxvd2x: {
102 // Turn PPC VSX stores into normal stores.
103 Value *Ptr = II.getArgOperand(1);
104 return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
105 }
106 case Intrinsic::ppc_altivec_vperm:
107 // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
108 // Note that ppc_altivec_vperm has a big-endian bias, so when creating
109 // a vectorshuffle for little endian, we must undo the transformation
110 // performed on vec_perm in altivec.h. That is, we must complement
111 // the permutation mask with respect to 31 and reverse the order of
112 // V1 and V2.
113 if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
114 assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
115 "Bad type for intrinsic!");
116
117 // Check that all of the elements are integer constants or undefs.
118 bool AllEltsOk = true;
119 for (unsigned I = 0; I != 16; ++I) {
120 Constant *Elt = Mask->getAggregateElement(I);
121 if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
122 AllEltsOk = false;
123 break;
124 }
125 }
126
127 if (AllEltsOk) {
128 // Cast the input vectors to byte vectors.
129 Value *Op0 =
130 IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
131 Value *Op1 =
132 IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
133 Value *Result = PoisonValue::get(Op0->getType());
134
135 // Only extract each element once.
136 Value *ExtractedElts[32];
137 memset(ExtractedElts, 0, sizeof(ExtractedElts));
138
139 for (unsigned I = 0; I != 16; ++I) {
140 if (isa<UndefValue>(Mask->getAggregateElement(I)))
141 continue;
142 unsigned Idx =
143 cast<ConstantInt>(Mask->getAggregateElement(I))->getZExtValue();
144 Idx &= 31; // Match the hardware behavior.
145 if (DL.isLittleEndian())
146 Idx = 31 - Idx;
147
148 if (!ExtractedElts[Idx]) {
149 Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
150 Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
151 ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
152 Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
153 }
154
155 // Insert this value into the result vector.
156 Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
157 IC.Builder.getInt32(I));
158 }
159 return CastInst::Create(Instruction::BitCast, Result, II.getType());
160 }
161 }
162 break;
163 }
164 return std::nullopt;
165}
166
170 return BaseT::getIntImmCost(Imm, Ty, CostKind);
171
172 assert(Ty->isIntegerTy());
173
174 unsigned BitSize = Ty->getPrimitiveSizeInBits();
175 if (BitSize == 0)
176 return ~0U;
177
178 if (Imm == 0)
179 return TTI::TCC_Free;
180
181 if (Imm.getBitWidth() <= 64) {
182 if (isInt<16>(Imm.getSExtValue()))
183 return TTI::TCC_Basic;
184
185 if (isInt<32>(Imm.getSExtValue())) {
186 // A constant that can be materialized using lis.
187 if ((Imm.getZExtValue() & 0xFFFF) == 0)
188 return TTI::TCC_Basic;
189
190 return 2 * TTI::TCC_Basic;
191 }
192 }
193
194 return 4 * TTI::TCC_Basic;
195}
196
199 const APInt &Imm, Type *Ty,
202 return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
203
204 assert(Ty->isIntegerTy());
205
206 unsigned BitSize = Ty->getPrimitiveSizeInBits();
207 if (BitSize == 0)
208 return ~0U;
209
210 switch (IID) {
211 default:
212 return TTI::TCC_Free;
213 case Intrinsic::sadd_with_overflow:
214 case Intrinsic::uadd_with_overflow:
215 case Intrinsic::ssub_with_overflow:
216 case Intrinsic::usub_with_overflow:
217 if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
218 return TTI::TCC_Free;
219 break;
220 case Intrinsic::experimental_stackmap:
221 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
222 return TTI::TCC_Free;
223 break;
224 case Intrinsic::experimental_patchpoint_void:
225 case Intrinsic::experimental_patchpoint:
226 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
227 return TTI::TCC_Free;
228 break;
229 }
230 return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
231}
232
233InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
234 const APInt &Imm, Type *Ty,
236 Instruction *Inst) const {
238 return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
239
240 assert(Ty->isIntegerTy());
241
242 unsigned BitSize = Ty->getPrimitiveSizeInBits();
243 if (BitSize == 0)
244 return ~0U;
245
246 unsigned ImmIdx = ~0U;
247 bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
248 ZeroFree = false;
249 switch (Opcode) {
250 default:
251 return TTI::TCC_Free;
252 case Instruction::GetElementPtr:
253 // Always hoist the base address of a GetElementPtr. This prevents the
254 // creation of new constants for every base constant that gets constant
255 // folded with the offset.
256 if (Idx == 0)
257 return 2 * TTI::TCC_Basic;
258 return TTI::TCC_Free;
259 case Instruction::And:
260 RunFree = true; // (for the rotate-and-mask instructions)
261 [[fallthrough]];
262 case Instruction::Add:
263 case Instruction::Or:
264 case Instruction::Xor:
265 ShiftedFree = true;
266 [[fallthrough]];
267 case Instruction::Sub:
268 case Instruction::Mul:
269 case Instruction::Shl:
270 case Instruction::LShr:
271 case Instruction::AShr:
272 ImmIdx = 1;
273 break;
274 case Instruction::ICmp:
275 UnsignedFree = true;
276 ImmIdx = 1;
277 // Zero comparisons can use record-form instructions.
278 [[fallthrough]];
279 case Instruction::Select:
280 ZeroFree = true;
281 break;
282 case Instruction::PHI:
283 case Instruction::Call:
284 case Instruction::Ret:
285 case Instruction::Load:
286 case Instruction::Store:
287 break;
288 }
289
290 if (ZeroFree && Imm == 0)
291 return TTI::TCC_Free;
292
293 if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
294 if (isInt<16>(Imm.getSExtValue()))
295 return TTI::TCC_Free;
296
297 if (RunFree) {
298 if (Imm.getBitWidth() <= 32 &&
299 (isShiftedMask_32(Imm.getZExtValue()) ||
300 isShiftedMask_32(~Imm.getZExtValue())))
301 return TTI::TCC_Free;
302
303 if (ST->isPPC64() &&
304 (isShiftedMask_64(Imm.getZExtValue()) ||
305 isShiftedMask_64(~Imm.getZExtValue())))
306 return TTI::TCC_Free;
307 }
308
309 if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
310 return TTI::TCC_Free;
311
312 if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
313 return TTI::TCC_Free;
314 }
315
316 return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
317}
318
319// Check if the current Type is an MMA vector type. Valid MMA types are
320// v256i1 and v512i1 respectively.
321static bool isMMAType(Type *Ty) {
322 return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) &&
323 (Ty->getPrimitiveSizeInBits() > 128);
324}
325
329 // We already implement getCastInstrCost and getMemoryOpCost where we perform
330 // the vector adjustment there.
331 if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
332 return BaseT::getInstructionCost(U, Operands, CostKind);
333
334 if (U->getType()->isVectorTy()) {
335 // Instructions that need to be split should cost more.
336 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(U->getType());
337 return LT.first * BaseT::getInstructionCost(U, Operands, CostKind);
338 }
339
340 return BaseT::getInstructionCost(U, Operands, CostKind);
341}
342
344 AssumptionCache &AC,
345 TargetLibraryInfo *LibInfo,
346 HardwareLoopInfo &HWLoopInfo) const {
347 const PPCTargetMachine &TM = ST->getTargetMachine();
348 TargetSchedModel SchedModel;
349 SchedModel.init(ST);
350
351 // FIXME: Sure there is no other way to get TTI? This should be cheap though.
353 TM.getTargetTransformInfo(*L->getHeader()->getParent());
354
355 // Do not convert small short loops to CTR loop.
356 unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
357 if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
359 CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
361 for (BasicBlock *BB : L->blocks())
362 Metrics.analyzeBasicBlock(BB, TTI, EphValues);
363 // 6 is an approximate latency for the mtctr instruction.
364 if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
365 return false;
366 }
367
368 // Check that there is no hardware loop related intrinsics in the loop.
369 for (auto *BB : L->getBlocks())
370 for (auto &I : *BB)
371 if (auto *Call = dyn_cast<IntrinsicInst>(&I))
372 if (Call->getIntrinsicID() == Intrinsic::set_loop_iterations ||
373 Call->getIntrinsicID() == Intrinsic::loop_decrement)
374 return false;
375
376 SmallVector<BasicBlock*, 4> ExitingBlocks;
377 L->getExitingBlocks(ExitingBlocks);
378
379 // If there is an exit edge known to be frequently taken,
380 // we should not transform this loop.
381 for (auto &BB : ExitingBlocks) {
382 Instruction *TI = BB->getTerminator();
383 if (!TI) continue;
384
385 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
386 uint64_t TrueWeight = 0, FalseWeight = 0;
387 if (!BI->isConditional() ||
388 !extractBranchWeights(*BI, TrueWeight, FalseWeight))
389 continue;
390
391 // If the exit path is more frequent than the loop path,
392 // we return here without further analysis for this loop.
393 bool TrueIsExit = !L->contains(BI->getSuccessor(0));
394 if (( TrueIsExit && FalseWeight < TrueWeight) ||
395 (!TrueIsExit && FalseWeight > TrueWeight))
396 return false;
397 }
398 }
399
400 LLVMContext &C = L->getHeader()->getContext();
401 HWLoopInfo.CountType = TM.isPPC64() ?
403 HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
404 return true;
405}
406
409 OptimizationRemarkEmitter *ORE) const {
410 if (ST->getCPUDirective() == PPC::DIR_A2) {
411 // The A2 is in-order with a deep pipeline, and concatenation unrolling
412 // helps expose latency-hiding opportunities to the instruction scheduler.
413 UP.Partial = UP.Runtime = true;
414
415 // We unroll a lot on the A2 (hundreds of instructions), and the benefits
416 // often outweigh the cost of a division to compute the trip count.
417 UP.AllowExpensiveTripCount = true;
418 }
419
420 BaseT::getUnrollingPreferences(L, SE, UP, ORE);
421}
422
427// This function returns true to allow using coldcc calling convention.
428// Returning true results in coldcc being used for functions which are cold at
429// all call sites when the callers of the functions are not calling any other
430// non coldcc functions.
434
435bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) const {
436 // On the A2, always unroll aggressively.
437 if (ST->getCPUDirective() == PPC::DIR_A2)
438 return true;
439
440 return LoopHasReductions;
441}
442
444PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
446 if (getST()->hasAltivec())
447 Options.LoadSizes = {16, 8, 4, 2, 1};
448 else
449 Options.LoadSizes = {8, 4, 2, 1};
450
451 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
452 return Options;
453}
454
456
457unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
458 assert(ClassID == GPRRC || ClassID == FPRRC ||
459 ClassID == VRRC || ClassID == VSXRC);
460 if (ST->hasVSX()) {
461 assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
462 return ClassID == VSXRC ? 64 : 32;
463 }
464 assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
465 return 32;
466}
467
469 if (Vector)
470 return ST->hasVSX() ? VSXRC : VRRC;
471 if (Ty &&
472 (Ty->getScalarType()->isFloatTy() || Ty->getScalarType()->isDoubleTy()))
473 return ST->hasVSX() ? VSXRC : FPRRC;
474 if (Ty && (Ty->getScalarType()->isFP128Ty() ||
475 Ty->getScalarType()->isPPC_FP128Ty()))
476 return VRRC;
477 if (Ty && Ty->getScalarType()->isHalfTy())
478 return VSXRC;
479 return GPRRC;
480}
481
482const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
483
484 switch (ClassID) {
485 default:
486 llvm_unreachable("unknown register class");
487 return "PPC::unknown register class";
488 case GPRRC: return "PPC::GPRRC";
489 case FPRRC: return "PPC::FPRRC";
490 case VRRC: return "PPC::VRRC";
491 case VSXRC: return "PPC::VSXRC";
492 }
493}
494
497 switch (K) {
499 return TypeSize::getFixed(ST->isPPC64() ? 64 : 32);
501 return TypeSize::getFixed(ST->hasAltivec() ? 128 : 0);
503 return TypeSize::getScalable(0);
504 }
505
506 llvm_unreachable("Unsupported register kind");
507}
508
510 // Starting with P7 we have a cache line size of 128.
511 unsigned Directive = ST->getCPUDirective();
512 // Assume that Future CPU has the same cache line size as the others.
516 return 128;
517
518 // On other processors return a default of 64 bytes.
519 return 64;
520}
521
523 return 300;
524}
525
527 unsigned Directive = ST->getCPUDirective();
528 // The 440 has no SIMD support, but floating-point instructions
529 // have a 5-cycle latency, so unroll by 5x for latency hiding.
530 if (Directive == PPC::DIR_440)
531 return 5;
532
533 // The A2 has no SIMD support, but floating-point instructions
534 // have a 6-cycle latency, so unroll by 6x for latency hiding.
535 if (Directive == PPC::DIR_A2)
536 return 6;
537
538 // FIXME: For lack of any better information, do no harm...
540 return 1;
541
542 // For P7 and P8, floating-point instructions have a 6-cycle latency and
543 // there are two execution units, so unroll by 12x for latency hiding.
544 // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
545 // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready
546 // Assume that future is the same as the others.
550 return 12;
551
552 // For most things, modern systems have two execution units (and
553 // out-of-order execution).
554 return 2;
555}
556
557// Returns a cost adjustment factor to adjust the cost of vector instructions
558// on targets which there is overlap between the vector and scalar units,
559// thereby reducing the overall throughput of vector code wrt. scalar code.
560// An invalid instruction cost is returned if the type is an MMA vector type.
562 Type *Ty1,
563 Type *Ty2) const {
564 // If the vector type is of an MMA type (v256i1, v512i1), an invalid
565 // instruction cost is returned. This is to signify to other cost computing
566 // functions to return the maximum instruction cost in order to prevent any
567 // opportunities for the optimizer to produce MMA types within the IR.
568 if (isMMAType(Ty1))
570
571 if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
572 return InstructionCost(1);
573
574 std::pair<InstructionCost, MVT> LT1 = getTypeLegalizationCost(Ty1);
575 // If type legalization involves splitting the vector, we don't want to
576 // double the cost at every step - only the last step.
577 if (LT1.first != 1 || !LT1.second.isVector())
578 return InstructionCost(1);
579
580 int ISD = TLI->InstructionOpcodeToISD(Opcode);
581 if (TLI->isOperationExpand(ISD, LT1.second))
582 return InstructionCost(1);
583
584 if (Ty2) {
585 std::pair<InstructionCost, MVT> LT2 = getTypeLegalizationCost(Ty2);
586 if (LT2.first != 1 || !LT2.second.isVector())
587 return InstructionCost(1);
588 }
589
590 return InstructionCost(2);
591}
592
594 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
596 ArrayRef<const Value *> Args, const Instruction *CxtI) const {
597 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
598
599 InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr);
600 if (!CostFactor.isValid())
602
603 // TODO: Handle more cost kinds.
605 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
606 Op2Info, Args, CxtI);
607
608 // Fallback to the default implementation.
610 Opcode, Ty, CostKind, Op1Info, Op2Info);
611 return Cost * CostFactor;
612}
613
615 VectorType *DstTy, VectorType *SrcTy,
616 ArrayRef<int> Mask,
618 int Index, VectorType *SubTp,
620 const Instruction *CxtI) const {
621
622 InstructionCost CostFactor =
623 vectorCostAdjustmentFactor(Instruction::ShuffleVector, SrcTy, nullptr);
624 if (!CostFactor.isValid())
626
627 // Legalize the type.
628 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcTy);
629
630 // PPC, for both Altivec/VSX, support cheap arbitrary permutations
631 // (at least in the sense that there need only be one non-loop-invariant
632 // instruction). We need one such shuffle instruction for each actual
633 // register (this is not true for arbitrary shuffles, but is true for the
634 // structured types of shuffles covered by TTI::ShuffleKind).
635 return LT.first * CostFactor;
636}
637
640 const Instruction *I) const {
642 return Opcode == Instruction::PHI ? 0 : 1;
643 // Branches are assumed to be predicted.
644 return 0;
645}
646
648 Type *Src,
651 const Instruction *I) const {
652 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
653
654 InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src);
655 if (!CostFactor.isValid())
657
659 BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
660 Cost *= CostFactor;
661 // TODO: Allow non-throughput costs that aren't binary.
663 return Cost == 0 ? 0 : 1;
664 return Cost;
665}
666
668 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
670 TTI::OperandValueInfo Op2Info, const Instruction *I) const {
671 InstructionCost CostFactor =
672 vectorCostAdjustmentFactor(Opcode, ValTy, nullptr);
673 if (!CostFactor.isValid())
675
677 Opcode, ValTy, CondTy, VecPred, CostKind, Op1Info, Op2Info, I);
678 // TODO: Handle other cost kinds.
680 return Cost;
681 return Cost * CostFactor;
682}
683
686 unsigned Index, const Value *Op0,
687 const Value *Op1) const {
688 assert(Val->isVectorTy() && "This must be a vector type");
689
690 int ISD = TLI->InstructionOpcodeToISD(Opcode);
691 assert(ISD && "Invalid opcode");
692
693 InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr);
694 if (!CostFactor.isValid())
696
698 BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
699 Cost *= CostFactor;
700
701 if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
702 // Double-precision scalars are already located in index #0 (or #1 if LE).
704 Index == (ST->isLittleEndian() ? 1 : 0))
705 return 0;
706
707 return Cost;
708 }
709 if (Val->getScalarType()->isIntegerTy()) {
710 unsigned EltSize = Val->getScalarSizeInBits();
711 // Computing on 1 bit values requires extra mask or compare operations.
712 unsigned MaskCostForOneBitSize = (VecMaskCost && EltSize == 1) ? 1 : 0;
713 // Computing on non const index requires extra mask or compare operations.
714 unsigned MaskCostForIdx = (Index != -1U) ? 0 : 1;
715 if (ST->hasP9Altivec()) {
716 // P10 has vxform insert which can handle non const index. The
717 // MaskCostForIdx is for masking the index.
718 // P9 has insert for const index. A move-to VSR and a permute/insert.
719 // Assume vector operation cost for both (cost will be 2x on P9).
721 if (ST->hasP10Vector())
722 return CostFactor + MaskCostForIdx;
723 if (Index != -1U)
724 return 2 * CostFactor;
725 } else if (ISD == ISD::EXTRACT_VECTOR_ELT) {
726 // It's an extract. Maybe we can do a cheap move-from VSR.
727 unsigned EltSize = Val->getScalarSizeInBits();
728 // P9 has both mfvsrd and mfvsrld for 64 bit integer.
729 if (EltSize == 64 && Index != -1U)
730 return 1;
731 if (EltSize == 32) {
732 unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
733 if (Index == MfvsrwzIndex)
734 return 1;
735
736 // For other indexs like non const, P9 has vxform extract. The
737 // MaskCostForIdx is for masking the index.
738 return CostFactor + MaskCostForIdx;
739 }
740
741 // We need a vector extract (or mfvsrld). Assume vector operation cost.
742 // The cost of the load constant for a vector extract is disregarded
743 // (invariant, easily schedulable).
744 return CostFactor + MaskCostForOneBitSize + MaskCostForIdx;
745 }
746 } else if (ST->hasDirectMove() && Index != -1U) {
747 // Assume permute has standard cost.
748 // Assume move-to/move-from VSR have 2x standard cost.
750 return 3;
751 return 3 + MaskCostForOneBitSize;
752 }
753 }
754
755 // Estimated cost of a load-hit-store delay. This was obtained
756 // experimentally as a minimum needed to prevent unprofitable
757 // vectorization for the paq8p benchmark. It may need to be
758 // raised further if other unprofitable cases remain.
759 unsigned LHSPenalty = 2;
761 LHSPenalty += 7;
762
763 // Vector element insert/extract with Altivec is very expensive,
764 // because they require store and reload with the attendant
765 // processor stall for load-hit-store. Until VSX is available,
766 // these need to be estimated as very costly.
769 return LHSPenalty + Cost;
770
771 return Cost;
772}
773
775 Align Alignment,
776 unsigned AddressSpace,
779 const Instruction *I) const {
780
781 InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr);
782 if (!CostFactor.isValid())
784
785 if (TLI->getValueType(DL, Src, true) == MVT::Other)
786 return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
787 CostKind);
788 // Legalize the type.
789 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
790 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
791 "Invalid Opcode");
792
794 BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
795 // TODO: Handle other cost kinds.
797 return Cost;
798
799 Cost *= CostFactor;
800
801 bool IsAltivecType = ST->hasAltivec() &&
802 (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
803 LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
804 bool IsVSXType = ST->hasVSX() &&
805 (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
806
807 // VSX has 32b/64b load instructions. Legalization can handle loading of
808 // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
809 // PPCTargetLowering can't compute the cost appropriately. So here we
810 // explicitly check this case. There are also corresponding store
811 // instructions.
812 unsigned MemBits = Src->getPrimitiveSizeInBits();
813 unsigned SrcBytes = LT.second.getStoreSize();
814 if (ST->hasVSX() && IsAltivecType) {
815 if (MemBits == 64 || (ST->hasP8Vector() && MemBits == 32))
816 return 1;
817
818 // Use lfiwax/xxspltw
819 if (Opcode == Instruction::Load && MemBits == 32 && Alignment < SrcBytes)
820 return 2;
821 }
822
823 // Aligned loads and stores are easy.
824 if (!SrcBytes || Alignment >= SrcBytes)
825 return Cost;
826
827 // If we can use the permutation-based load sequence, then this is also
828 // relatively cheap (not counting loop-invariant instructions): one load plus
829 // one permute (the last load in a series has extra cost, but we're
830 // neglecting that here). Note that on the P7, we could do unaligned loads
831 // for Altivec types using the VSX instructions, but that's more expensive
832 // than using the permutation-based load sequence. On the P8, that's no
833 // longer true.
834 if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
835 Alignment >= LT.second.getScalarType().getStoreSize())
836 return Cost + LT.first; // Add the cost of the permutations.
837
838 // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
839 // P7, unaligned vector loads are more expensive than the permutation-based
840 // load sequence, so that might be used instead, but regardless, the net cost
841 // is about the same (not counting loop-invariant instructions).
842 if (IsVSXType || (ST->hasVSX() && IsAltivecType))
843 return Cost;
844
845 // Newer PPC supports unaligned memory access.
846 if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
847 return Cost;
848
849 // PPC in general does not support unaligned loads and stores. They'll need
850 // to be decomposed based on the alignment factor.
851
852 // Add the cost of each scalar load or store.
853 Cost += LT.first * ((SrcBytes / Alignment.value()) - 1);
854
855 // For a vector type, there is also scalarization overhead (only for
856 // stores, loads are expanded using the vector-load + permutation sequence,
857 // which is much less expensive).
858 if (Src->isVectorTy() && Opcode == Instruction::Store)
859 for (int I = 0, E = cast<FixedVectorType>(Src)->getNumElements(); I < E;
860 ++I)
861 Cost += getVectorInstrCost(Instruction::ExtractElement, Src, CostKind, I,
862 nullptr, nullptr);
863
864 return Cost;
865}
866
868 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
869 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
870 bool UseMaskForCond, bool UseMaskForGaps) const {
871 InstructionCost CostFactor =
872 vectorCostAdjustmentFactor(Opcode, VecTy, nullptr);
873 if (!CostFactor.isValid())
875
876 if (UseMaskForCond || UseMaskForGaps)
877 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
878 Alignment, AddressSpace, CostKind,
879 UseMaskForCond, UseMaskForGaps);
880
881 assert(isa<VectorType>(VecTy) &&
882 "Expect a vector type for interleaved memory op");
883
884 // Legalize the type.
885 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VecTy);
886
887 // Firstly, the cost of load/store operation.
889 getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind);
890
891 // PPC, for both Altivec/VSX, support cheap arbitrary permutations
892 // (at least in the sense that there need only be one non-loop-invariant
893 // instruction). For each result vector, we need one shuffle per incoming
894 // vector (except that the first shuffle can take two incoming vectors
895 // because it does not need to take itself).
896 Cost += Factor*(LT.first-1);
897
898 return Cost;
899}
900
906
908 const Function *Callee) const {
909 const TargetMachine &TM = getTLI()->getTargetMachine();
910
911 const FeatureBitset &CallerBits =
912 TM.getSubtargetImpl(*Caller)->getFeatureBits();
913 const FeatureBitset &CalleeBits =
914 TM.getSubtargetImpl(*Callee)->getFeatureBits();
915
916 // Check that targets features are exactly the same. We can revisit to see if
917 // we can improve this.
918 return CallerBits == CalleeBits;
919}
920
922 const Function *Callee,
923 ArrayRef<Type *> Types) const {
924
925 // We need to ensure that argument promotion does not
926 // attempt to promote pointers to MMA types (__vector_pair
927 // and __vector_quad) since these types explicitly cannot be
928 // passed as arguments. Both of these types are larger than
929 // the 128-bit Altivec vectors and have a scalar size of 1 bit.
930 if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
931 return false;
932
933 return llvm::none_of(Types, [](Type *Ty) {
934 if (Ty->isSized())
935 return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;
936 return false;
937 });
938}
939
941 LoopInfo *LI, DominatorTree *DT,
942 AssumptionCache *AC,
943 TargetLibraryInfo *LibInfo) const {
944 // Process nested loops first.
945 for (Loop *I : *L)
946 if (canSaveCmp(I, BI, SE, LI, DT, AC, LibInfo))
947 return false; // Stop search.
948
949 HardwareLoopInfo HWLoopInfo(L);
950
951 if (!HWLoopInfo.canAnalyze(*LI))
952 return false;
953
954 if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
955 return false;
956
957 if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
958 return false;
959
960 *BI = HWLoopInfo.ExitBranch;
961 return true;
962}
963
965 const TargetTransformInfo::LSRCost &C2) const {
966 // PowerPC default behaviour here is "instruction number 1st priority".
967 // If LsrNoInsnsCost is set, call default implementation.
968 if (!LsrNoInsnsCost)
969 return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
970 C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
971 std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
972 C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
974}
975
976bool PPCTTIImpl::isNumRegsMajorCostOfLSR() const { return false; }
977
979 const PPCTargetMachine &TM = ST->getTargetMachine();
980 // XCOFF hasn't implemented lowerRelativeReference, disable non-ELF for now.
981 if (!TM.isELFv2ABI())
982 return false;
984}
985
987 MemIntrinsicInfo &Info) const {
988 switch (Inst->getIntrinsicID()) {
989 case Intrinsic::ppc_altivec_lvx:
990 case Intrinsic::ppc_altivec_lvxl:
991 case Intrinsic::ppc_altivec_lvebx:
992 case Intrinsic::ppc_altivec_lvehx:
993 case Intrinsic::ppc_altivec_lvewx:
994 case Intrinsic::ppc_vsx_lxvd2x:
995 case Intrinsic::ppc_vsx_lxvw4x:
996 case Intrinsic::ppc_vsx_lxvd2x_be:
997 case Intrinsic::ppc_vsx_lxvw4x_be:
998 case Intrinsic::ppc_vsx_lxvl:
999 case Intrinsic::ppc_vsx_lxvll:
1000 case Intrinsic::ppc_vsx_lxvp: {
1001 Info.PtrVal = Inst->getArgOperand(0);
1002 Info.ReadMem = true;
1003 Info.WriteMem = false;
1004 return true;
1005 }
1006 case Intrinsic::ppc_altivec_stvx:
1007 case Intrinsic::ppc_altivec_stvxl:
1008 case Intrinsic::ppc_altivec_stvebx:
1009 case Intrinsic::ppc_altivec_stvehx:
1010 case Intrinsic::ppc_altivec_stvewx:
1011 case Intrinsic::ppc_vsx_stxvd2x:
1012 case Intrinsic::ppc_vsx_stxvw4x:
1013 case Intrinsic::ppc_vsx_stxvd2x_be:
1014 case Intrinsic::ppc_vsx_stxvw4x_be:
1015 case Intrinsic::ppc_vsx_stxvl:
1016 case Intrinsic::ppc_vsx_stxvll:
1017 case Intrinsic::ppc_vsx_stxvp: {
1018 Info.PtrVal = Inst->getArgOperand(1);
1019 Info.ReadMem = false;
1020 Info.WriteMem = true;
1021 return true;
1022 }
1023 case Intrinsic::ppc_stbcx:
1024 case Intrinsic::ppc_sthcx:
1025 case Intrinsic::ppc_stdcx:
1026 case Intrinsic::ppc_stwcx: {
1027 Info.PtrVal = Inst->getArgOperand(0);
1028 Info.ReadMem = false;
1029 Info.WriteMem = true;
1030 return true;
1031 }
1032 default:
1033 break;
1034 }
1035
1036 return false;
1037}
1038
1040 return TLI->supportsTailCallFor(CB);
1041}
1042
1043// Target hook used by CodeGen to decide whether to expand vector predication
1044// intrinsics into scalar operations or to use special ISD nodes to represent
1045// them. The Target will not see the intrinsics.
1049 unsigned Directive = ST->getCPUDirective();
1050 VPLegalization DefaultLegalization = BaseT::getVPLegalizationStrategy(PI);
1053 return DefaultLegalization;
1054
1055 if (!ST->isPPC64())
1056 return DefaultLegalization;
1057
1058 unsigned IID = PI.getIntrinsicID();
1059 if (IID != Intrinsic::vp_load && IID != Intrinsic::vp_store)
1060 return DefaultLegalization;
1061
1062 bool IsLoad = IID == Intrinsic::vp_load;
1063 Type *VecTy = IsLoad ? PI.getType() : PI.getOperand(0)->getType();
1064 EVT VT = TLI->getValueType(DL, VecTy, true);
1065 if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
1066 VT != MVT::v16i8)
1067 return DefaultLegalization;
1068
1069 auto IsAllTrueMask = [](Value *MaskVal) {
1070 if (Value *SplattedVal = getSplatValue(MaskVal))
1071 if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
1072 return ConstValue->isAllOnesValue();
1073 return false;
1074 };
1075 unsigned MaskIx = IsLoad ? 1 : 2;
1076 if (!IsAllTrueMask(PI.getOperand(MaskIx)))
1077 return DefaultLegalization;
1078
1080}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
TargetTransformInfo::VPLegalization VPLegalization
This file provides the interface for the instcombine pass implementation.
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Trace Metrics
uint64_t IntrinsicInst * II
static cl::opt< bool > VecMaskCost("ppc-vec-mask-cost", cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden)
static cl::opt< bool > Pwr9EVL("ppc-pwr9-evl", cl::desc("Allow vp.load and vp.store for pwr9"), cl::init(false), cl::Hidden)
static cl::opt< bool > DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
static bool isMMAType(Type *Ty)
static cl::opt< bool > EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions"))
static cl::opt< bool > LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), cl::desc("Do not add instruction count to lsr cost model"))
This file a TargetTransformInfoImplBase conforming object specific to the PPC target machine.
This file contains the declarations for profiling metadata utility functions.
static unsigned getNumElements(Type *Ty)
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool shouldBuildRelLookupTables() const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Value * getArgOperand(unsigned i) const
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
This is an important base class in LLVM.
Definition Constant.h:43
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
Container class for subtarget features.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2579
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2567
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:522
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2207
The core instruction combiner logic.
const DataLayout & getDataLayout() const
DominatorTree & getDominatorTree() const
BuilderTy & Builder
AssumptionCache & getAssumptionCache() const
static InstructionCost getInvalid(CostType Val=0)
static InstructionCost getMax()
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
const FeatureBitset & getFeatureBits() const
The optimization diagnostic interface.
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1, Type *Ty2) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool enableInterleavedAccessVectorization() const override
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const override
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
unsigned getCacheLineSize() const override
bool useColdCCForColdCall(Function &F) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isNumRegsMajorCostOfLSR() const override
unsigned getPrefetchDistance() const override
TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
const char * getRegisterClassName(unsigned ClassID) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
bool shouldBuildRelLookupTables() const override
bool supportsTailCallFor(const CallBase *CB) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool enableAggressiveInterleaving(bool LoopHasReductions) const override
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Common code between 32-bit and 64-bit PowerPC targets.
TargetTransformInfo getTargetTransformInfo(const Function &F) const override
Get a TargetTransformInfo implementation for the target.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Provide an instruction scheduling machine model to CodeGen passes.
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
virtual TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
CastContextHint
Represents a hint about the context in which a cast is used.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:232
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Base class of all SIMD vector types.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
Definition ISDOpcodes.h:24
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition Local.cpp:1566
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition CodeMetrics.h:34
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Extended Value Type.
Definition ValueTypes.h:35
Attributes of a target dependent hardware loop.
LLVM_ABI bool canAnalyze(LoopInfo &LI)
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
Parameters that control the generic loop unrolling transformation.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...