LLVM 17.0.0git
TargetTransformInfo.cpp
Go to the documentation of this file.
1//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/Analysis/CFG.h"
13#include "llvm/IR/CFG.h"
14#include "llvm/IR/Dominators.h"
15#include "llvm/IR/Instruction.h"
18#include "llvm/IR/Module.h"
19#include "llvm/IR/Operator.h"
23#include <optional>
24#include <utility>
25
26using namespace llvm;
27using namespace PatternMatch;
28
29#define DEBUG_TYPE "tti"
30
31static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
33 cl::desc("Recognize reduction patterns."));
34
36 "cache-line-size", cl::init(0), cl::Hidden,
37 cl::desc("Use this to override the target cache line size when "
38 "specified by the user."));
39
41 "predictable-branch-threshold", cl::init(99), cl::Hidden,
43 "Use this to override the target's predictable branch threshold (%)."));
44
45namespace {
46/// No-op implementation of the TTI interface using the utility base
47/// classes.
48///
49/// This is used when no target specific information is available.
50struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
51 explicit NoTTIImpl(const DataLayout &DL)
52 : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
53};
54} // namespace
55
57 // If the loop has irreducible control flow, it can not be converted to
58 // Hardware loop.
59 LoopBlocksRPO RPOT(L);
60 RPOT.perform(&LI);
61 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
62 return false;
63 return true;
64}
65
67 Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost,
68 bool TypeBasedOnly)
69 : II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
70 ScalarizationCost(ScalarizationCost) {
71
72 if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
73 FMF = FPMO->getFastMathFlags();
74
75 if (!TypeBasedOnly)
76 Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
78 ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
79}
80
83 FastMathFlags Flags,
84 const IntrinsicInst *I,
85 InstructionCost ScalarCost)
86 : II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
87 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
88}
89
92 : RetTy(Ty), IID(Id) {
93
94 Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
95 ParamTys.reserve(Arguments.size());
96 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
97 ParamTys.push_back(Arguments[Idx]->getType());
98}
99
103 FastMathFlags Flags,
104 const IntrinsicInst *I,
105 InstructionCost ScalarCost)
106 : II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
107 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
108 Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
109}
110
112 LoopInfo &LI, DominatorTree &DT,
113 bool ForceNestedLoop,
115 SmallVector<BasicBlock *, 4> ExitingBlocks;
116 L->getExitingBlocks(ExitingBlocks);
117
118 for (BasicBlock *BB : ExitingBlocks) {
119 // If we pass the updated counter back through a phi, we need to know
120 // which latch the updated value will be coming from.
121 if (!L->isLoopLatch(BB)) {
123 continue;
124 }
125
126 const SCEV *EC = SE.getExitCount(L, BB);
127 if (isa<SCEVCouldNotCompute>(EC))
128 continue;
129 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
130 if (ConstEC->getValue()->isZero())
131 continue;
132 } else if (!SE.isLoopInvariant(EC, L))
133 continue;
134
135 if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
136 continue;
137
138 // If this exiting block is contained in a nested loop, it is not eligible
139 // for insertion of the branch-and-decrement since the inner loop would
140 // end up messing up the value in the CTR.
141 if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
142 continue;
143
144 // We now have a loop-invariant count of loop iterations (which is not the
145 // constant zero) for which we know that this loop will not exit via this
146 // existing block.
147
148 // We need to make sure that this block will run on every loop iteration.
149 // For this to be true, we must dominate all blocks with backedges. Such
150 // blocks are in-loop predecessors to the header block.
151 bool NotAlways = false;
152 for (BasicBlock *Pred : predecessors(L->getHeader())) {
153 if (!L->contains(Pred))
154 continue;
155
156 if (!DT.dominates(BB, Pred)) {
157 NotAlways = true;
158 break;
159 }
160 }
161
162 if (NotAlways)
163 continue;
164
165 // Make sure this blocks ends with a conditional branch.
166 Instruction *TI = BB->getTerminator();
167 if (!TI)
168 continue;
169
170 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
171 if (!BI->isConditional())
172 continue;
173
174 ExitBranch = BI;
175 } else
176 continue;
177
178 // Note that this block may not be the loop latch block, even if the loop
179 // has a latch block.
180 ExitBlock = BB;
181 ExitCount = EC;
182 break;
183 }
184
185 if (!ExitBlock)
186 return false;
187 return true;
188}
189
191 : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
192
194
196 : TTIImpl(std::move(Arg.TTIImpl)) {}
197
199 TTIImpl = std::move(RHS.TTIImpl);
200 return *this;
201}
202
204 return TTIImpl->getInliningThresholdMultiplier();
205}
206
207unsigned
209 return TTIImpl->adjustInliningThreshold(CB);
210}
211
213 return TTIImpl->getInlinerVectorBonusPercent();
214}
215
220 return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
221}
222
226 assert((Base || !Info.isSameBase()) &&
227 "If pointers have same base address it has to be provided.");
228 return TTIImpl->getPointersChainCost(Ptrs, Base, Info, CostKind);
229}
230
232 const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
233 BlockFrequencyInfo *BFI) const {
234 return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
235}
236
240 enum TargetCostKind CostKind) const {
241 InstructionCost Cost = TTIImpl->getInstructionCost(U, Operands, CostKind);
243 "TTI should not produce negative costs!");
244 return Cost;
245}
246
248 return PredictableBranchThreshold.getNumOccurrences() > 0
250 : TTIImpl->getPredictableBranchThreshold();
251}
252
254 return TTIImpl->hasBranchDivergence();
255}
256
258 return TTIImpl->useGPUDivergenceAnalysis();
259}
260
262 return TTIImpl->isSourceOfDivergence(V);
263}
264
266 return TTIImpl->isAlwaysUniform(V);
267}
268
270 return TTIImpl->getFlatAddressSpace();
271}
272
274 SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const {
275 return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
276}
277
279 unsigned ToAS) const {
280 return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
281}
282
284 unsigned AS) const {
285 return TTIImpl->canHaveNonUndefGlobalInitializerInAddressSpace(AS);
286}
287
289 return TTIImpl->getAssumedAddrSpace(V);
290}
291
293 return TTIImpl->isSingleThreaded();
294}
295
296std::pair<const Value *, unsigned>
298 return TTIImpl->getPredicatedAddrSpace(V);
299}
300
302 IntrinsicInst *II, Value *OldV, Value *NewV) const {
303 return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
304}
305
307 return TTIImpl->isLoweredToCall(F);
308}
309
312 TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
313 return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
314}
315
319 InterleavedAccessInfo *IAI) const {
320 return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
321}
322
324 bool IVUpdateMayOverflow) const {
325 return TTIImpl->getPreferredTailFoldingStyle(IVUpdateMayOverflow);
326}
327
328std::optional<Instruction *>
330 IntrinsicInst &II) const {
331 return TTIImpl->instCombineIntrinsic(IC, II);
332}
333
335 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
336 bool &KnownBitsComputed) const {
337 return TTIImpl->simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
338 KnownBitsComputed);
339}
340
342 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
343 APInt &UndefElts2, APInt &UndefElts3,
344 std::function<void(Instruction *, unsigned, APInt, APInt &)>
345 SimplifyAndSetOp) const {
346 return TTIImpl->simplifyDemandedVectorEltsIntrinsic(
347 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
348 SimplifyAndSetOp);
349}
350
353 OptimizationRemarkEmitter *ORE) const {
354 return TTIImpl->getUnrollingPreferences(L, SE, UP, ORE);
355}
356
358 PeelingPreferences &PP) const {
359 return TTIImpl->getPeelingPreferences(L, SE, PP);
360}
361
363 return TTIImpl->isLegalAddImmediate(Imm);
364}
365
367 return TTIImpl->isLegalICmpImmediate(Imm);
368}
369
371 int64_t BaseOffset,
372 bool HasBaseReg, int64_t Scale,
373 unsigned AddrSpace,
374 Instruction *I) const {
375 return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
376 Scale, AddrSpace, I);
377}
378
380 const LSRCost &C2) const {
381 return TTIImpl->isLSRCostLess(C1, C2);
382}
383
385 return TTIImpl->isNumRegsMajorCostOfLSR();
386}
387
389 return TTIImpl->isProfitableLSRChainElement(I);
390}
391
393 return TTIImpl->canMacroFuseCmp();
394}
395
397 ScalarEvolution *SE, LoopInfo *LI,
399 TargetLibraryInfo *LibInfo) const {
400 return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
401}
402
405 ScalarEvolution *SE) const {
406 return TTIImpl->getPreferredAddressingMode(L, SE);
407}
408
410 Align Alignment) const {
411 return TTIImpl->isLegalMaskedStore(DataType, Alignment);
412}
413
415 Align Alignment) const {
416 return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
417}
418
420 Align Alignment) const {
421 return TTIImpl->isLegalNTStore(DataType, Alignment);
422}
423
424bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
425 return TTIImpl->isLegalNTLoad(DataType, Alignment);
426}
427
429 ElementCount NumElements) const {
430 return TTIImpl->isLegalBroadcastLoad(ElementTy, NumElements);
431}
432
434 Align Alignment) const {
435 return TTIImpl->isLegalMaskedGather(DataType, Alignment);
436}
437
439 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
440 const SmallBitVector &OpcodeMask) const {
441 return TTIImpl->isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
442}
443
445 Align Alignment) const {
446 return TTIImpl->isLegalMaskedScatter(DataType, Alignment);
447}
448
450 Align Alignment) const {
451 return TTIImpl->forceScalarizeMaskedGather(DataType, Alignment);
452}
453
455 Align Alignment) const {
456 return TTIImpl->forceScalarizeMaskedScatter(DataType, Alignment);
457}
458
460 return TTIImpl->isLegalMaskedCompressStore(DataType);
461}
462
464 return TTIImpl->isLegalMaskedExpandLoad(DataType);
465}
466
468 return TTIImpl->enableOrderedReductions();
469}
470
471bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
472 return TTIImpl->hasDivRemOp(DataType, IsSigned);
473}
474
476 unsigned AddrSpace) const {
477 return TTIImpl->hasVolatileVariant(I, AddrSpace);
478}
479
481 return TTIImpl->prefersVectorizedAddressing();
482}
483
485 Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
486 int64_t Scale, unsigned AddrSpace) const {
487 InstructionCost Cost = TTIImpl->getScalingFactorCost(
488 Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace);
489 assert(Cost >= 0 && "TTI should not produce negative costs!");
490 return Cost;
491}
492
494 return TTIImpl->LSRWithInstrQueries();
495}
496
498 return TTIImpl->isTruncateFree(Ty1, Ty2);
499}
500
502 return TTIImpl->isProfitableToHoist(I);
503}
504
505bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
506
508 return TTIImpl->isTypeLegal(Ty);
509}
510
512 return TTIImpl->getRegUsageForType(Ty);
513}
514
516 return TTIImpl->shouldBuildLookupTables();
517}
518
520 Constant *C) const {
521 return TTIImpl->shouldBuildLookupTablesForConstant(C);
522}
523
525 return TTIImpl->shouldBuildRelLookupTables();
526}
527
529 return TTIImpl->useColdCCForColdCall(F);
530}
531
533 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
535 return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
536 CostKind);
537}
538
542 return TTIImpl->getOperandsScalarizationOverhead(Args, Tys, CostKind);
543}
544
546 return TTIImpl->supportsEfficientVectorElementLoadStore();
547}
548
550 return TTIImpl->supportsTailCalls();
551}
552
554 return TTIImpl->supportsTailCallFor(CB);
555}
556
558 bool LoopHasReductions) const {
559 return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
560}
561
563TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
564 return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
565}
566
568 return TTIImpl->enableSelectOptimize();
569}
570
572 return TTIImpl->enableInterleavedAccessVectorization();
573}
574
576 return TTIImpl->enableMaskedInterleavedAccessVectorization();
577}
578
580 return TTIImpl->isFPVectorizationPotentiallyUnsafe();
581}
582
583bool
585 unsigned BitWidth,
586 unsigned AddressSpace,
587 Align Alignment,
588 unsigned *Fast) const {
589 return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth,
590 AddressSpace, Alignment, Fast);
591}
592
594TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
595 return TTIImpl->getPopcntSupport(IntTyWidthInBit);
596}
597
599 return TTIImpl->haveFastSqrt(Ty);
600}
601
603 const Instruction *I) const {
604 return TTIImpl->isExpensiveToSpeculativelyExecute(I);
605}
606
608 return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
609}
610
612 InstructionCost Cost = TTIImpl->getFPOpCost(Ty);
613 assert(Cost >= 0 && "TTI should not produce negative costs!");
614 return Cost;
615}
616
618 unsigned Idx,
619 const APInt &Imm,
620 Type *Ty) const {
621 InstructionCost Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
622 assert(Cost >= 0 && "TTI should not produce negative costs!");
623 return Cost;
624}
625
629 InstructionCost Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind);
630 assert(Cost >= 0 && "TTI should not produce negative costs!");
631 return Cost;
632}
633
635 unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
638 TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
639 assert(Cost >= 0 && "TTI should not produce negative costs!");
640 return Cost;
641}
642
645 const APInt &Imm, Type *Ty,
648 TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
649 assert(Cost >= 0 && "TTI should not produce negative costs!");
650 return Cost;
651}
652
653unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
654 return TTIImpl->getNumberOfRegisters(ClassID);
655}
656
658 Type *Ty) const {
659 return TTIImpl->getRegisterClassForType(Vector, Ty);
660}
661
662const char *TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
663 return TTIImpl->getRegisterClassName(ClassID);
664}
665
668 return TTIImpl->getRegisterBitWidth(K);
669}
670
672 return TTIImpl->getMinVectorRegisterBitWidth();
673}
674
675std::optional<unsigned> TargetTransformInfo::getMaxVScale() const {
676 return TTIImpl->getMaxVScale();
677}
678
679std::optional<unsigned> TargetTransformInfo::getVScaleForTuning() const {
680 return TTIImpl->getVScaleForTuning();
681}
682
684 return TTIImpl->isVScaleKnownToBeAPowerOfTwo();
685}
686
689 return TTIImpl->shouldMaximizeVectorBandwidth(K);
690}
691
693 bool IsScalable) const {
694 return TTIImpl->getMinimumVF(ElemWidth, IsScalable);
695}
696
697unsigned TargetTransformInfo::getMaximumVF(unsigned ElemWidth,
698 unsigned Opcode) const {
699 return TTIImpl->getMaximumVF(ElemWidth, Opcode);
700}
701
702unsigned TargetTransformInfo::getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
703 Type *ScalarValTy) const {
704 return TTIImpl->getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
705}
706
708 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
709 return TTIImpl->shouldConsiderAddressTypePromotion(
710 I, AllowPromotionWithoutCommonHeader);
711}
712
714 return CacheLineSize.getNumOccurrences() > 0 ? CacheLineSize
715 : TTIImpl->getCacheLineSize();
716}
717
718std::optional<unsigned>
720 return TTIImpl->getCacheSize(Level);
721}
722
723std::optional<unsigned>
725 return TTIImpl->getCacheAssociativity(Level);
726}
727
729 return TTIImpl->getPrefetchDistance();
730}
731
733 unsigned NumMemAccesses, unsigned NumStridedMemAccesses,
734 unsigned NumPrefetches, bool HasCall) const {
735 return TTIImpl->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
736 NumPrefetches, HasCall);
737}
738
740 return TTIImpl->getMaxPrefetchIterationsAhead();
741}
742
744 return TTIImpl->enableWritePrefetching();
745}
746
748 return TTIImpl->shouldPrefetchAddressSpace(AS);
749}
750
752 return TTIImpl->getMaxInterleaveFactor(VF);
753}
754
759
760 if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
761 if (const auto *CI = dyn_cast<ConstantInt>(V)) {
762 if (CI->getValue().isPowerOf2())
763 OpProps = OP_PowerOf2;
764 else if (CI->getValue().isNegatedPowerOf2())
765 OpProps = OP_NegatedPowerOf2;
766 }
767 return {OK_UniformConstantValue, OpProps};
768 }
769
770 // A broadcast shuffle creates a uniform value.
771 // TODO: Add support for non-zero index broadcasts.
772 // TODO: Add support for different source vector width.
773 if (const auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
774 if (ShuffleInst->isZeroEltSplat())
775 OpInfo = OK_UniformValue;
776
777 const Value *Splat = getSplatValue(V);
778
779 // Check for a splat of a constant or for a non uniform vector of constants
780 // and check if the constant(s) are all powers of two.
781 if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
783 if (Splat) {
785 if (auto *CI = dyn_cast<ConstantInt>(Splat)) {
786 if (CI->getValue().isPowerOf2())
787 OpProps = OP_PowerOf2;
788 else if (CI->getValue().isNegatedPowerOf2())
789 OpProps = OP_NegatedPowerOf2;
790 }
791 } else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
792 bool AllPow2 = true, AllNegPow2 = true;
793 for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
794 if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I))) {
795 AllPow2 &= CI->getValue().isPowerOf2();
796 AllNegPow2 &= CI->getValue().isNegatedPowerOf2();
797 if (AllPow2 || AllNegPow2)
798 continue;
799 }
800 AllPow2 = AllNegPow2 = false;
801 break;
802 }
803 OpProps = AllPow2 ? OP_PowerOf2 : OpProps;
804 OpProps = AllNegPow2 ? OP_NegatedPowerOf2 : OpProps;
805 }
806 }
807
808 // Check for a splat of a uniform value. This is not loop aware, so return
809 // true only for the obviously uniform cases (argument, globalvalue)
810 if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
811 OpInfo = OK_UniformValue;
812
813 return {OpInfo, OpProps};
814}
815
817 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
818 OperandValueInfo Op1Info, OperandValueInfo Op2Info,
819 ArrayRef<const Value *> Args, const Instruction *CxtI) const {
821 TTIImpl->getArithmeticInstrCost(Opcode, Ty, CostKind,
822 Op1Info, Op2Info,
823 Args, CxtI);
824 assert(Cost >= 0 && "TTI should not produce negative costs!");
825 return Cost;
826}
827
829 ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
831 ArrayRef<const Value *> Args) const {
833 TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind, Index, SubTp, Args);
834 assert(Cost >= 0 && "TTI should not produce negative costs!");
835 return Cost;
836}
837
840 if (!I)
842
843 auto getLoadStoreKind = [](const Value *V, unsigned LdStOp, unsigned MaskedOp,
844 unsigned GatScatOp) {
845 const Instruction *I = dyn_cast<Instruction>(V);
846 if (!I)
848
849 if (I->getOpcode() == LdStOp)
851
852 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
853 if (II->getIntrinsicID() == MaskedOp)
855 if (II->getIntrinsicID() == GatScatOp)
857 }
858
860 };
861
862 switch (I->getOpcode()) {
863 case Instruction::ZExt:
864 case Instruction::SExt:
865 case Instruction::FPExt:
866 return getLoadStoreKind(I->getOperand(0), Instruction::Load,
867 Intrinsic::masked_load, Intrinsic::masked_gather);
868 case Instruction::Trunc:
869 case Instruction::FPTrunc:
870 if (I->hasOneUse())
871 return getLoadStoreKind(*I->user_begin(), Instruction::Store,
872 Intrinsic::masked_store,
873 Intrinsic::masked_scatter);
874 break;
875 default:
877 }
878
880}
881
883 unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH,
885 assert((I == nullptr || I->getOpcode() == Opcode) &&
886 "Opcode should reflect passed instruction.");
888 TTIImpl->getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
889 assert(Cost >= 0 && "TTI should not produce negative costs!");
890 return Cost;
891}
892
894 unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const {
896 TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
897 assert(Cost >= 0 && "TTI should not produce negative costs!");
898 return Cost;
899}
900
902 unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I) const {
903 assert((I == nullptr || I->getOpcode() == Opcode) &&
904 "Opcode should reflect passed instruction.");
905 InstructionCost Cost = TTIImpl->getCFInstrCost(Opcode, CostKind, I);
906 assert(Cost >= 0 && "TTI should not produce negative costs!");
907 return Cost;
908}
909
911 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
913 assert((I == nullptr || I->getOpcode() == Opcode) &&
914 "Opcode should reflect passed instruction.");
916 TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
917 assert(Cost >= 0 && "TTI should not produce negative costs!");
918 return Cost;
919}
920
922 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
923 Value *Op0, Value *Op1) const {
924 // FIXME: Assert that Opcode is either InsertElement or ExtractElement.
925 // This is mentioned in the interface description and respected by all
926 // callers, but never asserted upon.
928 TTIImpl->getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
929 assert(Cost >= 0 && "TTI should not produce negative costs!");
930 return Cost;
931}
932
936 unsigned Index) const {
937 // FIXME: Assert that Opcode is either InsertElement or ExtractElement.
938 // This is mentioned in the interface description and respected by all
939 // callers, but never asserted upon.
940 InstructionCost Cost = TTIImpl->getVectorInstrCost(I, Val, CostKind, Index);
941 assert(Cost >= 0 && "TTI should not produce negative costs!");
942 return Cost;
943}
944
946 Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts,
948 InstructionCost Cost = TTIImpl->getReplicationShuffleCost(
949 EltTy, ReplicationFactor, VF, DemandedDstElts, CostKind);
950 assert(Cost >= 0 && "TTI should not produce negative costs!");
951 return Cost;
952}
953
955 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
957 const Instruction *I) const {
958 assert((I == nullptr || I->getOpcode() == Opcode) &&
959 "Opcode should reflect passed instruction.");
960 InstructionCost Cost = TTIImpl->getMemoryOpCost(
961 Opcode, Src, Alignment, AddressSpace, CostKind, OpInfo, I);
962 assert(Cost >= 0 && "TTI should not produce negative costs!");
963 return Cost;
964}
965
967 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
969 InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment,
971 assert(Cost >= 0 && "TTI should not produce negative costs!");
972 return Cost;
973}
974
976 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
977 Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
978 InstructionCost Cost = TTIImpl->getGatherScatterOpCost(
979 Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
980 assert(Cost >= 0 && "TTI should not produce negative costs!");
981 return Cost;
982}
983
985 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
986 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
987 bool UseMaskForCond, bool UseMaskForGaps) const {
988 InstructionCost Cost = TTIImpl->getInterleavedMemoryOpCost(
989 Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
990 UseMaskForCond, UseMaskForGaps);
991 assert(Cost >= 0 && "TTI should not produce negative costs!");
992 return Cost;
993}
994
998 InstructionCost Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind);
999 assert(Cost >= 0 && "TTI should not produce negative costs!");
1000 return Cost;
1001}
1002
1005 ArrayRef<Type *> Tys,
1007 InstructionCost Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind);
1008 assert(Cost >= 0 && "TTI should not produce negative costs!");
1009 return Cost;
1010}
1011
1013 return TTIImpl->getNumberOfParts(Tp);
1014}
1015
1018 const SCEV *Ptr) const {
1019 InstructionCost Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
1020 assert(Cost >= 0 && "TTI should not produce negative costs!");
1021 return Cost;
1022}
1023
1025 InstructionCost Cost = TTIImpl->getMemcpyCost(I);
1026 assert(Cost >= 0 && "TTI should not produce negative costs!");
1027 return Cost;
1028}
1029
1031 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1034 TTIImpl->getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
1035 assert(Cost >= 0 && "TTI should not produce negative costs!");
1036 return Cost;
1037}
1038
1040 VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1043 TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
1044 assert(Cost >= 0 && "TTI should not produce negative costs!");
1045 return Cost;
1046}
1047
1049 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1050 std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) const {
1051 return TTIImpl->getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
1052 CostKind);
1053}
1054
1056 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1058 return TTIImpl->getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
1059}
1060
1063 return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
1064}
1065
1067 MemIntrinsicInfo &Info) const {
1068 return TTIImpl->getTgtMemIntrinsic(Inst, Info);
1069}
1070
1072 return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
1073}
1074
1076 IntrinsicInst *Inst, Type *ExpectedType) const {
1077 return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1078}
1079
1081 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1082 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1083 std::optional<uint32_t> AtomicElementSize) const {
1084 return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
1085 DestAddrSpace, SrcAlign, DestAlign,
1086 AtomicElementSize);
1087}
1088
1090 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1091 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1092 unsigned SrcAlign, unsigned DestAlign,
1093 std::optional<uint32_t> AtomicCpySize) const {
1094 TTIImpl->getMemcpyLoopResidualLoweringType(
1095 OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
1096 DestAlign, AtomicCpySize);
1097}
1098
1100 const Function *Callee) const {
1101 return TTIImpl->areInlineCompatible(Caller, Callee);
1102}
1103
1105 const Function *Caller, const Function *Callee,
1106 const ArrayRef<Type *> &Types) const {
1107 return TTIImpl->areTypesABICompatible(Caller, Callee, Types);
1108}
1109
1111 Type *Ty) const {
1112 return TTIImpl->isIndexedLoadLegal(Mode, Ty);
1113}
1114
1116 Type *Ty) const {
1117 return TTIImpl->isIndexedStoreLegal(Mode, Ty);
1118}
1119
1121 return TTIImpl->getLoadStoreVecRegBitWidth(AS);
1122}
1123
1125 return TTIImpl->isLegalToVectorizeLoad(LI);
1126}
1127
1129 return TTIImpl->isLegalToVectorizeStore(SI);
1130}
1131
1133 unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const {
1134 return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1135 AddrSpace);
1136}
1137
1139 unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const {
1140 return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1141 AddrSpace);
1142}
1143
1145 const RecurrenceDescriptor &RdxDesc, ElementCount VF) const {
1146 return TTIImpl->isLegalToVectorizeReduction(RdxDesc, VF);
1147}
1148
1150 return TTIImpl->isElementTypeLegalForScalableVector(Ty);
1151}
1152
1154 unsigned LoadSize,
1155 unsigned ChainSizeInBytes,
1156 VectorType *VecTy) const {
1157 return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1158}
1159
1161 unsigned StoreSize,
1162 unsigned ChainSizeInBytes,
1163 VectorType *VecTy) const {
1164 return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1165}
1166
1168 ReductionFlags Flags) const {
1169 return TTIImpl->preferInLoopReduction(Opcode, Ty, Flags);
1170}
1171
1173 unsigned Opcode, Type *Ty, ReductionFlags Flags) const {
1174 return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags);
1175}
1176
1178 return TTIImpl->preferEpilogueVectorization();
1179}
1180
1183 return TTIImpl->getVPLegalizationStrategy(VPI);
1184}
1185
1187 return TTIImpl->hasArmWideBranch(Thumb);
1188}
1189
1191 return TTIImpl->shouldExpandReduction(II);
1192}
1193
1195 return TTIImpl->getGISelRematGlobalCost();
1196}
1197
1199 return TTIImpl->getMinTripCountTailFoldingThreshold();
1200}
1201
1203 return TTIImpl->supportsScalableVectors();
1204}
1205
1207 return TTIImpl->enableScalableVectorization();
1208}
1209
1210bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType,
1211 Align Alignment) const {
1212 return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment);
1213}
1214
1216
1217TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1218
1220 std::function<Result(const Function &)> TTICallback)
1221 : TTICallback(std::move(TTICallback)) {}
1222
1225 return TTICallback(F);
1226}
1227
1228AnalysisKey TargetIRAnalysis::Key;
1229
1230TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1231 return Result(F.getParent()->getDataLayout());
1232}
1233
1234// Register the basic pass.
1236 "Target Transform Information", false, true)
1238
1239void TargetTransformInfoWrapperPass::anchor() {}
1240
1242 : ImmutablePass(ID) {
1245}
1246
1248 TargetIRAnalysis TIRA)
1249 : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1252}
1253
1255 FunctionAnalysisManager DummyFAM;
1256 TTI = TIRA.run(F, DummyFAM);
1257 return *TTI;
1258}
1259
1262 return new TargetTransformInfoWrapperPass(std::move(TIRA));
1263}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file provides helpers for the implementation of a TargetTransformInfo-conforming class.
static cl::opt< unsigned > PredictableBranchThreshold("predictable-branch-threshold", cl::init(99), cl::Hidden, cl::desc("Use this to override the target's predictable branch threshold (%)."))
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
This pass exposes codegen information to IR-level passes.
@ Flags
Definition: TextStubV5.cpp:93
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:75
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:152
iterator begin() const
Definition: ArrayRef.h:151
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1408
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1328
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1334
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
Class to represent function types.
Definition: DerivedTypes.h:103
param_iterator param_begin() const
Definition: DerivedTypes.h:128
param_iterator param_end() const
Definition: DerivedTypes.h:129
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:174
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:279
The core instruction combiner logic.
Definition: InstCombiner.h:45
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:780
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarCost=InstructionCost::getInvalid(), bool TypeBasedOnly=false)
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:177
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
BlockT * getHeader() const
Definition: LoopInfo.h:105
bool isLoopLatch(const BlockT *BB) const
Definition: LoopInfo.h:256
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
The optimization diagnostic interface.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
This class represents a constant integer value.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
Provides information about what library functions are available for the current target.
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool hasBranchDivergence() const
Return true if branch divergence exists.
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool hasArmWideBranch(bool Thumb) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
unsigned getGISelRematGlobalCost() const
MemIndexedMode
The type of load/store indexing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
unsigned getNumberOfParts(Type *Tp) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instruction.
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ GatherScatter
The cast is used with a gather/scatter.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:406
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:650
AddressSpace
Definition: NVPTXBaseInfo.h:21
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1946
auto predecessors(const MachineBasicBlock *BB)
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Flags describing the kind of vector reduction.
Parameters that control the generic loop unrolling transformation.