LLVM 18.0.0git
TargetTransformInfo.cpp
Go to the documentation of this file.
1//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/Analysis/CFG.h"
13#include "llvm/IR/CFG.h"
14#include "llvm/IR/Dominators.h"
15#include "llvm/IR/Instruction.h"
18#include "llvm/IR/Module.h"
19#include "llvm/IR/Operator.h"
23#include <optional>
24#include <utility>
25
26using namespace llvm;
27using namespace PatternMatch;
28
29#define DEBUG_TYPE "tti"
30
31static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
33 cl::desc("Recognize reduction patterns."));
34
36 "cache-line-size", cl::init(0), cl::Hidden,
37 cl::desc("Use this to override the target cache line size when "
38 "specified by the user."));
39
41 "predictable-branch-threshold", cl::init(99), cl::Hidden,
43 "Use this to override the target's predictable branch threshold (%)."));
44
45namespace {
46/// No-op implementation of the TTI interface using the utility base
47/// classes.
48///
49/// This is used when no target specific information is available.
50struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
51 explicit NoTTIImpl(const DataLayout &DL)
52 : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
53};
54} // namespace
55
57 // If the loop has irreducible control flow, it can not be converted to
58 // Hardware loop.
59 LoopBlocksRPO RPOT(L);
60 RPOT.perform(&LI);
61 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
62 return false;
63 return true;
64}
65
67 Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost,
68 bool TypeBasedOnly)
69 : II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
70 ScalarizationCost(ScalarizationCost) {
71
72 if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
73 FMF = FPMO->getFastMathFlags();
74
75 if (!TypeBasedOnly)
76 Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
78 ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
79}
80
83 FastMathFlags Flags,
84 const IntrinsicInst *I,
85 InstructionCost ScalarCost)
86 : II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
87 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
88}
89
92 : RetTy(Ty), IID(Id) {
93
94 Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
95 ParamTys.reserve(Arguments.size());
96 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
97 ParamTys.push_back(Arguments[Idx]->getType());
98}
99
103 FastMathFlags Flags,
104 const IntrinsicInst *I,
105 InstructionCost ScalarCost)
106 : II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
107 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
108 Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
109}
110
112 // Match default options:
113 // - hardware-loop-counter-bitwidth = 32
114 // - hardware-loop-decrement = 1
117}
118
120 LoopInfo &LI, DominatorTree &DT,
121 bool ForceNestedLoop,
123 SmallVector<BasicBlock *, 4> ExitingBlocks;
124 L->getExitingBlocks(ExitingBlocks);
125
126 for (BasicBlock *BB : ExitingBlocks) {
127 // If we pass the updated counter back through a phi, we need to know
128 // which latch the updated value will be coming from.
129 if (!L->isLoopLatch(BB)) {
131 continue;
132 }
133
134 const SCEV *EC = SE.getExitCount(L, BB);
135 if (isa<SCEVCouldNotCompute>(EC))
136 continue;
137 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
138 if (ConstEC->getValue()->isZero())
139 continue;
140 } else if (!SE.isLoopInvariant(EC, L))
141 continue;
142
143 if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
144 continue;
145
146 // If this exiting block is contained in a nested loop, it is not eligible
147 // for insertion of the branch-and-decrement since the inner loop would
148 // end up messing up the value in the CTR.
149 if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
150 continue;
151
152 // We now have a loop-invariant count of loop iterations (which is not the
153 // constant zero) for which we know that this loop will not exit via this
154 // existing block.
155
156 // We need to make sure that this block will run on every loop iteration.
157 // For this to be true, we must dominate all blocks with backedges. Such
158 // blocks are in-loop predecessors to the header block.
159 bool NotAlways = false;
160 for (BasicBlock *Pred : predecessors(L->getHeader())) {
161 if (!L->contains(Pred))
162 continue;
163
164 if (!DT.dominates(BB, Pred)) {
165 NotAlways = true;
166 break;
167 }
168 }
169
170 if (NotAlways)
171 continue;
172
173 // Make sure this blocks ends with a conditional branch.
174 Instruction *TI = BB->getTerminator();
175 if (!TI)
176 continue;
177
178 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
179 if (!BI->isConditional())
180 continue;
181
182 ExitBranch = BI;
183 } else
184 continue;
185
186 // Note that this block may not be the loop latch block, even if the loop
187 // has a latch block.
188 ExitBlock = BB;
189 ExitCount = EC;
190 break;
191 }
192
193 if (!ExitBlock)
194 return false;
195 return true;
196}
197
199 : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
200
202
204 : TTIImpl(std::move(Arg.TTIImpl)) {}
205
207 TTIImpl = std::move(RHS.TTIImpl);
208 return *this;
209}
210
212 return TTIImpl->getInliningThresholdMultiplier();
213}
214
215unsigned
217 return TTIImpl->adjustInliningThreshold(CB);
218}
219
221 const AllocaInst *AI) const {
222 return TTIImpl->getCallerAllocaCost(CB, AI);
223}
224
226 return TTIImpl->getInlinerVectorBonusPercent();
227}
228
230 Type *PointeeType, const Value *Ptr, ArrayRef<const Value *> Operands,
231 Type *AccessType, TTI::TargetCostKind CostKind) const {
232 return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
233}
234
237 const TTI::PointersChainInfo &Info, Type *AccessTy,
239 assert((Base || !Info.isSameBase()) &&
240 "If pointers have same base address it has to be provided.");
241 return TTIImpl->getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
242}
243
245 const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
246 BlockFrequencyInfo *BFI) const {
247 return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
248}
249
253 enum TargetCostKind CostKind) const {
254 InstructionCost Cost = TTIImpl->getInstructionCost(U, Operands, CostKind);
256 "TTI should not produce negative costs!");
257 return Cost;
258}
259
261 return PredictableBranchThreshold.getNumOccurrences() > 0
263 : TTIImpl->getPredictableBranchThreshold();
264}
265
267 return TTIImpl->hasBranchDivergence(F);
268}
269
271 return TTIImpl->isSourceOfDivergence(V);
272}
273
275 return TTIImpl->isAlwaysUniform(V);
276}
277
279 unsigned ToAS) const {
280 return TTIImpl->isValidAddrSpaceCast(FromAS, ToAS);
281}
282
284 unsigned ToAS) const {
285 return TTIImpl->addrspacesMayAlias(FromAS, ToAS);
286}
287
289 return TTIImpl->getFlatAddressSpace();
290}
291
293 SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const {
294 return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
295}
296
298 unsigned ToAS) const {
299 return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
300}
301
303 unsigned AS) const {
304 return TTIImpl->canHaveNonUndefGlobalInitializerInAddressSpace(AS);
305}
306
308 return TTIImpl->getAssumedAddrSpace(V);
309}
310
312 return TTIImpl->isSingleThreaded();
313}
314
315std::pair<const Value *, unsigned>
317 return TTIImpl->getPredicatedAddrSpace(V);
318}
319
321 IntrinsicInst *II, Value *OldV, Value *NewV) const {
322 return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
323}
324
326 return TTIImpl->isLoweredToCall(F);
327}
328
331 TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
332 return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
333}
334
336 TailFoldingInfo *TFI) const {
337 return TTIImpl->preferPredicateOverEpilogue(TFI);
338}
339
341 bool IVUpdateMayOverflow) const {
342 return TTIImpl->getPreferredTailFoldingStyle(IVUpdateMayOverflow);
343}
344
345std::optional<Instruction *>
347 IntrinsicInst &II) const {
348 return TTIImpl->instCombineIntrinsic(IC, II);
349}
350
352 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
353 bool &KnownBitsComputed) const {
354 return TTIImpl->simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
355 KnownBitsComputed);
356}
357
359 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
360 APInt &UndefElts2, APInt &UndefElts3,
361 std::function<void(Instruction *, unsigned, APInt, APInt &)>
362 SimplifyAndSetOp) const {
363 return TTIImpl->simplifyDemandedVectorEltsIntrinsic(
364 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
365 SimplifyAndSetOp);
366}
367
370 OptimizationRemarkEmitter *ORE) const {
371 return TTIImpl->getUnrollingPreferences(L, SE, UP, ORE);
372}
373
375 PeelingPreferences &PP) const {
376 return TTIImpl->getPeelingPreferences(L, SE, PP);
377}
378
380 return TTIImpl->isLegalAddImmediate(Imm);
381}
382
384 return TTIImpl->isLegalICmpImmediate(Imm);
385}
386
388 int64_t BaseOffset,
389 bool HasBaseReg, int64_t Scale,
390 unsigned AddrSpace,
391 Instruction *I) const {
392 return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
393 Scale, AddrSpace, I);
394}
395
397 const LSRCost &C2) const {
398 return TTIImpl->isLSRCostLess(C1, C2);
399}
400
402 return TTIImpl->isNumRegsMajorCostOfLSR();
403}
404
406 return TTIImpl->isProfitableLSRChainElement(I);
407}
408
410 return TTIImpl->canMacroFuseCmp();
411}
412
414 ScalarEvolution *SE, LoopInfo *LI,
416 TargetLibraryInfo *LibInfo) const {
417 return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
418}
419
422 ScalarEvolution *SE) const {
423 return TTIImpl->getPreferredAddressingMode(L, SE);
424}
425
427 Align Alignment) const {
428 return TTIImpl->isLegalMaskedStore(DataType, Alignment);
429}
430
432 Align Alignment) const {
433 return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
434}
435
437 Align Alignment) const {
438 return TTIImpl->isLegalNTStore(DataType, Alignment);
439}
440
441bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
442 return TTIImpl->isLegalNTLoad(DataType, Alignment);
443}
444
446 ElementCount NumElements) const {
447 return TTIImpl->isLegalBroadcastLoad(ElementTy, NumElements);
448}
449
451 Align Alignment) const {
452 return TTIImpl->isLegalMaskedGather(DataType, Alignment);
453}
454
456 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
457 const SmallBitVector &OpcodeMask) const {
458 return TTIImpl->isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
459}
460
462 Align Alignment) const {
463 return TTIImpl->isLegalMaskedScatter(DataType, Alignment);
464}
465
467 Align Alignment) const {
468 return TTIImpl->forceScalarizeMaskedGather(DataType, Alignment);
469}
470
472 Align Alignment) const {
473 return TTIImpl->forceScalarizeMaskedScatter(DataType, Alignment);
474}
475
477 return TTIImpl->isLegalMaskedCompressStore(DataType);
478}
479
481 return TTIImpl->isLegalMaskedExpandLoad(DataType);
482}
483
485 return TTIImpl->enableOrderedReductions();
486}
487
488bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
489 return TTIImpl->hasDivRemOp(DataType, IsSigned);
490}
491
493 unsigned AddrSpace) const {
494 return TTIImpl->hasVolatileVariant(I, AddrSpace);
495}
496
498 return TTIImpl->prefersVectorizedAddressing();
499}
500
502 Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
503 int64_t Scale, unsigned AddrSpace) const {
504 InstructionCost Cost = TTIImpl->getScalingFactorCost(
505 Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace);
506 assert(Cost >= 0 && "TTI should not produce negative costs!");
507 return Cost;
508}
509
511 return TTIImpl->LSRWithInstrQueries();
512}
513
515 return TTIImpl->isTruncateFree(Ty1, Ty2);
516}
517
519 return TTIImpl->isProfitableToHoist(I);
520}
521
522bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
523
525 return TTIImpl->isTypeLegal(Ty);
526}
527
529 return TTIImpl->getRegUsageForType(Ty);
530}
531
533 return TTIImpl->shouldBuildLookupTables();
534}
535
537 Constant *C) const {
538 return TTIImpl->shouldBuildLookupTablesForConstant(C);
539}
540
542 return TTIImpl->shouldBuildRelLookupTables();
543}
544
546 return TTIImpl->useColdCCForColdCall(F);
547}
548
550 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
552 return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
553 CostKind);
554}
555
559 return TTIImpl->getOperandsScalarizationOverhead(Args, Tys, CostKind);
560}
561
563 return TTIImpl->supportsEfficientVectorElementLoadStore();
564}
565
567 return TTIImpl->supportsTailCalls();
568}
569
571 return TTIImpl->supportsTailCallFor(CB);
572}
573
575 bool LoopHasReductions) const {
576 return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
577}
578
580TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
581 return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
582}
583
585 return TTIImpl->enableSelectOptimize();
586}
587
589 return TTIImpl->enableInterleavedAccessVectorization();
590}
591
593 return TTIImpl->enableMaskedInterleavedAccessVectorization();
594}
595
597 return TTIImpl->isFPVectorizationPotentiallyUnsafe();
598}
599
600bool
602 unsigned BitWidth,
603 unsigned AddressSpace,
604 Align Alignment,
605 unsigned *Fast) const {
606 return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth,
607 AddressSpace, Alignment, Fast);
608}
609
611TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
612 return TTIImpl->getPopcntSupport(IntTyWidthInBit);
613}
614
616 return TTIImpl->haveFastSqrt(Ty);
617}
618
620 const Instruction *I) const {
621 return TTIImpl->isExpensiveToSpeculativelyExecute(I);
622}
623
625 return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
626}
627
629 InstructionCost Cost = TTIImpl->getFPOpCost(Ty);
630 assert(Cost >= 0 && "TTI should not produce negative costs!");
631 return Cost;
632}
633
635 unsigned Idx,
636 const APInt &Imm,
637 Type *Ty) const {
638 InstructionCost Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
639 assert(Cost >= 0 && "TTI should not produce negative costs!");
640 return Cost;
641}
642
646 InstructionCost Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind);
647 assert(Cost >= 0 && "TTI should not produce negative costs!");
648 return Cost;
649}
650
652 unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
655 TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
656 assert(Cost >= 0 && "TTI should not produce negative costs!");
657 return Cost;
658}
659
662 const APInt &Imm, Type *Ty,
665 TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
666 assert(Cost >= 0 && "TTI should not produce negative costs!");
667 return Cost;
668}
669
670unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
671 return TTIImpl->getNumberOfRegisters(ClassID);
672}
673
675 Type *Ty) const {
676 return TTIImpl->getRegisterClassForType(Vector, Ty);
677}
678
679const char *TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
680 return TTIImpl->getRegisterClassName(ClassID);
681}
682
685 return TTIImpl->getRegisterBitWidth(K);
686}
687
689 return TTIImpl->getMinVectorRegisterBitWidth();
690}
691
692std::optional<unsigned> TargetTransformInfo::getMaxVScale() const {
693 return TTIImpl->getMaxVScale();
694}
695
696std::optional<unsigned> TargetTransformInfo::getVScaleForTuning() const {
697 return TTIImpl->getVScaleForTuning();
698}
699
701 return TTIImpl->isVScaleKnownToBeAPowerOfTwo();
702}
703
706 return TTIImpl->shouldMaximizeVectorBandwidth(K);
707}
708
710 bool IsScalable) const {
711 return TTIImpl->getMinimumVF(ElemWidth, IsScalable);
712}
713
714unsigned TargetTransformInfo::getMaximumVF(unsigned ElemWidth,
715 unsigned Opcode) const {
716 return TTIImpl->getMaximumVF(ElemWidth, Opcode);
717}
718
719unsigned TargetTransformInfo::getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
720 Type *ScalarValTy) const {
721 return TTIImpl->getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
722}
723
725 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
726 return TTIImpl->shouldConsiderAddressTypePromotion(
727 I, AllowPromotionWithoutCommonHeader);
728}
729
731 return CacheLineSize.getNumOccurrences() > 0 ? CacheLineSize
732 : TTIImpl->getCacheLineSize();
733}
734
735std::optional<unsigned>
737 return TTIImpl->getCacheSize(Level);
738}
739
740std::optional<unsigned>
742 return TTIImpl->getCacheAssociativity(Level);
743}
744
746 return TTIImpl->getPrefetchDistance();
747}
748
750 unsigned NumMemAccesses, unsigned NumStridedMemAccesses,
751 unsigned NumPrefetches, bool HasCall) const {
752 return TTIImpl->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
753 NumPrefetches, HasCall);
754}
755
757 return TTIImpl->getMaxPrefetchIterationsAhead();
758}
759
761 return TTIImpl->enableWritePrefetching();
762}
763
765 return TTIImpl->shouldPrefetchAddressSpace(AS);
766}
767
769 return TTIImpl->getMaxInterleaveFactor(VF);
770}
771
776
777 if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
778 if (const auto *CI = dyn_cast<ConstantInt>(V)) {
779 if (CI->getValue().isPowerOf2())
780 OpProps = OP_PowerOf2;
781 else if (CI->getValue().isNegatedPowerOf2())
782 OpProps = OP_NegatedPowerOf2;
783 }
784 return {OK_UniformConstantValue, OpProps};
785 }
786
787 // A broadcast shuffle creates a uniform value.
788 // TODO: Add support for non-zero index broadcasts.
789 // TODO: Add support for different source vector width.
790 if (const auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
791 if (ShuffleInst->isZeroEltSplat())
792 OpInfo = OK_UniformValue;
793
794 const Value *Splat = getSplatValue(V);
795
796 // Check for a splat of a constant or for a non uniform vector of constants
797 // and check if the constant(s) are all powers of two.
798 if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
800 if (Splat) {
802 if (auto *CI = dyn_cast<ConstantInt>(Splat)) {
803 if (CI->getValue().isPowerOf2())
804 OpProps = OP_PowerOf2;
805 else if (CI->getValue().isNegatedPowerOf2())
806 OpProps = OP_NegatedPowerOf2;
807 }
808 } else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
809 bool AllPow2 = true, AllNegPow2 = true;
810 for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
811 if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I))) {
812 AllPow2 &= CI->getValue().isPowerOf2();
813 AllNegPow2 &= CI->getValue().isNegatedPowerOf2();
814 if (AllPow2 || AllNegPow2)
815 continue;
816 }
817 AllPow2 = AllNegPow2 = false;
818 break;
819 }
820 OpProps = AllPow2 ? OP_PowerOf2 : OpProps;
821 OpProps = AllNegPow2 ? OP_NegatedPowerOf2 : OpProps;
822 }
823 }
824
825 // Check for a splat of a uniform value. This is not loop aware, so return
826 // true only for the obviously uniform cases (argument, globalvalue)
827 if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
828 OpInfo = OK_UniformValue;
829
830 return {OpInfo, OpProps};
831}
832
834 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
835 OperandValueInfo Op1Info, OperandValueInfo Op2Info,
836 ArrayRef<const Value *> Args, const Instruction *CxtI) const {
838 TTIImpl->getArithmeticInstrCost(Opcode, Ty, CostKind,
839 Op1Info, Op2Info,
840 Args, CxtI);
841 assert(Cost >= 0 && "TTI should not produce negative costs!");
842 return Cost;
843}
844
846 ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
848 ArrayRef<const Value *> Args) const {
850 TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind, Index, SubTp, Args);
851 assert(Cost >= 0 && "TTI should not produce negative costs!");
852 return Cost;
853}
854
857 if (!I)
859
860 auto getLoadStoreKind = [](const Value *V, unsigned LdStOp, unsigned MaskedOp,
861 unsigned GatScatOp) {
862 const Instruction *I = dyn_cast<Instruction>(V);
863 if (!I)
865
866 if (I->getOpcode() == LdStOp)
868
869 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
870 if (II->getIntrinsicID() == MaskedOp)
872 if (II->getIntrinsicID() == GatScatOp)
874 }
875
877 };
878
879 switch (I->getOpcode()) {
880 case Instruction::ZExt:
881 case Instruction::SExt:
882 case Instruction::FPExt:
883 return getLoadStoreKind(I->getOperand(0), Instruction::Load,
884 Intrinsic::masked_load, Intrinsic::masked_gather);
885 case Instruction::Trunc:
886 case Instruction::FPTrunc:
887 if (I->hasOneUse())
888 return getLoadStoreKind(*I->user_begin(), Instruction::Store,
889 Intrinsic::masked_store,
890 Intrinsic::masked_scatter);
891 break;
892 default:
894 }
895
897}
898
900 unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH,
902 assert((I == nullptr || I->getOpcode() == Opcode) &&
903 "Opcode should reflect passed instruction.");
905 TTIImpl->getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
906 assert(Cost >= 0 && "TTI should not produce negative costs!");
907 return Cost;
908}
909
911 unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const {
913 TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
914 assert(Cost >= 0 && "TTI should not produce negative costs!");
915 return Cost;
916}
917
919 unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I) const {
920 assert((I == nullptr || I->getOpcode() == Opcode) &&
921 "Opcode should reflect passed instruction.");
922 InstructionCost Cost = TTIImpl->getCFInstrCost(Opcode, CostKind, I);
923 assert(Cost >= 0 && "TTI should not produce negative costs!");
924 return Cost;
925}
926
928 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
930 assert((I == nullptr || I->getOpcode() == Opcode) &&
931 "Opcode should reflect passed instruction.");
933 TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
934 assert(Cost >= 0 && "TTI should not produce negative costs!");
935 return Cost;
936}
937
939 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
940 Value *Op0, Value *Op1) const {
941 // FIXME: Assert that Opcode is either InsertElement or ExtractElement.
942 // This is mentioned in the interface description and respected by all
943 // callers, but never asserted upon.
945 TTIImpl->getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
946 assert(Cost >= 0 && "TTI should not produce negative costs!");
947 return Cost;
948}
949
953 unsigned Index) const {
954 // FIXME: Assert that Opcode is either InsertElement or ExtractElement.
955 // This is mentioned in the interface description and respected by all
956 // callers, but never asserted upon.
957 InstructionCost Cost = TTIImpl->getVectorInstrCost(I, Val, CostKind, Index);
958 assert(Cost >= 0 && "TTI should not produce negative costs!");
959 return Cost;
960}
961
963 Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts,
965 InstructionCost Cost = TTIImpl->getReplicationShuffleCost(
966 EltTy, ReplicationFactor, VF, DemandedDstElts, CostKind);
967 assert(Cost >= 0 && "TTI should not produce negative costs!");
968 return Cost;
969}
970
972 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
974 const Instruction *I) const {
975 assert((I == nullptr || I->getOpcode() == Opcode) &&
976 "Opcode should reflect passed instruction.");
977 InstructionCost Cost = TTIImpl->getMemoryOpCost(
978 Opcode, Src, Alignment, AddressSpace, CostKind, OpInfo, I);
979 assert(Cost >= 0 && "TTI should not produce negative costs!");
980 return Cost;
981}
982
984 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
986 InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment,
988 assert(Cost >= 0 && "TTI should not produce negative costs!");
989 return Cost;
990}
991
993 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
994 Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
995 InstructionCost Cost = TTIImpl->getGatherScatterOpCost(
996 Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
997 assert(Cost >= 0 && "TTI should not produce negative costs!");
998 return Cost;
999}
1000
1002 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1003 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1004 bool UseMaskForCond, bool UseMaskForGaps) const {
1005 InstructionCost Cost = TTIImpl->getInterleavedMemoryOpCost(
1006 Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
1007 UseMaskForCond, UseMaskForGaps);
1008 assert(Cost >= 0 && "TTI should not produce negative costs!");
1009 return Cost;
1010}
1011
1015 InstructionCost Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind);
1016 assert(Cost >= 0 && "TTI should not produce negative costs!");
1017 return Cost;
1018}
1019
1022 ArrayRef<Type *> Tys,
1024 InstructionCost Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind);
1025 assert(Cost >= 0 && "TTI should not produce negative costs!");
1026 return Cost;
1027}
1028
1030 return TTIImpl->getNumberOfParts(Tp);
1031}
1032
1035 const SCEV *Ptr) const {
1036 InstructionCost Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
1037 assert(Cost >= 0 && "TTI should not produce negative costs!");
1038 return Cost;
1039}
1040
1042 InstructionCost Cost = TTIImpl->getMemcpyCost(I);
1043 assert(Cost >= 0 && "TTI should not produce negative costs!");
1044 return Cost;
1045}
1046
1048 return TTIImpl->getMaxMemIntrinsicInlineSizeThreshold();
1049}
1050
1052 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1055 TTIImpl->getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
1056 assert(Cost >= 0 && "TTI should not produce negative costs!");
1057 return Cost;
1058}
1059
1064 TTIImpl->getMinMaxReductionCost(IID, Ty, FMF, CostKind);
1065 assert(Cost >= 0 && "TTI should not produce negative costs!");
1066 return Cost;
1067}
1068
1070 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1072 return TTIImpl->getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
1073 CostKind);
1074}
1075
1077 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1079 return TTIImpl->getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
1080}
1081
1084 return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
1085}
1086
1088 MemIntrinsicInfo &Info) const {
1089 return TTIImpl->getTgtMemIntrinsic(Inst, Info);
1090}
1091
1093 return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
1094}
1095
1097 IntrinsicInst *Inst, Type *ExpectedType) const {
1098 return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1099}
1100
1102 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1103 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1104 std::optional<uint32_t> AtomicElementSize) const {
1105 return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
1106 DestAddrSpace, SrcAlign, DestAlign,
1107 AtomicElementSize);
1108}
1109
1111 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1112 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1113 unsigned SrcAlign, unsigned DestAlign,
1114 std::optional<uint32_t> AtomicCpySize) const {
1115 TTIImpl->getMemcpyLoopResidualLoweringType(
1116 OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
1117 DestAlign, AtomicCpySize);
1118}
1119
1121 const Function *Callee) const {
1122 return TTIImpl->areInlineCompatible(Caller, Callee);
1123}
1124
1126 const Function *Caller, const Function *Callee,
1127 const ArrayRef<Type *> &Types) const {
1128 return TTIImpl->areTypesABICompatible(Caller, Callee, Types);
1129}
1130
1132 Type *Ty) const {
1133 return TTIImpl->isIndexedLoadLegal(Mode, Ty);
1134}
1135
1137 Type *Ty) const {
1138 return TTIImpl->isIndexedStoreLegal(Mode, Ty);
1139}
1140
1142 return TTIImpl->getLoadStoreVecRegBitWidth(AS);
1143}
1144
1146 return TTIImpl->isLegalToVectorizeLoad(LI);
1147}
1148
1150 return TTIImpl->isLegalToVectorizeStore(SI);
1151}
1152
1154 unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const {
1155 return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1156 AddrSpace);
1157}
1158
1160 unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const {
1161 return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1162 AddrSpace);
1163}
1164
1166 const RecurrenceDescriptor &RdxDesc, ElementCount VF) const {
1167 return TTIImpl->isLegalToVectorizeReduction(RdxDesc, VF);
1168}
1169
1171 return TTIImpl->isElementTypeLegalForScalableVector(Ty);
1172}
1173
1175 unsigned LoadSize,
1176 unsigned ChainSizeInBytes,
1177 VectorType *VecTy) const {
1178 return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1179}
1180
1182 unsigned StoreSize,
1183 unsigned ChainSizeInBytes,
1184 VectorType *VecTy) const {
1185 return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1186}
1187
1189 ReductionFlags Flags) const {
1190 return TTIImpl->preferInLoopReduction(Opcode, Ty, Flags);
1191}
1192
1194 unsigned Opcode, Type *Ty, ReductionFlags Flags) const {
1195 return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags);
1196}
1197
1199 return TTIImpl->preferEpilogueVectorization();
1200}
1201
1204 return TTIImpl->getVPLegalizationStrategy(VPI);
1205}
1206
1208 return TTIImpl->hasArmWideBranch(Thumb);
1209}
1210
1212 return TTIImpl->getMaxNumArgs();
1213}
1214
1216 return TTIImpl->shouldExpandReduction(II);
1217}
1218
1220 return TTIImpl->getGISelRematGlobalCost();
1221}
1222
1224 return TTIImpl->getMinTripCountTailFoldingThreshold();
1225}
1226
1228 return TTIImpl->supportsScalableVectors();
1229}
1230
1232 return TTIImpl->enableScalableVectorization();
1233}
1234
1235bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType,
1236 Align Alignment) const {
1237 return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment);
1238}
1239
1241
1242TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1243
1245 std::function<Result(const Function &)> TTICallback)
1246 : TTICallback(std::move(TTICallback)) {}
1247
1250 return TTICallback(F);
1251}
1252
1253AnalysisKey TargetIRAnalysis::Key;
1254
1255TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1256 return Result(F.getParent()->getDataLayout());
1257}
1258
1259// Register the basic pass.
1261 "Target Transform Information", false, true)
1263
1264void TargetTransformInfoWrapperPass::anchor() {}
1265
1267 : ImmutablePass(ID) {
1270}
1271
1273 TargetIRAnalysis TIRA)
1274 : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1277}
1278
1280 FunctionAnalysisManager DummyFAM;
1281 TTI = TIRA.run(F, DummyFAM);
1282 return *TTI;
1283}
1284
1287 return new TargetTransformInfoWrapperPass(std::move(TIRA));
1288}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file provides helpers for the implementation of a TargetTransformInfo-conforming class.
static cl::opt< unsigned > PredictableBranchThreshold("predictable-branch-threshold", cl::init(99), cl::Hidden, cl::desc("Use this to override the target's predictable branch threshold (%)."))
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
This pass exposes codegen information to IR-level passes.
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:76
an instruction to allocate memory on the stack
Definition: Instructions.h:58
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1412
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1332
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1338
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
Class to represent function types.
Definition: DerivedTypes.h:103
param_iterator param_begin() const
Definition: DerivedTypes.h:128
param_iterator param_end() const
Definition: DerivedTypes.h:129
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:176
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:282
The core instruction combiner logic.
Definition: InstCombiner.h:46
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarCost=InstructionCost::getInvalid(), bool TypeBasedOnly=false)
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:177
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
bool isLoopLatch(const BlockT *BB) const
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
The optimization diagnostic interface.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:72
This class represents a constant integer value.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
Provides information about what library functions are available for the current target.
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Return false if a AS0 address cannot possibly alias a AS1 address.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool hasArmWideBranch(bool Thumb) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
unsigned getGISelRematGlobalCost() const
MemIndexedMode
The type of load/store indexing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
unsigned getNumberOfParts(Type *Tp) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instruction.
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
Returns the maximum memset / memcpy size in bytes that still makes it profitable to inline the call.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ GatherScatter
The cast is used with a gather/scatter.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt32Ty(LLVMContext &C)
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:400
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:440
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:649
AddressSpace
Definition: NVPTXBaseInfo.h:21
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1854
auto predecessors(const MachineBasicBlock *BB)
InstructionCost Cost
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Flags describing the kind of vector reduction.
Parameters that control the generic loop unrolling transformation.