LLVM 22.0.0git
TargetTransformInfoImpl.h
Go to the documentation of this file.
1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Operator.h"
25#include <optional>
26#include <utility>
27
28namespace llvm {
29
30class Function;
31
32/// Base class for use as a mix-in that aids implementing
33/// a TargetTransformInfo-compatible class.
35
36protected:
38
39 const DataLayout &DL;
40
42
43public:
45
46 // Provide value semantics. MSVC requires that we spell all of these out.
49
50 virtual const DataLayout &getDataLayout() const { return DL; }
51
52 // FIXME: It looks like this implementation is dead. All clients appear to
53 // use the (non-const) version from `TargetTransformInfoImplCRTPBase`.
54 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
56 Type *AccessType,
58 // In the basic model, we just assume that all-constant GEPs will be folded
59 // into their uses via addressing modes.
60 for (const Value *Operand : Operands)
61 if (!isa<Constant>(Operand))
62 return TTI::TCC_Basic;
63
64 return TTI::TCC_Free;
65 }
66
67 virtual InstructionCost
69 const TTI::PointersChainInfo &Info, Type *AccessTy,
71 llvm_unreachable("Not implemented");
72 }
73
74 virtual unsigned
77 BlockFrequencyInfo *BFI) const {
78 (void)PSI;
79 (void)BFI;
80 JTSize = 0;
81 return SI.getNumCases();
82 }
83
84 virtual InstructionCost
87 llvm_unreachable("Not implemented");
88 }
89
90 virtual unsigned getInliningThresholdMultiplier() const { return 1; }
92 return 8;
93 }
95 return 8;
96 }
98 // This is the value of InlineConstants::LastCallToStaticBonus before it was
99 // removed along with the introduction of this function.
100 return 15000;
101 }
102 virtual unsigned adjustInliningThreshold(const CallBase *CB) const {
103 return 0;
104 }
105 virtual unsigned getCallerAllocaCost(const CallBase *CB,
106 const AllocaInst *AI) const {
107 return 0;
108 };
109
110 virtual int getInlinerVectorBonusPercent() const { return 150; }
111
113 return TTI::TCC_Expensive;
114 }
115
116 virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { return 64; }
117
118 // Although this default value is arbitrary, it is not random. It is assumed
119 // that a condition that evaluates the same way by a higher percentage than
120 // this is best represented as control flow. Therefore, the default value N
121 // should be set such that the win from N% correct executions is greater than
122 // the loss from (100 - N)% mispredicted executions for the majority of
123 // intended targets.
125 return BranchProbability(99, 100);
126 }
127
128 virtual InstructionCost getBranchMispredictPenalty() const { return 0; }
129
130 virtual bool hasBranchDivergence(const Function *F = nullptr) const {
131 return false;
132 }
133
134 virtual bool isSourceOfDivergence(const Value *V) const { return false; }
135
136 virtual bool isAlwaysUniform(const Value *V) const { return false; }
137
138 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
139 return false;
140 }
141
142 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
143 return true;
144 }
145
146 virtual unsigned getFlatAddressSpace() const { return -1; }
147
149 Intrinsic::ID IID) const {
150 return false;
151 }
152
153 virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
154 virtual bool
156 return AS == 0;
157 };
158
159 virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
160
161 virtual bool isSingleThreaded() const { return false; }
162
163 virtual std::pair<const Value *, unsigned>
165 return std::make_pair(nullptr, -1);
166 }
167
169 Value *OldV,
170 Value *NewV) const {
171 return nullptr;
172 }
173
174 virtual bool isLoweredToCall(const Function *F) const {
175 assert(F && "A concrete function must be provided to this routine.");
176
177 // FIXME: These should almost certainly not be handled here, and instead
178 // handled with the help of TLI or the target itself. This was largely
179 // ported from existing analysis heuristics here so that such refactorings
180 // can take place in the future.
181
182 if (F->isIntrinsic())
183 return false;
184
185 if (F->hasLocalLinkage() || !F->hasName())
186 return true;
187
188 StringRef Name = F->getName();
189
190 // These will all likely lower to a single selection DAG node.
191 // clang-format off
192 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
193 Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
194 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
195 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
196 Name == "sin" || Name == "sinf" || Name == "sinl" ||
197 Name == "cos" || Name == "cosf" || Name == "cosl" ||
198 Name == "tan" || Name == "tanf" || Name == "tanl" ||
199 Name == "asin" || Name == "asinf" || Name == "asinl" ||
200 Name == "acos" || Name == "acosf" || Name == "acosl" ||
201 Name == "atan" || Name == "atanf" || Name == "atanl" ||
202 Name == "atan2" || Name == "atan2f" || Name == "atan2l"||
203 Name == "sinh" || Name == "sinhf" || Name == "sinhl" ||
204 Name == "cosh" || Name == "coshf" || Name == "coshl" ||
205 Name == "tanh" || Name == "tanhf" || Name == "tanhl" ||
206 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ||
207 Name == "exp10" || Name == "exp10l" || Name == "exp10f")
208 return false;
209 // clang-format on
210 // These are all likely to be optimized into something smaller.
211 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
212 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
213 Name == "floorf" || Name == "ceil" || Name == "round" ||
214 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
215 Name == "llabs")
216 return false;
217
218 return true;
219 }
220
222 AssumptionCache &AC,
223 TargetLibraryInfo *LibInfo,
224 HardwareLoopInfo &HWLoopInfo) const {
225 return false;
226 }
227
228 virtual unsigned getEpilogueVectorizationMinVF() const { return 16; }
229
231 return false;
232 }
233
234 virtual TailFoldingStyle
235 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
237 }
238
239 virtual std::optional<Instruction *>
241 return std::nullopt;
242 }
243
244 virtual std::optional<Value *>
246 APInt DemandedMask, KnownBits &Known,
247 bool &KnownBitsComputed) const {
248 return std::nullopt;
249 }
250
251 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
252 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
253 APInt &UndefElts2, APInt &UndefElts3,
254 std::function<void(Instruction *, unsigned, APInt, APInt &)>
255 SimplifyAndSetOp) const {
256 return std::nullopt;
257 }
258
262
265
266 virtual bool isLegalAddImmediate(int64_t Imm) const { return false; }
267
268 virtual bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
269
270 virtual bool isLegalICmpImmediate(int64_t Imm) const { return false; }
271
272 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
273 int64_t BaseOffset, bool HasBaseReg,
274 int64_t Scale, unsigned AddrSpace,
275 Instruction *I = nullptr,
276 int64_t ScalableOffset = 0) const {
277 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
278 // taken from the implementation of LSR.
279 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
280 }
281
282 virtual bool isLSRCostLess(const TTI::LSRCost &C1,
283 const TTI::LSRCost &C2) const {
284 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
285 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
286 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
287 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
288 }
289
290 virtual bool isNumRegsMajorCostOfLSR() const { return true; }
291
292 virtual bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
293
295 return false;
296 }
297
298 virtual bool canMacroFuseCmp() const { return false; }
299
300 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
302 TargetLibraryInfo *LibInfo) const {
303 return false;
304 }
305
308 return TTI::AMK_None;
309 }
310
311 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment,
312 unsigned AddressSpace) const {
313 return false;
314 }
315
316 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment,
317 unsigned AddressSpace) const {
318 return false;
319 }
320
321 virtual bool isLegalNTStore(Type *DataType, Align Alignment) const {
322 // By default, assume nontemporal memory stores are available for stores
323 // that are aligned and have a size that is a power of 2.
324 unsigned DataSize = DL.getTypeStoreSize(DataType);
325 return Alignment >= DataSize && isPowerOf2_32(DataSize);
326 }
327
328 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const {
329 // By default, assume nontemporal memory loads are available for loads that
330 // are aligned and have a size that is a power of 2.
331 unsigned DataSize = DL.getTypeStoreSize(DataType);
332 return Alignment >= DataSize && isPowerOf2_32(DataSize);
333 }
334
335 virtual bool isLegalBroadcastLoad(Type *ElementTy,
336 ElementCount NumElements) const {
337 return false;
338 }
339
340 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
341 return false;
342 }
343
344 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
345 return false;
346 }
347
349 Align Alignment) const {
350 return false;
351 }
352
354 Align Alignment) const {
355 return false;
356 }
357
358 virtual bool isLegalMaskedCompressStore(Type *DataType,
359 Align Alignment) const {
360 return false;
361 }
362
363 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
364 unsigned Opcode1,
365 const SmallBitVector &OpcodeMask) const {
366 return false;
367 }
368
369 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const {
370 return false;
371 }
372
373 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const {
374 return false;
375 }
376
377 virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
378 Align Alignment,
379 unsigned AddrSpace) const {
380 return false;
381 }
382
383 virtual bool isLegalMaskedVectorHistogram(Type *AddrType,
384 Type *DataType) const {
385 return false;
386 }
387
388 virtual bool enableOrderedReductions() const { return false; }
389
390 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) const {
391 return false;
392 }
393
394 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
395 return false;
396 }
397
398 virtual bool prefersVectorizedAddressing() const { return true; }
399
401 StackOffset BaseOffset,
402 bool HasBaseReg, int64_t Scale,
403 unsigned AddrSpace) const {
404 // Guess that all legal addressing mode are free.
405 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg,
406 Scale, AddrSpace, /*I=*/nullptr,
407 BaseOffset.getScalable()))
408 return 0;
410 }
411
412 virtual bool LSRWithInstrQueries() const { return false; }
413
414 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
415
416 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
417
418 virtual bool useAA() const { return false; }
419
420 virtual bool isTypeLegal(Type *Ty) const { return false; }
421
422 virtual unsigned getRegUsageForType(Type *Ty) const { return 1; }
423
424 virtual bool shouldBuildLookupTables() const { return true; }
425
427 return true;
428 }
429
430 virtual bool shouldBuildRelLookupTables() const { return false; }
431
432 virtual bool useColdCCForColdCall(Function &F) const { return false; }
433
434 virtual bool useFastCCForInternalCall(Function &F) const { return true; }
435
437 return false;
438 }
439
441 unsigned ScalarOpdIdx) const {
442 return false;
443 }
444
446 int OpdIdx) const {
447 return OpdIdx == -1;
448 }
449
450 virtual bool
452 int RetIdx) const {
453 return RetIdx == 0;
454 }
455
457 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
458 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
459 ArrayRef<Value *> VL = {}) const {
460 return 0;
461 }
462
463 virtual InstructionCost
468
469 virtual bool supportsEfficientVectorElementLoadStore() const { return false; }
470
471 virtual bool supportsTailCalls() const { return true; }
472
473 virtual bool supportsTailCallFor(const CallBase *CB) const {
474 llvm_unreachable("Not implemented");
475 }
476
477 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) const {
478 return false;
479 }
480
482 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
483 return {};
484 }
485
486 virtual bool enableSelectOptimize() const { return true; }
487
488 virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const {
489 // A select with two constant operands will usually be better left as a
490 // select.
491 using namespace llvm::PatternMatch;
493 return false;
494 // If the select is a logical-and/logical-or then it is better treated as a
495 // and/or by the backend.
496 return isa<SelectInst>(I) &&
499 }
500
501 virtual bool enableInterleavedAccessVectorization() const { return false; }
502
504 return false;
505 }
506
507 virtual bool isFPVectorizationPotentiallyUnsafe() const { return false; }
508
510 unsigned BitWidth,
511 unsigned AddressSpace,
512 Align Alignment,
513 unsigned *Fast) const {
514 return false;
515 }
516
518 getPopcntSupport(unsigned IntTyWidthInBit) const {
519 return TTI::PSK_Software;
520 }
521
522 virtual bool haveFastSqrt(Type *Ty) const { return false; }
523
525 return true;
526 }
527
528 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
529
530 virtual InstructionCost getFPOpCost(Type *Ty) const {
532 }
533
534 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
535 const APInt &Imm,
536 Type *Ty) const {
537 return 0;
538 }
539
540 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
542 return TTI::TCC_Basic;
543 }
544
545 virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
546 const APInt &Imm, Type *Ty,
548 Instruction *Inst = nullptr) const {
549 return TTI::TCC_Free;
550 }
551
552 virtual InstructionCost
553 getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
554 Type *Ty, TTI::TargetCostKind CostKind) const {
555 return TTI::TCC_Free;
556 }
557
559 const Function &Fn) const {
560 return false;
561 }
562
563 virtual unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
564 virtual bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const {
565 return false;
566 }
567
568 virtual unsigned getRegisterClassForType(bool Vector,
569 Type *Ty = nullptr) const {
570 return Vector ? 1 : 0;
571 }
572
573 virtual const char *getRegisterClassName(unsigned ClassID) const {
574 switch (ClassID) {
575 default:
576 return "Generic::Unknown Register Class";
577 case 0:
578 return "Generic::ScalarRC";
579 case 1:
580 return "Generic::VectorRC";
581 }
582 }
583
584 virtual TypeSize
588
589 virtual unsigned getMinVectorRegisterBitWidth() const { return 128; }
590
591 virtual std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
592 virtual std::optional<unsigned> getVScaleForTuning() const {
593 return std::nullopt;
594 }
595 virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
596
597 virtual bool
601
602 virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
603 return ElementCount::get(0, IsScalable);
604 }
605
606 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
607 return 0;
608 }
609 virtual unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const {
610 return VF;
611 }
612
614 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
615 AllowPromotionWithoutCommonHeader = false;
616 return false;
617 }
618
619 virtual unsigned getCacheLineSize() const { return 0; }
620 virtual std::optional<unsigned>
622 switch (Level) {
624 [[fallthrough]];
626 return std::nullopt;
627 }
628 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
629 }
630
631 virtual std::optional<unsigned>
633 switch (Level) {
635 [[fallthrough]];
637 return std::nullopt;
638 }
639
640 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
641 }
642
643 virtual std::optional<unsigned> getMinPageSize() const { return {}; }
644
645 virtual unsigned getPrefetchDistance() const { return 0; }
646 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
647 unsigned NumStridedMemAccesses,
648 unsigned NumPrefetches,
649 bool HasCall) const {
650 return 1;
651 }
652 virtual unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
653 virtual bool enableWritePrefetching() const { return false; }
654 virtual bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
655
657 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
659 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
662 }
663
664 virtual unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
665
667 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
669 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) const {
670 // Widenable conditions will eventually lower into constants, so some
671 // operations with them will be trivially optimized away.
672 auto IsWidenableCondition = [](const Value *V) {
673 if (auto *II = dyn_cast<IntrinsicInst>(V))
674 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
675 return true;
676 return false;
677 };
678 // FIXME: A number of transformation tests seem to require these values
679 // which seems a little odd for how arbitary there are.
680 switch (Opcode) {
681 default:
682 break;
683 case Instruction::FDiv:
684 case Instruction::FRem:
685 case Instruction::SDiv:
686 case Instruction::SRem:
687 case Instruction::UDiv:
688 case Instruction::URem:
689 // FIXME: Unlikely to be true for CodeSize.
690 return TTI::TCC_Expensive;
691 case Instruction::And:
692 case Instruction::Or:
693 if (any_of(Args, IsWidenableCondition))
694 return TTI::TCC_Free;
695 break;
696 }
697
698 // Assume a 3cy latency for fp arithmetic ops.
700 if (Ty->getScalarType()->isFloatingPointTy())
701 return 3;
702
703 return 1;
704 }
705
706 virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
707 unsigned Opcode1,
708 const SmallBitVector &OpcodeMask,
711 }
712
713 virtual InstructionCost
716 VectorType *SubTp, ArrayRef<const Value *> Args = {},
717 const Instruction *CxtI = nullptr) const {
718 return 1;
719 }
720
721 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
722 Type *Src, TTI::CastContextHint CCH,
724 const Instruction *I) const {
725 switch (Opcode) {
726 default:
727 break;
728 case Instruction::IntToPtr: {
729 unsigned SrcSize = Src->getScalarSizeInBits();
730 if (DL.isLegalInteger(SrcSize) &&
731 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
732 return 0;
733 break;
734 }
735 case Instruction::PtrToAddr: {
736 unsigned DstSize = Dst->getScalarSizeInBits();
737 assert(DstSize == DL.getAddressSizeInBits(Src));
738 if (DL.isLegalInteger(DstSize))
739 return 0;
740 break;
741 }
742 case Instruction::PtrToInt: {
743 unsigned DstSize = Dst->getScalarSizeInBits();
744 if (DL.isLegalInteger(DstSize) &&
745 DstSize >= DL.getPointerTypeSizeInBits(Src))
746 return 0;
747 break;
748 }
749 case Instruction::BitCast:
750 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
751 // Identity and pointer-to-pointer casts are free.
752 return 0;
753 break;
754 case Instruction::Trunc: {
755 // trunc to a native type is free (assuming the target has compare and
756 // shift-right of the same width).
757 TypeSize DstSize = DL.getTypeSizeInBits(Dst);
758 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
759 return 0;
760 break;
761 }
762 }
763 return 1;
764 }
765
766 virtual InstructionCost
767 getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
768 unsigned Index, TTI::TargetCostKind CostKind) const {
769 return 1;
770 }
771
772 virtual InstructionCost getCFInstrCost(unsigned Opcode,
774 const Instruction *I = nullptr) const {
775 // A phi would be free, unless we're costing the throughput because it
776 // will require a register.
777 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
778 return 0;
779 return 1;
780 }
781
783 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
785 TTI::OperandValueInfo Op2Info, const Instruction *I) const {
786 return 1;
787 }
788
789 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
791 unsigned Index, const Value *Op0,
792 const Value *Op1) const {
793 return 1;
794 }
795
796 /// \param ScalarUserAndIdx encodes the information about extracts from a
797 /// vector with 'Scalar' being the value being extracted,'User' being the user
798 /// of the extract(nullptr if user is not known before vectorization) and
799 /// 'Idx' being the extract lane.
801 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
802 Value *Scalar,
803 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
804 return 1;
805 }
806
809 unsigned Index) const {
810 return 1;
811 }
812
813 virtual InstructionCost
816 unsigned Index) const {
817 return 1;
818 }
819
820 virtual InstructionCost
821 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
822 const APInt &DemandedDstElts,
824 return 1;
825 }
826
827 virtual InstructionCost
830 // Note: The `insertvalue` cost here is chosen to match the default case of
831 // getInstructionCost() -- as prior to adding this helper `insertvalue` was
832 // not handled.
833 if (Opcode == Instruction::InsertValue &&
835 return TTI::TCC_Basic;
836 return TTI::TCC_Free;
837 }
838
839 virtual InstructionCost
840 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
842 TTI::OperandValueInfo OpInfo, const Instruction *I) const {
843 return 1;
844 }
845
846 virtual InstructionCost
851
852 virtual InstructionCost
853 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
854 bool VariableMask, Align Alignment,
856 const Instruction *I = nullptr) const {
857 return 1;
858 }
859
861 unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
862 TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const {
863 return 1;
864 }
865
866 virtual InstructionCost
867 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
868 bool VariableMask, Align Alignment,
870 const Instruction *I = nullptr) const {
872 }
873
875 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
876 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
877 bool UseMaskForCond, bool UseMaskForGaps) const {
878 return 1;
879 }
880
881 virtual InstructionCost
884 switch (ICA.getID()) {
885 default:
886 break;
887 case Intrinsic::allow_runtime_check:
888 case Intrinsic::allow_ubsan_check:
889 case Intrinsic::annotation:
890 case Intrinsic::assume:
891 case Intrinsic::sideeffect:
892 case Intrinsic::pseudoprobe:
893 case Intrinsic::arithmetic_fence:
894 case Intrinsic::dbg_assign:
895 case Intrinsic::dbg_declare:
896 case Intrinsic::dbg_value:
897 case Intrinsic::dbg_label:
898 case Intrinsic::invariant_start:
899 case Intrinsic::invariant_end:
900 case Intrinsic::launder_invariant_group:
901 case Intrinsic::strip_invariant_group:
902 case Intrinsic::is_constant:
903 case Intrinsic::lifetime_start:
904 case Intrinsic::lifetime_end:
905 case Intrinsic::experimental_noalias_scope_decl:
906 case Intrinsic::objectsize:
907 case Intrinsic::ptr_annotation:
908 case Intrinsic::var_annotation:
909 case Intrinsic::experimental_gc_result:
910 case Intrinsic::experimental_gc_relocate:
911 case Intrinsic::coro_alloc:
912 case Intrinsic::coro_begin:
913 case Intrinsic::coro_begin_custom_abi:
914 case Intrinsic::coro_free:
915 case Intrinsic::coro_end:
916 case Intrinsic::coro_frame:
917 case Intrinsic::coro_size:
918 case Intrinsic::coro_align:
919 case Intrinsic::coro_suspend:
920 case Intrinsic::coro_subfn_addr:
921 case Intrinsic::threadlocal_address:
922 case Intrinsic::experimental_widenable_condition:
923 case Intrinsic::ssa_copy:
924 // These intrinsics don't actually represent code after lowering.
925 return 0;
926 }
927 return 1;
928 }
929
933 return 1;
934 }
935
936 // Assume that we have a register of the right size for the type.
937 virtual unsigned getNumberOfParts(Type *Tp) const { return 1; }
938
941 const SCEV *,
942 TTI::TargetCostKind) const {
943 return 0;
944 }
945
946 virtual InstructionCost
948 std::optional<FastMathFlags> FMF,
949 TTI::TargetCostKind) const {
950 return 1;
951 }
952
958
959 virtual InstructionCost
960 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
961 VectorType *Ty, std::optional<FastMathFlags> FMF,
963 return 1;
964 }
965
966 virtual InstructionCost
967 getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy,
969 return 1;
970 }
971
972 virtual InstructionCost
974 return 0;
975 }
976
978 MemIntrinsicInfo &Info) const {
979 return false;
980 }
981
982 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const {
983 // Note for overrides: You must ensure for all element unordered-atomic
984 // memory intrinsics that all power-of-2 element sizes up to, and
985 // including, the return value of this method have a corresponding
986 // runtime lib call. These runtime lib call definitions can be found
987 // in RuntimeLibcalls.h
988 return 0;
989 }
990
991 virtual Value *
993 bool CanCreate = true) const {
994 return nullptr;
995 }
996
997 virtual Type *
999 unsigned SrcAddrSpace, unsigned DestAddrSpace,
1000 Align SrcAlign, Align DestAlign,
1001 std::optional<uint32_t> AtomicElementSize) const {
1002 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
1003 : Type::getInt8Ty(Context);
1004 }
1005
1007 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1008 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1009 Align SrcAlign, Align DestAlign,
1010 std::optional<uint32_t> AtomicCpySize) const {
1011 unsigned OpSizeInBytes = AtomicCpySize.value_or(1);
1012 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
1013 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
1014 OpsOut.push_back(OpType);
1015 }
1016
1017 virtual bool areInlineCompatible(const Function *Caller,
1018 const Function *Callee) const {
1019 return (Caller->getFnAttribute("target-cpu") ==
1020 Callee->getFnAttribute("target-cpu")) &&
1021 (Caller->getFnAttribute("target-features") ==
1022 Callee->getFnAttribute("target-features"));
1023 }
1024
1025 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1026 unsigned DefaultCallPenalty) const {
1027 return DefaultCallPenalty;
1028 }
1029
1030 virtual bool areTypesABICompatible(const Function *Caller,
1031 const Function *Callee,
1032 ArrayRef<Type *> Types) const {
1033 return (Caller->getFnAttribute("target-cpu") ==
1034 Callee->getFnAttribute("target-cpu")) &&
1035 (Caller->getFnAttribute("target-features") ==
1036 Callee->getFnAttribute("target-features"));
1037 }
1038
1040 return false;
1041 }
1042
1044 return false;
1045 }
1046
1047 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
1048 return 128;
1049 }
1050
1051 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
1052
1053 virtual bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
1054
1055 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1056 Align Alignment,
1057 unsigned AddrSpace) const {
1058 return true;
1059 }
1060
1061 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1062 Align Alignment,
1063 unsigned AddrSpace) const {
1064 return true;
1065 }
1066
1068 ElementCount VF) const {
1069 return true;
1070 }
1071
1073 return true;
1074 }
1075
1076 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1077 unsigned ChainSizeInBytes,
1078 VectorType *VecTy) const {
1079 return VF;
1080 }
1081
1082 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1083 unsigned ChainSizeInBytes,
1084 VectorType *VecTy) const {
1085 return VF;
1086 }
1087
1088 virtual bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const {
1089 return false;
1090 }
1091
1092 virtual bool preferInLoopReduction(RecurKind Kind, Type *Ty) const {
1093 return false;
1094 }
1095 virtual bool preferAlternateOpcodeVectorization() const { return true; }
1096
1097 virtual bool preferPredicatedReductionSelect() const { return false; }
1098
1099 virtual bool preferEpilogueVectorization() const { return true; }
1100
1101 virtual bool shouldConsiderVectorizationRegPressure() const { return false; }
1102
1103 virtual bool shouldExpandReduction(const IntrinsicInst *II) const {
1104 return true;
1105 }
1106
1107 virtual TTI::ReductionShuffle
1111
1112 virtual unsigned getGISelRematGlobalCost() const { return 1; }
1113
1114 virtual unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
1115
1116 virtual bool supportsScalableVectors() const { return false; }
1117
1118 virtual bool enableScalableVectorization() const { return false; }
1119
1120 virtual bool hasActiveVectorLength() const { return false; }
1121
1123 SmallVectorImpl<Use *> &Ops) const {
1124 return false;
1125 }
1126
1127 virtual bool isVectorShiftByScalarCheap(Type *Ty) const { return false; }
1128
1135
1136 virtual bool hasArmWideBranch(bool) const { return false; }
1137
1138 virtual APInt getFeatureMask(const Function &F) const {
1139 return APInt::getZero(32);
1140 }
1141
1142 virtual bool isMultiversionedFunction(const Function &F) const {
1143 return false;
1144 }
1145
1146 virtual unsigned getMaxNumArgs() const { return UINT_MAX; }
1147
1148 virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
1149 Type *ArrayType) const {
1150 return 0;
1151 }
1152
1154 const Function &F,
1155 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
1156
1157 virtual bool allowVectorElementIndexingUsingGEP() const { return true; }
1158
1159protected:
1160 // Obtain the minimum required size to hold the value (without the sign)
1161 // In case of a vector it returns the min required size for one element.
1162 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
1164 const auto *VectorValue = cast<Constant>(Val);
1165
1166 // In case of a vector need to pick the max between the min
1167 // required size for each element
1168 auto *VT = cast<FixedVectorType>(Val->getType());
1169
1170 // Assume unsigned elements
1171 isSigned = false;
1172
1173 // The max required size is the size of the vector element type
1174 unsigned MaxRequiredSize =
1175 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
1176
1177 unsigned MinRequiredSize = 0;
1178 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
1179 if (auto *IntElement =
1180 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
1181 bool signedElement = IntElement->getValue().isNegative();
1182 // Get the element min required size.
1183 unsigned ElementMinRequiredSize =
1184 IntElement->getValue().getSignificantBits() - 1;
1185 // In case one element is signed then all the vector is signed.
1186 isSigned |= signedElement;
1187 // Save the max required bit size between all the elements.
1188 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1189 } else {
1190 // not an int constant element
1191 return MaxRequiredSize;
1192 }
1193 }
1194 return MinRequiredSize;
1195 }
1196
1197 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
1198 isSigned = CI->getValue().isNegative();
1199 return CI->getValue().getSignificantBits() - 1;
1200 }
1201
1202 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
1203 isSigned = true;
1204 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1205 }
1206
1207 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1208 isSigned = false;
1209 return Cast->getSrcTy()->getScalarSizeInBits();
1210 }
1211
1212 isSigned = false;
1213 return Val->getType()->getScalarSizeInBits();
1214 }
1215
1216 bool isStridedAccess(const SCEV *Ptr) const {
1217 return Ptr && isa<SCEVAddRecExpr>(Ptr);
1218 }
1219
1221 const SCEV *Ptr) const {
1222 if (!isStridedAccess(Ptr))
1223 return nullptr;
1224 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
1225 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
1226 }
1227
1229 int64_t MergeDistance) const {
1230 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1231 if (!Step)
1232 return false;
1233 APInt StrideVal = Step->getAPInt();
1234 if (StrideVal.getBitWidth() > 64)
1235 return false;
1236 // FIXME: Need to take absolute value for negative stride case.
1237 return StrideVal.getSExtValue() < MergeDistance;
1238 }
1239};
1240
1241/// CRTP base class for use as a mix-in that aids implementing
1242/// a TargetTransformInfo-compatible class.
1243template <typename T>
1245private:
1246 typedef TargetTransformInfoImplBase BaseT;
1247
1248protected:
1250
1251public:
1252 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1253 ArrayRef<const Value *> Operands, Type *AccessType,
1254 TTI::TargetCostKind CostKind) const override {
1255 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1256 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
1257 bool HasBaseReg = (BaseGV == nullptr);
1258
1259 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1260 APInt BaseOffset(PtrSizeBits, 0);
1261 int64_t Scale = 0;
1262
1263 auto GTI = gep_type_begin(PointeeType, Operands);
1264 Type *TargetType = nullptr;
1265
1266 // Handle the case where the GEP instruction has a single operand,
1267 // the basis, therefore TargetType is a nullptr.
1268 if (Operands.empty())
1269 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1270
1271 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1272 TargetType = GTI.getIndexedType();
1273 // We assume that the cost of Scalar GEP with constant index and the
1274 // cost of Vector GEP with splat constant index are the same.
1275 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
1276 if (!ConstIdx)
1277 if (auto Splat = getSplatValue(*I))
1278 ConstIdx = dyn_cast<ConstantInt>(Splat);
1279 if (StructType *STy = GTI.getStructTypeOrNull()) {
1280 // For structures the index is always splat or scalar constant
1281 assert(ConstIdx && "Unexpected GEP index");
1282 uint64_t Field = ConstIdx->getZExtValue();
1283 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
1284 } else {
1285 // If this operand is a scalable type, bail out early.
1286 // TODO: Make isLegalAddressingMode TypeSize aware.
1287 if (TargetType->isScalableTy())
1288 return TTI::TCC_Basic;
1289 int64_t ElementSize =
1290 GTI.getSequentialElementStride(DL).getFixedValue();
1291 if (ConstIdx) {
1292 BaseOffset +=
1293 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1294 } else {
1295 // Needs scale register.
1296 if (Scale != 0)
1297 // No addressing mode takes two scale registers.
1298 return TTI::TCC_Basic;
1299 Scale = ElementSize;
1300 }
1301 }
1302 }
1303
1304 // If we haven't been provided a hint, use the target type for now.
1305 //
1306 // TODO: Take a look at potentially removing this: This is *slightly* wrong
1307 // as it's possible to have a GEP with a foldable target type but a memory
1308 // access that isn't foldable. For example, this load isn't foldable on
1309 // RISC-V:
1310 //
1311 // %p = getelementptr i32, ptr %base, i32 42
1312 // %x = load <2 x i32>, ptr %p
1313 if (!AccessType)
1314 AccessType = TargetType;
1315
1316 // If the final address of the GEP is a legal addressing mode for the given
1317 // access type, then we can fold it into its users.
1318 if (static_cast<const T *>(this)->isLegalAddressingMode(
1319 AccessType, const_cast<GlobalValue *>(BaseGV),
1320 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1322 return TTI::TCC_Free;
1323
1324 // TODO: Instead of returning TCC_Basic here, we should use
1325 // getArithmeticInstrCost. Or better yet, provide a hook to let the target
1326 // model it.
1327 return TTI::TCC_Basic;
1328 }
1329
1332 const TTI::PointersChainInfo &Info, Type *AccessTy,
1333 TTI::TargetCostKind CostKind) const override {
1335 // In the basic model we take into account GEP instructions only
1336 // (although here can come alloca instruction, a value, constants and/or
1337 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1338 // pointer). Typically, if Base is a not a GEP-instruction and all the
1339 // pointers are relative to the same base address, all the rest are
1340 // either GEP instructions, PHIs, bitcasts or constants. When we have same
1341 // base, we just calculate cost of each non-Base GEP as an ADD operation if
1342 // any their index is a non-const.
1343 // If no known dependecies between the pointers cost is calculated as a sum
1344 // of costs of GEP instructions.
1345 for (const Value *V : Ptrs) {
1346 const auto *GEP = dyn_cast<GetElementPtrInst>(V);
1347 if (!GEP)
1348 continue;
1349 if (Info.isSameBase() && V != Base) {
1350 if (GEP->hasAllConstantIndices())
1351 continue;
1352 Cost += static_cast<const T *>(this)->getArithmeticInstrCost(
1353 Instruction::Add, GEP->getType(), CostKind,
1354 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
1355 {});
1356 } else {
1357 SmallVector<const Value *> Indices(GEP->indices());
1358 Cost += static_cast<const T *>(this)->getGEPCost(
1359 GEP->getSourceElementType(), GEP->getPointerOperand(), Indices,
1360 AccessTy, CostKind);
1361 }
1362 }
1363 return Cost;
1364 }
1365
1368 TTI::TargetCostKind CostKind) const override {
1369 using namespace llvm::PatternMatch;
1370
1371 auto *TargetTTI = static_cast<const T *>(this);
1372 // Handle non-intrinsic calls, invokes, and callbr.
1373 // FIXME: Unlikely to be true for anything but CodeSize.
1374 auto *CB = dyn_cast<CallBase>(U);
1375 if (CB && !isa<IntrinsicInst>(U)) {
1376 if (const Function *F = CB->getCalledFunction()) {
1377 if (!TargetTTI->isLoweredToCall(F))
1378 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1379
1380 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1381 }
1382 // For indirect or other calls, scale cost by number of arguments.
1383 return TTI::TCC_Basic * (CB->arg_size() + 1);
1384 }
1385
1386 Type *Ty = U->getType();
1387 unsigned Opcode = Operator::getOpcode(U);
1388 auto *I = dyn_cast<Instruction>(U);
1389 switch (Opcode) {
1390 default:
1391 break;
1392 case Instruction::Call: {
1393 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1394 auto *Intrinsic = cast<IntrinsicInst>(U);
1395 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1396 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1397 }
1398 case Instruction::Br:
1399 case Instruction::Ret:
1400 case Instruction::PHI:
1401 case Instruction::Switch:
1402 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1403 case Instruction::Freeze:
1404 return TTI::TCC_Free;
1405 case Instruction::ExtractValue:
1406 case Instruction::InsertValue:
1407 return TargetTTI->getInsertExtractValueCost(Opcode, CostKind);
1408 case Instruction::Alloca:
1409 if (cast<AllocaInst>(U)->isStaticAlloca())
1410 return TTI::TCC_Free;
1411 break;
1412 case Instruction::GetElementPtr: {
1413 const auto *GEP = cast<GEPOperator>(U);
1414 Type *AccessType = nullptr;
1415 // For now, only provide the AccessType in the simple case where the GEP
1416 // only has one user.
1417 if (GEP->hasOneUser() && I)
1418 AccessType = I->user_back()->getAccessType();
1419
1420 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1421 Operands.front(), Operands.drop_front(),
1422 AccessType, CostKind);
1423 }
1424 case Instruction::Add:
1425 case Instruction::FAdd:
1426 case Instruction::Sub:
1427 case Instruction::FSub:
1428 case Instruction::Mul:
1429 case Instruction::FMul:
1430 case Instruction::UDiv:
1431 case Instruction::SDiv:
1432 case Instruction::FDiv:
1433 case Instruction::URem:
1434 case Instruction::SRem:
1435 case Instruction::FRem:
1436 case Instruction::Shl:
1437 case Instruction::LShr:
1438 case Instruction::AShr:
1439 case Instruction::And:
1440 case Instruction::Or:
1441 case Instruction::Xor:
1442 case Instruction::FNeg: {
1443 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]);
1444 TTI::OperandValueInfo Op2Info;
1445 if (Opcode != Instruction::FNeg)
1446 Op2Info = TTI::getOperandInfo(Operands[1]);
1447 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1448 Op2Info, Operands, I);
1449 }
1450 case Instruction::IntToPtr:
1451 case Instruction::PtrToAddr:
1452 case Instruction::PtrToInt:
1453 case Instruction::SIToFP:
1454 case Instruction::UIToFP:
1455 case Instruction::FPToUI:
1456 case Instruction::FPToSI:
1457 case Instruction::Trunc:
1458 case Instruction::FPTrunc:
1459 case Instruction::BitCast:
1460 case Instruction::FPExt:
1461 case Instruction::SExt:
1462 case Instruction::ZExt:
1463 case Instruction::AddrSpaceCast: {
1464 Type *OpTy = Operands[0]->getType();
1465 return TargetTTI->getCastInstrCost(
1466 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1467 }
1468 case Instruction::Store: {
1469 auto *SI = cast<StoreInst>(U);
1470 Type *ValTy = Operands[0]->getType();
1471 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]);
1472 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1473 SI->getPointerAddressSpace(), CostKind,
1474 OpInfo, I);
1475 }
1476 case Instruction::Load: {
1477 // FIXME: Arbitary cost which could come from the backend.
1479 return 4;
1480 auto *LI = cast<LoadInst>(U);
1481 Type *LoadType = U->getType();
1482 // If there is a non-register sized type, the cost estimation may expand
1483 // it to be several instructions to load into multiple registers on the
1484 // target. But, if the only use of the load is a trunc instruction to a
1485 // register sized type, the instruction selector can combine these
1486 // instructions to be a single load. So, in this case, we use the
1487 // destination type of the trunc instruction rather than the load to
1488 // accurately estimate the cost of this load instruction.
1489 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1490 !LoadType->isVectorTy()) {
1491 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1492 LoadType = TI->getDestTy();
1493 }
1494 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1496 {TTI::OK_AnyValue, TTI::OP_None}, I);
1497 }
1498 case Instruction::Select: {
1499 const Value *Op0, *Op1;
1500 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1501 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1502 // select x, y, false --> x & y
1503 // select x, true, y --> x | y
1504 const auto Op1Info = TTI::getOperandInfo(Op0);
1505 const auto Op2Info = TTI::getOperandInfo(Op1);
1506 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1507 Op1->getType()->getScalarSizeInBits() == 1);
1508
1509 SmallVector<const Value *, 2> Operands{Op0, Op1};
1510 return TargetTTI->getArithmeticInstrCost(
1511 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1512 CostKind, Op1Info, Op2Info, Operands, I);
1513 }
1514 const auto Op1Info = TTI::getOperandInfo(Operands[1]);
1515 const auto Op2Info = TTI::getOperandInfo(Operands[2]);
1516 Type *CondTy = Operands[0]->getType();
1517 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1519 CostKind, Op1Info, Op2Info, I);
1520 }
1521 case Instruction::ICmp:
1522 case Instruction::FCmp: {
1523 const auto Op1Info = TTI::getOperandInfo(Operands[0]);
1524 const auto Op2Info = TTI::getOperandInfo(Operands[1]);
1525 Type *ValTy = Operands[0]->getType();
1526 // TODO: Also handle ICmp/FCmp constant expressions.
1527 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1528 I ? cast<CmpInst>(I)->getPredicate()
1530 CostKind, Op1Info, Op2Info, I);
1531 }
1532 case Instruction::InsertElement: {
1533 auto *IE = dyn_cast<InsertElementInst>(U);
1534 if (!IE)
1535 return TTI::TCC_Basic; // FIXME
1536 unsigned Idx = -1;
1537 if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
1538 if (CI->getValue().getActiveBits() <= 32)
1539 Idx = CI->getZExtValue();
1540 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1541 }
1542 case Instruction::ShuffleVector: {
1543 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1544 if (!Shuffle)
1545 return TTI::TCC_Basic; // FIXME
1546
1547 auto *VecTy = cast<VectorType>(U->getType());
1548 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
1549 ArrayRef<int> Mask = Shuffle->getShuffleMask();
1550 int NumSubElts, SubIndex;
1551
1552 // Treat undef/poison mask as free (no matter the length).
1553 if (all_of(Mask, [](int M) { return M < 0; }))
1554 return TTI::TCC_Free;
1555
1556 // TODO: move more of this inside improveShuffleKindFromMask.
1557 if (Shuffle->changesLength()) {
1558 // Treat a 'subvector widening' as a free shuffle.
1559 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1560 return TTI::TCC_Free;
1561
1562 if (Shuffle->isExtractSubvectorMask(SubIndex))
1563 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
1564 VecSrcTy, Mask, CostKind, SubIndex,
1565 VecTy, Operands, Shuffle);
1566
1567 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1568 return TargetTTI->getShuffleCost(
1569 TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind,
1570 SubIndex,
1571 FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1572 Operands, Shuffle);
1573
1574 int ReplicationFactor, VF;
1575 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1576 APInt DemandedDstElts = APInt::getZero(Mask.size());
1577 for (auto I : enumerate(Mask)) {
1578 if (I.value() != PoisonMaskElem)
1579 DemandedDstElts.setBit(I.index());
1580 }
1581 return TargetTTI->getReplicationShuffleCost(
1582 VecSrcTy->getElementType(), ReplicationFactor, VF,
1583 DemandedDstElts, CostKind);
1584 }
1585
1586 bool IsUnary = isa<UndefValue>(Operands[1]);
1587 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1588 SmallVector<int, 16> AdjustMask(Mask);
1589
1590 // Widening shuffle - widening the source(s) to the new length
1591 // (treated as free - see above), and then perform the adjusted
1592 // shuffle at that width.
1593 if (Shuffle->increasesLength()) {
1594 for (int &M : AdjustMask)
1595 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1596
1597 return TargetTTI->getShuffleCost(
1599 VecTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1600 }
1601
1602 // Narrowing shuffle - perform shuffle at original wider width and
1603 // then extract the lower elements.
1604 // FIXME: This can assume widening, which is not true of all vector
1605 // architectures (and is not even the default).
1606 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
1607
1608 InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
1610 VecSrcTy, VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands,
1611 Shuffle);
1612
1613 SmallVector<int, 16> ExtractMask(Mask.size());
1614 std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1615 return ShuffleCost + TargetTTI->getShuffleCost(
1616 TTI::SK_ExtractSubvector, VecTy, VecSrcTy,
1617 ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
1618 }
1619
1620 if (Shuffle->isIdentity())
1621 return TTI::TCC_Free;
1622
1623 if (Shuffle->isReverse())
1624 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, VecSrcTy, Mask,
1625 CostKind, 0, nullptr, Operands,
1626 Shuffle);
1627
1628 if (Shuffle->isTranspose())
1629 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, VecSrcTy,
1630 Mask, CostKind, 0, nullptr, Operands,
1631 Shuffle);
1632
1633 if (Shuffle->isZeroEltSplat())
1634 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, VecSrcTy,
1635 Mask, CostKind, 0, nullptr, Operands,
1636 Shuffle);
1637
1638 if (Shuffle->isSingleSource())
1639 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1640 VecSrcTy, Mask, CostKind, 0, nullptr,
1641 Operands, Shuffle);
1642
1643 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1644 return TargetTTI->getShuffleCost(
1645 TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, SubIndex,
1646 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
1647 Shuffle);
1648
1649 if (Shuffle->isSelect())
1650 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, VecSrcTy, Mask,
1651 CostKind, 0, nullptr, Operands,
1652 Shuffle);
1653
1654 if (Shuffle->isSplice(SubIndex))
1655 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, VecSrcTy, Mask,
1656 CostKind, SubIndex, nullptr, Operands,
1657 Shuffle);
1658
1659 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, VecSrcTy,
1660 Mask, CostKind, 0, nullptr, Operands,
1661 Shuffle);
1662 }
1663 case Instruction::ExtractElement: {
1664 auto *EEI = dyn_cast<ExtractElementInst>(U);
1665 if (!EEI)
1666 return TTI::TCC_Basic; // FIXME
1667 unsigned Idx = -1;
1668 if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
1669 if (CI->getValue().getActiveBits() <= 32)
1670 Idx = CI->getZExtValue();
1671 Type *DstTy = Operands[0]->getType();
1672 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1673 }
1674 }
1675
1676 // By default, just classify everything remaining as 'basic'.
1677 return TTI::TCC_Basic;
1678 }
1679
1681 auto *TargetTTI = static_cast<const T *>(this);
1682 SmallVector<const Value *, 4> Ops(I->operand_values());
1683 InstructionCost Cost = TargetTTI->getInstructionCost(
1686 }
1687
1688 bool supportsTailCallFor(const CallBase *CB) const override {
1689 return static_cast<const T *>(this)->supportsTailCalls();
1690 }
1691};
1692} // namespace llvm
1693
1694#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition APInt.h:78
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1041
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
iterator end() const
Definition ArrayRef.h:131
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Class to represent array types.
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Information for memory intrinsic cost model.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
The optimization diagnostic interface.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:40
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Class to represent struct types.
Multiway switch.
Provides information about what library functions are available for the current target.
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const
virtual bool preferAlternateOpcodeVectorization() const
virtual bool isProfitableLSRChainElement(Instruction *I) const
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const
virtual const DataLayout & getDataLayout() const
virtual bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const
virtual std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
virtual bool enableInterleavedAccessVectorization() const
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
virtual InstructionCost getFPOpCost(Type *Ty) const
virtual unsigned getMaxInterleaveFactor(ElementCount VF) const
virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
virtual TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isStridedAccess(const SCEV *Ptr) const
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace) const
virtual TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) const
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
virtual InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
virtual bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty) const
virtual unsigned adjustInliningThreshold(const CallBase *CB) const
virtual InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
virtual bool shouldDropLSRSolutionIfLessProfitable() const
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) const
virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
virtual unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const
virtual bool isLegalICmpImmediate(int64_t Imm) const
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, const Instruction *I) const
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
virtual bool haveFastSqrt(Type *Ty) const
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
virtual InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
virtual std::optional< unsigned > getVScaleForTuning() const
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
virtual unsigned getNumberOfParts(Type *Tp) const
virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
virtual void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
virtual bool useColdCCForColdCall(Function &F) const
virtual InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
virtual bool isLegalAddScalableImmediate(int64_t Imm) const
virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const
TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg)
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment) const
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
virtual unsigned getMinVectorRegisterBitWidth() const
unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const
virtual bool shouldBuildLookupTablesForConstant(Constant *C) const
virtual bool isFPVectorizationPotentiallyUnsafe() const
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
virtual InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
virtual std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
virtual unsigned getEpilogueVectorizationMinVF() const
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const
virtual TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
virtual TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const
virtual unsigned getMaxPrefetchIterationsAhead() const
virtual bool allowVectorElementIndexingUsingGEP() const
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
virtual TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
const SCEVConstant * getConstantStrideStep(ScalarEvolution *SE, const SCEV *Ptr) const
virtual bool hasBranchDivergence(const Function *F=nullptr) const
virtual InstructionCost getArithmeticReductionCost(unsigned, VectorType *, std::optional< FastMathFlags > FMF, TTI::TargetCostKind) const
virtual bool isProfitableToHoist(Instruction *I) const
virtual const char * getRegisterClassName(unsigned ClassID) const
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, FastMathFlags, TTI::TargetCostKind) const
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const
virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
virtual bool isLegalNTStore(Type *DataType, Align Alignment) const
virtual InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
virtual APInt getFeatureMask(const Function &F) const
virtual std::optional< unsigned > getMinPageSize() const
virtual unsigned getRegUsageForType(Type *Ty) const
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const
virtual bool isLoweredToCall(const Function *F) const
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
virtual bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty) const
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const
virtual BranchProbability getPredictableBranchThreshold() const
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const
virtual bool isLegalToVectorizeStore(StoreInst *SI) const
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const
virtual bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const
virtual bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const
virtual bool preferInLoopReduction(RecurKind Kind, Type *Ty) const
virtual bool isMultiversionedFunction(const Function &F) const
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) const
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
virtual bool isLegalAddImmediate(int64_t Imm) const
virtual InstructionCost getInsertExtractValueCost(unsigned Opcode, TTI::TargetCostKind CostKind) const
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const
virtual InstructionCost getBranchMispredictPenalty() const
virtual bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const
virtual bool enableMaskedInterleavedAccessVectorization() const
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const
virtual unsigned getInliningThresholdMultiplier() const
TargetTransformInfoImplBase(const DataLayout &DL)
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
virtual bool isAlwaysUniform(const Value *V) const
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, const Instruction *I) const
virtual bool shouldExpandReduction(const IntrinsicInst *II) const
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
virtual unsigned getGISelRematGlobalCost() const
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const
virtual bool isTypeLegal(Type *Ty) const
virtual unsigned getAssumedAddrSpace(const Value *V) const
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx) const
virtual unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const
virtual bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
virtual InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace) const
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
virtual bool supportsTailCallFor(const CallBase *CB) const
virtual std::optional< unsigned > getMaxVScale() const
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const
virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const
virtual bool shouldConsiderVectorizationRegPressure() const
virtual InstructionCost getMemcpyCost(const Instruction *I) const
virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
virtual bool isSourceOfDivergence(const Value *V) const
virtual bool useFastCCForInternalCall(Function &F) const
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
virtual void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &, OptimizationRemarkEmitter *) const
TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)=default
virtual bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
virtual bool supportsEfficientVectorElementLoadStore() const
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
virtual unsigned getMinTripCountTailFoldingThreshold() const
virtual TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
virtual void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const
bool supportsTailCallFor(const CallBase *CB) const override
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
MemIndexedMode
The type of load/store indexing.
AddressingModeKind
Which addressing mode Loop Strength Reduction will try to generate.
@ AMK_None
Don't prefer any addressing mode.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
CacheLevel
The possible cache levels.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
@ Length
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
InstructionCost Cost
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr int PoisonMaskElem
RecurKind
These are the kinds of recurrences that we support.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Parameters that control the generic loop unrolling transformation.