LLVM 22.0.0git
TargetTransformInfoImpl.h
Go to the documentation of this file.
1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Operator.h"
25#include <optional>
26#include <utility>
27
28namespace llvm {
29
30class Function;
31
32/// Base class for use as a mix-in that aids implementing
33/// a TargetTransformInfo-compatible class.
35
36protected:
38
39 const DataLayout &DL;
40
42
43public:
45
46 // Provide value semantics. MSVC requires that we spell all of these out.
49
50 virtual const DataLayout &getDataLayout() const { return DL; }
51
52 // FIXME: It looks like this implementation is dead. All clients appear to
53 // use the (non-const) version from `TargetTransformInfoImplCRTPBase`.
54 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
56 Type *AccessType,
58 // In the basic model, we just assume that all-constant GEPs will be folded
59 // into their uses via addressing modes.
60 for (const Value *Operand : Operands)
61 if (!isa<Constant>(Operand))
62 return TTI::TCC_Basic;
63
64 return TTI::TCC_Free;
65 }
66
67 virtual InstructionCost
69 const TTI::PointersChainInfo &Info, Type *AccessTy,
71 llvm_unreachable("Not implemented");
72 }
73
74 virtual unsigned
77 BlockFrequencyInfo *BFI) const {
78 (void)PSI;
79 (void)BFI;
80 JTSize = 0;
81 return SI.getNumCases();
82 }
83
84 virtual InstructionCost
87 llvm_unreachable("Not implemented");
88 }
89
90 virtual unsigned getInliningThresholdMultiplier() const { return 1; }
92 return 8;
93 }
95 return 8;
96 }
98 // This is the value of InlineConstants::LastCallToStaticBonus before it was
99 // removed along with the introduction of this function.
100 return 15000;
101 }
102 virtual unsigned adjustInliningThreshold(const CallBase *CB) const {
103 return 0;
104 }
105 virtual unsigned getCallerAllocaCost(const CallBase *CB,
106 const AllocaInst *AI) const {
107 return 0;
108 };
109
110 virtual int getInlinerVectorBonusPercent() const { return 150; }
111
113 return TTI::TCC_Expensive;
114 }
115
116 virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { return 64; }
117
118 // Although this default value is arbitrary, it is not random. It is assumed
119 // that a condition that evaluates the same way by a higher percentage than
120 // this is best represented as control flow. Therefore, the default value N
121 // should be set such that the win from N% correct executions is greater than
122 // the loss from (100 - N)% mispredicted executions for the majority of
123 // intended targets.
125 return BranchProbability(99, 100);
126 }
127
128 virtual InstructionCost getBranchMispredictPenalty() const { return 0; }
129
130 virtual bool hasBranchDivergence(const Function *F = nullptr) const {
131 return false;
132 }
133
134 virtual bool isSourceOfDivergence(const Value *V) const { return false; }
135
136 virtual bool isAlwaysUniform(const Value *V) const { return false; }
137
138 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
139 return false;
140 }
141
142 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
143 return true;
144 }
145
146 virtual unsigned getFlatAddressSpace() const { return -1; }
147
149 Intrinsic::ID IID) const {
150 return false;
151 }
152
153 virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
154 virtual bool
156 return AS == 0;
157 };
158
159 virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
160
161 virtual bool isSingleThreaded() const { return false; }
162
163 virtual std::pair<const Value *, unsigned>
165 return std::make_pair(nullptr, -1);
166 }
167
169 Value *OldV,
170 Value *NewV) const {
171 return nullptr;
172 }
173
174 virtual bool isLoweredToCall(const Function *F) const {
175 assert(F && "A concrete function must be provided to this routine.");
176
177 // FIXME: These should almost certainly not be handled here, and instead
178 // handled with the help of TLI or the target itself. This was largely
179 // ported from existing analysis heuristics here so that such refactorings
180 // can take place in the future.
181
182 if (F->isIntrinsic())
183 return false;
184
185 if (F->hasLocalLinkage() || !F->hasName())
186 return true;
187
188 StringRef Name = F->getName();
189
190 // These will all likely lower to a single selection DAG node.
191 // clang-format off
192 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
193 Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
194 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
195 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
196 Name == "sin" || Name == "sinf" || Name == "sinl" ||
197 Name == "cos" || Name == "cosf" || Name == "cosl" ||
198 Name == "tan" || Name == "tanf" || Name == "tanl" ||
199 Name == "asin" || Name == "asinf" || Name == "asinl" ||
200 Name == "acos" || Name == "acosf" || Name == "acosl" ||
201 Name == "atan" || Name == "atanf" || Name == "atanl" ||
202 Name == "atan2" || Name == "atan2f" || Name == "atan2l"||
203 Name == "sinh" || Name == "sinhf" || Name == "sinhl" ||
204 Name == "cosh" || Name == "coshf" || Name == "coshl" ||
205 Name == "tanh" || Name == "tanhf" || Name == "tanhl" ||
206 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ||
207 Name == "exp10" || Name == "exp10l" || Name == "exp10f")
208 return false;
209 // clang-format on
210 // These are all likely to be optimized into something smaller.
211 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
212 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
213 Name == "floorf" || Name == "ceil" || Name == "round" ||
214 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
215 Name == "llabs")
216 return false;
217
218 return true;
219 }
220
222 AssumptionCache &AC,
223 TargetLibraryInfo *LibInfo,
224 HardwareLoopInfo &HWLoopInfo) const {
225 return false;
226 }
227
228 virtual unsigned getEpilogueVectorizationMinVF() const { return 16; }
229
231 return false;
232 }
233
234 virtual TailFoldingStyle
235 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
237 }
238
239 virtual std::optional<Instruction *>
241 return std::nullopt;
242 }
243
244 virtual std::optional<Value *>
246 APInt DemandedMask, KnownBits &Known,
247 bool &KnownBitsComputed) const {
248 return std::nullopt;
249 }
250
251 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
252 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
253 APInt &UndefElts2, APInt &UndefElts3,
254 std::function<void(Instruction *, unsigned, APInt, APInt &)>
255 SimplifyAndSetOp) const {
256 return std::nullopt;
257 }
258
262
265
266 virtual bool isLegalAddImmediate(int64_t Imm) const { return false; }
267
268 virtual bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
269
270 virtual bool isLegalICmpImmediate(int64_t Imm) const { return false; }
271
272 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
273 int64_t BaseOffset, bool HasBaseReg,
274 int64_t Scale, unsigned AddrSpace,
275 Instruction *I = nullptr,
276 int64_t ScalableOffset = 0) const {
277 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
278 // taken from the implementation of LSR.
279 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
280 }
281
282 virtual bool isLSRCostLess(const TTI::LSRCost &C1,
283 const TTI::LSRCost &C2) const {
284 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
285 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
286 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
287 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
288 }
289
290 virtual bool isNumRegsMajorCostOfLSR() const { return true; }
291
292 virtual bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
293
295 return false;
296 }
297
298 virtual bool canMacroFuseCmp() const { return false; }
299
300 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
302 TargetLibraryInfo *LibInfo) const {
303 return false;
304 }
305
308 return TTI::AMK_None;
309 }
310
311 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment,
312 unsigned AddressSpace,
313 TTI::MaskKind MaskKind) const {
314 return false;
315 }
316
317 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment,
318 unsigned AddressSpace,
319 TTI::MaskKind MaskKind) const {
320 return false;
321 }
322
323 virtual bool isLegalNTStore(Type *DataType, Align Alignment) const {
324 // By default, assume nontemporal memory stores are available for stores
325 // that are aligned and have a size that is a power of 2.
326 unsigned DataSize = DL.getTypeStoreSize(DataType);
327 return Alignment >= DataSize && isPowerOf2_32(DataSize);
328 }
329
330 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const {
331 // By default, assume nontemporal memory loads are available for loads that
332 // are aligned and have a size that is a power of 2.
333 unsigned DataSize = DL.getTypeStoreSize(DataType);
334 return Alignment >= DataSize && isPowerOf2_32(DataSize);
335 }
336
337 virtual bool isLegalBroadcastLoad(Type *ElementTy,
338 ElementCount NumElements) const {
339 return false;
340 }
341
342 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
343 return false;
344 }
345
346 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
347 return false;
348 }
349
351 Align Alignment) const {
352 return false;
353 }
354
356 Align Alignment) const {
357 return false;
358 }
359
360 virtual bool isLegalMaskedCompressStore(Type *DataType,
361 Align Alignment) const {
362 return false;
363 }
364
365 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
366 unsigned Opcode1,
367 const SmallBitVector &OpcodeMask) const {
368 return false;
369 }
370
371 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const {
372 return false;
373 }
374
375 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const {
376 return false;
377 }
378
379 virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
380 Align Alignment,
381 unsigned AddrSpace) const {
382 return false;
383 }
384
385 virtual bool isLegalMaskedVectorHistogram(Type *AddrType,
386 Type *DataType) const {
387 return false;
388 }
389
390 virtual bool enableOrderedReductions() const { return false; }
391
392 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) const {
393 return false;
394 }
395
396 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
397 return false;
398 }
399
400 virtual bool prefersVectorizedAddressing() const { return true; }
401
403 StackOffset BaseOffset,
404 bool HasBaseReg, int64_t Scale,
405 unsigned AddrSpace) const {
406 // Guess that all legal addressing mode are free.
407 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg,
408 Scale, AddrSpace, /*I=*/nullptr,
409 BaseOffset.getScalable()))
410 return 0;
412 }
413
414 virtual bool LSRWithInstrQueries() const { return false; }
415
416 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
417
418 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
419
420 virtual bool useAA() const { return false; }
421
422 virtual bool isTypeLegal(Type *Ty) const { return false; }
423
424 virtual unsigned getRegUsageForType(Type *Ty) const { return 1; }
425
426 virtual bool shouldBuildLookupTables() const { return true; }
427
429 return true;
430 }
431
432 virtual bool shouldBuildRelLookupTables() const { return false; }
433
434 virtual bool useColdCCForColdCall(Function &F) const { return false; }
435
436 virtual bool useFastCCForInternalCall(Function &F) const { return true; }
437
439 return false;
440 }
441
443 unsigned ScalarOpdIdx) const {
444 return false;
445 }
446
448 int OpdIdx) const {
449 return OpdIdx == -1;
450 }
451
452 virtual bool
454 int RetIdx) const {
455 return RetIdx == 0;
456 }
457
459 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
460 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
461 ArrayRef<Value *> VL = {}) const {
462 return 0;
463 }
464
465 virtual InstructionCost
470
471 virtual bool supportsEfficientVectorElementLoadStore() const { return false; }
472
473 virtual bool supportsTailCalls() const { return true; }
474
475 virtual bool supportsTailCallFor(const CallBase *CB) const {
476 llvm_unreachable("Not implemented");
477 }
478
479 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) const {
480 return false;
481 }
482
484 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
485 return {};
486 }
487
488 virtual bool enableSelectOptimize() const { return true; }
489
490 virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const {
491 // A select with two constant operands will usually be better left as a
492 // select.
493 using namespace llvm::PatternMatch;
495 return false;
496 // If the select is a logical-and/logical-or then it is better treated as a
497 // and/or by the backend.
498 return isa<SelectInst>(I) &&
501 }
502
503 virtual bool enableInterleavedAccessVectorization() const { return false; }
504
506 return false;
507 }
508
509 virtual bool isFPVectorizationPotentiallyUnsafe() const { return false; }
510
512 unsigned BitWidth,
513 unsigned AddressSpace,
514 Align Alignment,
515 unsigned *Fast) const {
516 return false;
517 }
518
520 getPopcntSupport(unsigned IntTyWidthInBit) const {
521 return TTI::PSK_Software;
522 }
523
524 virtual bool haveFastSqrt(Type *Ty) const { return false; }
525
527 return true;
528 }
529
530 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
531
532 virtual InstructionCost getFPOpCost(Type *Ty) const {
534 }
535
536 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
537 const APInt &Imm,
538 Type *Ty) const {
539 return 0;
540 }
541
542 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
544 return TTI::TCC_Basic;
545 }
546
547 virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
548 const APInt &Imm, Type *Ty,
550 Instruction *Inst = nullptr) const {
551 return TTI::TCC_Free;
552 }
553
554 virtual InstructionCost
555 getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
556 Type *Ty, TTI::TargetCostKind CostKind) const {
557 return TTI::TCC_Free;
558 }
559
561 const Function &Fn) const {
562 return false;
563 }
564
565 virtual unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
566 virtual bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const {
567 return false;
568 }
569
570 virtual unsigned getRegisterClassForType(bool Vector,
571 Type *Ty = nullptr) const {
572 return Vector ? 1 : 0;
573 }
574
575 virtual const char *getRegisterClassName(unsigned ClassID) const {
576 switch (ClassID) {
577 default:
578 return "Generic::Unknown Register Class";
579 case 0:
580 return "Generic::ScalarRC";
581 case 1:
582 return "Generic::VectorRC";
583 }
584 }
585
586 virtual TypeSize
590
591 virtual unsigned getMinVectorRegisterBitWidth() const { return 128; }
592
593 virtual std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
594 virtual std::optional<unsigned> getVScaleForTuning() const {
595 return std::nullopt;
596 }
597 virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
598
599 virtual bool
603
604 virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
605 return ElementCount::get(0, IsScalable);
606 }
607
608 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
609 return 0;
610 }
611 virtual unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const {
612 return VF;
613 }
614
616 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
617 AllowPromotionWithoutCommonHeader = false;
618 return false;
619 }
620
621 virtual unsigned getCacheLineSize() const { return 0; }
622 virtual std::optional<unsigned>
624 switch (Level) {
626 [[fallthrough]];
628 return std::nullopt;
629 }
630 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
631 }
632
633 virtual std::optional<unsigned>
635 switch (Level) {
637 [[fallthrough]];
639 return std::nullopt;
640 }
641
642 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
643 }
644
645 virtual std::optional<unsigned> getMinPageSize() const { return {}; }
646
647 virtual unsigned getPrefetchDistance() const { return 0; }
648 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
649 unsigned NumStridedMemAccesses,
650 unsigned NumPrefetches,
651 bool HasCall) const {
652 return 1;
653 }
654 virtual unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
655 virtual bool enableWritePrefetching() const { return false; }
656 virtual bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
657
659 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
661 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
664 }
665
666 virtual unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
667
669 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
671 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) const {
672 // Widenable conditions will eventually lower into constants, so some
673 // operations with them will be trivially optimized away.
674 auto IsWidenableCondition = [](const Value *V) {
675 if (auto *II = dyn_cast<IntrinsicInst>(V))
676 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
677 return true;
678 return false;
679 };
680 // FIXME: A number of transformation tests seem to require these values
681 // which seems a little odd for how arbitary there are.
682 switch (Opcode) {
683 default:
684 break;
685 case Instruction::FDiv:
686 case Instruction::FRem:
687 case Instruction::SDiv:
688 case Instruction::SRem:
689 case Instruction::UDiv:
690 case Instruction::URem:
691 // FIXME: Unlikely to be true for CodeSize.
692 return TTI::TCC_Expensive;
693 case Instruction::And:
694 case Instruction::Or:
695 if (any_of(Args, IsWidenableCondition))
696 return TTI::TCC_Free;
697 break;
698 }
699
700 // Assume a 3cy latency for fp arithmetic ops.
702 if (Ty->getScalarType()->isFloatingPointTy())
703 return 3;
704
705 return 1;
706 }
707
708 virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
709 unsigned Opcode1,
710 const SmallBitVector &OpcodeMask,
713 }
714
715 virtual InstructionCost
718 VectorType *SubTp, ArrayRef<const Value *> Args = {},
719 const Instruction *CxtI = nullptr) const {
720 return 1;
721 }
722
723 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
724 Type *Src, TTI::CastContextHint CCH,
726 const Instruction *I) const {
727 switch (Opcode) {
728 default:
729 break;
730 case Instruction::IntToPtr: {
731 unsigned SrcSize = Src->getScalarSizeInBits();
732 if (DL.isLegalInteger(SrcSize) &&
733 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
734 return 0;
735 break;
736 }
737 case Instruction::PtrToAddr: {
738 unsigned DstSize = Dst->getScalarSizeInBits();
739 assert(DstSize == DL.getAddressSizeInBits(Src));
740 if (DL.isLegalInteger(DstSize))
741 return 0;
742 break;
743 }
744 case Instruction::PtrToInt: {
745 unsigned DstSize = Dst->getScalarSizeInBits();
746 if (DL.isLegalInteger(DstSize) &&
747 DstSize >= DL.getPointerTypeSizeInBits(Src))
748 return 0;
749 break;
750 }
751 case Instruction::BitCast:
752 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
753 // Identity and pointer-to-pointer casts are free.
754 return 0;
755 break;
756 case Instruction::Trunc: {
757 // trunc to a native type is free (assuming the target has compare and
758 // shift-right of the same width).
759 TypeSize DstSize = DL.getTypeSizeInBits(Dst);
760 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
761 return 0;
762 break;
763 }
764 }
765 return 1;
766 }
767
768 virtual InstructionCost
769 getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
770 unsigned Index, TTI::TargetCostKind CostKind) const {
771 return 1;
772 }
773
774 virtual InstructionCost getCFInstrCost(unsigned Opcode,
776 const Instruction *I = nullptr) const {
777 // A phi would be free, unless we're costing the throughput because it
778 // will require a register.
779 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
780 return 0;
781 return 1;
782 }
783
785 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
787 TTI::OperandValueInfo Op2Info, const Instruction *I) const {
788 return 1;
789 }
790
791 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
793 unsigned Index, const Value *Op0,
794 const Value *Op1) const {
795 return 1;
796 }
797
798 /// \param ScalarUserAndIdx encodes the information about extracts from a
799 /// vector with 'Scalar' being the value being extracted,'User' being the user
800 /// of the extract(nullptr if user is not known before vectorization) and
801 /// 'Idx' being the extract lane.
803 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
804 Value *Scalar,
805 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
806 return 1;
807 }
808
811 unsigned Index) const {
812 return 1;
813 }
814
815 virtual InstructionCost
818 unsigned Index) const {
819 return 1;
820 }
821
822 virtual InstructionCost
823 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
824 const APInt &DemandedDstElts,
826 return 1;
827 }
828
829 virtual InstructionCost
832 // Note: The `insertvalue` cost here is chosen to match the default case of
833 // getInstructionCost() -- as prior to adding this helper `insertvalue` was
834 // not handled.
835 if (Opcode == Instruction::InsertValue &&
837 return TTI::TCC_Basic;
838 return TTI::TCC_Free;
839 }
840
841 virtual InstructionCost
842 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
844 TTI::OperandValueInfo OpInfo, const Instruction *I) const {
845 return 1;
846 }
847
849 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
850 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
851 bool UseMaskForCond, bool UseMaskForGaps) const {
852 return 1;
853 }
854
855 virtual InstructionCost
858 switch (ICA.getID()) {
859 default:
860 break;
861 case Intrinsic::allow_runtime_check:
862 case Intrinsic::allow_ubsan_check:
863 case Intrinsic::annotation:
864 case Intrinsic::assume:
865 case Intrinsic::sideeffect:
866 case Intrinsic::pseudoprobe:
867 case Intrinsic::arithmetic_fence:
868 case Intrinsic::dbg_assign:
869 case Intrinsic::dbg_declare:
870 case Intrinsic::dbg_value:
871 case Intrinsic::dbg_label:
872 case Intrinsic::invariant_start:
873 case Intrinsic::invariant_end:
874 case Intrinsic::launder_invariant_group:
875 case Intrinsic::strip_invariant_group:
876 case Intrinsic::is_constant:
877 case Intrinsic::lifetime_start:
878 case Intrinsic::lifetime_end:
879 case Intrinsic::experimental_noalias_scope_decl:
880 case Intrinsic::objectsize:
881 case Intrinsic::ptr_annotation:
882 case Intrinsic::var_annotation:
883 case Intrinsic::experimental_gc_result:
884 case Intrinsic::experimental_gc_relocate:
885 case Intrinsic::coro_alloc:
886 case Intrinsic::coro_begin:
887 case Intrinsic::coro_begin_custom_abi:
888 case Intrinsic::coro_free:
889 case Intrinsic::coro_end:
890 case Intrinsic::coro_frame:
891 case Intrinsic::coro_size:
892 case Intrinsic::coro_align:
893 case Intrinsic::coro_suspend:
894 case Intrinsic::coro_subfn_addr:
895 case Intrinsic::threadlocal_address:
896 case Intrinsic::experimental_widenable_condition:
897 case Intrinsic::ssa_copy:
898 // These intrinsics don't actually represent code after lowering.
899 return 0;
900 }
901 return 1;
902 }
903
904 virtual InstructionCost
907 switch (MICA.getID()) {
908 case Intrinsic::masked_scatter:
909 case Intrinsic::masked_gather:
910 case Intrinsic::masked_load:
911 case Intrinsic::masked_store:
912 case Intrinsic::vp_scatter:
913 case Intrinsic::vp_gather:
914 case Intrinsic::masked_compressstore:
915 case Intrinsic::masked_expandload:
916 return 1;
917 }
919 }
920
924 return 1;
925 }
926
927 // Assume that we have a register of the right size for the type.
928 virtual unsigned getNumberOfParts(Type *Tp) const { return 1; }
929
932 const SCEV *,
933 TTI::TargetCostKind) const {
934 return 0;
935 }
936
937 virtual InstructionCost
939 std::optional<FastMathFlags> FMF,
940 TTI::TargetCostKind) const {
941 return 1;
942 }
943
949
950 virtual InstructionCost
951 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
952 VectorType *Ty, std::optional<FastMathFlags> FMF,
954 return 1;
955 }
956
957 virtual InstructionCost
958 getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy,
960 return 1;
961 }
962
963 virtual InstructionCost
965 return 0;
966 }
967
969 MemIntrinsicInfo &Info) const {
970 return false;
971 }
972
973 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const {
974 // Note for overrides: You must ensure for all element unordered-atomic
975 // memory intrinsics that all power-of-2 element sizes up to, and
976 // including, the return value of this method have a corresponding
977 // runtime lib call. These runtime lib call definitions can be found
978 // in RuntimeLibcalls.h
979 return 0;
980 }
981
982 virtual Value *
984 bool CanCreate = true) const {
985 return nullptr;
986 }
987
988 virtual Type *
990 unsigned SrcAddrSpace, unsigned DestAddrSpace,
991 Align SrcAlign, Align DestAlign,
992 std::optional<uint32_t> AtomicElementSize) const {
993 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
994 : Type::getInt8Ty(Context);
995 }
996
998 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
999 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1000 Align SrcAlign, Align DestAlign,
1001 std::optional<uint32_t> AtomicCpySize) const {
1002 unsigned OpSizeInBytes = AtomicCpySize.value_or(1);
1003 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
1004 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
1005 OpsOut.push_back(OpType);
1006 }
1007
1008 virtual bool areInlineCompatible(const Function *Caller,
1009 const Function *Callee) const {
1010 return (Caller->getFnAttribute("target-cpu") ==
1011 Callee->getFnAttribute("target-cpu")) &&
1012 (Caller->getFnAttribute("target-features") ==
1013 Callee->getFnAttribute("target-features"));
1014 }
1015
1016 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1017 unsigned DefaultCallPenalty) const {
1018 return DefaultCallPenalty;
1019 }
1020
1021 virtual bool areTypesABICompatible(const Function *Caller,
1022 const Function *Callee,
1023 ArrayRef<Type *> Types) const {
1024 return (Caller->getFnAttribute("target-cpu") ==
1025 Callee->getFnAttribute("target-cpu")) &&
1026 (Caller->getFnAttribute("target-features") ==
1027 Callee->getFnAttribute("target-features"));
1028 }
1029
1031 return false;
1032 }
1033
1035 return false;
1036 }
1037
1038 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
1039 return 128;
1040 }
1041
1042 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
1043
1044 virtual bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
1045
1046 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1047 Align Alignment,
1048 unsigned AddrSpace) const {
1049 return true;
1050 }
1051
1052 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1053 Align Alignment,
1054 unsigned AddrSpace) const {
1055 return true;
1056 }
1057
1059 ElementCount VF) const {
1060 return true;
1061 }
1062
1064 return true;
1065 }
1066
1067 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1068 unsigned ChainSizeInBytes,
1069 VectorType *VecTy) const {
1070 return VF;
1071 }
1072
1073 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1074 unsigned ChainSizeInBytes,
1075 VectorType *VecTy) const {
1076 return VF;
1077 }
1078
1079 virtual bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const {
1080 return false;
1081 }
1082
1083 virtual bool preferInLoopReduction(RecurKind Kind, Type *Ty) const {
1084 return false;
1085 }
1086 virtual bool preferAlternateOpcodeVectorization() const { return true; }
1087
1088 virtual bool preferPredicatedReductionSelect() const { return false; }
1089
1090 virtual bool preferEpilogueVectorization() const { return true; }
1091
1092 virtual bool shouldConsiderVectorizationRegPressure() const { return false; }
1093
1094 virtual bool shouldExpandReduction(const IntrinsicInst *II) const {
1095 return true;
1096 }
1097
1098 virtual TTI::ReductionShuffle
1102
1103 virtual unsigned getGISelRematGlobalCost() const { return 1; }
1104
1105 virtual unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
1106
1107 virtual bool supportsScalableVectors() const { return false; }
1108
1109 virtual bool enableScalableVectorization() const { return false; }
1110
1111 virtual bool hasActiveVectorLength() const { return false; }
1112
1114 SmallVectorImpl<Use *> &Ops) const {
1115 return false;
1116 }
1117
1118 virtual bool isVectorShiftByScalarCheap(Type *Ty) const { return false; }
1119
1126
1127 virtual bool hasArmWideBranch(bool) const { return false; }
1128
1129 virtual APInt getFeatureMask(const Function &F) const {
1130 return APInt::getZero(32);
1131 }
1132
1133 virtual bool isMultiversionedFunction(const Function &F) const {
1134 return false;
1135 }
1136
1137 virtual unsigned getMaxNumArgs() const { return UINT_MAX; }
1138
1139 virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
1140 Type *ArrayType) const {
1141 return 0;
1142 }
1143
1145 const Function &F,
1146 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
1147
1148 virtual bool allowVectorElementIndexingUsingGEP() const { return true; }
1149
1150protected:
1151 // Obtain the minimum required size to hold the value (without the sign)
1152 // In case of a vector it returns the min required size for one element.
1153 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
1155 const auto *VectorValue = cast<Constant>(Val);
1156
1157 // In case of a vector need to pick the max between the min
1158 // required size for each element
1159 auto *VT = cast<FixedVectorType>(Val->getType());
1160
1161 // Assume unsigned elements
1162 isSigned = false;
1163
1164 // The max required size is the size of the vector element type
1165 unsigned MaxRequiredSize =
1166 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
1167
1168 unsigned MinRequiredSize = 0;
1169 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
1170 if (auto *IntElement =
1171 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
1172 bool signedElement = IntElement->getValue().isNegative();
1173 // Get the element min required size.
1174 unsigned ElementMinRequiredSize =
1175 IntElement->getValue().getSignificantBits() - 1;
1176 // In case one element is signed then all the vector is signed.
1177 isSigned |= signedElement;
1178 // Save the max required bit size between all the elements.
1179 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1180 } else {
1181 // not an int constant element
1182 return MaxRequiredSize;
1183 }
1184 }
1185 return MinRequiredSize;
1186 }
1187
1188 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
1189 isSigned = CI->getValue().isNegative();
1190 return CI->getValue().getSignificantBits() - 1;
1191 }
1192
1193 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
1194 isSigned = true;
1195 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1196 }
1197
1198 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1199 isSigned = false;
1200 return Cast->getSrcTy()->getScalarSizeInBits();
1201 }
1202
1203 isSigned = false;
1204 return Val->getType()->getScalarSizeInBits();
1205 }
1206
1207 bool isStridedAccess(const SCEV *Ptr) const {
1208 return Ptr && isa<SCEVAddRecExpr>(Ptr);
1209 }
1210
1212 const SCEV *Ptr) const {
1213 if (!isStridedAccess(Ptr))
1214 return nullptr;
1215 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
1216 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
1217 }
1218
1220 int64_t MergeDistance) const {
1221 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1222 if (!Step)
1223 return false;
1224 APInt StrideVal = Step->getAPInt();
1225 if (StrideVal.getBitWidth() > 64)
1226 return false;
1227 // FIXME: Need to take absolute value for negative stride case.
1228 return StrideVal.getSExtValue() < MergeDistance;
1229 }
1230};
1231
1232/// CRTP base class for use as a mix-in that aids implementing
1233/// a TargetTransformInfo-compatible class.
1234template <typename T>
1236private:
1237 typedef TargetTransformInfoImplBase BaseT;
1238
1239protected:
1241
1242public:
1243 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1244 ArrayRef<const Value *> Operands, Type *AccessType,
1245 TTI::TargetCostKind CostKind) const override {
1246 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1247 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
1248 bool HasBaseReg = (BaseGV == nullptr);
1249
1250 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1251 APInt BaseOffset(PtrSizeBits, 0);
1252 int64_t Scale = 0;
1253
1254 auto GTI = gep_type_begin(PointeeType, Operands);
1255 Type *TargetType = nullptr;
1256
1257 // Handle the case where the GEP instruction has a single operand,
1258 // the basis, therefore TargetType is a nullptr.
1259 if (Operands.empty())
1260 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1261
1262 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1263 TargetType = GTI.getIndexedType();
1264 // We assume that the cost of Scalar GEP with constant index and the
1265 // cost of Vector GEP with splat constant index are the same.
1266 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
1267 if (!ConstIdx)
1268 if (auto Splat = getSplatValue(*I))
1269 ConstIdx = dyn_cast<ConstantInt>(Splat);
1270 if (StructType *STy = GTI.getStructTypeOrNull()) {
1271 // For structures the index is always splat or scalar constant
1272 assert(ConstIdx && "Unexpected GEP index");
1273 uint64_t Field = ConstIdx->getZExtValue();
1274 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
1275 } else {
1276 // If this operand is a scalable type, bail out early.
1277 // TODO: Make isLegalAddressingMode TypeSize aware.
1278 if (TargetType->isScalableTy())
1279 return TTI::TCC_Basic;
1280 int64_t ElementSize =
1281 GTI.getSequentialElementStride(DL).getFixedValue();
1282 if (ConstIdx) {
1283 BaseOffset +=
1284 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1285 } else {
1286 // Needs scale register.
1287 if (Scale != 0)
1288 // No addressing mode takes two scale registers.
1289 return TTI::TCC_Basic;
1290 Scale = ElementSize;
1291 }
1292 }
1293 }
1294
1295 // If we haven't been provided a hint, use the target type for now.
1296 //
1297 // TODO: Take a look at potentially removing this: This is *slightly* wrong
1298 // as it's possible to have a GEP with a foldable target type but a memory
1299 // access that isn't foldable. For example, this load isn't foldable on
1300 // RISC-V:
1301 //
1302 // %p = getelementptr i32, ptr %base, i32 42
1303 // %x = load <2 x i32>, ptr %p
1304 if (!AccessType)
1305 AccessType = TargetType;
1306
1307 // If the final address of the GEP is a legal addressing mode for the given
1308 // access type, then we can fold it into its users.
1309 if (static_cast<const T *>(this)->isLegalAddressingMode(
1310 AccessType, const_cast<GlobalValue *>(BaseGV),
1311 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1313 return TTI::TCC_Free;
1314
1315 // TODO: Instead of returning TCC_Basic here, we should use
1316 // getArithmeticInstrCost. Or better yet, provide a hook to let the target
1317 // model it.
1318 return TTI::TCC_Basic;
1319 }
1320
1323 const TTI::PointersChainInfo &Info, Type *AccessTy,
1324 TTI::TargetCostKind CostKind) const override {
1326 // In the basic model we take into account GEP instructions only
1327 // (although here can come alloca instruction, a value, constants and/or
1328 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1329 // pointer). Typically, if Base is a not a GEP-instruction and all the
1330 // pointers are relative to the same base address, all the rest are
1331 // either GEP instructions, PHIs, bitcasts or constants. When we have same
1332 // base, we just calculate cost of each non-Base GEP as an ADD operation if
1333 // any their index is a non-const.
1334 // If no known dependecies between the pointers cost is calculated as a sum
1335 // of costs of GEP instructions.
1336 for (const Value *V : Ptrs) {
1337 const auto *GEP = dyn_cast<GetElementPtrInst>(V);
1338 if (!GEP)
1339 continue;
1340 if (Info.isSameBase() && V != Base) {
1341 if (GEP->hasAllConstantIndices())
1342 continue;
1343 Cost += static_cast<const T *>(this)->getArithmeticInstrCost(
1344 Instruction::Add, GEP->getType(), CostKind,
1345 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
1346 {});
1347 } else {
1348 SmallVector<const Value *> Indices(GEP->indices());
1349 Cost += static_cast<const T *>(this)->getGEPCost(
1350 GEP->getSourceElementType(), GEP->getPointerOperand(), Indices,
1351 AccessTy, CostKind);
1352 }
1353 }
1354 return Cost;
1355 }
1356
1359 TTI::TargetCostKind CostKind) const override {
1360 using namespace llvm::PatternMatch;
1361
1362 auto *TargetTTI = static_cast<const T *>(this);
1363 // Handle non-intrinsic calls, invokes, and callbr.
1364 // FIXME: Unlikely to be true for anything but CodeSize.
1365 auto *CB = dyn_cast<CallBase>(U);
1366 if (CB && !isa<IntrinsicInst>(U)) {
1367 if (const Function *F = CB->getCalledFunction()) {
1368 if (!TargetTTI->isLoweredToCall(F))
1369 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1370
1371 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1372 }
1373 // For indirect or other calls, scale cost by number of arguments.
1374 return TTI::TCC_Basic * (CB->arg_size() + 1);
1375 }
1376
1377 Type *Ty = U->getType();
1378 unsigned Opcode = Operator::getOpcode(U);
1379 auto *I = dyn_cast<Instruction>(U);
1380 switch (Opcode) {
1381 default:
1382 break;
1383 case Instruction::Call: {
1384 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1385 auto *Intrinsic = cast<IntrinsicInst>(U);
1386 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1387 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1388 }
1389 case Instruction::Br:
1390 case Instruction::Ret:
1391 case Instruction::PHI:
1392 case Instruction::Switch:
1393 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1394 case Instruction::Freeze:
1395 return TTI::TCC_Free;
1396 case Instruction::ExtractValue:
1397 case Instruction::InsertValue:
1398 return TargetTTI->getInsertExtractValueCost(Opcode, CostKind);
1399 case Instruction::Alloca:
1400 if (cast<AllocaInst>(U)->isStaticAlloca())
1401 return TTI::TCC_Free;
1402 break;
1403 case Instruction::GetElementPtr: {
1404 const auto *GEP = cast<GEPOperator>(U);
1405 Type *AccessType = nullptr;
1406 // For now, only provide the AccessType in the simple case where the GEP
1407 // only has one user.
1408 if (GEP->hasOneUser() && I)
1409 AccessType = I->user_back()->getAccessType();
1410
1411 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1412 Operands.front(), Operands.drop_front(),
1413 AccessType, CostKind);
1414 }
1415 case Instruction::Add:
1416 case Instruction::FAdd:
1417 case Instruction::Sub:
1418 case Instruction::FSub:
1419 case Instruction::Mul:
1420 case Instruction::FMul:
1421 case Instruction::UDiv:
1422 case Instruction::SDiv:
1423 case Instruction::FDiv:
1424 case Instruction::URem:
1425 case Instruction::SRem:
1426 case Instruction::FRem:
1427 case Instruction::Shl:
1428 case Instruction::LShr:
1429 case Instruction::AShr:
1430 case Instruction::And:
1431 case Instruction::Or:
1432 case Instruction::Xor:
1433 case Instruction::FNeg: {
1434 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]);
1435 TTI::OperandValueInfo Op2Info;
1436 if (Opcode != Instruction::FNeg)
1437 Op2Info = TTI::getOperandInfo(Operands[1]);
1438 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1439 Op2Info, Operands, I);
1440 }
1441 case Instruction::IntToPtr:
1442 case Instruction::PtrToAddr:
1443 case Instruction::PtrToInt:
1444 case Instruction::SIToFP:
1445 case Instruction::UIToFP:
1446 case Instruction::FPToUI:
1447 case Instruction::FPToSI:
1448 case Instruction::Trunc:
1449 case Instruction::FPTrunc:
1450 case Instruction::BitCast:
1451 case Instruction::FPExt:
1452 case Instruction::SExt:
1453 case Instruction::ZExt:
1454 case Instruction::AddrSpaceCast: {
1455 Type *OpTy = Operands[0]->getType();
1456 return TargetTTI->getCastInstrCost(
1457 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1458 }
1459 case Instruction::Store: {
1460 auto *SI = cast<StoreInst>(U);
1461 Type *ValTy = Operands[0]->getType();
1462 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]);
1463 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1464 SI->getPointerAddressSpace(), CostKind,
1465 OpInfo, I);
1466 }
1467 case Instruction::Load: {
1468 // FIXME: Arbitary cost which could come from the backend.
1470 return 4;
1471 auto *LI = cast<LoadInst>(U);
1472 Type *LoadType = U->getType();
1473 // If there is a non-register sized type, the cost estimation may expand
1474 // it to be several instructions to load into multiple registers on the
1475 // target. But, if the only use of the load is a trunc instruction to a
1476 // register sized type, the instruction selector can combine these
1477 // instructions to be a single load. So, in this case, we use the
1478 // destination type of the trunc instruction rather than the load to
1479 // accurately estimate the cost of this load instruction.
1480 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1481 !LoadType->isVectorTy()) {
1482 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1483 LoadType = TI->getDestTy();
1484 }
1485 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1487 {TTI::OK_AnyValue, TTI::OP_None}, I);
1488 }
1489 case Instruction::Select: {
1490 const Value *Op0, *Op1;
1491 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1492 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1493 // select x, y, false --> x & y
1494 // select x, true, y --> x | y
1495 const auto Op1Info = TTI::getOperandInfo(Op0);
1496 const auto Op2Info = TTI::getOperandInfo(Op1);
1497 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1498 Op1->getType()->getScalarSizeInBits() == 1);
1499
1500 SmallVector<const Value *, 2> Operands{Op0, Op1};
1501 return TargetTTI->getArithmeticInstrCost(
1502 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1503 CostKind, Op1Info, Op2Info, Operands, I);
1504 }
1505 const auto Op1Info = TTI::getOperandInfo(Operands[1]);
1506 const auto Op2Info = TTI::getOperandInfo(Operands[2]);
1507 Type *CondTy = Operands[0]->getType();
1508 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1510 CostKind, Op1Info, Op2Info, I);
1511 }
1512 case Instruction::ICmp:
1513 case Instruction::FCmp: {
1514 const auto Op1Info = TTI::getOperandInfo(Operands[0]);
1515 const auto Op2Info = TTI::getOperandInfo(Operands[1]);
1516 Type *ValTy = Operands[0]->getType();
1517 // TODO: Also handle ICmp/FCmp constant expressions.
1518 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1519 I ? cast<CmpInst>(I)->getPredicate()
1521 CostKind, Op1Info, Op2Info, I);
1522 }
1523 case Instruction::InsertElement: {
1524 auto *IE = dyn_cast<InsertElementInst>(U);
1525 if (!IE)
1526 return TTI::TCC_Basic; // FIXME
1527 unsigned Idx = -1;
1528 if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
1529 if (CI->getValue().getActiveBits() <= 32)
1530 Idx = CI->getZExtValue();
1531 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1532 }
1533 case Instruction::ShuffleVector: {
1534 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1535 if (!Shuffle)
1536 return TTI::TCC_Basic; // FIXME
1537
1538 auto *VecTy = cast<VectorType>(U->getType());
1539 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
1540 ArrayRef<int> Mask = Shuffle->getShuffleMask();
1541 int NumSubElts, SubIndex;
1542
1543 // Treat undef/poison mask as free (no matter the length).
1544 if (all_of(Mask, [](int M) { return M < 0; }))
1545 return TTI::TCC_Free;
1546
1547 // TODO: move more of this inside improveShuffleKindFromMask.
1548 if (Shuffle->changesLength()) {
1549 // Treat a 'subvector widening' as a free shuffle.
1550 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1551 return TTI::TCC_Free;
1552
1553 if (Shuffle->isExtractSubvectorMask(SubIndex))
1554 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
1555 VecSrcTy, Mask, CostKind, SubIndex,
1556 VecTy, Operands, Shuffle);
1557
1558 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1559 return TargetTTI->getShuffleCost(
1560 TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind,
1561 SubIndex,
1562 FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1563 Operands, Shuffle);
1564
1565 int ReplicationFactor, VF;
1566 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1567 APInt DemandedDstElts = APInt::getZero(Mask.size());
1568 for (auto I : enumerate(Mask)) {
1569 if (I.value() != PoisonMaskElem)
1570 DemandedDstElts.setBit(I.index());
1571 }
1572 return TargetTTI->getReplicationShuffleCost(
1573 VecSrcTy->getElementType(), ReplicationFactor, VF,
1574 DemandedDstElts, CostKind);
1575 }
1576
1577 bool IsUnary = isa<UndefValue>(Operands[1]);
1578 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1579 SmallVector<int, 16> AdjustMask(Mask);
1580
1581 // Widening shuffle - widening the source(s) to the new length
1582 // (treated as free - see above), and then perform the adjusted
1583 // shuffle at that width.
1584 if (Shuffle->increasesLength()) {
1585 for (int &M : AdjustMask)
1586 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1587
1588 return TargetTTI->getShuffleCost(
1590 VecTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1591 }
1592
1593 // Narrowing shuffle - perform shuffle at original wider width and
1594 // then extract the lower elements.
1595 // FIXME: This can assume widening, which is not true of all vector
1596 // architectures (and is not even the default).
1597 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
1598
1599 InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
1601 VecSrcTy, VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands,
1602 Shuffle);
1603
1604 SmallVector<int, 16> ExtractMask(Mask.size());
1605 std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1606 return ShuffleCost + TargetTTI->getShuffleCost(
1607 TTI::SK_ExtractSubvector, VecTy, VecSrcTy,
1608 ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
1609 }
1610
1611 if (Shuffle->isIdentity())
1612 return TTI::TCC_Free;
1613
1614 if (Shuffle->isReverse())
1615 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, VecSrcTy, Mask,
1616 CostKind, 0, nullptr, Operands,
1617 Shuffle);
1618
1619 if (Shuffle->isTranspose())
1620 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, VecSrcTy,
1621 Mask, CostKind, 0, nullptr, Operands,
1622 Shuffle);
1623
1624 if (Shuffle->isZeroEltSplat())
1625 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, VecSrcTy,
1626 Mask, CostKind, 0, nullptr, Operands,
1627 Shuffle);
1628
1629 if (Shuffle->isSingleSource())
1630 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1631 VecSrcTy, Mask, CostKind, 0, nullptr,
1632 Operands, Shuffle);
1633
1634 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1635 return TargetTTI->getShuffleCost(
1636 TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, SubIndex,
1637 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
1638 Shuffle);
1639
1640 if (Shuffle->isSelect())
1641 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, VecSrcTy, Mask,
1642 CostKind, 0, nullptr, Operands,
1643 Shuffle);
1644
1645 if (Shuffle->isSplice(SubIndex))
1646 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, VecSrcTy, Mask,
1647 CostKind, SubIndex, nullptr, Operands,
1648 Shuffle);
1649
1650 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, VecSrcTy,
1651 Mask, CostKind, 0, nullptr, Operands,
1652 Shuffle);
1653 }
1654 case Instruction::ExtractElement: {
1655 auto *EEI = dyn_cast<ExtractElementInst>(U);
1656 if (!EEI)
1657 return TTI::TCC_Basic; // FIXME
1658 unsigned Idx = -1;
1659 if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
1660 if (CI->getValue().getActiveBits() <= 32)
1661 Idx = CI->getZExtValue();
1662 Type *DstTy = Operands[0]->getType();
1663 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1664 }
1665 }
1666
1667 // By default, just classify everything remaining as 'basic'.
1668 return TTI::TCC_Basic;
1669 }
1670
1672 auto *TargetTTI = static_cast<const T *>(this);
1673 SmallVector<const Value *, 4> Ops(I->operand_values());
1674 InstructionCost Cost = TargetTTI->getInstructionCost(
1677 }
1678
1679 bool supportsTailCallFor(const CallBase *CB) const override {
1680 return static_cast<const T *>(this)->supportsTailCalls();
1681 }
1682};
1683} // namespace llvm
1684
1685#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition APInt.h:78
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1041
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
iterator end() const
Definition ArrayRef.h:131
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Class to represent array types.
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Information for memory intrinsic cost model.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
The optimization diagnostic interface.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:40
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Class to represent struct types.
Multiway switch.
Provides information about what library functions are available for the current target.
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const
virtual bool preferAlternateOpcodeVectorization() const
virtual bool isProfitableLSRChainElement(Instruction *I) const
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const
virtual const DataLayout & getDataLayout() const
virtual bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const
virtual std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
virtual bool enableInterleavedAccessVectorization() const
virtual InstructionCost getFPOpCost(Type *Ty) const
virtual unsigned getMaxInterleaveFactor(ElementCount VF) const
virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
virtual TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isStridedAccess(const SCEV *Ptr) const
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
virtual TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) const
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind) const
virtual InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
virtual bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty) const
virtual unsigned adjustInliningThreshold(const CallBase *CB) const
virtual InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
virtual bool shouldDropLSRSolutionIfLessProfitable() const
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind) const
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) const
virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
virtual unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const
virtual bool isLegalICmpImmediate(int64_t Imm) const
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, const Instruction *I) const
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
virtual bool haveFastSqrt(Type *Ty) const
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
virtual std::optional< unsigned > getVScaleForTuning() const
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
virtual unsigned getNumberOfParts(Type *Tp) const
virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
virtual void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
virtual bool useColdCCForColdCall(Function &F) const
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
virtual bool isLegalAddScalableImmediate(int64_t Imm) const
virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const
TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg)
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment) const
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
virtual unsigned getMinVectorRegisterBitWidth() const
unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const
virtual bool shouldBuildLookupTablesForConstant(Constant *C) const
virtual bool isFPVectorizationPotentiallyUnsafe() const
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
virtual InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
virtual std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
virtual unsigned getEpilogueVectorizationMinVF() const
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const
virtual TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
virtual TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const
virtual unsigned getMaxPrefetchIterationsAhead() const
virtual bool allowVectorElementIndexingUsingGEP() const
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
virtual TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
const SCEVConstant * getConstantStrideStep(ScalarEvolution *SE, const SCEV *Ptr) const
virtual bool hasBranchDivergence(const Function *F=nullptr) const
virtual InstructionCost getArithmeticReductionCost(unsigned, VectorType *, std::optional< FastMathFlags > FMF, TTI::TargetCostKind) const
virtual bool isProfitableToHoist(Instruction *I) const
virtual const char * getRegisterClassName(unsigned ClassID) const
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, FastMathFlags, TTI::TargetCostKind) const
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const
virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
virtual bool isLegalNTStore(Type *DataType, Align Alignment) const
virtual APInt getFeatureMask(const Function &F) const
virtual InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
virtual std::optional< unsigned > getMinPageSize() const
virtual unsigned getRegUsageForType(Type *Ty) const
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const
virtual bool isLoweredToCall(const Function *F) const
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
virtual bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty) const
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const
virtual BranchProbability getPredictableBranchThreshold() const
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const
virtual bool isLegalToVectorizeStore(StoreInst *SI) const
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const
virtual bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const
virtual bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const
virtual bool preferInLoopReduction(RecurKind Kind, Type *Ty) const
virtual bool isMultiversionedFunction(const Function &F) const
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) const
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
virtual bool isLegalAddImmediate(int64_t Imm) const
virtual InstructionCost getInsertExtractValueCost(unsigned Opcode, TTI::TargetCostKind CostKind) const
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const
virtual InstructionCost getBranchMispredictPenalty() const
virtual bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const
virtual bool enableMaskedInterleavedAccessVectorization() const
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const
virtual unsigned getInliningThresholdMultiplier() const
TargetTransformInfoImplBase(const DataLayout &DL)
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
virtual bool isAlwaysUniform(const Value *V) const
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, const Instruction *I) const
virtual bool shouldExpandReduction(const IntrinsicInst *II) const
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
virtual unsigned getGISelRematGlobalCost() const
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const
virtual bool isTypeLegal(Type *Ty) const
virtual unsigned getAssumedAddrSpace(const Value *V) const
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx) const
virtual unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const
virtual bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
virtual InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
virtual bool supportsTailCallFor(const CallBase *CB) const
virtual std::optional< unsigned > getMaxVScale() const
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const
virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const
virtual bool shouldConsiderVectorizationRegPressure() const
virtual InstructionCost getMemcpyCost(const Instruction *I) const
virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
virtual bool isSourceOfDivergence(const Value *V) const
virtual bool useFastCCForInternalCall(Function &F) const
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
virtual void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &, OptimizationRemarkEmitter *) const
TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)=default
virtual bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
virtual bool supportsEfficientVectorElementLoadStore() const
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
virtual unsigned getMinTripCountTailFoldingThreshold() const
virtual TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
virtual void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const
bool supportsTailCallFor(const CallBase *CB) const override
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
MaskKind
Some targets only support masked load/store with a constant mask.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
MemIndexedMode
The type of load/store indexing.
AddressingModeKind
Which addressing mode Loop Strength Reduction will try to generate.
@ AMK_None
Don't prefer any addressing mode.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
CacheLevel
The possible cache levels.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
@ Length
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
InstructionCost Cost
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2484
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr int PoisonMaskElem
RecurKind
These are the kinds of recurrences that we support.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Parameters that control the generic loop unrolling transformation.