LLVM 17.0.0git
TargetTransformInfoImpl.h
Go to the documentation of this file.
1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Operator.h"
25#include <optional>
26#include <utility>
27
28namespace llvm {
29
30class Function;
31
32/// Base class for use as a mix-in that aids implementing
33/// a TargetTransformInfo-compatible class.
35protected:
37
38 const DataLayout &DL;
39
41
42public:
43 // Provide value semantics. MSVC requires that we spell all of these out.
46
47 const DataLayout &getDataLayout() const { return DL; }
48
49 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
52 // In the basic model, we just assume that all-constant GEPs will be folded
53 // into their uses via addressing modes.
54 for (const Value *Operand : Operands)
55 if (!isa<Constant>(Operand))
56 return TTI::TCC_Basic;
57
58 return TTI::TCC_Free;
59 }
60
62 unsigned &JTSize,
64 BlockFrequencyInfo *BFI) const {
65 (void)PSI;
66 (void)BFI;
67 JTSize = 0;
68 return SI.getNumCases();
69 }
70
71 unsigned getInliningThresholdMultiplier() const { return 1; }
72 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
73
74 int getInlinerVectorBonusPercent() const { return 150; }
75
77 return TTI::TCC_Expensive;
78 }
79
80 // Although this default value is arbitrary, it is not random. It is assumed
81 // that a condition that evaluates the same way by a higher percentage than
82 // this is best represented as control flow. Therefore, the default value N
83 // should be set such that the win from N% correct executions is greater than
84 // the loss from (100 - N)% mispredicted executions for the majority of
85 // intended targets.
87 return BranchProbability(99, 100);
88 }
89
90 bool hasBranchDivergence() const { return false; }
91
92 bool useGPUDivergenceAnalysis() const { return false; }
93
94 bool isSourceOfDivergence(const Value *V) const { return false; }
95
96 bool isAlwaysUniform(const Value *V) const { return false; }
97
98 unsigned getFlatAddressSpace() const { return -1; }
99
101 Intrinsic::ID IID) const {
102 return false;
103 }
104
105 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
107 return AS == 0;
108 };
109
110 unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
111
112 bool isSingleThreaded() const { return false; }
113
114 std::pair<const Value *, unsigned>
116 return std::make_pair(nullptr, -1);
117 }
118
120 Value *NewV) const {
121 return nullptr;
122 }
123
124 bool isLoweredToCall(const Function *F) const {
125 assert(F && "A concrete function must be provided to this routine.");
126
127 // FIXME: These should almost certainly not be handled here, and instead
128 // handled with the help of TLI or the target itself. This was largely
129 // ported from existing analysis heuristics here so that such refactorings
130 // can take place in the future.
131
132 if (F->isIntrinsic())
133 return false;
134
135 if (F->hasLocalLinkage() || !F->hasName())
136 return true;
137
138 StringRef Name = F->getName();
139
140 // These will all likely lower to a single selection DAG node.
141 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
142 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
143 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
144 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
145 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
146 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
147 return false;
148
149 // These are all likely to be optimized into something smaller.
150 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
151 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
152 Name == "floorf" || Name == "ceil" || Name == "round" ||
153 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
154 Name == "llabs")
155 return false;
156
157 return true;
158 }
159
162 HardwareLoopInfo &HWLoopInfo) const {
163 return false;
164 }
165
168 DominatorTree *DT,
170 InterleavedAccessInfo *IAI) const {
171 return false;
172 }
173
175 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
177 }
178
179 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
180 IntrinsicInst &II) const {
181 return std::nullopt;
182 }
183
184 std::optional<Value *>
186 APInt DemandedMask, KnownBits &Known,
187 bool &KnownBitsComputed) const {
188 return std::nullopt;
189 }
190
192 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
193 APInt &UndefElts2, APInt &UndefElts3,
194 std::function<void(Instruction *, unsigned, APInt, APInt &)>
195 SimplifyAndSetOp) const {
196 return std::nullopt;
197 }
198
201 OptimizationRemarkEmitter *) const {}
202
204 TTI::PeelingPreferences &) const {}
205
206 bool isLegalAddImmediate(int64_t Imm) const { return false; }
207
208 bool isLegalICmpImmediate(int64_t Imm) const { return false; }
209
210 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
211 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
212 Instruction *I = nullptr) const {
213 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
214 // taken from the implementation of LSR.
215 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
216 }
217
218 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
219 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
220 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
221 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
222 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
223 }
224
225 bool isNumRegsMajorCostOfLSR() const { return true; }
226
227 bool isProfitableLSRChainElement(Instruction *I) const { return false; }
228
229 bool canMacroFuseCmp() const { return false; }
230
233 TargetLibraryInfo *LibInfo) const {
234 return false;
235 }
236
239 return TTI::AMK_None;
240 }
241
242 bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
243 return false;
244 }
245
246 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
247 return false;
248 }
249
250 bool isLegalNTStore(Type *DataType, Align Alignment) const {
251 // By default, assume nontemporal memory stores are available for stores
252 // that are aligned and have a size that is a power of 2.
253 unsigned DataSize = DL.getTypeStoreSize(DataType);
254 return Alignment >= DataSize && isPowerOf2_32(DataSize);
255 }
256
257 bool isLegalNTLoad(Type *DataType, Align Alignment) const {
258 // By default, assume nontemporal memory loads are available for loads that
259 // are aligned and have a size that is a power of 2.
260 unsigned DataSize = DL.getTypeStoreSize(DataType);
261 return Alignment >= DataSize && isPowerOf2_32(DataSize);
262 }
263
264 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
265 return false;
267
268 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
269 return false;
270 }
271
272 bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
273 return false;
274 }
275
276 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
277 return false;
278 }
279
281 Align Alignment) const {
282 return false;
283 }
284
285 bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
286
287 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
288 const SmallBitVector &OpcodeMask) const {
289 return false;
290 }
291
292 bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
293
294 bool enableOrderedReductions() const { return false; }
295
296 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
297
298 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
299 return false;
300 }
301
302 bool prefersVectorizedAddressing() const { return true; }
303
305 int64_t BaseOffset, bool HasBaseReg,
306 int64_t Scale,
307 unsigned AddrSpace) const {
308 // Guess that all legal addressing mode are free.
309 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
310 AddrSpace))
311 return 0;
312 return -1;
313 }
314
315 bool LSRWithInstrQueries() const { return false; }
316
317 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
318
319 bool isProfitableToHoist(Instruction *I) const { return true; }
320
321 bool useAA() const { return false; }
322
323 bool isTypeLegal(Type *Ty) const { return false; }
324
325 unsigned getRegUsageForType(Type *Ty) const { return 1; }
326
327 bool shouldBuildLookupTables() const { return true; }
328
329 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
330
331 bool shouldBuildRelLookupTables() const { return false; }
332
333 bool useColdCCForColdCall(Function &F) const { return false; }
334
336 const APInt &DemandedElts,
337 bool Insert, bool Extract,
339 return 0;
340 }
341
346 return 0;
347 }
348
349 bool supportsEfficientVectorElementLoadStore() const { return false; }
350
351 bool supportsTailCalls() const { return true; }
352
353 bool supportsTailCallFor(const CallBase *CB) const {
354 return supportsTailCalls();
355 }
356
357 bool enableAggressiveInterleaving(bool LoopHasReductions) const {
358 return false;
359 }
360
362 bool IsZeroCmp) const {
363 return {};
364 }
365
366 bool enableSelectOptimize() const { return true; }
367
368 bool enableInterleavedAccessVectorization() const { return false; }
369
370 bool enableMaskedInterleavedAccessVectorization() const { return false; }
371
372 bool isFPVectorizationPotentiallyUnsafe() const { return false; }
373
375 unsigned AddressSpace, Align Alignment,
376 unsigned *Fast) const {
377 return false;
378 }
379
380 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
381 return TTI::PSK_Software;
382 }
383
384 bool haveFastSqrt(Type *Ty) const { return false; }
385
386 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
387
388 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
389
392 }
393
394 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
395 const APInt &Imm, Type *Ty) const {
396 return 0;
397 }
398
401 return TTI::TCC_Basic;
402 }
403
404 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
405 const APInt &Imm, Type *Ty,
407 Instruction *Inst = nullptr) const {
408 return TTI::TCC_Free;
409 }
410
412 const APInt &Imm, Type *Ty,
414 return TTI::TCC_Free;
415 }
416
417 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
418
419 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
420 return Vector ? 1 : 0;
421 };
422
423 const char *getRegisterClassName(unsigned ClassID) const {
424 switch (ClassID) {
425 default:
426 return "Generic::Unknown Register Class";
427 case 0:
428 return "Generic::ScalarRC";
429 case 1:
430 return "Generic::VectorRC";
431 }
432 }
433
435 return TypeSize::getFixed(32);
436 }
437
438 unsigned getMinVectorRegisterBitWidth() const { return 128; }
439
440 std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
441 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
442 bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
443
444 bool
446 return false;
447 }
448
449 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
450 return ElementCount::get(0, IsScalable);
451 }
452
453 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
454 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
455
457 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
458 AllowPromotionWithoutCommonHeader = false;
459 return false;
460 }
461
462 unsigned getCacheLineSize() const { return 0; }
463 std::optional<unsigned>
465 switch (Level) {
467 [[fallthrough]];
469 return std::nullopt;
470 }
471 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
472 }
473
474 std::optional<unsigned>
476 switch (Level) {
478 [[fallthrough]];
480 return std::nullopt;
481 }
482
483 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
484 }
485
486 unsigned getPrefetchDistance() const { return 0; }
487 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
488 unsigned NumStridedMemAccesses,
489 unsigned NumPrefetches, bool HasCall) const {
490 return 1;
491 }
492 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
493 bool enableWritePrefetching() const { return false; }
494 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
495
496 unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
497
499 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
502 const Instruction *CxtI = nullptr) const {
503 // Widenable conditions will eventually lower into constants, so some
504 // operations with them will be trivially optimized away.
505 auto IsWidenableCondition = [](const Value *V) {
506 if (auto *II = dyn_cast<IntrinsicInst>(V))
507 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
508 return true;
509 return false;
510 };
511 // FIXME: A number of transformation tests seem to require these values
512 // which seems a little odd for how arbitary there are.
513 switch (Opcode) {
514 default:
515 break;
516 case Instruction::FDiv:
517 case Instruction::FRem:
518 case Instruction::SDiv:
519 case Instruction::SRem:
520 case Instruction::UDiv:
521 case Instruction::URem:
522 // FIXME: Unlikely to be true for CodeSize.
523 return TTI::TCC_Expensive;
524 case Instruction::And:
525 case Instruction::Or:
526 if (any_of(Args, IsWidenableCondition))
527 return TTI::TCC_Free;
528 break;
529 }
530
531 // Assume a 3cy latency for fp arithmetic ops.
533 if (Ty->getScalarType()->isFloatingPointTy())
534 return 3;
535
536 return 1;
537 }
538
542 ArrayRef<const Value *> Args = std::nullopt) const {
543 return 1;
544 }
545
546 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
549 const Instruction *I) const {
550 switch (Opcode) {
551 default:
552 break;
553 case Instruction::IntToPtr: {
554 unsigned SrcSize = Src->getScalarSizeInBits();
555 if (DL.isLegalInteger(SrcSize) &&
556 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
557 return 0;
558 break;
559 }
560 case Instruction::PtrToInt: {
561 unsigned DstSize = Dst->getScalarSizeInBits();
562 if (DL.isLegalInteger(DstSize) &&
563 DstSize >= DL.getPointerTypeSizeInBits(Src))
564 return 0;
565 break;
566 }
567 case Instruction::BitCast:
568 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
569 // Identity and pointer-to-pointer casts are free.
570 return 0;
571 break;
572 case Instruction::Trunc: {
573 // trunc to a native type is free (assuming the target has compare and
574 // shift-right of the same width).
575 TypeSize DstSize = DL.getTypeSizeInBits(Dst);
576 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
577 return 0;
578 break;
579 }
580 }
581 return 1;
582 }
583
585 VectorType *VecTy,
586 unsigned Index) const {
587 return 1;
588 }
589
591 const Instruction *I = nullptr) const {
592 // A phi would be free, unless we're costing the throughput because it
593 // will require a register.
594 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
595 return 0;
596 return 1;
597 }
598
599 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
600 CmpInst::Predicate VecPred,
602 const Instruction *I) const {
603 return 1;
604 }
605
608 unsigned Index, Value *Op0,
609 Value *Op1) const {
610 return 1;
611 }
612
615 unsigned Index) const {
616 return 1;
617 }
618
619 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
620 const APInt &DemandedDstElts,
622 return 1;
623 }
624
625 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
626 unsigned AddressSpace,
629 const Instruction *I) const {
630 return 1;
631 }
632
633 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
634 unsigned AddressSpace,
636 const Instruction *I) const {
637 return 1;
638 }
639
641 Align Alignment, unsigned AddressSpace,
643 return 1;
644 }
645
647 const Value *Ptr, bool VariableMask,
648 Align Alignment,
650 const Instruction *I = nullptr) const {
651 return 1;
652 }
653
655 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
656 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
657 bool UseMaskForCond, bool UseMaskForGaps) const {
658 return 1;
659 }
660
663 switch (ICA.getID()) {
664 default:
665 break;
666 case Intrinsic::annotation:
667 case Intrinsic::assume:
668 case Intrinsic::sideeffect:
669 case Intrinsic::pseudoprobe:
670 case Intrinsic::arithmetic_fence:
671 case Intrinsic::dbg_assign:
672 case Intrinsic::dbg_declare:
673 case Intrinsic::dbg_value:
674 case Intrinsic::dbg_label:
675 case Intrinsic::invariant_start:
676 case Intrinsic::invariant_end:
677 case Intrinsic::launder_invariant_group:
678 case Intrinsic::strip_invariant_group:
679 case Intrinsic::is_constant:
680 case Intrinsic::lifetime_start:
681 case Intrinsic::lifetime_end:
682 case Intrinsic::experimental_noalias_scope_decl:
683 case Intrinsic::objectsize:
684 case Intrinsic::ptr_annotation:
685 case Intrinsic::var_annotation:
686 case Intrinsic::experimental_gc_result:
687 case Intrinsic::experimental_gc_relocate:
688 case Intrinsic::coro_alloc:
689 case Intrinsic::coro_begin:
690 case Intrinsic::coro_free:
691 case Intrinsic::coro_end:
692 case Intrinsic::coro_frame:
693 case Intrinsic::coro_size:
694 case Intrinsic::coro_align:
695 case Intrinsic::coro_suspend:
696 case Intrinsic::coro_subfn_addr:
697 case Intrinsic::threadlocal_address:
698 case Intrinsic::experimental_widenable_condition:
699 // These intrinsics don't actually represent code after lowering.
700 return 0;
701 }
702 return 1;
703 }
704
708 return 1;
709 }
710
711 // Assume that we have a register of the right size for the type.
712 unsigned getNumberOfParts(Type *Tp) const { return 1; }
713
715 const SCEV *) const {
716 return 0;
717 }
718
720 std::optional<FastMathFlags> FMF,
721 TTI::TargetCostKind) const {
722 return 1;
723 }
724
726 TTI::TargetCostKind) const {
727 return 1;
728 }
729
730 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
731 Type *ResTy, VectorType *Ty,
732 std::optional<FastMathFlags> FMF,
734 return 1;
735 }
736
738 VectorType *Ty,
740 return 1;
741 }
742
744 return 0;
745 }
746
748 return false;
749 }
750
752 // Note for overrides: You must ensure for all element unordered-atomic
753 // memory intrinsics that all power-of-2 element sizes up to, and
754 // including, the return value of this method have a corresponding
755 // runtime lib call. These runtime lib call definitions can be found
756 // in RuntimeLibcalls.h
757 return 0;
758 }
759
761 Type *ExpectedType) const {
762 return nullptr;
763 }
764
765 Type *
767 unsigned SrcAddrSpace, unsigned DestAddrSpace,
768 unsigned SrcAlign, unsigned DestAlign,
769 std::optional<uint32_t> AtomicElementSize) const {
770 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
772 }
773
775 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
776 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
777 unsigned SrcAlign, unsigned DestAlign,
778 std::optional<uint32_t> AtomicCpySize) const {
779 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
780 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
781 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
782 OpsOut.push_back(OpType);
783 }
784
785 bool areInlineCompatible(const Function *Caller,
786 const Function *Callee) const {
787 return (Caller->getFnAttribute("target-cpu") ==
788 Callee->getFnAttribute("target-cpu")) &&
789 (Caller->getFnAttribute("target-features") ==
790 Callee->getFnAttribute("target-features"));
791 }
792
793 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
794 const ArrayRef<Type *> &Types) const {
795 return (Caller->getFnAttribute("target-cpu") ==
796 Callee->getFnAttribute("target-cpu")) &&
797 (Caller->getFnAttribute("target-features") ==
798 Callee->getFnAttribute("target-features"));
799 }
800
802 const DataLayout &DL) const {
803 return false;
804 }
805
807 const DataLayout &DL) const {
808 return false;
809 }
810
811 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
812
813 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
814
815 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
816
817 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
818 unsigned AddrSpace) const {
819 return true;
820 }
821
822 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
823 unsigned AddrSpace) const {
824 return true;
825 }
826
828 ElementCount VF) const {
829 return true;
830 }
831
832 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
833
834 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
835 unsigned ChainSizeInBytes,
836 VectorType *VecTy) const {
837 return VF;
838 }
839
840 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
841 unsigned ChainSizeInBytes,
842 VectorType *VecTy) const {
843 return VF;
844 }
845
846 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
848 return false;
849 }
850
851 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
853 return false;
854 }
855
857 return true;
858 }
859
860 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
861
862 unsigned getGISelRematGlobalCost() const { return 1; }
863
864 unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
865
866 bool supportsScalableVectors() const { return false; }
867
868 bool enableScalableVectorization() const { return false; }
869
870 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
871 Align Alignment) const {
872 return false;
873 }
874
879 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
880 }
881
882 bool hasArmWideBranch(bool) const { return false; }
883
884protected:
885 // Obtain the minimum required size to hold the value (without the sign)
886 // In case of a vector it returns the min required size for one element.
887 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
888 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
889 const auto *VectorValue = cast<Constant>(Val);
890
891 // In case of a vector need to pick the max between the min
892 // required size for each element
893 auto *VT = cast<FixedVectorType>(Val->getType());
894
895 // Assume unsigned elements
896 isSigned = false;
897
898 // The max required size is the size of the vector element type
899 unsigned MaxRequiredSize =
900 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
901
902 unsigned MinRequiredSize = 0;
903 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
904 if (auto *IntElement =
905 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
906 bool signedElement = IntElement->getValue().isNegative();
907 // Get the element min required size.
908 unsigned ElementMinRequiredSize =
909 IntElement->getValue().getSignificantBits() - 1;
910 // In case one element is signed then all the vector is signed.
911 isSigned |= signedElement;
912 // Save the max required bit size between all the elements.
913 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
914 } else {
915 // not an int constant element
916 return MaxRequiredSize;
917 }
918 }
919 return MinRequiredSize;
920 }
921
922 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
923 isSigned = CI->getValue().isNegative();
924 return CI->getValue().getSignificantBits() - 1;
925 }
926
927 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
928 isSigned = true;
929 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
930 }
931
932 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
933 isSigned = false;
934 return Cast->getSrcTy()->getScalarSizeInBits();
935 }
936
937 isSigned = false;
938 return Val->getType()->getScalarSizeInBits();
939 }
940
941 bool isStridedAccess(const SCEV *Ptr) const {
942 return Ptr && isa<SCEVAddRecExpr>(Ptr);
943 }
944
946 const SCEV *Ptr) const {
947 if (!isStridedAccess(Ptr))
948 return nullptr;
949 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
950 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
951 }
952
954 int64_t MergeDistance) const {
955 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
956 if (!Step)
957 return false;
958 APInt StrideVal = Step->getAPInt();
959 if (StrideVal.getBitWidth() > 64)
960 return false;
961 // FIXME: Need to take absolute value for negative stride case.
962 return StrideVal.getSExtValue() < MergeDistance;
963 }
964};
965
966/// CRTP base class for use as a mix-in that aids implementing
967/// a TargetTransformInfo-compatible class.
968template <typename T>
970private:
972
973protected:
975
976public:
978
982 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
983 assert(cast<PointerType>(Ptr->getType()->getScalarType())
984 ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
985 "explicit pointee type doesn't match operand's pointee type");
986 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
987 bool HasBaseReg = (BaseGV == nullptr);
988
989 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
990 APInt BaseOffset(PtrSizeBits, 0);
991 int64_t Scale = 0;
992
993 auto GTI = gep_type_begin(PointeeType, Operands);
994 Type *TargetType = nullptr;
995
996 // Handle the case where the GEP instruction has a single operand,
997 // the basis, therefore TargetType is a nullptr.
998 if (Operands.empty())
999 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1000
1001 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1002 TargetType = GTI.getIndexedType();
1003 // We assume that the cost of Scalar GEP with constant index and the
1004 // cost of Vector GEP with splat constant index are the same.
1005 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
1006 if (!ConstIdx)
1007 if (auto Splat = getSplatValue(*I))
1008 ConstIdx = dyn_cast<ConstantInt>(Splat);
1009 if (StructType *STy = GTI.getStructTypeOrNull()) {
1010 // For structures the index is always splat or scalar constant
1011 assert(ConstIdx && "Unexpected GEP index");
1012 uint64_t Field = ConstIdx->getZExtValue();
1013 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
1014 } else {
1015 // If this operand is a scalable type, bail out early.
1016 // TODO: handle scalable vectors
1017 if (isa<ScalableVectorType>(TargetType))
1018 return TTI::TCC_Basic;
1019 int64_t ElementSize =
1020 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue();
1021 if (ConstIdx) {
1022 BaseOffset +=
1023 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1024 } else {
1025 // Needs scale register.
1026 if (Scale != 0)
1027 // No addressing mode takes two scale registers.
1028 return TTI::TCC_Basic;
1029 Scale = ElementSize;
1030 }
1031 }
1032 }
1033
1034 if (static_cast<T *>(this)->isLegalAddressingMode(
1035 TargetType, const_cast<GlobalValue *>(BaseGV),
1036 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1037 Ptr->getType()->getPointerAddressSpace()))
1038 return TTI::TCC_Free;
1039 return TTI::TCC_Basic;
1040 }
1041
1043 const Value *Base,
1047 // In the basic model we take into account GEP instructions only
1048 // (although here can come alloca instruction, a value, constants and/or
1049 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1050 // pointer). Typically, if Base is a not a GEP-instruction and all the
1051 // pointers are relative to the same base address, all the rest are
1052 // either GEP instructions, PHIs, bitcasts or constants. When we have same
1053 // base, we just calculate cost of each non-Base GEP as an ADD operation if
1054 // any their index is a non-const.
1055 // If no known dependecies between the pointers cost is calculated as a sum
1056 // of costs of GEP instructions.
1057 for (const Value *V : Ptrs) {
1058 const auto *GEP = dyn_cast<GetElementPtrInst>(V);
1059 if (!GEP)
1060 continue;
1061 if (Info.isSameBase() && V != Base) {
1062 if (GEP->hasAllConstantIndices())
1063 continue;
1064 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1065 Instruction::Add, GEP->getType(), CostKind,
1067 std::nullopt);
1068 } else {
1069 SmallVector<const Value *> Indices(GEP->indices());
1070 Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
1071 GEP->getPointerOperand(),
1072 Indices, CostKind);
1073 }
1074 }
1075 return Cost;
1076 }
1077
1081 using namespace llvm::PatternMatch;
1082
1083 auto *TargetTTI = static_cast<T *>(this);
1084 // Handle non-intrinsic calls, invokes, and callbr.
1085 // FIXME: Unlikely to be true for anything but CodeSize.
1086 auto *CB = dyn_cast<CallBase>(U);
1087 if (CB && !isa<IntrinsicInst>(U)) {
1088 if (const Function *F = CB->getCalledFunction()) {
1089 if (!TargetTTI->isLoweredToCall(F))
1090 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1091
1092 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1093 }
1094 // For indirect or other calls, scale cost by number of arguments.
1095 return TTI::TCC_Basic * (CB->arg_size() + 1);
1096 }
1097
1098 Type *Ty = U->getType();
1099 unsigned Opcode = Operator::getOpcode(U);
1100 auto *I = dyn_cast<Instruction>(U);
1101 switch (Opcode) {
1102 default:
1103 break;
1104 case Instruction::Call: {
1105 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1106 auto *Intrinsic = cast<IntrinsicInst>(U);
1107 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1108 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1109 }
1110 case Instruction::Br:
1111 case Instruction::Ret:
1112 case Instruction::PHI:
1113 case Instruction::Switch:
1114 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1115 case Instruction::ExtractValue:
1116 case Instruction::Freeze:
1117 return TTI::TCC_Free;
1118 case Instruction::Alloca:
1119 if (cast<AllocaInst>(U)->isStaticAlloca())
1120 return TTI::TCC_Free;
1121 break;
1122 case Instruction::GetElementPtr: {
1123 const auto *GEP = cast<GEPOperator>(U);
1124 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1125 GEP->getPointerOperand(),
1126 Operands.drop_front(), CostKind);
1127 }
1128 case Instruction::Add:
1129 case Instruction::FAdd:
1130 case Instruction::Sub:
1131 case Instruction::FSub:
1132 case Instruction::Mul:
1133 case Instruction::FMul:
1134 case Instruction::UDiv:
1135 case Instruction::SDiv:
1136 case Instruction::FDiv:
1137 case Instruction::URem:
1138 case Instruction::SRem:
1139 case Instruction::FRem:
1140 case Instruction::Shl:
1141 case Instruction::LShr:
1142 case Instruction::AShr:
1143 case Instruction::And:
1144 case Instruction::Or:
1145 case Instruction::Xor:
1146 case Instruction::FNeg: {
1147 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(U->getOperand(0));
1148 TTI::OperandValueInfo Op2Info;
1149 if (Opcode != Instruction::FNeg)
1150 Op2Info = TTI::getOperandInfo(U->getOperand(1));
1151 SmallVector<const Value *, 2> Operands(U->operand_values());
1152 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1153 Op2Info, Operands, I);
1154 }
1155 case Instruction::IntToPtr:
1156 case Instruction::PtrToInt:
1157 case Instruction::SIToFP:
1158 case Instruction::UIToFP:
1159 case Instruction::FPToUI:
1160 case Instruction::FPToSI:
1161 case Instruction::Trunc:
1162 case Instruction::FPTrunc:
1163 case Instruction::BitCast:
1164 case Instruction::FPExt:
1165 case Instruction::SExt:
1166 case Instruction::ZExt:
1167 case Instruction::AddrSpaceCast: {
1168 Type *OpTy = U->getOperand(0)->getType();
1169 return TargetTTI->getCastInstrCost(
1170 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1171 }
1172 case Instruction::Store: {
1173 auto *SI = cast<StoreInst>(U);
1174 Type *ValTy = U->getOperand(0)->getType();
1175 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(U->getOperand(0));
1176 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1177 SI->getPointerAddressSpace(), CostKind,
1178 OpInfo, I);
1179 }
1180 case Instruction::Load: {
1181 // FIXME: Arbitary cost which could come from the backend.
1183 return 4;
1184 auto *LI = cast<LoadInst>(U);
1185 Type *LoadType = U->getType();
1186 // If there is a non-register sized type, the cost estimation may expand
1187 // it to be several instructions to load into multiple registers on the
1188 // target. But, if the only use of the load is a trunc instruction to a
1189 // register sized type, the instruction selector can combine these
1190 // instructions to be a single load. So, in this case, we use the
1191 // destination type of the trunc instruction rather than the load to
1192 // accurately estimate the cost of this load instruction.
1193 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1194 !LoadType->isVectorTy()) {
1195 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1196 LoadType = TI->getDestTy();
1197 }
1198 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1200 {TTI::OK_AnyValue, TTI::OP_None}, I);
1201 }
1202 case Instruction::Select: {
1203 const Value *Op0, *Op1;
1204 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1205 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1206 // select x, y, false --> x & y
1207 // select x, true, y --> x | y
1208 const auto Op1Info = TTI::getOperandInfo(Op0);
1209 const auto Op2Info = TTI::getOperandInfo(Op1);
1210 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1211 Op1->getType()->getScalarSizeInBits() == 1);
1212
1214 return TargetTTI->getArithmeticInstrCost(
1215 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1216 CostKind, Op1Info, Op2Info, Operands, I);
1217 }
1218 Type *CondTy = U->getOperand(0)->getType();
1219 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1221 CostKind, I);
1222 }
1223 case Instruction::ICmp:
1224 case Instruction::FCmp: {
1225 Type *ValTy = U->getOperand(0)->getType();
1226 // TODO: Also handle ICmp/FCmp constant expressions.
1227 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1228 I ? cast<CmpInst>(I)->getPredicate()
1230 CostKind, I);
1231 }
1232 case Instruction::InsertElement: {
1233 auto *IE = dyn_cast<InsertElementInst>(U);
1234 if (!IE)
1235 return TTI::TCC_Basic; // FIXME
1236 unsigned Idx = -1;
1237 if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
1238 if (CI->getValue().getActiveBits() <= 32)
1239 Idx = CI->getZExtValue();
1240 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1241 }
1242 case Instruction::ShuffleVector: {
1243 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1244 if (!Shuffle)
1245 return TTI::TCC_Basic; // FIXME
1246
1247 auto *VecTy = cast<VectorType>(U->getType());
1248 auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
1249 int NumSubElts, SubIndex;
1250
1251 if (Shuffle->changesLength()) {
1252 // Treat a 'subvector widening' as a free shuffle.
1253 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1254 return 0;
1255
1256 if (Shuffle->isExtractSubvectorMask(SubIndex))
1257 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1258 Shuffle->getShuffleMask(), CostKind,
1259 SubIndex, VecTy, Operands);
1260
1261 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1262 return TargetTTI->getShuffleCost(
1263 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
1264 CostKind, SubIndex,
1265 FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1266 Operands);
1267
1268 int ReplicationFactor, VF;
1269 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1270 APInt DemandedDstElts =
1271 APInt::getZero(Shuffle->getShuffleMask().size());
1272 for (auto I : enumerate(Shuffle->getShuffleMask())) {
1273 if (I.value() != UndefMaskElem)
1274 DemandedDstElts.setBit(I.index());
1275 }
1276 return TargetTTI->getReplicationShuffleCost(
1277 VecSrcTy->getElementType(), ReplicationFactor, VF,
1278 DemandedDstElts, CostKind);
1279 }
1280
1281 return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
1282 }
1283
1284 if (Shuffle->isIdentity())
1285 return 0;
1286
1287 if (Shuffle->isReverse())
1288 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
1289 Shuffle->getShuffleMask(), CostKind, 0,
1290 nullptr, Operands);
1291
1292 if (Shuffle->isSelect())
1293 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
1294 Shuffle->getShuffleMask(), CostKind, 0,
1295 nullptr, Operands);
1296
1297 if (Shuffle->isTranspose())
1298 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
1299 Shuffle->getShuffleMask(), CostKind, 0,
1300 nullptr, Operands);
1301
1302 if (Shuffle->isZeroEltSplat())
1303 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
1304 Shuffle->getShuffleMask(), CostKind, 0,
1305 nullptr, Operands);
1306
1307 if (Shuffle->isSingleSource())
1308 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1309 Shuffle->getShuffleMask(), CostKind, 0,
1310 nullptr, Operands);
1311
1312 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1313 return TargetTTI->getShuffleCost(
1314 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind,
1315 SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1316 Operands);
1317
1318 if (Shuffle->isSplice(SubIndex))
1319 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy,
1320 Shuffle->getShuffleMask(), CostKind,
1321 SubIndex, nullptr, Operands);
1322
1323 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
1324 Shuffle->getShuffleMask(), CostKind, 0,
1325 nullptr, Operands);
1326 }
1327 case Instruction::ExtractElement: {
1328 auto *EEI = dyn_cast<ExtractElementInst>(U);
1329 if (!EEI)
1330 return TTI::TCC_Basic; // FIXME
1331 unsigned Idx = -1;
1332 if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
1333 if (CI->getValue().getActiveBits() <= 32)
1334 Idx = CI->getZExtValue();
1335 Type *DstTy = U->getOperand(0)->getType();
1336 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1337 }
1338 }
1339
1340 // By default, just classify everything as 'basic' or -1 to represent that
1341 // don't know the throughput cost.
1343 }
1344
1346 auto *TargetTTI = static_cast<T *>(this);
1347 SmallVector<const Value *, 4> Ops(I->operand_values());
1348 InstructionCost Cost = TargetTTI->getInstructionCost(
1351 }
1352};
1353} // namespace llvm
1354
1355#endif
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
LLVMContext & Context
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This pass exposes codegen information to IR-level passes.
@ Flags
Definition: TextStubV5.cpp:93
Class for arbitrary precision integers.
Definition: APInt.h:75
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1002
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1516
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:136
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:698
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:741
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:500
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:674
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:468
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:297
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:704
The core instruction combiner logic.
Definition: InstCombiner.h:45
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:781
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:177
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
The optimization diagnostic interface.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:653
Class to represent struct types.
Definition: DerivedTypes.h:213
Multiway switch.
Provides information about what library functions are available for the current target.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
const DataLayout & getDataLayout() const
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const
bool isLegalToVectorizeStore(StoreInst *SI) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
std::optional< unsigned > getVScaleForTuning() const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
bool isLegalICmpImmediate(int64_t Imm) const
unsigned getRegUsageForType(Type *Ty) const
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const
bool isAlwaysUniform(const Value *V) const
bool isProfitableToHoist(Instruction *I) const
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
bool isTruncateFree(Type *Ty1, Type *Ty2) const
bool isStridedAccess(const SCEV *Ptr) const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned, VectorType *, std::optional< FastMathFlags > FMF, TTI::TargetCostKind) const
InstructionCost getFPOpCost(Type *Ty) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool isLegalMaskedExpandLoad(Type *DataType) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const
bool isProfitableLSRChainElement(Instruction *I) const
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const
bool isNoopAddrSpaceCast(unsigned, unsigned) const
unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg)
TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &, OptimizationRemarkEmitter *) const
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
unsigned getAssumedAddrSpace(const Value *V) const
bool supportsTailCallFor(const CallBase *CB) const
bool isLegalNTStore(Type *DataType, Align Alignment) const
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
unsigned adjustInliningThreshold(const CallBase *CB) const
BranchProbability getPredictableBranchThreshold() const
InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool, TTI::TargetCostKind) const
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
const SCEVConstant * getConstantStrideStep(ScalarEvolution *SE, const SCEV *Ptr) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool isSourceOfDivergence(const Value *V) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
unsigned getMaxInterleaveFactor(ElementCount VF) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const
bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const
bool hasDivRemOp(Type *DataType, bool IsSigned) const
InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, const SCEV *) const
bool isLegalMaskedCompressStore(Type *DataType) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt) const
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr) const
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const
bool isLoweredToCall(const Function *F) const
TargetTransformInfoImplBase(const DataLayout &DL)
const char * getRegisterClassName(unsigned ClassID) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, const Instruction *I) const
bool useColdCCForColdCall(Function &F) const
bool shouldExpandReduction(const IntrinsicInst *II) const
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)=default
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
MemIndexedMode
The type of load/store indexing.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
CacheLevel
The possible cache levels.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:322
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:267
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:350
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:406
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2430
AddressSpace
Definition: NVPTXBaseInfo.h:21
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr int UndefMaskElem
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Flags describing the kind of vector reduction.
Parameters that control the generic loop unrolling transformation.