LLVM 20.0.0git
TargetTransformInfoImpl.h
Go to the documentation of this file.
1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Operator.h"
25#include <optional>
26#include <utility>
27
28namespace llvm {
29
30class Function;
31
32/// Base class for use as a mix-in that aids implementing
33/// a TargetTransformInfo-compatible class.
35
36protected:
38
39 const DataLayout &DL;
40
42
43public:
44 // Provide value semantics. MSVC requires that we spell all of these out.
47
48 const DataLayout &getDataLayout() const { return DL; }
49
50 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
53 // In the basic model, we just assume that all-constant GEPs will be folded
54 // into their uses via addressing modes.
55 for (const Value *Operand : Operands)
56 if (!isa<Constant>(Operand))
57 return TTI::TCC_Basic;
58
59 return TTI::TCC_Free;
60 }
61
63 unsigned &JTSize,
65 BlockFrequencyInfo *BFI) const {
66 (void)PSI;
67 (void)BFI;
68 JTSize = 0;
69 return SI.getNumCases();
70 }
71
72 unsigned getInliningThresholdMultiplier() const { return 1; }
75 return 8;
76 }
77 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
78 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
79 return 0;
80 };
81
82 int getInlinerVectorBonusPercent() const { return 150; }
83
85 return TTI::TCC_Expensive;
86 }
87
89 return 64;
90 }
91
92 // Although this default value is arbitrary, it is not random. It is assumed
93 // that a condition that evaluates the same way by a higher percentage than
94 // this is best represented as control flow. Therefore, the default value N
95 // should be set such that the win from N% correct executions is greater than
96 // the loss from (100 - N)% mispredicted executions for the majority of
97 // intended targets.
99 return BranchProbability(99, 100);
100 }
101
103
104 bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
105
106 bool isSourceOfDivergence(const Value *V) const { return false; }
107
108 bool isAlwaysUniform(const Value *V) const { return false; }
109
110 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
111 return false;
112 }
113
114 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
115 return true;
116 }
117
118 unsigned getFlatAddressSpace() const { return -1; }
119
121 Intrinsic::ID IID) const {
122 return false;
123 }
124
125 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
127 return AS == 0;
128 };
129
130 unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
131
132 bool isSingleThreaded() const { return false; }
133
134 std::pair<const Value *, unsigned>
136 return std::make_pair(nullptr, -1);
137 }
138
140 Value *NewV) const {
141 return nullptr;
142 }
143
144 bool isLoweredToCall(const Function *F) const {
145 assert(F && "A concrete function must be provided to this routine.");
146
147 // FIXME: These should almost certainly not be handled here, and instead
148 // handled with the help of TLI or the target itself. This was largely
149 // ported from existing analysis heuristics here so that such refactorings
150 // can take place in the future.
151
152 if (F->isIntrinsic())
153 return false;
154
155 if (F->hasLocalLinkage() || !F->hasName())
156 return true;
157
158 StringRef Name = F->getName();
159
160 // These will all likely lower to a single selection DAG node.
161 // clang-format off
162 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
163 Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
164 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
165 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
166 Name == "sin" || Name == "sinf" || Name == "sinl" ||
167 Name == "cos" || Name == "cosf" || Name == "cosl" ||
168 Name == "tan" || Name == "tanf" || Name == "tanl" ||
169 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
170 return false;
171 // clang-format on
172 // These are all likely to be optimized into something smaller.
173 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
174 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
175 Name == "floorf" || Name == "ceil" || Name == "round" ||
176 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
177 Name == "llabs")
178 return false;
179
180 return true;
181 }
182
185 HardwareLoopInfo &HWLoopInfo) const {
186 return false;
187 }
188
189 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; }
190
192 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
194 }
195
196 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
197 IntrinsicInst &II) const {
198 return std::nullopt;
199 }
200
201 std::optional<Value *>
203 APInt DemandedMask, KnownBits &Known,
204 bool &KnownBitsComputed) const {
205 return std::nullopt;
206 }
207
209 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
210 APInt &UndefElts2, APInt &UndefElts3,
211 std::function<void(Instruction *, unsigned, APInt, APInt &)>
212 SimplifyAndSetOp) const {
213 return std::nullopt;
214 }
215
218 OptimizationRemarkEmitter *) const {}
219
221 TTI::PeelingPreferences &) const {}
222
223 bool isLegalAddImmediate(int64_t Imm) const { return false; }
224
225 bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
226
227 bool isLegalICmpImmediate(int64_t Imm) const { return false; }
228
229 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
230 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
231 Instruction *I = nullptr,
232 int64_t ScalableOffset = 0) const {
233 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
234 // taken from the implementation of LSR.
235 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
236 }
237
238 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
239 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
240 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
241 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
242 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
243 }
244
245 bool isNumRegsMajorCostOfLSR() const { return true; }
246
247 bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
248
249 bool isProfitableLSRChainElement(Instruction *I) const { return false; }
250
251 bool canMacroFuseCmp() const { return false; }
252
255 TargetLibraryInfo *LibInfo) const {
256 return false;
257 }
258
261 return TTI::AMK_None;
262 }
263
264 bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
265 return false;
266 }
267
268 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
269 return false;
270 }
271
272 bool isLegalNTStore(Type *DataType, Align Alignment) const {
273 // By default, assume nontemporal memory stores are available for stores
274 // that are aligned and have a size that is a power of 2.
275 unsigned DataSize = DL.getTypeStoreSize(DataType);
276 return Alignment >= DataSize && isPowerOf2_32(DataSize);
277 }
278
279 bool isLegalNTLoad(Type *DataType, Align Alignment) const {
280 // By default, assume nontemporal memory loads are available for loads that
281 // are aligned and have a size that is a power of 2.
282 unsigned DataSize = DL.getTypeStoreSize(DataType);
283 return Alignment >= DataSize && isPowerOf2_32(DataSize);
284 }
285
286 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
287 return false;
288 }
289
290 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
291 return false;
292 }
293
294 bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
295 return false;
296 }
297
298 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
299 return false;
300 }
301
303 Align Alignment) const {
304 return false;
305 }
306
307 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const {
308 return false;
309 }
310
311 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
312 const SmallBitVector &OpcodeMask) const {
313 return false;
314 }
315
316 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const {
317 return false;
318 }
319
320 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const {
321 return false;
322 }
323
324 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const {
325 return false;
326 }
327
328 bool enableOrderedReductions() const { return false; }
329
330 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
331
332 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
333 return false;
334 }
335
336 bool prefersVectorizedAddressing() const { return true; }
337
339 StackOffset BaseOffset, bool HasBaseReg,
340 int64_t Scale,
341 unsigned AddrSpace) const {
342 // Guess that all legal addressing mode are free.
343 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg,
344 Scale, AddrSpace, /*I=*/nullptr,
345 BaseOffset.getScalable()))
346 return 0;
347 return -1;
348 }
349
350 bool LSRWithInstrQueries() const { return false; }
351
352 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
353
354 bool isProfitableToHoist(Instruction *I) const { return true; }
355
356 bool useAA() const { return false; }
357
358 bool isTypeLegal(Type *Ty) const { return false; }
359
360 unsigned getRegUsageForType(Type *Ty) const { return 1; }
361
362 bool shouldBuildLookupTables() const { return true; }
363
364 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
365
366 bool shouldBuildRelLookupTables() const { return false; }
367
368 bool useColdCCForColdCall(Function &F) const { return false; }
369
371 const APInt &DemandedElts,
372 bool Insert, bool Extract,
374 return 0;
375 }
376
381 return 0;
382 }
383
384 bool supportsEfficientVectorElementLoadStore() const { return false; }
385
386 bool supportsTailCalls() const { return true; }
387
388 bool enableAggressiveInterleaving(bool LoopHasReductions) const {
389 return false;
390 }
391
393 bool IsZeroCmp) const {
394 return {};
395 }
396
397 bool enableSelectOptimize() const { return true; }
398
400 // If the select is a logical-and/logical-or then it is better treated as a
401 // and/or by the backend.
402 using namespace llvm::PatternMatch;
403 return isa<SelectInst>(I) &&
406 }
407
408 bool enableInterleavedAccessVectorization() const { return false; }
409
410 bool enableMaskedInterleavedAccessVectorization() const { return false; }
411
412 bool isFPVectorizationPotentiallyUnsafe() const { return false; }
413
415 unsigned AddressSpace, Align Alignment,
416 unsigned *Fast) const {
417 return false;
418 }
419
420 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
421 return TTI::PSK_Software;
422 }
423
424 bool haveFastSqrt(Type *Ty) const { return false; }
425
426 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
427
428 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
429
432 }
433
434 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
435 const APInt &Imm, Type *Ty) const {
436 return 0;
437 }
438
441 return TTI::TCC_Basic;
442 }
443
444 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
445 const APInt &Imm, Type *Ty,
447 Instruction *Inst = nullptr) const {
448 return TTI::TCC_Free;
449 }
450
452 const APInt &Imm, Type *Ty,
454 return TTI::TCC_Free;
455 }
456
458 const Function &Fn) const {
459 return false;
460 }
461
462 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
463 bool hasConditionalLoadStoreForType(Type *Ty) const { return false; }
464
465 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
466 return Vector ? 1 : 0;
467 };
468
469 const char *getRegisterClassName(unsigned ClassID) const {
470 switch (ClassID) {
471 default:
472 return "Generic::Unknown Register Class";
473 case 0:
474 return "Generic::ScalarRC";
475 case 1:
476 return "Generic::VectorRC";
477 }
478 }
479
481 return TypeSize::getFixed(32);
482 }
483
484 unsigned getMinVectorRegisterBitWidth() const { return 128; }
485
486 std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
487 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
488 bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
489
490 bool
492 return false;
493 }
494
495 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
496 return ElementCount::get(0, IsScalable);
497 }
498
499 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
500 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
501
503 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
504 AllowPromotionWithoutCommonHeader = false;
505 return false;
506 }
507
508 unsigned getCacheLineSize() const { return 0; }
509 std::optional<unsigned>
511 switch (Level) {
513 [[fallthrough]];
515 return std::nullopt;
516 }
517 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
518 }
519
520 std::optional<unsigned>
522 switch (Level) {
524 [[fallthrough]];
526 return std::nullopt;
527 }
528
529 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
530 }
531
532 std::optional<unsigned> getMinPageSize() const { return {}; }
533
534 unsigned getPrefetchDistance() const { return 0; }
535 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
536 unsigned NumStridedMemAccesses,
537 unsigned NumPrefetches, bool HasCall) const {
538 return 1;
539 }
540 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
541 bool enableWritePrefetching() const { return false; }
542 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
543
544 unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
545
547 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
550 const Instruction *CxtI = nullptr) const {
551 // Widenable conditions will eventually lower into constants, so some
552 // operations with them will be trivially optimized away.
553 auto IsWidenableCondition = [](const Value *V) {
554 if (auto *II = dyn_cast<IntrinsicInst>(V))
555 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
556 return true;
557 return false;
558 };
559 // FIXME: A number of transformation tests seem to require these values
560 // which seems a little odd for how arbitary there are.
561 switch (Opcode) {
562 default:
563 break;
564 case Instruction::FDiv:
565 case Instruction::FRem:
566 case Instruction::SDiv:
567 case Instruction::SRem:
568 case Instruction::UDiv:
569 case Instruction::URem:
570 // FIXME: Unlikely to be true for CodeSize.
571 return TTI::TCC_Expensive;
572 case Instruction::And:
573 case Instruction::Or:
574 if (any_of(Args, IsWidenableCondition))
575 return TTI::TCC_Free;
576 break;
577 }
578
579 // Assume a 3cy latency for fp arithmetic ops.
581 if (Ty->getScalarType()->isFloatingPointTy())
582 return 3;
583
584 return 1;
585 }
586
588 unsigned Opcode1,
589 const SmallBitVector &OpcodeMask,
592 }
593
595 ArrayRef<int> Mask,
597 VectorType *SubTp,
598 ArrayRef<const Value *> Args = std::nullopt,
599 const Instruction *CxtI = nullptr) const {
600 return 1;
601 }
602
603 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
606 const Instruction *I) const {
607 switch (Opcode) {
608 default:
609 break;
610 case Instruction::IntToPtr: {
611 unsigned SrcSize = Src->getScalarSizeInBits();
612 if (DL.isLegalInteger(SrcSize) &&
613 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
614 return 0;
615 break;
616 }
617 case Instruction::PtrToInt: {
618 unsigned DstSize = Dst->getScalarSizeInBits();
619 if (DL.isLegalInteger(DstSize) &&
620 DstSize >= DL.getPointerTypeSizeInBits(Src))
621 return 0;
622 break;
623 }
624 case Instruction::BitCast:
625 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
626 // Identity and pointer-to-pointer casts are free.
627 return 0;
628 break;
629 case Instruction::Trunc: {
630 // trunc to a native type is free (assuming the target has compare and
631 // shift-right of the same width).
632 TypeSize DstSize = DL.getTypeSizeInBits(Dst);
633 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
634 return 0;
635 break;
636 }
637 }
638 return 1;
639 }
640
642 VectorType *VecTy,
643 unsigned Index) const {
644 return 1;
645 }
646
648 const Instruction *I = nullptr) const {
649 // A phi would be free, unless we're costing the throughput because it
650 // will require a register.
651 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
652 return 0;
653 return 1;
654 }
655
656 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
657 CmpInst::Predicate VecPred,
659 const Instruction *I) const {
660 return 1;
661 }
662
665 unsigned Index, Value *Op0,
666 Value *Op1) const {
667 return 1;
668 }
669
672 unsigned Index) const {
673 return 1;
674 }
675
676 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
677 const APInt &DemandedDstElts,
679 return 1;
680 }
681
682 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
683 unsigned AddressSpace,
686 const Instruction *I) const {
687 return 1;
688 }
689
690 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
691 unsigned AddressSpace,
693 const Instruction *I) const {
694 return 1;
695 }
696
698 Align Alignment, unsigned AddressSpace,
700 return 1;
701 }
702
704 const Value *Ptr, bool VariableMask,
705 Align Alignment,
707 const Instruction *I = nullptr) const {
708 return 1;
709 }
710
712 const Value *Ptr, bool VariableMask,
713 Align Alignment,
715 const Instruction *I = nullptr) const {
717 }
718
720 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
721 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
722 bool UseMaskForCond, bool UseMaskForGaps) const {
723 return 1;
724 }
725
728 switch (ICA.getID()) {
729 default:
730 break;
731 case Intrinsic::experimental_vector_histogram_add:
732 // For now, we want explicit support from the target for histograms.
734 case Intrinsic::allow_runtime_check:
735 case Intrinsic::allow_ubsan_check:
736 case Intrinsic::annotation:
737 case Intrinsic::assume:
738 case Intrinsic::sideeffect:
739 case Intrinsic::pseudoprobe:
740 case Intrinsic::arithmetic_fence:
741 case Intrinsic::dbg_assign:
742 case Intrinsic::dbg_declare:
743 case Intrinsic::dbg_value:
744 case Intrinsic::dbg_label:
745 case Intrinsic::invariant_start:
746 case Intrinsic::invariant_end:
747 case Intrinsic::launder_invariant_group:
748 case Intrinsic::strip_invariant_group:
749 case Intrinsic::is_constant:
750 case Intrinsic::lifetime_start:
751 case Intrinsic::lifetime_end:
752 case Intrinsic::experimental_noalias_scope_decl:
753 case Intrinsic::objectsize:
754 case Intrinsic::ptr_annotation:
755 case Intrinsic::var_annotation:
756 case Intrinsic::experimental_gc_result:
757 case Intrinsic::experimental_gc_relocate:
758 case Intrinsic::coro_alloc:
759 case Intrinsic::coro_begin:
760 case Intrinsic::coro_free:
761 case Intrinsic::coro_end:
762 case Intrinsic::coro_frame:
763 case Intrinsic::coro_size:
764 case Intrinsic::coro_align:
765 case Intrinsic::coro_suspend:
766 case Intrinsic::coro_subfn_addr:
767 case Intrinsic::threadlocal_address:
768 case Intrinsic::experimental_widenable_condition:
769 case Intrinsic::ssa_copy:
770 // These intrinsics don't actually represent code after lowering.
771 return 0;
772 }
773 return 1;
774 }
775
779 return 1;
780 }
781
782 // Assume that we have a register of the right size for the type.
783 unsigned getNumberOfParts(Type *Tp) const { return 1; }
784
786 const SCEV *) const {
787 return 0;
788 }
789
791 std::optional<FastMathFlags> FMF,
792 TTI::TargetCostKind) const {
793 return 1;
794 }
795
798 TTI::TargetCostKind) const {
799 return 1;
800 }
801
802 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
803 Type *ResTy, VectorType *Ty,
804 FastMathFlags FMF,
806 return 1;
807 }
808
810 VectorType *Ty,
812 return 1;
813 }
814
816 return 0;
817 }
818
820 return false;
821 }
822
824 // Note for overrides: You must ensure for all element unordered-atomic
825 // memory intrinsics that all power-of-2 element sizes up to, and
826 // including, the return value of this method have a corresponding
827 // runtime lib call. These runtime lib call definitions can be found
828 // in RuntimeLibcalls.h
829 return 0;
830 }
831
833 Type *ExpectedType) const {
834 return nullptr;
835 }
836
837 Type *
839 unsigned SrcAddrSpace, unsigned DestAddrSpace,
840 Align SrcAlign, Align DestAlign,
841 std::optional<uint32_t> AtomicElementSize) const {
842 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
843 : Type::getInt8Ty(Context);
844 }
845
847 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
848 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
849 Align SrcAlign, Align DestAlign,
850 std::optional<uint32_t> AtomicCpySize) const {
851 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
852 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
853 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
854 OpsOut.push_back(OpType);
855 }
856
857 bool areInlineCompatible(const Function *Caller,
858 const Function *Callee) const {
859 return (Caller->getFnAttribute("target-cpu") ==
860 Callee->getFnAttribute("target-cpu")) &&
861 (Caller->getFnAttribute("target-features") ==
862 Callee->getFnAttribute("target-features"));
863 }
864
865 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
866 unsigned DefaultCallPenalty) const {
867 return DefaultCallPenalty;
868 }
869
870 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
871 const ArrayRef<Type *> &Types) const {
872 return (Caller->getFnAttribute("target-cpu") ==
873 Callee->getFnAttribute("target-cpu")) &&
874 (Caller->getFnAttribute("target-features") ==
875 Callee->getFnAttribute("target-features"));
876 }
877
879 const DataLayout &DL) const {
880 return false;
881 }
882
884 const DataLayout &DL) const {
885 return false;
886 }
887
888 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
889
890 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
891
892 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
893
894 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
895 unsigned AddrSpace) const {
896 return true;
897 }
898
899 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
900 unsigned AddrSpace) const {
901 return true;
902 }
903
905 ElementCount VF) const {
906 return true;
907 }
908
909 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
910
911 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
912 unsigned ChainSizeInBytes,
913 VectorType *VecTy) const {
914 return VF;
915 }
916
917 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
918 unsigned ChainSizeInBytes,
919 VectorType *VecTy) const {
920 return VF;
921 }
922
923 bool preferFixedOverScalableIfEqualCost() const { return false; }
924
925 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
926 TTI::ReductionFlags Flags) const {
927 return false;
928 }
929
930 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
931 TTI::ReductionFlags Flags) const {
932 return false;
933 }
934
936 return true;
937 }
938
939 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
940
944 }
945
946 unsigned getGISelRematGlobalCost() const { return 1; }
947
948 unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
949
950 bool supportsScalableVectors() const { return false; }
951
952 bool enableScalableVectorization() const { return false; }
953
954 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
955 Align Alignment) const {
956 return false;
957 }
958
963 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
964 }
965
966 bool hasArmWideBranch(bool) const { return false; }
967
968 unsigned getMaxNumArgs() const { return UINT_MAX; }
969
970protected:
971 // Obtain the minimum required size to hold the value (without the sign)
972 // In case of a vector it returns the min required size for one element.
973 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
974 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
975 const auto *VectorValue = cast<Constant>(Val);
976
977 // In case of a vector need to pick the max between the min
978 // required size for each element
979 auto *VT = cast<FixedVectorType>(Val->getType());
980
981 // Assume unsigned elements
982 isSigned = false;
983
984 // The max required size is the size of the vector element type
985 unsigned MaxRequiredSize =
986 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
987
988 unsigned MinRequiredSize = 0;
989 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
990 if (auto *IntElement =
991 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
992 bool signedElement = IntElement->getValue().isNegative();
993 // Get the element min required size.
994 unsigned ElementMinRequiredSize =
995 IntElement->getValue().getSignificantBits() - 1;
996 // In case one element is signed then all the vector is signed.
997 isSigned |= signedElement;
998 // Save the max required bit size between all the elements.
999 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1000 } else {
1001 // not an int constant element
1002 return MaxRequiredSize;
1003 }
1004 }
1005 return MinRequiredSize;
1006 }
1007
1008 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
1009 isSigned = CI->getValue().isNegative();
1010 return CI->getValue().getSignificantBits() - 1;
1011 }
1012
1013 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
1014 isSigned = true;
1015 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1016 }
1017
1018 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1019 isSigned = false;
1020 return Cast->getSrcTy()->getScalarSizeInBits();
1021 }
1022
1023 isSigned = false;
1024 return Val->getType()->getScalarSizeInBits();
1025 }
1026
1027 bool isStridedAccess(const SCEV *Ptr) const {
1028 return Ptr && isa<SCEVAddRecExpr>(Ptr);
1029 }
1030
1032 const SCEV *Ptr) const {
1033 if (!isStridedAccess(Ptr))
1034 return nullptr;
1035 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
1036 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
1037 }
1038
1040 int64_t MergeDistance) const {
1041 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1042 if (!Step)
1043 return false;
1044 APInt StrideVal = Step->getAPInt();
1045 if (StrideVal.getBitWidth() > 64)
1046 return false;
1047 // FIXME: Need to take absolute value for negative stride case.
1048 return StrideVal.getSExtValue() < MergeDistance;
1049 }
1050};
1051
1052/// CRTP base class for use as a mix-in that aids implementing
1053/// a TargetTransformInfo-compatible class.
1054template <typename T>
1056private:
1058
1059protected:
1061
1062public:
1064
1068 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1069 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
1070 bool HasBaseReg = (BaseGV == nullptr);
1071
1072 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1073 APInt BaseOffset(PtrSizeBits, 0);
1074 int64_t Scale = 0;
1075
1076 auto GTI = gep_type_begin(PointeeType, Operands);
1077 Type *TargetType = nullptr;
1078
1079 // Handle the case where the GEP instruction has a single operand,
1080 // the basis, therefore TargetType is a nullptr.
1081 if (Operands.empty())
1082 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1083
1084 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1085 TargetType = GTI.getIndexedType();
1086 // We assume that the cost of Scalar GEP with constant index and the
1087 // cost of Vector GEP with splat constant index are the same.
1088 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
1089 if (!ConstIdx)
1090 if (auto Splat = getSplatValue(*I))
1091 ConstIdx = dyn_cast<ConstantInt>(Splat);
1092 if (StructType *STy = GTI.getStructTypeOrNull()) {
1093 // For structures the index is always splat or scalar constant
1094 assert(ConstIdx && "Unexpected GEP index");
1095 uint64_t Field = ConstIdx->getZExtValue();
1096 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
1097 } else {
1098 // If this operand is a scalable type, bail out early.
1099 // TODO: Make isLegalAddressingMode TypeSize aware.
1100 if (TargetType->isScalableTy())
1101 return TTI::TCC_Basic;
1102 int64_t ElementSize =
1103 GTI.getSequentialElementStride(DL).getFixedValue();
1104 if (ConstIdx) {
1105 BaseOffset +=
1106 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1107 } else {
1108 // Needs scale register.
1109 if (Scale != 0)
1110 // No addressing mode takes two scale registers.
1111 return TTI::TCC_Basic;
1112 Scale = ElementSize;
1113 }
1114 }
1115 }
1116
1117 // If we haven't been provided a hint, use the target type for now.
1118 //
1119 // TODO: Take a look at potentially removing this: This is *slightly* wrong
1120 // as it's possible to have a GEP with a foldable target type but a memory
1121 // access that isn't foldable. For example, this load isn't foldable on
1122 // RISC-V:
1123 //
1124 // %p = getelementptr i32, ptr %base, i32 42
1125 // %x = load <2 x i32>, ptr %p
1126 if (!AccessType)
1127 AccessType = TargetType;
1128
1129 // If the final address of the GEP is a legal addressing mode for the given
1130 // access type, then we can fold it into its users.
1131 if (static_cast<T *>(this)->isLegalAddressingMode(
1132 AccessType, const_cast<GlobalValue *>(BaseGV),
1133 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1134 Ptr->getType()->getPointerAddressSpace()))
1135 return TTI::TCC_Free;
1136
1137 // TODO: Instead of returning TCC_Basic here, we should use
1138 // getArithmeticInstrCost. Or better yet, provide a hook to let the target
1139 // model it.
1140 return TTI::TCC_Basic;
1141 }
1142
1144 const Value *Base,
1146 Type *AccessTy,
1149 // In the basic model we take into account GEP instructions only
1150 // (although here can come alloca instruction, a value, constants and/or
1151 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1152 // pointer). Typically, if Base is a not a GEP-instruction and all the
1153 // pointers are relative to the same base address, all the rest are
1154 // either GEP instructions, PHIs, bitcasts or constants. When we have same
1155 // base, we just calculate cost of each non-Base GEP as an ADD operation if
1156 // any their index is a non-const.
1157 // If no known dependecies between the pointers cost is calculated as a sum
1158 // of costs of GEP instructions.
1159 for (const Value *V : Ptrs) {
1160 const auto *GEP = dyn_cast<GetElementPtrInst>(V);
1161 if (!GEP)
1162 continue;
1163 if (Info.isSameBase() && V != Base) {
1164 if (GEP->hasAllConstantIndices())
1165 continue;
1166 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1167 Instruction::Add, GEP->getType(), CostKind,
1169 std::nullopt);
1170 } else {
1171 SmallVector<const Value *> Indices(GEP->indices());
1172 Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
1173 GEP->getPointerOperand(),
1174 Indices, AccessTy, CostKind);
1175 }
1176 }
1177 return Cost;
1178 }
1179
1183 using namespace llvm::PatternMatch;
1184
1185 auto *TargetTTI = static_cast<T *>(this);
1186 // Handle non-intrinsic calls, invokes, and callbr.
1187 // FIXME: Unlikely to be true for anything but CodeSize.
1188 auto *CB = dyn_cast<CallBase>(U);
1189 if (CB && !isa<IntrinsicInst>(U)) {
1190 if (const Function *F = CB->getCalledFunction()) {
1191 if (!TargetTTI->isLoweredToCall(F))
1192 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1193
1194 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1195 }
1196 // For indirect or other calls, scale cost by number of arguments.
1197 return TTI::TCC_Basic * (CB->arg_size() + 1);
1198 }
1199
1200 Type *Ty = U->getType();
1201 unsigned Opcode = Operator::getOpcode(U);
1202 auto *I = dyn_cast<Instruction>(U);
1203 switch (Opcode) {
1204 default:
1205 break;
1206 case Instruction::Call: {
1207 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1208 auto *Intrinsic = cast<IntrinsicInst>(U);
1209 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1210 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1211 }
1212 case Instruction::Br:
1213 case Instruction::Ret:
1214 case Instruction::PHI:
1215 case Instruction::Switch:
1216 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1217 case Instruction::ExtractValue:
1218 case Instruction::Freeze:
1219 return TTI::TCC_Free;
1220 case Instruction::Alloca:
1221 if (cast<AllocaInst>(U)->isStaticAlloca())
1222 return TTI::TCC_Free;
1223 break;
1224 case Instruction::GetElementPtr: {
1225 const auto *GEP = cast<GEPOperator>(U);
1226 Type *AccessType = nullptr;
1227 // For now, only provide the AccessType in the simple case where the GEP
1228 // only has one user.
1229 if (GEP->hasOneUser() && I)
1230 AccessType = I->user_back()->getAccessType();
1231
1232 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1233 Operands.front(), Operands.drop_front(),
1234 AccessType, CostKind);
1235 }
1236 case Instruction::Add:
1237 case Instruction::FAdd:
1238 case Instruction::Sub:
1239 case Instruction::FSub:
1240 case Instruction::Mul:
1241 case Instruction::FMul:
1242 case Instruction::UDiv:
1243 case Instruction::SDiv:
1244 case Instruction::FDiv:
1245 case Instruction::URem:
1246 case Instruction::SRem:
1247 case Instruction::FRem:
1248 case Instruction::Shl:
1249 case Instruction::LShr:
1250 case Instruction::AShr:
1251 case Instruction::And:
1252 case Instruction::Or:
1253 case Instruction::Xor:
1254 case Instruction::FNeg: {
1256 TTI::OperandValueInfo Op2Info;
1257 if (Opcode != Instruction::FNeg)
1258 Op2Info = TTI::getOperandInfo(Operands[1]);
1259 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1260 Op2Info, Operands, I);
1261 }
1262 case Instruction::IntToPtr:
1263 case Instruction::PtrToInt:
1264 case Instruction::SIToFP:
1265 case Instruction::UIToFP:
1266 case Instruction::FPToUI:
1267 case Instruction::FPToSI:
1268 case Instruction::Trunc:
1269 case Instruction::FPTrunc:
1270 case Instruction::BitCast:
1271 case Instruction::FPExt:
1272 case Instruction::SExt:
1273 case Instruction::ZExt:
1274 case Instruction::AddrSpaceCast: {
1275 Type *OpTy = Operands[0]->getType();
1276 return TargetTTI->getCastInstrCost(
1277 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1278 }
1279 case Instruction::Store: {
1280 auto *SI = cast<StoreInst>(U);
1281 Type *ValTy = Operands[0]->getType();
1283 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1284 SI->getPointerAddressSpace(), CostKind,
1285 OpInfo, I);
1286 }
1287 case Instruction::Load: {
1288 // FIXME: Arbitary cost which could come from the backend.
1290 return 4;
1291 auto *LI = cast<LoadInst>(U);
1292 Type *LoadType = U->getType();
1293 // If there is a non-register sized type, the cost estimation may expand
1294 // it to be several instructions to load into multiple registers on the
1295 // target. But, if the only use of the load is a trunc instruction to a
1296 // register sized type, the instruction selector can combine these
1297 // instructions to be a single load. So, in this case, we use the
1298 // destination type of the trunc instruction rather than the load to
1299 // accurately estimate the cost of this load instruction.
1300 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1301 !LoadType->isVectorTy()) {
1302 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1303 LoadType = TI->getDestTy();
1304 }
1305 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1307 {TTI::OK_AnyValue, TTI::OP_None}, I);
1308 }
1309 case Instruction::Select: {
1310 const Value *Op0, *Op1;
1311 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1312 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1313 // select x, y, false --> x & y
1314 // select x, true, y --> x | y
1315 const auto Op1Info = TTI::getOperandInfo(Op0);
1316 const auto Op2Info = TTI::getOperandInfo(Op1);
1317 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1318 Op1->getType()->getScalarSizeInBits() == 1);
1319
1321 return TargetTTI->getArithmeticInstrCost(
1322 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1323 CostKind, Op1Info, Op2Info, Operands, I);
1324 }
1325 Type *CondTy = Operands[0]->getType();
1326 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1328 CostKind, I);
1329 }
1330 case Instruction::ICmp:
1331 case Instruction::FCmp: {
1332 Type *ValTy = Operands[0]->getType();
1333 // TODO: Also handle ICmp/FCmp constant expressions.
1334 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1335 I ? cast<CmpInst>(I)->getPredicate()
1337 CostKind, I);
1338 }
1339 case Instruction::InsertElement: {
1340 auto *IE = dyn_cast<InsertElementInst>(U);
1341 if (!IE)
1342 return TTI::TCC_Basic; // FIXME
1343 unsigned Idx = -1;
1344 if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
1345 if (CI->getValue().getActiveBits() <= 32)
1346 Idx = CI->getZExtValue();
1347 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1348 }
1349 case Instruction::ShuffleVector: {
1350 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1351 if (!Shuffle)
1352 return TTI::TCC_Basic; // FIXME
1353
1354 auto *VecTy = cast<VectorType>(U->getType());
1355 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
1356 ArrayRef<int> Mask = Shuffle->getShuffleMask();
1357 int NumSubElts, SubIndex;
1358
1359 // TODO: move more of this inside improveShuffleKindFromMask.
1360 if (Shuffle->changesLength()) {
1361 // Treat a 'subvector widening' as a free shuffle.
1362 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1363 return 0;
1364
1365 if (Shuffle->isExtractSubvectorMask(SubIndex))
1366 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1367 Mask, CostKind, SubIndex, VecTy,
1368 Operands, Shuffle);
1369
1370 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1371 return TargetTTI->getShuffleCost(
1372 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1373 FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1374 Operands, Shuffle);
1375
1376 int ReplicationFactor, VF;
1377 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1378 APInt DemandedDstElts = APInt::getZero(Mask.size());
1379 for (auto I : enumerate(Mask)) {
1380 if (I.value() != PoisonMaskElem)
1381 DemandedDstElts.setBit(I.index());
1382 }
1383 return TargetTTI->getReplicationShuffleCost(
1384 VecSrcTy->getElementType(), ReplicationFactor, VF,
1385 DemandedDstElts, CostKind);
1386 }
1387
1388 bool IsUnary = isa<UndefValue>(Operands[1]);
1389 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1390 SmallVector<int, 16> AdjustMask(Mask);
1391
1392 // Widening shuffle - widening the source(s) to the new length
1393 // (treated as free - see above), and then perform the adjusted
1394 // shuffle at that width.
1395 if (Shuffle->increasesLength()) {
1396 for (int &M : AdjustMask)
1397 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1398
1399 return TargetTTI->getShuffleCost(
1401 AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1402 }
1403
1404 // Narrowing shuffle - perform shuffle at original wider width and
1405 // then extract the lower elements.
1406 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
1407
1408 InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
1410 VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1411
1412 SmallVector<int, 16> ExtractMask(Mask.size());
1413 std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1414 return ShuffleCost + TargetTTI->getShuffleCost(
1415 TTI::SK_ExtractSubvector, VecSrcTy,
1416 ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
1417 }
1418
1419 if (Shuffle->isIdentity())
1420 return 0;
1421
1422 if (Shuffle->isReverse())
1423 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
1424 0, nullptr, Operands, Shuffle);
1425
1426 if (Shuffle->isSelect())
1427 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
1428 0, nullptr, Operands, Shuffle);
1429
1430 if (Shuffle->isTranspose())
1431 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
1432 CostKind, 0, nullptr, Operands,
1433 Shuffle);
1434
1435 if (Shuffle->isZeroEltSplat())
1436 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
1437 CostKind, 0, nullptr, Operands,
1438 Shuffle);
1439
1440 if (Shuffle->isSingleSource())
1441 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
1442 CostKind, 0, nullptr, Operands,
1443 Shuffle);
1444
1445 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1446 return TargetTTI->getShuffleCost(
1447 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1448 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
1449 Shuffle);
1450
1451 if (Shuffle->isSplice(SubIndex))
1452 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
1453 SubIndex, nullptr, Operands, Shuffle);
1454
1455 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
1456 CostKind, 0, nullptr, Operands, Shuffle);
1457 }
1458 case Instruction::ExtractElement: {
1459 auto *EEI = dyn_cast<ExtractElementInst>(U);
1460 if (!EEI)
1461 return TTI::TCC_Basic; // FIXME
1462 unsigned Idx = -1;
1463 if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
1464 if (CI->getValue().getActiveBits() <= 32)
1465 Idx = CI->getZExtValue();
1466 Type *DstTy = Operands[0]->getType();
1467 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1468 }
1469 }
1470
1471 // By default, just classify everything as 'basic' or -1 to represent that
1472 // don't know the throughput cost.
1474 }
1475
1477 auto *TargetTTI = static_cast<T *>(this);
1478 SmallVector<const Value *, 4> Ops(I->operand_values());
1479 InstructionCost Cost = TargetTTI->getInstructionCost(
1482 }
1483
1484 bool supportsTailCallFor(const CallBase *CB) const {
1485 return static_cast<const T *>(this)->supportsTailCalls();
1486 }
1487};
1488} // namespace llvm
1489
1490#endif
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:77
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1307
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1445
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1519
an instruction to allocate memory on the stack
Definition: Instructions.h:61
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:217
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:695
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:738
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:621
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:429
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
The core instruction combiner logic.
Definition: InstCombiner.h:47
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:42
The optimization diagnostic interface.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:70
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
An instruction for storing to memory.
Definition: Instructions.h:290
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:600
Class to represent struct types.
Definition: DerivedTypes.h:216
Multiway switch.
Provides information about what library functions are available for the current target.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
const DataLayout & getDataLayout() const
bool isLegalToVectorizeStore(StoreInst *SI) const
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
bool shouldTreatInstructionLikeSelect(const Instruction *I)
bool isLegalToVectorizeLoad(LoadInst *LI) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
std::optional< unsigned > getVScaleForTuning() const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
bool isLegalICmpImmediate(int64_t Imm) const
bool hasConditionalLoadStoreForType(Type *Ty) const
unsigned getRegUsageForType(Type *Ty) const
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const
bool isAlwaysUniform(const Value *V) const
bool isProfitableToHoist(Instruction *I) const
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
bool isTruncateFree(Type *Ty1, Type *Ty2) const
bool isStridedAccess(const SCEV *Ptr) const
InstructionCost getBranchMispredictPenalty() const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned, VectorType *, std::optional< FastMathFlags > FMF, TTI::TargetCostKind) const
InstructionCost getFPOpCost(Type *Ty) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
std::optional< unsigned > getMaxVScale() const
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
bool isProfitableLSRChainElement(Instruction *I) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr) const
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
bool isNoopAddrSpaceCast(unsigned, unsigned) const
unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg)
TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &, OptimizationRemarkEmitter *) const
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
unsigned getAssumedAddrSpace(const Value *V) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
bool isLegalNTStore(Type *DataType, Align Alignment) const
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
unsigned adjustInliningThreshold(const CallBase *CB) const
BranchProbability getPredictableBranchThreshold() const
std::optional< unsigned > getMinPageSize() const
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const
const SCEVConstant * getConstantStrideStep(ScalarEvolution *SE, const SCEV *Ptr) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool isSourceOfDivergence(const Value *V) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
unsigned getMaxInterleaveFactor(ElementCount VF) const
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const
bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const
bool hasDivRemOp(Type *DataType, bool IsSigned) const
InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, const SCEV *) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const
bool isLoweredToCall(const Function *F) const
bool hasBranchDivergence(const Function *F=nullptr) const
TargetTransformInfoImplBase(const DataLayout &DL)
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
const char * getRegisterClassName(unsigned ClassID) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, FastMathFlags, TTI::TargetCostKind) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, const Instruction *I) const
bool useColdCCForColdCall(Function &F) const
bool shouldExpandReduction(const IntrinsicInst *II) const
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool isLegalAddScalableImmediate(int64_t Imm) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)=default
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
bool supportsTailCallFor(const CallBase *CB) const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
MemIndexedMode
The type of load/store indexing.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
CacheLevel
The possible cache levels.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:480
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2431
AddressSpace
Definition: NVPTXBaseInfo.h:21
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
constexpr int PoisonMaskElem
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
gep_type_iterator gep_type_begin(const User *GEP)
InstructionCost Cost
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Flags describing the kind of vector reduction.
Parameters that control the generic loop unrolling transformation.