LLVM 23.0.0git
NVPTXTargetTransformInfo.h
Go to the documentation of this file.
1//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file a TargetTransformInfoImplBase conforming object specific to the
10/// NVPTX target machine. It uses the target's detailed information to
11/// provide more precise answers to certain TTI queries, while letting the
12/// target independent and default TTI implementations handle the rest.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
18
20#include "NVPTXTargetMachine.h"
21#include "NVPTXUtilities.h"
25#include <optional>
26
27namespace llvm {
28
29class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
31 typedef TargetTransformInfo TTI;
32 friend BaseT;
33
34 const NVPTXSubtarget *ST;
35 const NVPTXTargetLowering *TLI;
36
37 const NVPTXSubtarget *getST() const { return ST; };
38 const NVPTXTargetLowering *getTLI() const { return TLI; };
39
40 /// \returns true if the result of the value could potentially be
41 /// different across threads in a warp.
42 bool isSourceOfDivergence(const Value *V) const;
43
44public:
45 explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
46 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl()),
47 TLI(ST->getTargetLowering()) {}
48
49 bool hasBranchDivergence(const Function *F = nullptr) const override {
50 return true;
51 }
52
53 unsigned getFlatAddressSpace() const override {
54 return AddressSpace::ADDRESS_SPACE_GENERIC;
55 }
56
57 bool
59 return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
60 AS != AddressSpace::ADDRESS_SPACE_LOCAL &&
61 AS != AddressSpace::ADDRESS_SPACE_ENTRY_PARAM;
62 }
63
64 std::optional<Instruction *>
66
67 // Loads and stores can be vectorized if the alignment is at least as big as
68 // the load/store we want to vectorize.
69 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
70 unsigned AddrSpace) const override {
71 return Alignment >= ChainSizeInBytes;
72 }
73 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
74 unsigned AddrSpace) const override {
75 return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
76 }
77
78 // NVPTX has infinite registers of all kinds, but the actual machine doesn't.
79 // We conservatively return 1 here which is just enough to enable the
80 // vectorizers but disables heuristics based on the number of registers.
81 // FIXME: Return a more reasonable number, while keeping an eye on
82 // LoopVectorizer's unrolling heuristics.
83 unsigned getNumberOfRegisters(unsigned ClassID) const override { return 1; }
84
85 // Only <2 x half> should be vectorized, so always return 32 for the vector
86 // register size.
91 unsigned getMinVectorRegisterBitWidth() const override { return 32; }
92
93 bool shouldExpandReduction(const IntrinsicInst *II) const override {
94 // Turn off ExpandReductions pass for NVPTX, which doesn't have advanced
95 // swizzling operations. Our backend/Selection DAG can expand these
96 // reductions with less movs.
97 return false;
98 }
99
100 // We don't want to prevent inlining because of target-cpu and -features
101 // attributes that were added to newer versions of LLVM/Clang: There are
102 // no incompatible functions in PTX, ptxas will throw errors in such cases.
103 bool areInlineCompatible(const Function *Caller,
104 const Function *Callee) const override {
105 return true;
106 }
107
108 // Increase the inlining cost threshold by a factor of 11, reflecting that
109 // calls are particularly expensive in NVPTX.
110 unsigned getInliningThresholdMultiplier() const override { return 11; }
111
114 TTI::TargetCostKind CostKind) const override;
115
117 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
121 const Instruction *CxtI = nullptr) const override;
122
124 getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts,
125 bool Insert, bool Extract,
127 bool ForPoisonSrc = true, ArrayRef<Value *> VL = {},
129 TTI::VectorInstrContext::None) const override {
130 if (!InTy->getElementCount().isFixed())
132
133 auto VT = getTLI()->getValueType(DL, InTy);
134 auto NumElements = InTy->getElementCount().getFixedValue();
136 if (Insert && !VL.empty()) {
137 bool AllConstant = all_of(seq(NumElements), [&](int Idx) {
138 return !DemandedElts[Idx] || isa<Constant>(VL[Idx]);
139 });
140 if (AllConstant) {
141 Cost += TTI::TCC_Free;
142 Insert = false;
143 }
144 }
145 if (Insert && NVPTX::isPackedVectorTy(VT) && VT.is32BitVector()) {
146 // Can be built in a single 32-bit mov (64-bit regs are emulated in SASS
147 // with 2x 32-bit regs)
148 Cost += 1;
149 Insert = false;
150 }
151 if (Insert && VT == MVT::v4i8) {
152 InstructionCost Cost = 3; // 3 x PRMT
153 for (auto Idx : seq(NumElements))
154 if (DemandedElts[Idx])
155 Cost += 1; // zext operand to i32
156 Insert = false;
157 }
158 return Cost + BaseT::getScalarizationOverhead(InTy, DemandedElts, Insert,
159 Extract, CostKind,
160 ForPoisonSrc, VL);
161 }
162
163 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
165 OptimizationRemarkEmitter *ORE) const override;
166
167 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
168 TTI::PeelingPreferences &PP) const override;
169
170 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const override {
171 // Volatile loads/stores are only supported for shared and global address
172 // spaces, or for generic AS that maps to them.
173 if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
174 AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
175 AddrSpace == llvm::ADDRESS_SPACE_SHARED))
176 return false;
177
178 switch(I->getOpcode()){
179 default:
180 return false;
181 case Instruction::Load:
182 case Instruction::Store:
183 return true;
184 }
185 }
186
188 unsigned DstAS) const override {
189 if (SrcAS != llvm::ADDRESS_SPACE_GENERIC)
190 return BaseT::getAddrSpaceCastPreservedPtrMask(SrcAS, DstAS);
191 if (DstAS != llvm::ADDRESS_SPACE_GLOBAL &&
193 return BaseT::getAddrSpaceCastPreservedPtrMask(SrcAS, DstAS);
194
195 // Address change within 4K size does not change the original address space
196 // and is safe to perform address cast form SrcAS to DstAS.
197 APInt PtrMask(DL.getPointerSizeInBits(llvm::ADDRESS_SPACE_GENERIC), 0xfff);
198 return PtrMask;
199 }
200
202 Intrinsic::ID IID) const override;
203
204 bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddrSpace,
205 TTI::MaskKind MaskKind) const override;
206
207 bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddrSpace,
208 TTI::MaskKind MaskKind) const override;
209
210 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;
211
213 Value *NewV) const override;
214 unsigned getAssumedAddrSpace(const Value *V) const override;
215
217 const Function &F,
218 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
219
220 bool shouldBuildRelLookupTables() const override {
221 // Self-referential globals are not supported.
222 return false;
223 }
224
226};
227
228} // end namespace llvm
229
230#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
unsigned getNumberOfRegisters(unsigned ClassID) const override
InstructionUniformity getInstructionUniformity(const Value *V) const override
unsigned getFlatAddressSpace() const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddrSpace, TTI::MaskKind MaskKind) const override
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Estimate the overhead of scalarizing an instruction.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
bool shouldBuildRelLookupTables() const override
unsigned getInliningThresholdMultiplier() const override
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
APInt getAddrSpaceCastPreservedPtrMask(unsigned SrcAS, unsigned DstAS) const override
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const override
unsigned getAssumedAddrSpace(const Value *V) const override
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddrSpace, TTI::MaskKind MaskKind) const override
unsigned getMinVectorRegisterBitWidth() const override
bool hasBranchDivergence(const Function *F=nullptr) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const DataLayout & getDataLayout() const
virtual APInt getAddrSpaceCastPreservedPtrMask(unsigned SrcAS, unsigned DstAS) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
@ None
The insert/extract is not used with a load/store.
MaskKind
Some targets only support masked load/store with a constant mask.
TargetCostKind
The kind of cost model.
@ TCC_Free
Expected to fold away in lowering.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isPackedVectorTy(EVT VT)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
InstructionCost Cost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
Definition Uniformity.h:18
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Parameters that control the generic loop unrolling transformation.