Line data Source code
1 : //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// This file a TargetTransformInfo::Concept conforming object specific to the
12 : /// AMDGPU target machine. It uses the target's detailed information to
13 : /// provide more precise answers to certain TTI queries, while letting the
14 : /// target independent and default TTI implementations handle the rest.
15 : //
16 : //===----------------------------------------------------------------------===//
17 :
18 : #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 : #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
20 :
21 : #include "AMDGPU.h"
22 : #include "AMDGPUSubtarget.h"
23 : #include "AMDGPUTargetMachine.h"
24 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
25 : #include "Utils/AMDGPUBaseInfo.h"
26 : #include "llvm/ADT/ArrayRef.h"
27 : #include "llvm/Analysis/TargetTransformInfo.h"
28 : #include "llvm/CodeGen/BasicTTIImpl.h"
29 : #include "llvm/IR/Function.h"
30 : #include "llvm/MC/SubtargetFeature.h"
31 : #include "llvm/Support/MathExtras.h"
32 : #include <cassert>
33 :
34 : namespace llvm {
35 :
36 : class AMDGPUTargetLowering;
37 : class Loop;
38 : class ScalarEvolution;
39 : class Type;
40 : class Value;
41 :
42 434308 : class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
43 : using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
44 : using TTI = TargetTransformInfo;
45 :
46 : friend BaseT;
47 :
48 : Triple TargetTriple;
49 :
50 : public:
51 : explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
52 434308 : : BaseT(TM, F.getParent()->getDataLayout()),
53 868616 : TargetTriple(TM->getTargetTriple()) {}
54 :
55 : void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
56 : TTI::UnrollingPreferences &UP);
57 : };
58 :
59 1188336 : class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
60 : using BaseT = BasicTTIImplBase<GCNTTIImpl>;
61 : using TTI = TargetTransformInfo;
62 :
63 : friend BaseT;
64 :
65 : const GCNSubtarget *ST;
66 : const AMDGPUTargetLowering *TLI;
67 : AMDGPUTTIImpl CommonTTI;
68 : bool IsGraphicsShader;
69 :
70 : const FeatureBitset InlineFeatureIgnoreList = {
71 : // Codegen control options which don't matter.
72 : AMDGPU::FeatureEnableLoadStoreOpt,
73 : AMDGPU::FeatureEnableSIScheduler,
74 : AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
75 : AMDGPU::FeatureFlatForGlobal,
76 : AMDGPU::FeaturePromoteAlloca,
77 : AMDGPU::FeatureUnalignedBufferAccess,
78 : AMDGPU::FeatureUnalignedScratchAccess,
79 :
80 : AMDGPU::FeatureAutoWaitcntBeforeBarrier,
81 : AMDGPU::FeatureDebuggerEmitPrologue,
82 : AMDGPU::FeatureDebuggerInsertNops,
83 :
84 : // Property of the kernel/environment which can't actually differ.
85 : AMDGPU::FeatureSGPRInitBug,
86 : AMDGPU::FeatureXNACK,
87 : AMDGPU::FeatureTrapHandler,
88 :
89 : // Perf-tuning features
90 : AMDGPU::FeatureFastFMAF32,
91 : AMDGPU::HalfRate64Ops
92 : };
93 :
94 0 : const GCNSubtarget *getST() const { return ST; }
95 0 : const AMDGPUTargetLowering *getTLI() const { return TLI; }
96 :
97 : static inline int getFullRateInstrCost() {
98 : return TargetTransformInfo::TCC_Basic;
99 : }
100 :
101 : static inline int getHalfRateInstrCost() {
102 : return 2 * TargetTransformInfo::TCC_Basic;
103 : }
104 :
105 : // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
106 : // should be 2 or 4.
107 : static inline int getQuarterRateInstrCost() {
108 : return 3 * TargetTransformInfo::TCC_Basic;
109 : }
110 :
111 : // On some parts, normal fp64 operations are half rate, and others
112 : // quarter. This also applies to some integer operations.
113 0 : inline int get64BitInstrCost() const {
114 48 : return ST->hasHalfRate64Ops() ?
115 0 : getHalfRateInstrCost() : getQuarterRateInstrCost();
116 : }
117 :
118 : public:
119 396115 : explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
120 396115 : : BaseT(TM, F.getParent()->getDataLayout()),
121 396115 : ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
122 396115 : TLI(ST->getTargetLowering()),
123 : CommonTTI(TM, F),
124 1188345 : IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
125 :
126 0 : bool hasBranchDivergence() { return true; }
127 :
128 : void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
129 : TTI::UnrollingPreferences &UP);
130 :
131 0 : TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
132 : assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
133 0 : return TTI::PSK_FastHardware;
134 : }
135 :
136 : unsigned getHardwareNumberOfRegisters(bool Vector) const;
137 : unsigned getNumberOfRegisters(bool Vector) const;
138 : unsigned getRegisterBitWidth(bool Vector) const;
139 : unsigned getMinVectorRegisterBitWidth() const;
140 : unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
141 : unsigned ChainSizeInBytes,
142 : VectorType *VecTy) const;
143 : unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
144 : unsigned ChainSizeInBytes,
145 : VectorType *VecTy) const;
146 : unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
147 :
148 : bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
149 : unsigned Alignment,
150 : unsigned AddrSpace) const;
151 : bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
152 : unsigned Alignment,
153 : unsigned AddrSpace) const;
154 : bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
155 : unsigned Alignment,
156 : unsigned AddrSpace) const;
157 :
158 : unsigned getMaxInterleaveFactor(unsigned VF);
159 :
160 : bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
161 :
162 : int getArithmeticInstrCost(
163 : unsigned Opcode, Type *Ty,
164 : TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
165 : TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
166 : TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
167 : TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
168 : ArrayRef<const Value *> Args = ArrayRef<const Value *>());
169 :
170 : unsigned getCFInstrCost(unsigned Opcode);
171 :
172 : int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
173 : bool isSourceOfDivergence(const Value *V) const;
174 : bool isAlwaysUniform(const Value *V) const;
175 :
176 0 : unsigned getFlatAddressSpace() const {
177 : // Don't bother running InferAddressSpaces pass on graphics shaders which
178 : // don't use flat addressing.
179 19842 : if (IsGraphicsShader)
180 0 : return -1;
181 18105 : return ST->hasFlatAddressSpace() ?
182 : AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
183 : }
184 :
185 0 : unsigned getVectorSplitCost() { return 0; }
186 :
187 : unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
188 : Type *SubTp);
189 :
190 : bool areInlineCompatible(const Function *Caller,
191 : const Function *Callee) const;
192 :
193 0 : unsigned getInliningThresholdMultiplier() { return 9; }
194 :
195 : int getArithmeticReductionCost(unsigned Opcode,
196 : Type *Ty,
197 : bool IsPairwise);
198 : int getMinMaxReductionCost(Type *Ty, Type *CondTy,
199 : bool IsPairwiseForm,
200 : bool IsUnsigned);
201 : };
202 :
203 76385 : class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
204 : using BaseT = BasicTTIImplBase<R600TTIImpl>;
205 : using TTI = TargetTransformInfo;
206 :
207 : friend BaseT;
208 :
209 : const R600Subtarget *ST;
210 : const AMDGPUTargetLowering *TLI;
211 : AMDGPUTTIImpl CommonTTI;
212 :
213 : public:
214 38193 : explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
215 38193 : : BaseT(TM, F.getParent()->getDataLayout()),
216 38193 : ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
217 : TLI(ST->getTargetLowering()),
218 114579 : CommonTTI(TM, F) {}
219 :
220 0 : const R600Subtarget *getST() const { return ST; }
221 0 : const AMDGPUTargetLowering *getTLI() const { return TLI; }
222 :
223 : void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
224 : TTI::UnrollingPreferences &UP);
225 : unsigned getHardwareNumberOfRegisters(bool Vec) const;
226 : unsigned getNumberOfRegisters(bool Vec) const;
227 : unsigned getRegisterBitWidth(bool Vector) const;
228 : unsigned getMinVectorRegisterBitWidth() const;
229 : unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
230 : bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
231 : unsigned AddrSpace) const;
232 : bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
233 : unsigned Alignment,
234 : unsigned AddrSpace) const;
235 : bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
236 : unsigned Alignment,
237 : unsigned AddrSpace) const;
238 : unsigned getMaxInterleaveFactor(unsigned VF);
239 : unsigned getCFInstrCost(unsigned Opcode);
240 : int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
241 : };
242 :
243 : } // end namespace llvm
244 :
245 : #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
|