LLVM  8.0.0svn
AMDGPUTargetTransformInfo.h
Go to the documentation of this file.
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file a TargetTransformInfo::Concept conforming object specific to the
12 /// AMDGPU target machine. It uses the target's detailed information to
13 /// provide more precise answers to certain TTI queries, while letting the
14 /// target independent and default TTI implementations handle the rest.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
20 
21 #include "AMDGPU.h"
22 #include "AMDGPUSubtarget.h"
23 #include "AMDGPUTargetMachine.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/IR/Function.h"
32 #include <cassert>
33 
34 namespace llvm {
35 
36 class AMDGPUTargetLowering;
37 class Loop;
38 class ScalarEvolution;
39 class Type;
40 class Value;
41 
42 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
44  using TTI = TargetTransformInfo;
45 
46  friend BaseT;
47 
48  Triple TargetTriple;
49 
50 public:
51  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
52  : BaseT(TM, F.getParent()->getDataLayout()),
53  TargetTriple(TM->getTargetTriple()) {}
54 
57 };
58 
59 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
61  using TTI = TargetTransformInfo;
62 
63  friend BaseT;
64 
65  const GCNSubtarget *ST;
66  const AMDGPUTargetLowering *TLI;
67  AMDGPUTTIImpl CommonTTI;
68  bool IsGraphicsShader;
69 
70  const FeatureBitset InlineFeatureIgnoreList = {
71  // Codegen control options which don't matter.
72  AMDGPU::FeatureEnableLoadStoreOpt,
73  AMDGPU::FeatureEnableSIScheduler,
74  AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
75  AMDGPU::FeatureFlatForGlobal,
76  AMDGPU::FeaturePromoteAlloca,
77  AMDGPU::FeatureUnalignedBufferAccess,
78  AMDGPU::FeatureUnalignedScratchAccess,
79 
80  AMDGPU::FeatureAutoWaitcntBeforeBarrier,
81  AMDGPU::FeatureDebuggerEmitPrologue,
82  AMDGPU::FeatureDebuggerInsertNops,
83 
84  // Property of the kernel/environment which can't actually differ.
85  AMDGPU::FeatureSGPRInitBug,
86  AMDGPU::FeatureXNACK,
87  AMDGPU::FeatureTrapHandler,
88 
89  // Perf-tuning features
90  AMDGPU::FeatureFastFMAF32,
91  AMDGPU::HalfRate64Ops
92  };
93 
94  const GCNSubtarget *getST() const { return ST; }
95  const AMDGPUTargetLowering *getTLI() const { return TLI; }
96 
97  static inline int getFullRateInstrCost() {
99  }
100 
101  static inline int getHalfRateInstrCost() {
103  }
104 
105  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
106  // should be 2 or 4.
107  static inline int getQuarterRateInstrCost() {
109  }
110 
111  // On some parts, normal fp64 operations are half rate, and others
112  // quarter. This also applies to some integer operations.
113  inline int get64BitInstrCost() const {
114  return ST->hasHalfRate64Ops() ?
115  getHalfRateInstrCost() : getQuarterRateInstrCost();
116  }
117 
118 public:
119  explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
120  : BaseT(TM, F.getParent()->getDataLayout()),
121  ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
122  TLI(ST->getTargetLowering()),
123  CommonTTI(TM, F),
124  IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
125 
126  bool hasBranchDivergence() { return true; }
127 
130 
132  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
133  return TTI::PSK_FastHardware;
134  }
135 
136  unsigned getHardwareNumberOfRegisters(bool Vector) const;
137  unsigned getNumberOfRegisters(bool Vector) const;
138  unsigned getRegisterBitWidth(bool Vector) const;
139  unsigned getMinVectorRegisterBitWidth() const;
140  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
141  unsigned ChainSizeInBytes,
142  VectorType *VecTy) const;
143  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
144  unsigned ChainSizeInBytes,
145  VectorType *VecTy) const;
146  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
147 
148  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
149  unsigned Alignment,
150  unsigned AddrSpace) const;
151  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
152  unsigned Alignment,
153  unsigned AddrSpace) const;
154  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
155  unsigned Alignment,
156  unsigned AddrSpace) const;
157 
158  unsigned getMaxInterleaveFactor(unsigned VF);
159 
160  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
161 
163  unsigned Opcode, Type *Ty,
169 
170  unsigned getCFInstrCost(unsigned Opcode);
171 
172  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
173  bool isSourceOfDivergence(const Value *V) const;
174  bool isAlwaysUniform(const Value *V) const;
175 
176  unsigned getFlatAddressSpace() const {
177  // Don't bother running InferAddressSpaces pass on graphics shaders which
178  // don't use flat addressing.
179  if (IsGraphicsShader)
180  return -1;
181  return ST->hasFlatAddressSpace() ?
183  }
184 
185  unsigned getVectorSplitCost() { return 0; }
186 
187  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
188  Type *SubTp);
189 
190  bool areInlineCompatible(const Function *Caller,
191  const Function *Callee) const;
192 
193  unsigned getInliningThresholdMultiplier() { return 9; }
194 
195  int getArithmeticReductionCost(unsigned Opcode,
196  Type *Ty,
197  bool IsPairwise);
198  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
199  bool IsPairwiseForm,
200  bool IsUnsigned);
201 };
202 
203 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
205  using TTI = TargetTransformInfo;
206 
207  friend BaseT;
208 
209  const R600Subtarget *ST;
210  const AMDGPUTargetLowering *TLI;
211  AMDGPUTTIImpl CommonTTI;
212 
213 public:
214  explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
215  : BaseT(TM, F.getParent()->getDataLayout()),
216  ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
217  TLI(ST->getTargetLowering()),
218  CommonTTI(TM, F) {}
219 
220  const R600Subtarget *getST() const { return ST; }
221  const AMDGPUTargetLowering *getTLI() const { return TLI; }
222 
225  unsigned getHardwareNumberOfRegisters(bool Vec) const;
226  unsigned getNumberOfRegisters(bool Vec) const;
227  unsigned getRegisterBitWidth(bool Vector) const;
228  unsigned getMinVectorRegisterBitWidth() const;
229  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
230  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
231  unsigned AddrSpace) const;
232  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
233  unsigned Alignment,
234  unsigned AddrSpace) const;
235  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
236  unsigned Alignment,
237  unsigned AddrSpace) const;
238  unsigned getMaxInterleaveFactor(unsigned VF);
239  unsigned getCFInstrCost(unsigned Opcode);
240  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
241 };
242 
243 } // end namespace llvm
244 
245 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
Address space for flat memory.
Definition: AMDGPU.h:233
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:507
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:49
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
The main scalar evolution driver.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
F(f)
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:78
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:505
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getFlatAddressSpace() const
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:442
bool hasHalfRate64Ops() const
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
amdgpu Simplify well known AMD library false Value * Callee
const R600Subtarget * getST() const
Container class for subtarget features.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
const AMDGPUTargetLowering * getTLI() const
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
The AMDGPU TargetMachine interface definition for hw codgen targets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OperandValueProperties
Additional properties of an operand&#39;s values.
bool hasFlatAddressSpace() const
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:700
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:554
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:440
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Class to represent vector types.
Definition: DerivedTypes.h:393
bool isShader(CallingConv::ID cc)
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Provides AMDGPU specific target descriptions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
Parameters that control the generic loop unrolling transformation.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:745
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:148
aarch64 promote const
LLVM Value Representation.
Definition: Value.h:73
static const Function * getParent(const Value *V)
unsigned getInliningThresholdMultiplier()
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
const DataLayout & getDataLayout() const
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
Information about a load/store intrinsic defined by the target.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
ShuffleKind
The various kinds of shuffle patterns for vector queries.