LLVM  9.0.0svn
AMDGPUTargetTransformInfo.h
Go to the documentation of this file.
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// AMDGPU target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 
20 #include "AMDGPU.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/IR/Function.h"
31 #include <cassert>
32 
33 namespace llvm {
34 
35 class AMDGPUTargetLowering;
36 class Loop;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40 
41 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
43  using TTI = TargetTransformInfo;
44 
45  friend BaseT;
46 
47  Triple TargetTriple;
48 
49 public:
50  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
51  : BaseT(TM, F.getParent()->getDataLayout()),
52  TargetTriple(TM->getTargetTriple()) {}
53 
56 };
57 
58 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
60  using TTI = TargetTransformInfo;
61 
62  friend BaseT;
63 
64  const GCNSubtarget *ST;
65  const AMDGPUTargetLowering *TLI;
66  AMDGPUTTIImpl CommonTTI;
67  bool IsGraphicsShader;
68 
69  const FeatureBitset InlineFeatureIgnoreList = {
70  // Codegen control options which don't matter.
71  AMDGPU::FeatureEnableLoadStoreOpt,
72  AMDGPU::FeatureEnableSIScheduler,
73  AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
74  AMDGPU::FeatureFlatForGlobal,
75  AMDGPU::FeaturePromoteAlloca,
76  AMDGPU::FeatureUnalignedBufferAccess,
77  AMDGPU::FeatureUnalignedScratchAccess,
78 
79  AMDGPU::FeatureAutoWaitcntBeforeBarrier,
80 
81  // Property of the kernel/environment which can't actually differ.
82  AMDGPU::FeatureSGPRInitBug,
83  AMDGPU::FeatureXNACK,
84  AMDGPU::FeatureTrapHandler,
85  AMDGPU::FeatureCodeObjectV3,
86 
87  // The default assumption needs to be ecc is enabled, but no directly
88  // exposed operations depend on it, so it can be safely inlined.
89  AMDGPU::FeatureSRAMECC,
90 
91  // Perf-tuning features
92  AMDGPU::FeatureFastFMAF32,
93  AMDGPU::HalfRate64Ops
94  };
95 
96  const GCNSubtarget *getST() const { return ST; }
97  const AMDGPUTargetLowering *getTLI() const { return TLI; }
98 
99  static inline int getFullRateInstrCost() {
101  }
102 
103  static inline int getHalfRateInstrCost() {
105  }
106 
107  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
108  // should be 2 or 4.
109  static inline int getQuarterRateInstrCost() {
111  }
112 
113  // On some parts, normal fp64 operations are half rate, and others
114  // quarter. This also applies to some integer operations.
115  inline int get64BitInstrCost() const {
116  return ST->hasHalfRate64Ops() ?
117  getHalfRateInstrCost() : getQuarterRateInstrCost();
118  }
119 
120 public:
121  explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
122  : BaseT(TM, F.getParent()->getDataLayout()),
123  ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
124  TLI(ST->getTargetLowering()),
125  CommonTTI(TM, F),
126  IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
127 
128  bool hasBranchDivergence() { return true; }
129 
132 
134  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
135  return TTI::PSK_FastHardware;
136  }
137 
138  unsigned getHardwareNumberOfRegisters(bool Vector) const;
139  unsigned getNumberOfRegisters(bool Vector) const;
140  unsigned getRegisterBitWidth(bool Vector) const;
141  unsigned getMinVectorRegisterBitWidth() const;
142  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
143  unsigned ChainSizeInBytes,
144  VectorType *VecTy) const;
145  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
146  unsigned ChainSizeInBytes,
147  VectorType *VecTy) const;
148  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
149 
150  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
151  unsigned Alignment,
152  unsigned AddrSpace) const;
153  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
154  unsigned Alignment,
155  unsigned AddrSpace) const;
156  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
157  unsigned Alignment,
158  unsigned AddrSpace) const;
159 
160  unsigned getMaxInterleaveFactor(unsigned VF);
161 
163 
165  unsigned Opcode, Type *Ty,
171 
172  unsigned getCFInstrCost(unsigned Opcode);
173 
174  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
175  bool isSourceOfDivergence(const Value *V) const;
176  bool isAlwaysUniform(const Value *V) const;
177 
178  unsigned getFlatAddressSpace() const {
179  // Don't bother running InferAddressSpaces pass on graphics shaders which
180  // don't use flat addressing.
181  if (IsGraphicsShader)
182  return -1;
183  return ST->hasFlatAddressSpace() ?
185  }
186 
187  unsigned getVectorSplitCost() { return 0; }
188 
189  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
190  Type *SubTp);
191 
192  bool areInlineCompatible(const Function *Caller,
193  const Function *Callee) const;
194 
195  unsigned getInliningThresholdMultiplier() { return 9; }
196 
197  int getArithmeticReductionCost(unsigned Opcode,
198  Type *Ty,
199  bool IsPairwise);
200  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
201  bool IsPairwiseForm,
202  bool IsUnsigned);
203 };
204 
205 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
207  using TTI = TargetTransformInfo;
208 
209  friend BaseT;
210 
211  const R600Subtarget *ST;
212  const AMDGPUTargetLowering *TLI;
213  AMDGPUTTIImpl CommonTTI;
214 
215 public:
216  explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
217  : BaseT(TM, F.getParent()->getDataLayout()),
218  ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
219  TLI(ST->getTargetLowering()),
220  CommonTTI(TM, F) {}
221 
222  const R600Subtarget *getST() const { return ST; }
223  const AMDGPUTargetLowering *getTLI() const { return TLI; }
224 
227  unsigned getHardwareNumberOfRegisters(bool Vec) const;
228  unsigned getNumberOfRegisters(bool Vec) const;
229  unsigned getRegisterBitWidth(bool Vector) const;
230  unsigned getMinVectorRegisterBitWidth() const;
231  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
232  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
233  unsigned AddrSpace) const;
234  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
235  unsigned Alignment,
236  unsigned AddrSpace) const;
237  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
238  unsigned Alignment,
239  unsigned AddrSpace) const;
240  unsigned getMaxInterleaveFactor(unsigned VF);
241  unsigned getCFInstrCost(unsigned Opcode);
242  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
243 };
244 
245 } // end namespace llvm
246 
247 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:567
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:48
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
The main scalar evolution driver.
Address space for flat memory.
Definition: AMDGPU.h:250
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
F(f)
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:565
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getFlatAddressSpace() const
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:502
bool hasHalfRate64Ops() const
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
const R600Subtarget * getST() const
Container class for subtarget features.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
const AMDGPUTargetLowering * getTLI() const
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
The AMDGPU TargetMachine interface definition for hw codgen targets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
OperandValueProperties
Additional properties of an operand&#39;s values.
bool hasFlatAddressSpace() const
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:766
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:614
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:500
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Class to represent vector types.
Definition: DerivedTypes.h:424
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool isShader(CallingConv::ID cc)
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Provides AMDGPU specific target descriptions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
Parameters that control the generic loop unrolling transformation.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:811
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:208
aarch64 promote const
LLVM Value Representation.
Definition: Value.h:72
static const Function * getParent(const Value *V)
unsigned getInliningThresholdMultiplier()
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
const DataLayout & getDataLayout() const
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
Information about a load/store intrinsic defined by the target.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.