LLVM  9.0.0svn
AMDGPUTargetTransformInfo.h
Go to the documentation of this file.
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// AMDGPU target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 
20 #include "AMDGPU.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/IR/Function.h"
31 #include <cassert>
32 
33 namespace llvm {
34 
35 class AMDGPUTargetLowering;
36 class Loop;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40 
41 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
43  using TTI = TargetTransformInfo;
44 
45  friend BaseT;
46 
47  Triple TargetTriple;
48 
49 public:
50  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
51  : BaseT(TM, F.getParent()->getDataLayout()),
52  TargetTriple(TM->getTargetTriple()) {}
53 
56 };
57 
58 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
60  using TTI = TargetTransformInfo;
61 
62  friend BaseT;
63 
64  const GCNSubtarget *ST;
65  const AMDGPUTargetLowering *TLI;
66  AMDGPUTTIImpl CommonTTI;
67  bool IsGraphicsShader;
68 
69  const FeatureBitset InlineFeatureIgnoreList = {
70  // Codegen control options which don't matter.
71  AMDGPU::FeatureEnableLoadStoreOpt,
72  AMDGPU::FeatureEnableSIScheduler,
73  AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
74  AMDGPU::FeatureFlatForGlobal,
75  AMDGPU::FeaturePromoteAlloca,
76  AMDGPU::FeatureUnalignedBufferAccess,
77  AMDGPU::FeatureUnalignedScratchAccess,
78 
79  AMDGPU::FeatureAutoWaitcntBeforeBarrier,
80  AMDGPU::FeatureDebuggerEmitPrologue,
81  AMDGPU::FeatureDebuggerInsertNops,
82 
83  // Property of the kernel/environment which can't actually differ.
84  AMDGPU::FeatureSGPRInitBug,
85  AMDGPU::FeatureXNACK,
86  AMDGPU::FeatureTrapHandler,
87  AMDGPU::FeatureCodeObjectV3,
88 
89  // Perf-tuning features
90  AMDGPU::FeatureFastFMAF32,
91  AMDGPU::HalfRate64Ops
92  };
93 
94  const GCNSubtarget *getST() const { return ST; }
95  const AMDGPUTargetLowering *getTLI() const { return TLI; }
96 
97  static inline int getFullRateInstrCost() {
99  }
100 
101  static inline int getHalfRateInstrCost() {
103  }
104 
105  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
106  // should be 2 or 4.
107  static inline int getQuarterRateInstrCost() {
109  }
110 
111  // On some parts, normal fp64 operations are half rate, and others
112  // quarter. This also applies to some integer operations.
113  inline int get64BitInstrCost() const {
114  return ST->hasHalfRate64Ops() ?
115  getHalfRateInstrCost() : getQuarterRateInstrCost();
116  }
117 
118 public:
119  explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
120  : BaseT(TM, F.getParent()->getDataLayout()),
121  ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
122  TLI(ST->getTargetLowering()),
123  CommonTTI(TM, F),
124  IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
125 
126  bool hasBranchDivergence() { return true; }
127 
130 
132  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
133  return TTI::PSK_FastHardware;
134  }
135 
136  unsigned getHardwareNumberOfRegisters(bool Vector) const;
137  unsigned getNumberOfRegisters(bool Vector) const;
138  unsigned getRegisterBitWidth(bool Vector) const;
139  unsigned getMinVectorRegisterBitWidth() const;
140  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
141  unsigned ChainSizeInBytes,
142  VectorType *VecTy) const;
143  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
144  unsigned ChainSizeInBytes,
145  VectorType *VecTy) const;
146  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
147 
148  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
149  unsigned Alignment,
150  unsigned AddrSpace) const;
151  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
152  unsigned Alignment,
153  unsigned AddrSpace) const;
154  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
155  unsigned Alignment,
156  unsigned AddrSpace) const;
157 
158  unsigned getMaxInterleaveFactor(unsigned VF);
159 
161 
163  unsigned Opcode, Type *Ty,
169 
170  unsigned getCFInstrCost(unsigned Opcode);
171 
172  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
173  bool isSourceOfDivergence(const Value *V) const;
174  bool isAlwaysUniform(const Value *V) const;
175 
176  unsigned getFlatAddressSpace() const {
177  // Don't bother running InferAddressSpaces pass on graphics shaders which
178  // don't use flat addressing.
179  if (IsGraphicsShader)
180  return -1;
181  return ST->hasFlatAddressSpace() ?
183  }
184 
185  unsigned getVectorSplitCost() { return 0; }
186 
187  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
188  Type *SubTp);
189 
190  bool areInlineCompatible(const Function *Caller,
191  const Function *Callee) const;
192 
193  unsigned getInliningThresholdMultiplier() { return 9; }
194 
195  int getArithmeticReductionCost(unsigned Opcode,
196  Type *Ty,
197  bool IsPairwise);
198  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
199  bool IsPairwiseForm,
200  bool IsUnsigned);
201 };
202 
203 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
205  using TTI = TargetTransformInfo;
206 
207  friend BaseT;
208 
209  const R600Subtarget *ST;
210  const AMDGPUTargetLowering *TLI;
211  AMDGPUTTIImpl CommonTTI;
212 
213 public:
214  explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
215  : BaseT(TM, F.getParent()->getDataLayout()),
216  ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
217  TLI(ST->getTargetLowering()),
218  CommonTTI(TM, F) {}
219 
220  const R600Subtarget *getST() const { return ST; }
221  const AMDGPUTargetLowering *getTLI() const { return TLI; }
222 
225  unsigned getHardwareNumberOfRegisters(bool Vec) const;
226  unsigned getNumberOfRegisters(bool Vec) const;
227  unsigned getRegisterBitWidth(bool Vector) const;
228  unsigned getMinVectorRegisterBitWidth() const;
229  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
230  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
231  unsigned AddrSpace) const;
232  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
233  unsigned Alignment,
234  unsigned AddrSpace) const;
235  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
236  unsigned Alignment,
237  unsigned AddrSpace) const;
238  unsigned getMaxInterleaveFactor(unsigned VF);
239  unsigned getCFInstrCost(unsigned Opcode);
240  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
241 };
242 
243 } // end namespace llvm
244 
245 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:567
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:48
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
The main scalar evolution driver.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
F(f)
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:565
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getFlatAddressSpace() const
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:502
bool hasHalfRate64Ops() const
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
const R600Subtarget * getST() const
Container class for subtarget features.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
const AMDGPUTargetLowering * getTLI() const
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
The AMDGPU TargetMachine interface definition for hw codgen targets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
OperandValueProperties
Additional properties of an operand&#39;s values.
bool hasFlatAddressSpace() const
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:766
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:614
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:500
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Address space for flat memory.
Definition: AMDGPU.h:254
Class to represent vector types.
Definition: DerivedTypes.h:424
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool isShader(CallingConv::ID cc)
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Provides AMDGPU specific target descriptions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
Parameters that control the generic loop unrolling transformation.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:811
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:208
aarch64 promote const
LLVM Value Representation.
Definition: Value.h:72
static const Function * getParent(const Value *V)
unsigned getInliningThresholdMultiplier()
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
const DataLayout & getDataLayout() const
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
Information about a load/store intrinsic defined by the target.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.