LLVM  6.0.0svn
AMDGPUTargetTransformInfo.h
Go to the documentation of this file.
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file a TargetTransformInfo::Concept conforming object specific to the
12 /// AMDGPU target machine. It uses the target's detailed information to
13 /// provide more precise answers to certain TTI queries, while letting the
14 /// target independent and default TTI implementations handle the rest.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
20 
21 #include "AMDGPU.h"
22 #include "AMDGPUSubtarget.h"
23 #include "AMDGPUTargetMachine.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/IR/Function.h"
31 #include <cassert>
32 
33 namespace llvm {
34 
35 class AMDGPUTargetLowering;
36 class Loop;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40 
41 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
43  using TTI = TargetTransformInfo;
44 
45  friend BaseT;
46 
47  const AMDGPUSubtarget *ST;
48  const AMDGPUTargetLowering *TLI;
49  bool IsGraphicsShader;
50 
51  const FeatureBitset InlineFeatureIgnoreList = {
52  // Codegen control options which don't matter.
53  AMDGPU::FeatureEnableLoadStoreOpt,
54  AMDGPU::FeatureEnableSIScheduler,
55  AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
56  AMDGPU::FeatureFlatForGlobal,
57  AMDGPU::FeaturePromoteAlloca,
58  AMDGPU::FeatureUnalignedBufferAccess,
59  AMDGPU::FeatureUnalignedScratchAccess,
60 
61  AMDGPU::FeatureAutoWaitcntBeforeBarrier,
62  AMDGPU::FeatureDebuggerEmitPrologue,
63  AMDGPU::FeatureDebuggerInsertNops,
64  AMDGPU::FeatureDebuggerReserveRegs,
65 
66  // Property of the kernel/environment which can't actually differ.
67  AMDGPU::FeatureSGPRInitBug,
68  AMDGPU::FeatureXNACK,
69  AMDGPU::FeatureTrapHandler,
70 
71  // Perf-tuning features
72  AMDGPU::FeatureFastFMAF32,
73  AMDGPU::HalfRate64Ops
74  };
75 
76  const AMDGPUSubtarget *getST() const { return ST; }
77  const AMDGPUTargetLowering *getTLI() const { return TLI; }
78 
79  static inline int getFullRateInstrCost() {
81  }
82 
83  static inline int getHalfRateInstrCost() {
85  }
86 
87  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
88  // should be 2 or 4.
89  static inline int getQuarterRateInstrCost() {
91  }
92 
93  // On some parts, normal fp64 operations are half rate, and others
94  // quarter. This also applies to some integer operations.
95  inline int get64BitInstrCost() const {
96  return ST->hasHalfRate64Ops() ?
97  getHalfRateInstrCost() : getQuarterRateInstrCost();
98  }
99 
100 public:
101  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
102  : BaseT(TM, F.getParent()->getDataLayout()),
103  ST(TM->getSubtargetImpl(F)),
104  TLI(ST->getTargetLowering()),
105  IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
106 
107  bool hasBranchDivergence() { return true; }
108 
111 
113  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
114  return TTI::PSK_FastHardware;
115  }
116 
117  unsigned getHardwareNumberOfRegisters(bool Vector) const;
118  unsigned getNumberOfRegisters(bool Vector) const;
119  unsigned getRegisterBitWidth(bool Vector) const;
120  unsigned getMinVectorRegisterBitWidth() const;
121  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
122 
123  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
124  unsigned Alignment,
125  unsigned AddrSpace) const;
126  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
127  unsigned Alignment,
128  unsigned AddrSpace) const;
129  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
130  unsigned Alignment,
131  unsigned AddrSpace) const;
132 
133  unsigned getMaxInterleaveFactor(unsigned VF);
134 
136  unsigned Opcode, Type *Ty,
142 
143  unsigned getCFInstrCost(unsigned Opcode);
144 
145  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
146  bool isSourceOfDivergence(const Value *V) const;
147  bool isAlwaysUniform(const Value *V) const;
148 
149  unsigned getFlatAddressSpace() const {
150  // Don't bother running InferAddressSpaces pass on graphics shaders which
151  // don't use flat addressing.
152  if (IsGraphicsShader)
153  return -1;
154  return ST->hasFlatAddressSpace() ?
156  }
157 
158  unsigned getVectorSplitCost() { return 0; }
159 
160  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
161  Type *SubTp);
162 
163  bool areInlineCompatible(const Function *Caller,
164  const Function *Callee) const;
165 
166  unsigned getInliningThresholdMultiplier() { return 9; }
167 };
168 
169 } // end namespace llvm
170 
171 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
unsigned getNumberOfRegisters(bool Vector) const
AMDGPU specific subclass of TargetSubtarget.
bool isAlwaysUniform(const Value *V) const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
The main scalar evolution driver.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
F(f)
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:78
unsigned getHardwareNumberOfRegisters(bool Vector) const
unsigned getFlatAddressSpace() const
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
unsigned getMaxInterleaveFactor(unsigned VF)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
amdgpu Simplify well known AMD library false Value * Callee
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Container class for subtarget features.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
unsigned getRegisterBitWidth(bool Vector) const
unsigned getMinVectorRegisterBitWidth() const
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
The AMDGPU TargetMachine interface definition for hw codgen targets.
OperandValueProperties
Additional properties of an operand&#39;s values.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
bool hasFlatAddressSpace() const
unsigned getCFInstrCost(unsigned Opcode)
bool isShader(CallingConv::ID cc)
AMDGPUAS getAMDGPUAS() const
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index)
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
Parameters that control the generic loop unrolling transformation.
bool hasHalfRate64Ops() const
unsigned FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:217
bool isSourceOfDivergence(const Value *V) const
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
LLVM Value Representation.
Definition: Value.h:73
static const Function * getParent(const Value *V)
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
const DataLayout & getDataLayout() const
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
ShuffleKind
The various kinds of shuffle patterns for vector queries.