LLVM  14.0.0git
R600TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // R600 target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
18 #include "AMDGPU.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "R600Subtarget.h"
21 
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "R600tti"
25 
27  : BaseT(TM, F.getParent()->getDataLayout()),
28  ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
29  TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
30 
32  return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
33 }
34 
35 unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
36  return getHardwareNumberOfRegisters(Vec);
37 }
38 
41  return TypeSize::getFixed(32);
42 }
43 
44 unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
45 
46 unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
47  if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
48  AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
49  return 128;
50  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
51  AddrSpace == AMDGPUAS::REGION_ADDRESS)
52  return 64;
53  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
54  return 32;
55 
56  if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
57  AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
58  (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
59  AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
60  return 128;
61  llvm_unreachable("unhandled address space");
62 }
63 
64 bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
65  Align Alignment,
66  unsigned AddrSpace) const {
67  // We allow vectorization of flat stores, even though we may need to decompose
68  // them later if they may access private memory. We don't have enough context
69  // here, and legalization can handle it.
70  return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
71 }
72 
73 bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
74  Align Alignment,
75  unsigned AddrSpace) const {
76  return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
77 }
78 
79 bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
80  Align Alignment,
81  unsigned AddrSpace) const {
82  return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
83 }
84 
85 unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
86  // Disable unrolling if the loop is not vectorized.
87  // TODO: Enable this again.
88  if (VF == 1)
89  return 1;
90 
91  return 8;
92 }
93 
96  const Instruction *I) {
98  return Opcode == Instruction::PHI ? 0 : 1;
99 
100  // XXX - For some reason this isn't called for switch.
101  switch (Opcode) {
102  case Instruction::Br:
103  case Instruction::Ret:
104  return 10;
105  default:
106  return BaseT::getCFInstrCost(Opcode, CostKind, I);
107  }
108 }
109 
111  unsigned Index) {
112  switch (Opcode) {
113  case Instruction::ExtractElement:
114  case Instruction::InsertElement: {
115  unsigned EltSize =
116  DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
117  if (EltSize < 32) {
118  return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
119  }
120 
121  // Extracts are just reads of a subregister, so are free. Inserts are
122  // considered free because we don't want to have any cost for scalarizing
123  // operations, and we don't have to copy into a different register class.
124 
125  // Dynamic indexing isn't free and is best avoided.
126  return Index == ~0u ? 2 : 0;
127  }
128  default:
129  return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
130  }
131 }
132 
136  CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
137 }
138 
141  CommonTTI.getPeelingPreferences(L, SE, PP);
142 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::R600TTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: R600TargetTransformInfo.cpp:94
llvm::BasicTTIImplBase< R600TTIImpl >::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:38
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:664
llvm::BasicTTIImplBase< R600TTIImpl >::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1055
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::R600TTIImpl::getHardwareNumberOfRegisters
unsigned getHardwareNumberOfRegisters(bool Vec) const
Definition: R600TargetTransformInfo.cpp:31
R600TargetTransformInfo.h
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::R600TTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index)
Definition: R600TargetTransformInfo.cpp:110
llvm::R600TTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: R600TargetTransformInfo.cpp:139
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:535
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::AMDGPUAS::PARAM_D_ADDRESS
@ PARAM_D_ADDRESS
Address space for direct addressable parameter memory (CONST0).
Definition: AMDGPU.h:371
llvm::R600TTIImpl::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: R600TargetTransformInfo.cpp:79
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AMDGPUTargetMachine
Definition: AMDGPUTargetMachine.h:29
llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition: AMDGPU.h:396
llvm::BasicTTIImplBase< R600TTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1108
llvm::R600TTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: R600TargetTransformInfo.cpp:85
llvm::R600TTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: R600TargetTransformInfo.cpp:133
llvm::Instruction
Definition: Instruction.h:45
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AMDGPUTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: AMDGPUTargetTransformInfo.cpp:262
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
llvm::R600Subtarget
Definition: R600Subtarget.h:35
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::R600TTIImpl::isLegalToVectorizeMemChain
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: R600TargetTransformInfo.cpp:64
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:428
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::BasicTTIImplBase< R600TTIImpl >
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360
llvm::R600TTIImpl::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: R600TargetTransformInfo.cpp:73
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
Definition: AMDGPU.h:373
llvm::AMDGPUTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: AMDGPUTargetTransformInfo.cpp:104
AMDGPU.h
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:890
llvm::R600TTIImpl::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: R600TargetTransformInfo.cpp:46
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::TypeSize
Definition: TypeSize.h:417
llvm::R600TTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const
Definition: R600TargetTransformInfo.cpp:40
llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPU.h:381
R600Subtarget.h
llvm::R600TTIImpl::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: R600TargetTransformInfo.cpp:44
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:907
llvm::R600TTIImpl::R600TTIImpl
R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Definition: R600TargetTransformInfo.cpp:26
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
llvm::R600TTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(bool Vec) const
Definition: R600TargetTransformInfo.cpp:35
AMDGPUTargetMachine.h