LLVM  9.0.0svn
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUCallLowering.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "SIRegisterInfo.h"
26 
27 using namespace llvm;
28 
30  : CallLowering(&TLI) {
31 }
32 
34  const Value *Val,
35  ArrayRef<unsigned> VRegs) const {
36  // FIXME: Add support for non-void returns.
37  if (Val)
38  return false;
39 
40  MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
41  return true;
42 }
43 
44 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
45  Type *ParamTy,
46  uint64_t Offset) const {
47 
48  MachineFunction &MF = MIRBuilder.getMF();
51  const Function &F = MF.getFunction();
52  const DataLayout &DL = F.getParent()->getDataLayout();
54  LLT PtrType = getLLTForType(*PtrTy, DL);
55  unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
56  unsigned KernArgSegmentPtr =
58  unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
59 
60  unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
61  MIRBuilder.buildConstant(OffsetReg, Offset);
62 
63  MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
64 
65  return DstReg;
66 }
67 
68 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
69  Type *ParamTy, uint64_t Offset,
70  unsigned Align,
71  unsigned DstReg) const {
72  MachineFunction &MF = MIRBuilder.getMF();
73  const Function &F = MF.getFunction();
74  const DataLayout &DL = F.getParent()->getDataLayout();
76  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
77  unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
78  unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
79 
80  MachineMemOperand *MMO =
84  TypeSize, Align);
85 
86  MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
87 }
88 
90  const Function &F,
91  ArrayRef<unsigned> VRegs) const {
92  // AMDGPU_GS and AMDGP_HS are not supported yet.
95  return false;
96 
97  MachineFunction &MF = MIRBuilder.getMF();
98  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
101  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
102  const DataLayout &DL = F.getParent()->getDataLayout();
103 
105  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
106 
107  // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
108  if (Info->hasPrivateSegmentBuffer()) {
109  unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
110  MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
111  CCInfo.AllocateReg(PrivateSegmentBufferReg);
112  }
113 
114  if (Info->hasDispatchPtr()) {
115  unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
116  // FIXME: Need to add reg as live-in
117  CCInfo.AllocateReg(DispatchPtrReg);
118  }
119 
120  if (Info->hasQueuePtr()) {
121  unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
122  // FIXME: Need to add reg as live-in
123  CCInfo.AllocateReg(QueuePtrReg);
124  }
125 
126  if (Info->hasKernargSegmentPtr()) {
127  unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
129  unsigned VReg = MRI.createGenericVirtualRegister(P2);
130  MRI.addLiveIn(InputPtrReg, VReg);
131  MIRBuilder.getMBB().addLiveIn(InputPtrReg);
132  MIRBuilder.buildCopy(VReg, InputPtrReg);
133  CCInfo.AllocateReg(InputPtrReg);
134  }
135 
136  if (Info->hasDispatchID()) {
137  unsigned DispatchIDReg = Info->addDispatchID(*TRI);
138  // FIXME: Need to add reg as live-in
139  CCInfo.AllocateReg(DispatchIDReg);
140  }
141 
142  if (Info->hasFlatScratchInit()) {
143  unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
144  // FIXME: Need to add reg as live-in
145  CCInfo.AllocateReg(FlatScratchInitReg);
146  }
147 
148  // The infrastructure for normal calling convention lowering is essentially
149  // useless for kernels. We want to avoid any kind of legalization or argument
150  // splitting.
152  unsigned i = 0;
153  const unsigned KernArgBaseAlign = 16;
154  const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
155  uint64_t ExplicitArgOffset = 0;
156 
157  // TODO: Align down to dword alignment and extract bits for extending loads.
158  for (auto &Arg : F.args()) {
159  Type *ArgTy = Arg.getType();
160  unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
161  if (AllocSize == 0)
162  continue;
163 
164  unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
165 
166  uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
167  ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
168 
169  unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
170  ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
171  lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
172  ++i;
173  }
174 
175  return true;
176  }
177 
178  unsigned NumArgs = F.arg_size();
179  Function::const_arg_iterator CurOrigArg = F.arg_begin();
180  const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
181  unsigned PSInputNum = 0;
182  BitVector Skipped(NumArgs);
183  for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
184  EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
185 
186  // We can only hanlde simple value types at the moment.
187  ISD::ArgFlagsTy Flags;
188  ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
189  setArgFlags(OrigArg, i + 1, DL, F);
190  Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
191 
193  !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
194  PSInputNum <= 15) {
195  if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
196  Skipped.set(i);
197  ++PSInputNum;
198  continue;
199  }
200 
201  Info->markPSInputAllocated(PSInputNum);
202  if (!CurOrigArg->use_empty())
203  Info->markPSInputEnabled(PSInputNum);
204 
205  ++PSInputNum;
206  }
207 
209  /*IsVarArg=*/false);
210 
211  if (ValEVT.isVector()) {
212  EVT ElemVT = ValEVT.getVectorElementType();
213  if (!ValEVT.isSimple())
214  return false;
215  MVT ValVT = ElemVT.getSimpleVT();
216  bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
217  OrigArg.Flags, CCInfo);
218  if (!Res)
219  return false;
220  } else {
221  MVT ValVT = ValEVT.getSimpleVT();
222  if (!ValEVT.isSimple())
223  return false;
224  bool Res =
225  AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
226 
227  // Fail if we don't know how to handle this type.
228  if (Res)
229  return false;
230  }
231  }
232 
234 
237  for (unsigned i = 0, OrigArgIdx = 0;
238  OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
239  if (Skipped.test(OrigArgIdx))
240  continue;
241  CCValAssign &VA = ArgLocs[i++];
242  MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
243  MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
244  MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
245  }
246  return true;
247  }
248 
249  return false;
250 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0, unsigned Op1)
Build and insert Res = G_GEP Op0, Op1.
AMDGPU specific subclass of TargetSubtarget.
bool isPSInputAllocated(unsigned Index) const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
This file describes how to lower LLVM calls to machine code calls.
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
bool test(unsigned Idx) const
Definition: BitVector.h:501
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
unsigned const TargetRegisterInfo * TRI
F(f)
void markPSInputEnabled(unsigned Index)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
A description of a memory reference used in the backend.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
Address space for constant memory (VTX2)
Definition: AMDGPU.h:258
unsigned addDispatchID(const SIRegisterInfo &TRI)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
MachineFunction & getMF()
Getter for the function we currently build.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
void setOrigAlign(unsigned A)
void markPSInputAllocated(unsigned Index)
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:188
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
Class to represent pointers.
Definition: DerivedTypes.h:498
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Helper class to build MachineInstr.
SI DAG Lowering interface definition.
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
size_t arg_size() const
Definition: Function.h:697
arg_iterator arg_begin()
Definition: Function.h:670
The memory access is non-temporal.
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
Extended Value Type.
Definition: ValueTypes.h:33
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1414
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
This class contains a discriminated union of information about pointers in memory operands...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
CCState - This class holds information needed while lowering arguments and return values...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
Interface definition of the TargetLowering class that is common to all AMD GPUs.
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
CCValAssign - Represent assignment of one arg/retval to a location.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:729
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef< unsigned > VRegs) const override
This hook must be implemented to lower the incoming (formal) arguments, described by Args...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:435
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
The memory access always returns the same value (or traps).
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef< unsigned > VRegs) const override
This hook must be implemented to lower outgoing return values, described by Val, into the specified v...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:418
MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
unsigned getLocReg() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
bool use_empty() const
Definition: Value.h:322
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
iterator_range< arg_iterator > args()
Definition: Function.h:688