LLVM  9.0.0svn
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUCallLowering.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "SIRegisterInfo.h"
23 #include "llvm/CodeGen/Analysis.h"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 struct OutgoingArgHandler : public CallLowering::ValueHandler {
34  OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
35  MachineInstrBuilder MIB, CCAssignFn *AssignFn)
36  : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
37 
39 
40  unsigned getStackAddress(uint64_t Size, int64_t Offset,
41  MachinePointerInfo &MPO) override {
42  llvm_unreachable("not implemented");
43  }
44 
45  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
46  MachinePointerInfo &MPO, CCValAssign &VA) override {
47  llvm_unreachable("not implemented");
48  }
49 
50  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
51  CCValAssign &VA) override {
52  MIB.addUse(PhysReg);
53  MIRBuilder.buildCopy(PhysReg, ValVReg);
54  }
55 
56  bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
57  CCValAssign::LocInfo LocInfo,
59  CCState &State) override {
60  return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
61  }
62 };
63 
64 }
65 
67  : CallLowering(&TLI) {
68 }
69 
71  const Value *Val,
72  ArrayRef<unsigned> VRegs) const {
73 
74  MachineFunction &MF = MIRBuilder.getMF();
77  MFI->setIfReturnsVoid(!Val);
78 
79  if (!Val) {
80  MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
81  return true;
82  }
83 
84  unsigned VReg = VRegs[0];
85 
86  const Function &F = MF.getFunction();
87  auto &DL = F.getParent()->getDataLayout();
89  return false;
90 
91 
92  const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
93  SmallVector<EVT, 4> SplitVTs;
95  ArgInfo OrigArg{VReg, Val->getType()};
96  setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
97  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
98 
99  SmallVector<ArgInfo, 8> SplitArgs;
100  CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
101  for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
102  Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
103  SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
104  }
105  auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
106  OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
107  if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
108  return false;
109  MIRBuilder.insertInstr(RetInstr);
110 
111  return true;
112 }
113 
114 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
115  Type *ParamTy,
116  uint64_t Offset) const {
117 
118  MachineFunction &MF = MIRBuilder.getMF();
121  const Function &F = MF.getFunction();
122  const DataLayout &DL = F.getParent()->getDataLayout();
124  LLT PtrType = getLLTForType(*PtrTy, DL);
125  unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
126  unsigned KernArgSegmentPtr =
128  unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
129 
130  unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
131  MIRBuilder.buildConstant(OffsetReg, Offset);
132 
133  MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
134 
135  return DstReg;
136 }
137 
138 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
139  Type *ParamTy, uint64_t Offset,
140  unsigned Align,
141  unsigned DstReg) const {
142  MachineFunction &MF = MIRBuilder.getMF();
143  const Function &F = MF.getFunction();
144  const DataLayout &DL = F.getParent()->getDataLayout();
146  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
147  unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
148  unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
149 
150  MachineMemOperand *MMO =
154  TypeSize, Align);
155 
156  MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
157 }
158 
160  const Function &F,
161  ArrayRef<unsigned> VRegs) const {
162  // AMDGPU_GS and AMDGP_HS are not supported yet.
165  return false;
166 
167  MachineFunction &MF = MIRBuilder.getMF();
168  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
171  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
172  const DataLayout &DL = F.getParent()->getDataLayout();
173 
175  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
176 
177  // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
178  if (Info->hasPrivateSegmentBuffer()) {
179  unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
180  MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
181  CCInfo.AllocateReg(PrivateSegmentBufferReg);
182  }
183 
184  if (Info->hasDispatchPtr()) {
185  unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
186  // FIXME: Need to add reg as live-in
187  CCInfo.AllocateReg(DispatchPtrReg);
188  }
189 
190  if (Info->hasQueuePtr()) {
191  unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
192  // FIXME: Need to add reg as live-in
193  CCInfo.AllocateReg(QueuePtrReg);
194  }
195 
196  if (Info->hasKernargSegmentPtr()) {
197  unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
199  unsigned VReg = MRI.createGenericVirtualRegister(P2);
200  MRI.addLiveIn(InputPtrReg, VReg);
201  MIRBuilder.getMBB().addLiveIn(InputPtrReg);
202  MIRBuilder.buildCopy(VReg, InputPtrReg);
203  CCInfo.AllocateReg(InputPtrReg);
204  }
205 
206  if (Info->hasDispatchID()) {
207  unsigned DispatchIDReg = Info->addDispatchID(*TRI);
208  // FIXME: Need to add reg as live-in
209  CCInfo.AllocateReg(DispatchIDReg);
210  }
211 
212  if (Info->hasFlatScratchInit()) {
213  unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
214  // FIXME: Need to add reg as live-in
215  CCInfo.AllocateReg(FlatScratchInitReg);
216  }
217 
218  // The infrastructure for normal calling convention lowering is essentially
219  // useless for kernels. We want to avoid any kind of legalization or argument
220  // splitting.
222  unsigned i = 0;
223  const unsigned KernArgBaseAlign = 16;
224  const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
225  uint64_t ExplicitArgOffset = 0;
226 
227  // TODO: Align down to dword alignment and extract bits for extending loads.
228  for (auto &Arg : F.args()) {
229  Type *ArgTy = Arg.getType();
230  unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
231  if (AllocSize == 0)
232  continue;
233 
234  unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
235 
236  uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
237  ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
238 
239  unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
240  ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
241  lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
242  ++i;
243  }
244 
245  return true;
246  }
247 
248  unsigned NumArgs = F.arg_size();
249  Function::const_arg_iterator CurOrigArg = F.arg_begin();
250  const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
251  unsigned PSInputNum = 0;
252  BitVector Skipped(NumArgs);
253  for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
254  EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
255 
256  // We can only hanlde simple value types at the moment.
257  ISD::ArgFlagsTy Flags;
258  ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
259  setArgFlags(OrigArg, i + 1, DL, F);
260  Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
261 
263  !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
264  PSInputNum <= 15) {
265  if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
266  Skipped.set(i);
267  ++PSInputNum;
268  continue;
269  }
270 
271  Info->markPSInputAllocated(PSInputNum);
272  if (!CurOrigArg->use_empty())
273  Info->markPSInputEnabled(PSInputNum);
274 
275  ++PSInputNum;
276  }
277 
279  /*IsVarArg=*/false);
280 
281  if (ValEVT.isVector()) {
282  EVT ElemVT = ValEVT.getVectorElementType();
283  if (!ValEVT.isSimple())
284  return false;
285  MVT ValVT = ElemVT.getSimpleVT();
286  bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
287  OrigArg.Flags, CCInfo);
288  if (!Res)
289  return false;
290  } else {
291  MVT ValVT = ValEVT.getSimpleVT();
292  if (!ValEVT.isSimple())
293  return false;
294  bool Res =
295  AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
296 
297  // Fail if we don't know how to handle this type.
298  if (Res)
299  return false;
300  }
301  }
302 
304 
307  for (unsigned i = 0, OrigArgIdx = 0;
308  OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
309  if (Skipped.test(OrigArgIdx))
310  continue;
311  CCValAssign &VA = ArgLocs[i++];
312  MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
313  MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
314  MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
315  }
316  return true;
317  }
318 
319  return false;
320 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0, unsigned Op1)
Build and insert Res = G_GEP Op0, Op1.
AMDGPU specific subclass of TargetSubtarget.
bool isPSInputAllocated(unsigned Index) const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
This file describes how to lower LLVM calls to machine code calls.
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
bool test(unsigned Idx) const
Definition: BitVector.h:501
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1034
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
unsigned const TargetRegisterInfo * TRI
F(f)
void markPSInputEnabled(unsigned Index)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef< ArgInfo > Args, ValueHandler &Handler) const
Invoke Handler::assignArg on each of the given Args and then use Callback to move them to the assigne...
A description of a memory reference used in the backend.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
unsigned addDispatchID(const SIRegisterInfo &TRI)
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don&#39;t insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
void setOrigAlign(unsigned A)
void markPSInputAllocated(unsigned Index)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:83
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
Class to represent pointers.
Definition: DerivedTypes.h:498
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
Helper class to build MachineInstr.
SI DAG Lowering interface definition.
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
size_t arg_size() const
Definition: Function.h:700
arg_iterator arg_begin()
Definition: Function.h:673
The memory access is non-temporal.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:254
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
Extended Value Type.
Definition: ValueTypes.h:33
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
size_t size() const
Definition: SmallVector.h:52
Argument handling is mostly uniform between the four places that make these decisions: function forma...
Definition: CallLowering.h:64
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:188
CCState - This class holds information needed while lowering arguments and return values...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
Interface definition of the TargetLowering class that is common to all AMD GPUs.
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
CCValAssign - Represent assignment of one arg/retval to a location.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:749
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef< unsigned > VRegs) const override
This hook must be implemented to lower the incoming (formal) arguments, described by Args...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
bool isShader(CallingConv::ID cc)
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:461
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
The memory access always returns the same value (or traps).
uint32_t Size
Definition: Profile.cpp:46
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef< unsigned > VRegs) const override
This hook must be implemented to lower outgoing return values, described by Val, into the specified v...
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
unsigned getLocReg() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
bool use_empty() const
Definition: Value.h:322
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
iterator_range< arg_iterator > args()
Definition: Function.h:691