LLVM  10.0.0svn
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUCallLowering.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "SIRegisterInfo.h"
23 #include "llvm/CodeGen/Analysis.h"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 struct OutgoingValueHandler : public CallLowering::ValueHandler {
34  OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
35  MachineInstrBuilder MIB, CCAssignFn *AssignFn)
36  : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
37 
39 
40  Register getStackAddress(uint64_t Size, int64_t Offset,
41  MachinePointerInfo &MPO) override {
42  llvm_unreachable("not implemented");
43  }
44 
45  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
46  MachinePointerInfo &MPO, CCValAssign &VA) override {
47  llvm_unreachable("not implemented");
48  }
49 
50  void assignValueToReg(Register ValVReg, Register PhysReg,
51  CCValAssign &VA) override {
52  Register ExtReg;
53  if (VA.getLocVT().getSizeInBits() < 32) {
54  // 16-bit types are reported as legal for 32-bit registers. We need to
55  // extend and do a 32-bit copy to avoid the verifier complaining about it.
56  ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
57  } else
58  ExtReg = extendRegister(ValVReg, VA);
59 
60  MIRBuilder.buildCopy(PhysReg, ExtReg);
61  MIB.addUse(PhysReg, RegState::Implicit);
62  }
63 
64  bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
65  CCValAssign::LocInfo LocInfo,
67  CCState &State) override {
68  return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
69  }
70 };
71 
72 struct IncomingArgHandler : public CallLowering::ValueHandler {
73  uint64_t StackUsed = 0;
74 
75  IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
76  CCAssignFn *AssignFn)
77  : ValueHandler(MIRBuilder, MRI, AssignFn) {}
78 
79  Register getStackAddress(uint64_t Size, int64_t Offset,
80  MachinePointerInfo &MPO) override {
81  auto &MFI = MIRBuilder.getMF().getFrameInfo();
82  int FI = MFI.CreateFixedObject(Size, Offset, true);
83  MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
84  Register AddrReg = MRI.createGenericVirtualRegister(
86  MIRBuilder.buildFrameIndex(AddrReg, FI);
87  StackUsed = std::max(StackUsed, Size + Offset);
88  return AddrReg;
89  }
90 
91  void assignValueToReg(Register ValVReg, Register PhysReg,
92  CCValAssign &VA) override {
93  markPhysRegUsed(PhysReg);
94 
95  if (VA.getLocVT().getSizeInBits() < 32) {
96  // 16-bit types are reported as legal for 32-bit registers. We need to do
97  // a 32-bit copy, and truncate to avoid the verifier complaining about it.
98  auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
99  MIRBuilder.buildTrunc(ValVReg, Copy);
100  return;
101  }
102 
103  switch (VA.getLocInfo()) {
104  case CCValAssign::LocInfo::SExt:
105  case CCValAssign::LocInfo::ZExt:
106  case CCValAssign::LocInfo::AExt: {
107  auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
108  MIRBuilder.buildTrunc(ValVReg, Copy);
109  break;
110  }
111  default:
112  MIRBuilder.buildCopy(ValVReg, PhysReg);
113  break;
114  }
115  }
116 
117  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
118  MachinePointerInfo &MPO, CCValAssign &VA) override {
119  // FIXME: Get alignment
120  auto MMO = MIRBuilder.getMF().getMachineMemOperand(
122  MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
123  }
124 
125  /// How the physical register gets marked varies between formal
126  /// parameters (it's a basic-block live-in), and a call instruction
127  /// (it's an implicit-def of the BL).
128  virtual void markPhysRegUsed(unsigned PhysReg) = 0;
129 
130  // FIXME: What is the point of this being a callback?
131  bool isIncomingArgumentHandler() const override { return true; }
132 };
133 
134 struct FormalArgHandler : public IncomingArgHandler {
135  FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
136  CCAssignFn *AssignFn)
137  : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
138 
139  void markPhysRegUsed(unsigned PhysReg) override {
140  MIRBuilder.getMBB().addLiveIn(PhysReg);
141  }
142 };
143 
144 }
145 
147  : CallLowering(&TLI) {
148 }
149 
150 void AMDGPUCallLowering::splitToValueTypes(
151  const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
152  const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
153  SplitArgTy PerformArgSplit) const {
154  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
155  LLVMContext &Ctx = OrigArg.Ty->getContext();
156 
157  if (OrigArg.Ty->isVoidTy())
158  return;
159 
160  SmallVector<EVT, 4> SplitVTs;
161  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
162 
163  assert(OrigArg.Regs.size() == SplitVTs.size());
164 
165  int SplitIdx = 0;
166  for (EVT VT : SplitVTs) {
167  unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
168  Type *Ty = VT.getTypeForEVT(Ctx);
169 
170 
171 
172  if (NumParts == 1) {
173  // No splitting to do, but we want to replace the original type (e.g. [1 x
174  // double] -> double).
175  SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty,
176  OrigArg.Flags, OrigArg.IsFixed);
177 
178  ++SplitIdx;
179  continue;
180  }
181 
182  LLT LLTy = getLLTForType(*Ty, DL);
183 
184  SmallVector<Register, 8> SplitRegs;
185 
186  EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
187  Type *PartTy = PartVT.getTypeForEVT(Ctx);
188  LLT PartLLT = getLLTForType(*PartTy, DL);
189 
190  // FIXME: Should we be reporting all of the part registers for a single
191  // argument, and let handleAssignments take care of the repacking?
192  for (unsigned i = 0; i < NumParts; ++i) {
193  Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
194  SplitRegs.push_back(PartReg);
195  SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
196  }
197 
198  PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx);
199 
200  ++SplitIdx;
201  }
202 }
203 
204 // Get the appropriate type to make \p OrigTy \p Factor times bigger.
205 static LLT getMultipleType(LLT OrigTy, int Factor) {
206  if (OrigTy.isVector()) {
207  return LLT::vector(OrigTy.getNumElements() * Factor,
208  OrigTy.getElementType());
209  }
210 
211  return LLT::scalar(OrigTy.getSizeInBits() * Factor);
212 }
213 
214 // TODO: Move to generic code
215 static void unpackRegsToOrigType(MachineIRBuilder &MIRBuilder,
216  ArrayRef<Register> DstRegs,
217  Register SrcReg,
218  LLT SrcTy,
219  LLT PartTy) {
220  assert(DstRegs.size() > 1 && "Nothing to unpack");
221 
222  MachineFunction &MF = MIRBuilder.getMF();
223  MachineRegisterInfo &MRI = MF.getRegInfo();
224 
225  const unsigned SrcSize = SrcTy.getSizeInBits();
226  const unsigned PartSize = PartTy.getSizeInBits();
227 
228  if (SrcTy.isVector() && !PartTy.isVector() &&
229  PartSize > SrcTy.getElementType().getSizeInBits()) {
230  // Vector was scalarized, and the elements extended.
231  auto UnmergeToEltTy = MIRBuilder.buildUnmerge(SrcTy.getElementType(),
232  SrcReg);
233  for (int i = 0, e = DstRegs.size(); i != e; ++i)
234  MIRBuilder.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i));
235  return;
236  }
237 
238  if (SrcSize % PartSize == 0) {
239  MIRBuilder.buildUnmerge(DstRegs, SrcReg);
240  return;
241  }
242 
243  const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize;
244 
245  LLT BigTy = getMultipleType(PartTy, NumRoundedParts);
246  auto ImpDef = MIRBuilder.buildUndef(BigTy);
247 
248  Register BigReg = MRI.createGenericVirtualRegister(BigTy);
249  MIRBuilder.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0);
250 
251  int64_t Offset = 0;
252  for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize)
253  MIRBuilder.buildExtract(DstRegs[i], BigReg, Offset);
254 }
255 
256 /// Lower the return value for the already existing \p Ret. This assumes that
257 /// \p MIRBuilder's insertion point is correct.
258 bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
259  const Value *Val, ArrayRef<Register> VRegs,
260  MachineInstrBuilder &Ret) const {
261  if (!Val)
262  return true;
263 
264  auto &MF = MIRBuilder.getMF();
265  const auto &F = MF.getFunction();
266  const DataLayout &DL = MF.getDataLayout();
267 
268  CallingConv::ID CC = F.getCallingConv();
269  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
270  MachineRegisterInfo &MRI = MF.getRegInfo();
271 
272  ArgInfo OrigRetInfo(VRegs, Val->getType());
273  setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
274  SmallVector<ArgInfo, 4> SplitRetInfos;
275 
276  splitToValueTypes(
277  OrigRetInfo, SplitRetInfos, DL, MRI, CC,
278  [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
279  unpackRegsToOrigType(MIRBuilder, Regs, VRegs[VTSplitIdx], LLTy, PartLLT);
280  });
281 
282  CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
283 
284  OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn);
285  return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler);
286 }
287 
289  const Value *Val,
290  ArrayRef<Register> VRegs) const {
291 
292  MachineFunction &MF = MIRBuilder.getMF();
293  MachineRegisterInfo &MRI = MF.getRegInfo();
295  MFI->setIfReturnsVoid(!Val);
296 
297  assert(!Val == VRegs.empty() && "Return value without a vreg");
298 
299  CallingConv::ID CC = MIRBuilder.getMF().getFunction().getCallingConv();
300  const bool IsShader = AMDGPU::isShader(CC);
301  const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) ||
302  AMDGPU::isKernel(CC);
303  if (IsWaveEnd) {
304  MIRBuilder.buildInstr(AMDGPU::S_ENDPGM)
305  .addImm(0);
306  return true;
307  }
308 
309  auto const &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>();
310 
311  unsigned ReturnOpc =
312  IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
313 
314  auto Ret = MIRBuilder.buildInstrNoInsert(ReturnOpc);
315  Register ReturnAddrVReg;
316  if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
317  ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
318  Ret.addUse(ReturnAddrVReg);
319  }
320 
321  if (!lowerReturnVal(MIRBuilder, Val, VRegs, Ret))
322  return false;
323 
324  if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
325  const SIRegisterInfo *TRI = ST.getRegisterInfo();
326  Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
327  &AMDGPU::SGPR_64RegClass);
328  MIRBuilder.buildCopy(ReturnAddrVReg, LiveInReturn);
329  }
330 
331  // TODO: Handle CalleeSavedRegsViaCopy.
332 
333  MIRBuilder.insertInstr(Ret);
334  return true;
335 }
336 
337 Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
338  Type *ParamTy,
339  uint64_t Offset) const {
340 
341  MachineFunction &MF = MIRBuilder.getMF();
343  MachineRegisterInfo &MRI = MF.getRegInfo();
344  const Function &F = MF.getFunction();
345  const DataLayout &DL = F.getParent()->getDataLayout();
347  LLT PtrType = getLLTForType(*PtrTy, DL);
348  Register DstReg = MRI.createGenericVirtualRegister(PtrType);
349  Register KernArgSegmentPtr =
351  Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
352 
353  Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
354  MIRBuilder.buildConstant(OffsetReg, Offset);
355 
356  MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
357 
358  return DstReg;
359 }
360 
361 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
362  Type *ParamTy, uint64_t Offset,
363  unsigned Align,
364  Register DstReg) const {
365  MachineFunction &MF = MIRBuilder.getMF();
366  const Function &F = MF.getFunction();
367  const DataLayout &DL = F.getParent()->getDataLayout();
369  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
370  unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
371  Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
372 
373  MachineMemOperand *MMO =
377  TypeSize, Align);
378 
379  MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
380 }
381 
382 // Allocate special inputs passed in user SGPRs.
383 static void allocateHSAUserSGPRs(CCState &CCInfo,
384  MachineIRBuilder &MIRBuilder,
385  MachineFunction &MF,
386  const SIRegisterInfo &TRI,
388  // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
389  if (Info.hasPrivateSegmentBuffer()) {
390  unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
391  MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
392  CCInfo.AllocateReg(PrivateSegmentBufferReg);
393  }
394 
395  if (Info.hasDispatchPtr()) {
396  unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
397  MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
398  CCInfo.AllocateReg(DispatchPtrReg);
399  }
400 
401  if (Info.hasQueuePtr()) {
402  unsigned QueuePtrReg = Info.addQueuePtr(TRI);
403  MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
404  CCInfo.AllocateReg(QueuePtrReg);
405  }
406 
407  if (Info.hasKernargSegmentPtr()) {
408  MachineRegisterInfo &MRI = MF.getRegInfo();
409  Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
411  Register VReg = MRI.createGenericVirtualRegister(P4);
412  MRI.addLiveIn(InputPtrReg, VReg);
413  MIRBuilder.getMBB().addLiveIn(InputPtrReg);
414  MIRBuilder.buildCopy(VReg, InputPtrReg);
415  CCInfo.AllocateReg(InputPtrReg);
416  }
417 
418  if (Info.hasDispatchID()) {
419  unsigned DispatchIDReg = Info.addDispatchID(TRI);
420  MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
421  CCInfo.AllocateReg(DispatchIDReg);
422  }
423 
424  if (Info.hasFlatScratchInit()) {
425  unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
426  MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
427  CCInfo.AllocateReg(FlatScratchInitReg);
428  }
429 
430  // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
431  // these from the dispatch pointer.
432 }
433 
435  MachineIRBuilder &MIRBuilder, const Function &F,
436  ArrayRef<ArrayRef<Register>> VRegs) const {
437  MachineFunction &MF = MIRBuilder.getMF();
438  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
439  MachineRegisterInfo &MRI = MF.getRegInfo();
441  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
442  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
443 
444  const DataLayout &DL = F.getParent()->getDataLayout();
445 
447  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
448 
449  allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
450 
451  unsigned i = 0;
452  const unsigned KernArgBaseAlign = 16;
453  const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
454  uint64_t ExplicitArgOffset = 0;
455 
456  // TODO: Align down to dword alignment and extract bits for extending loads.
457  for (auto &Arg : F.args()) {
458  Type *ArgTy = Arg.getType();
459  unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
460  if (AllocSize == 0)
461  continue;
462 
463  unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
464 
465  uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
466  ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
467 
468  ArrayRef<Register> OrigArgRegs = VRegs[i];
469  Register ArgReg =
470  OrigArgRegs.size() == 1
471  ? OrigArgRegs[0]
472  : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
473  unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
474  ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
475  lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
476  if (OrigArgRegs.size() > 1)
477  unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
478  ++i;
479  }
480 
481  TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
482  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
483  return true;
484 }
485 
486 // TODO: Move this to generic code
488  ArrayRef<Register> OrigRegs,
489  ArrayRef<Register> Regs,
490  LLT LLTy,
491  LLT PartLLT) {
492  if (!LLTy.isVector() && !PartLLT.isVector()) {
493  MIRBuilder.buildMerge(OrigRegs[0], Regs);
494  return;
495  }
496 
497  if (LLTy.isVector() && PartLLT.isVector()) {
498  assert(LLTy.getElementType() == PartLLT.getElementType());
499 
500  int DstElts = LLTy.getNumElements();
501  int PartElts = PartLLT.getNumElements();
502  if (DstElts % PartElts == 0)
503  MIRBuilder.buildConcatVectors(OrigRegs[0], Regs);
504  else {
505  // Deal with v3s16 split into v2s16
506  assert(PartElts == 2 && DstElts % 2 != 0);
507  int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
508 
509  LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
510  auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs);
511  MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0);
512  }
513 
514  return;
515  }
516 
517  assert(LLTy.isVector() && !PartLLT.isVector());
518 
519  LLT DstEltTy = LLTy.getElementType();
520  if (DstEltTy == PartLLT) {
521  // Vector was trivially scalarized.
522  MIRBuilder.buildBuildVector(OrigRegs[0], Regs);
523  } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
524  // Deal with vector with 64-bit elements decomposed to 32-bit
525  // registers. Need to create intermediate 64-bit elements.
526  SmallVector<Register, 8> EltMerges;
527  int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
528 
529  assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
530 
531  for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
532  auto Merge = MIRBuilder.buildMerge(DstEltTy,
533  Regs.take_front(PartsPerElt));
534  EltMerges.push_back(Merge.getReg(0));
535  Regs = Regs.drop_front(PartsPerElt);
536  }
537 
538  MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges);
539  } else {
540  // Vector was split, and elements promoted to a wider type.
541  LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
542  auto BV = MIRBuilder.buildBuildVector(BVType, Regs);
543  MIRBuilder.buildTrunc(OrigRegs[0], BV);
544  }
545 }
546 
548  MachineIRBuilder &MIRBuilder, const Function &F,
549  ArrayRef<ArrayRef<Register>> VRegs) const {
551 
552  // The infrastructure for normal calling convention lowering is essentially
553  // useless for kernels. We want to avoid any kind of legalization or argument
554  // splitting.
555  if (CC == CallingConv::AMDGPU_KERNEL)
556  return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
557 
558  const bool IsShader = AMDGPU::isShader(CC);
559  const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
560 
561  MachineFunction &MF = MIRBuilder.getMF();
562  MachineBasicBlock &MBB = MIRBuilder.getMBB();
563  MachineRegisterInfo &MRI = MF.getRegInfo();
565  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
566  const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
567  const DataLayout &DL = F.getParent()->getDataLayout();
568 
569 
571  CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
572 
573  if (!IsEntryFunc) {
574  Register ReturnAddrReg = TRI->getReturnAddressReg(MF);
575  Register LiveInReturn = MF.addLiveIn(ReturnAddrReg,
576  &AMDGPU::SGPR_64RegClass);
577  MBB.addLiveIn(ReturnAddrReg);
578  MIRBuilder.buildCopy(LiveInReturn, ReturnAddrReg);
579  }
580 
581  if (Info->hasImplicitBufferPtr()) {
582  Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
583  MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
584  CCInfo.AllocateReg(ImplicitBufferPtrReg);
585  }
586 
587 
588  SmallVector<ArgInfo, 32> SplitArgs;
589  unsigned Idx = 0;
590  unsigned PSInputNum = 0;
591 
592  for (auto &Arg : F.args()) {
593  if (DL.getTypeStoreSize(Arg.getType()) == 0)
594  continue;
595 
596  const bool InReg = Arg.hasAttribute(Attribute::InReg);
597 
598  // SGPR arguments to functions not implemented.
599  if (!IsShader && InReg)
600  return false;
601 
602  if (Arg.hasAttribute(Attribute::SwiftSelf) ||
603  Arg.hasAttribute(Attribute::SwiftError) ||
604  Arg.hasAttribute(Attribute::Nest))
605  return false;
606 
607  if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
608  const bool ArgUsed = !Arg.use_empty();
609  bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
610 
611  if (!SkipArg) {
612  Info->markPSInputAllocated(PSInputNum);
613  if (ArgUsed)
614  Info->markPSInputEnabled(PSInputNum);
615  }
616 
617  ++PSInputNum;
618 
619  if (SkipArg) {
620  for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
621  MIRBuilder.buildUndef(VRegs[Idx][I]);
622 
623  ++Idx;
624  continue;
625  }
626  }
627 
628  ArgInfo OrigArg(VRegs[Idx], Arg.getType());
629  setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
630 
631  splitToValueTypes(
632  OrigArg, SplitArgs, DL, MRI, CC,
633  // FIXME: We should probably be passing multiple registers to
634  // handleAssignments to do this
635  [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
636  packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs,
637  LLTy, PartLLT);
638  });
639 
640  ++Idx;
641  }
642 
643  // At least one interpolation mode must be enabled or else the GPU will
644  // hang.
645  //
646  // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
647  // set PSInputAddr, the user wants to enable some bits after the compilation
648  // based on run-time states. Since we can't know what the final PSInputEna
649  // will look like, so we shouldn't do anything here and the user should take
650  // responsibility for the correct programming.
651  //
652  // Otherwise, the following restrictions apply:
653  // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
654  // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
655  // enabled too.
656  if (CC == CallingConv::AMDGPU_PS) {
657  if ((Info->getPSInputAddr() & 0x7F) == 0 ||
658  ((Info->getPSInputAddr() & 0xF) == 0 &&
659  Info->isPSInputAllocated(11))) {
660  CCInfo.AllocateReg(AMDGPU::VGPR0);
661  CCInfo.AllocateReg(AMDGPU::VGPR1);
662  Info->markPSInputAllocated(0);
663  Info->markPSInputEnabled(0);
664  }
665 
666  if (Subtarget.isAmdPalOS()) {
667  // For isAmdPalOS, the user does not enable some bits after compilation
668  // based on run-time states; the register values being generated here are
669  // the final ones set in hardware. Therefore we need to apply the
670  // workaround to PSInputAddr and PSInputEnable together. (The case where
671  // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
672  // set up an input arg for a particular interpolation mode, but nothing
673  // uses that input arg. Really we should have an earlier pass that removes
674  // such an arg.)
675  unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
676  if ((PsInputBits & 0x7F) == 0 ||
677  ((PsInputBits & 0xF) == 0 &&
678  (PsInputBits >> 11 & 1)))
679  Info->markPSInputEnabled(
681  }
682  }
683 
684  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
685  CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
686 
687  if (!MBB.empty())
688  MIRBuilder.setInstr(*MBB.begin());
689 
690  FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
691  if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler))
692  return false;
693 
694  if (!IsEntryFunc) {
695  // Special inputs come after user arguments.
696  TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
697  }
698 
699  // Start adding system SGPRs.
700  if (IsEntryFunc) {
701  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
702  } else {
703  CCInfo.AllocateReg(Info->getScratchRSrcReg());
704  CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
705  CCInfo.AllocateReg(Info->getFrameOffsetReg());
706  TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
707  }
708 
709  // Move back to the end of the basic block.
710  MIRBuilder.setMBB(MBB);
711 
712  return true;
713 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
Interface definition for SIRegisterInfo.
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:211
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
bool isPSInputAllocated(unsigned Index) const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MachineInstrBuilder buildInsert(Register Res, Register Src, Register Op, unsigned Index)
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
This file describes how to lower LLVM calls to machine code calls.
void allocateSpecialInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
unsigned const TargetRegisterInfo * TRI
F(f)
void markPSInputEnabled(unsigned Index)
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
bool isVector() const
bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef< ArgInfo > Args, ValueHandler &Handler) const
Invoke Handler::assignArg on each of the given Args and then use Callback to move them to the assigne...
A description of a memory reference used in the backend.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:261
unsigned addDispatchID(const SIRegisterInfo &TRI)
The returned value is undefined.
Definition: MathExtras.h:45
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
The memory access is dereferenceable (i.e., doesn&#39;t trap).
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LocInfo getLocInfo() const
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
unsigned getSizeInBits() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don&#39;t insert <empty> = Opcode <empty>.
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
MachineFunction & getMF()
Getter for the function we currently build.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
void markPSInputAllocated(unsigned Index)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:119
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
Class to represent pointers.
Definition: DerivedTypes.h:544
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
static void unpackRegsToOrigType(MachineIRBuilder &MIRBuilder, ArrayRef< Register > DstRegs, Register SrcReg, LLT SrcTy, LLT PartTy)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
unsigned getReturnAddressReg(const MachineFunction &MF) const
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef< ArrayRef< Register >> VRegs) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs...
bool isEntryFunctionCC(CallingConv::ID CC)
Helper class to build MachineInstr.
SI DAG Lowering interface definition.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
Address space for private memory.
Definition: AMDGPU.h:275
Extended Value Type.
Definition: ValueTypes.h:33
R600 Clause Merge
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1433
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
size_t size() const
Definition: SmallVector.h:52
Argument handling is mostly uniform between the four places that make these decisions: function forma...
Definition: CallLowering.h:91
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
void unpackRegs(ArrayRef< Register > DstRegs, Register SrcReg, Type *PackedTy, MachineIRBuilder &MIRBuilder) const
Generate instructions for unpacking SrcReg into the DstRegs corresponding to the aggregate type Packe...
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder, ArrayRef< Register > OrigRegs, ArrayRef< Register > Regs, LLT LLTy, LLT PartLLT)
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
CCState - This class holds information needed while lowering arguments and return values...
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1158
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef< Register > VRegs) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
Interface definition of the TargetLowering class that is common to all AMD GPUs.
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
CCValAssign - Represent assignment of one arg/retval to a location.
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
LLVM_READNONE bool isKernel(CallingConv::ID CC)
bool lowerFormalArgumentsKernel(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef< ArrayRef< Register >> VRegs) const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isShader(CallingConv::ID cc)
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:126
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:187
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
#define I(x, y, z)
Definition: MD5.cpp:58
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
The memory access always returns the same value (or traps).
uint32_t Size
Definition: Profile.cpp:46
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &MIRBuilder, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:73
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:445
static LLT getMultipleType(LLT OrigTy, int Factor)
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
SmallVector< Register, 4 > Regs
Definition: CallLowering.h:47
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
iterator_range< arg_iterator > args()
Definition: Function.h:719
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
const SIRegisterInfo * getRegisterInfo() const override