LLVM  10.0.0svn
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUCallLowering.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "SIRegisterInfo.h"
23 #include "llvm/CodeGen/Analysis.h"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 struct OutgoingArgHandler : public CallLowering::ValueHandler {
34  OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
35  MachineInstrBuilder MIB, CCAssignFn *AssignFn)
36  : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
37 
39 
40  Register getStackAddress(uint64_t Size, int64_t Offset,
41  MachinePointerInfo &MPO) override {
42  llvm_unreachable("not implemented");
43  }
44 
45  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
46  MachinePointerInfo &MPO, CCValAssign &VA) override {
47  llvm_unreachable("not implemented");
48  }
49 
50  void assignValueToReg(Register ValVReg, Register PhysReg,
51  CCValAssign &VA) override {
52  MIB.addUse(PhysReg);
53  MIRBuilder.buildCopy(PhysReg, ValVReg);
54  }
55 
56  bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
57  CCValAssign::LocInfo LocInfo,
59  CCState &State) override {
60  return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
61  }
62 };
63 
64 struct IncomingArgHandler : public CallLowering::ValueHandler {
65  uint64_t StackUsed = 0;
66 
67  IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
68  CCAssignFn *AssignFn)
69  : ValueHandler(MIRBuilder, MRI, AssignFn) {}
70 
71  Register getStackAddress(uint64_t Size, int64_t Offset,
72  MachinePointerInfo &MPO) override {
73  auto &MFI = MIRBuilder.getMF().getFrameInfo();
74  int FI = MFI.CreateFixedObject(Size, Offset, true);
75  MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
76  Register AddrReg = MRI.createGenericVirtualRegister(
78  MIRBuilder.buildFrameIndex(AddrReg, FI);
79  StackUsed = std::max(StackUsed, Size + Offset);
80  return AddrReg;
81  }
82 
83  void assignValueToReg(Register ValVReg, Register PhysReg,
84  CCValAssign &VA) override {
85  markPhysRegUsed(PhysReg);
86 
87  if (VA.getLocVT().getSizeInBits() < 32) {
88  // 16-bit types are reported as legal for 32-bit registers. We need to do
89  // a 32-bit copy, and truncate to avoid the verifier complaining about it.
90  auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
91  MIRBuilder.buildTrunc(ValVReg, Copy);
92  return;
93  }
94 
95  switch (VA.getLocInfo()) {
96  case CCValAssign::LocInfo::SExt:
97  case CCValAssign::LocInfo::ZExt:
98  case CCValAssign::LocInfo::AExt: {
99  auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
100  MIRBuilder.buildTrunc(ValVReg, Copy);
101  break;
102  }
103  default:
104  MIRBuilder.buildCopy(ValVReg, PhysReg);
105  break;
106  }
107  }
108 
109  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
110  MachinePointerInfo &MPO, CCValAssign &VA) override {
111  // FIXME: Get alignment
112  auto MMO = MIRBuilder.getMF().getMachineMemOperand(
114  MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
115  }
116 
117  /// How the physical register gets marked varies between formal
118  /// parameters (it's a basic-block live-in), and a call instruction
119  /// (it's an implicit-def of the BL).
120  virtual void markPhysRegUsed(unsigned PhysReg) = 0;
121 
122  // FIXME: What is the point of this being a callback?
123  bool isArgumentHandler() const override { return true; }
124 };
125 
126 struct FormalArgHandler : public IncomingArgHandler {
127  FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
128  CCAssignFn *AssignFn)
129  : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
130 
131  void markPhysRegUsed(unsigned PhysReg) override {
132  MIRBuilder.getMBB().addLiveIn(PhysReg);
133  }
134 };
135 
136 }
137 
139  : CallLowering(&TLI) {
140 }
141 
142 void AMDGPUCallLowering::splitToValueTypes(
143  const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
144  const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
145  SplitArgTy PerformArgSplit) const {
146  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
147  LLVMContext &Ctx = OrigArg.Ty->getContext();
148 
149  if (OrigArg.Ty->isVoidTy())
150  return;
151 
152  SmallVector<EVT, 4> SplitVTs;
153  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
154 
155  assert(OrigArg.Regs.size() == SplitVTs.size());
156 
157  int SplitIdx = 0;
158  for (EVT VT : SplitVTs) {
159  unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
160  Type *Ty = VT.getTypeForEVT(Ctx);
161 
162 
163 
164  if (NumParts == 1) {
165  // No splitting to do, but we want to replace the original type (e.g. [1 x
166  // double] -> double).
167  SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty,
168  OrigArg.Flags, OrigArg.IsFixed);
169 
170  ++SplitIdx;
171  continue;
172  }
173 
174  LLT LLTy = getLLTForType(*Ty, DL);
175 
176  SmallVector<Register, 8> SplitRegs;
177 
178  EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
179  Type *PartTy = PartVT.getTypeForEVT(Ctx);
180  LLT PartLLT = getLLTForType(*PartTy, DL);
181 
182  // FIXME: Should we be reporting all of the part registers for a single
183  // argument, and let handleAssignments take care of the repacking?
184  for (unsigned i = 0; i < NumParts; ++i) {
185  Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
186  SplitRegs.push_back(PartReg);
187  SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
188  }
189 
190  PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx);
191 
192  ++SplitIdx;
193  }
194 }
195 
197  const Value *Val,
198  ArrayRef<Register> VRegs) const {
199 
200  MachineFunction &MF = MIRBuilder.getMF();
201  MachineRegisterInfo &MRI = MF.getRegInfo();
203  MFI->setIfReturnsVoid(!Val);
204 
205  if (!Val) {
206  MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
207  return true;
208  }
209 
210  Register VReg = VRegs[0];
211 
212  const Function &F = MF.getFunction();
213  auto &DL = F.getParent()->getDataLayout();
215  return false;
216 
217 
218  const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
219  SmallVector<EVT, 4> SplitVTs;
221  ArgInfo OrigArg{VReg, Val->getType()};
222  setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
223  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
224 
225  SmallVector<ArgInfo, 8> SplitArgs;
226  CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
227  for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
228  Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
229  SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
230  }
231  auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
232  OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
233  if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
234  return false;
235  MIRBuilder.insertInstr(RetInstr);
236 
237  return true;
238 }
239 
240 Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
241  Type *ParamTy,
242  uint64_t Offset) const {
243 
244  MachineFunction &MF = MIRBuilder.getMF();
246  MachineRegisterInfo &MRI = MF.getRegInfo();
247  const Function &F = MF.getFunction();
248  const DataLayout &DL = F.getParent()->getDataLayout();
250  LLT PtrType = getLLTForType(*PtrTy, DL);
251  Register DstReg = MRI.createGenericVirtualRegister(PtrType);
252  Register KernArgSegmentPtr =
254  Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
255 
256  Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
257  MIRBuilder.buildConstant(OffsetReg, Offset);
258 
259  MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
260 
261  return DstReg;
262 }
263 
264 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
265  Type *ParamTy, uint64_t Offset,
266  unsigned Align,
267  Register DstReg) const {
268  MachineFunction &MF = MIRBuilder.getMF();
269  const Function &F = MF.getFunction();
270  const DataLayout &DL = F.getParent()->getDataLayout();
272  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
273  unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
274  Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
275 
276  MachineMemOperand *MMO =
280  TypeSize, Align);
281 
282  MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
283 }
284 
285 // Allocate special inputs passed in user SGPRs.
286 static void allocateHSAUserSGPRs(CCState &CCInfo,
287  MachineIRBuilder &MIRBuilder,
288  MachineFunction &MF,
289  const SIRegisterInfo &TRI,
291  // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
292  if (Info.hasPrivateSegmentBuffer()) {
293  unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
294  MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
295  CCInfo.AllocateReg(PrivateSegmentBufferReg);
296  }
297 
298  if (Info.hasDispatchPtr()) {
299  unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
300  MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
301  CCInfo.AllocateReg(DispatchPtrReg);
302  }
303 
304  if (Info.hasQueuePtr()) {
305  unsigned QueuePtrReg = Info.addQueuePtr(TRI);
306  MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
307  CCInfo.AllocateReg(QueuePtrReg);
308  }
309 
310  if (Info.hasKernargSegmentPtr()) {
311  MachineRegisterInfo &MRI = MF.getRegInfo();
312  Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
314  Register VReg = MRI.createGenericVirtualRegister(P4);
315  MRI.addLiveIn(InputPtrReg, VReg);
316  MIRBuilder.getMBB().addLiveIn(InputPtrReg);
317  MIRBuilder.buildCopy(VReg, InputPtrReg);
318  CCInfo.AllocateReg(InputPtrReg);
319  }
320 
321  if (Info.hasDispatchID()) {
322  unsigned DispatchIDReg = Info.addDispatchID(TRI);
323  MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
324  CCInfo.AllocateReg(DispatchIDReg);
325  }
326 
327  if (Info.hasFlatScratchInit()) {
328  unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
329  MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
330  CCInfo.AllocateReg(FlatScratchInitReg);
331  }
332 
333  // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
334  // these from the dispatch pointer.
335 }
336 
338  MachineIRBuilder &MIRBuilder, const Function &F,
339  ArrayRef<ArrayRef<Register>> VRegs) const {
340  MachineFunction &MF = MIRBuilder.getMF();
341  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
342  MachineRegisterInfo &MRI = MF.getRegInfo();
344  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
345  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
346 
347  const DataLayout &DL = F.getParent()->getDataLayout();
348 
350  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
351 
352  allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
353 
354  unsigned i = 0;
355  const unsigned KernArgBaseAlign = 16;
356  const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
357  uint64_t ExplicitArgOffset = 0;
358 
359  // TODO: Align down to dword alignment and extract bits for extending loads.
360  for (auto &Arg : F.args()) {
361  Type *ArgTy = Arg.getType();
362  unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
363  if (AllocSize == 0)
364  continue;
365 
366  unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
367 
368  uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
369  ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
370 
371  ArrayRef<Register> OrigArgRegs = VRegs[i];
372  Register ArgReg =
373  OrigArgRegs.size() == 1
374  ? OrigArgRegs[0]
375  : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
376  unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
377  ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
378  lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
379  if (OrigArgRegs.size() > 1)
380  unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
381  ++i;
382  }
383 
384  TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
385  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
386  return true;
387 }
388 
390  ArrayRef<Register> OrigRegs,
391  ArrayRef<Register> Regs,
392  LLT LLTy,
393  LLT PartLLT) {
394  if (!LLTy.isVector() && !PartLLT.isVector()) {
395  MIRBuilder.buildMerge(OrigRegs[0], Regs);
396  return;
397  }
398 
399  if (LLTy.isVector() && PartLLT.isVector()) {
400  assert(LLTy.getElementType() == PartLLT.getElementType());
401 
402  int DstElts = LLTy.getNumElements();
403  int PartElts = PartLLT.getNumElements();
404  if (DstElts % PartElts == 0)
405  MIRBuilder.buildConcatVectors(OrigRegs[0], Regs);
406  else {
407  // Deal with v3s16 split into v2s16
408  assert(PartElts == 2 && DstElts % 2 != 0);
409  int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
410 
411  LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
412  auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs);
413  MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0);
414  }
415 
416  return;
417  }
418 
419  assert(LLTy.isVector() && !PartLLT.isVector());
420 
421  LLT DstEltTy = LLTy.getElementType();
422  if (DstEltTy == PartLLT) {
423  // Vector was trivially scalarized.
424  MIRBuilder.buildBuildVector(OrigRegs[0], Regs);
425  } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
426  // Deal with vector with 64-bit elements decomposed to 32-bit
427  // registers. Need to create intermediate 64-bit elements.
428  SmallVector<Register, 8> EltMerges;
429  int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
430 
431  assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
432 
433  for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
434  auto Merge = MIRBuilder.buildMerge(DstEltTy,
435  Regs.take_front(PartsPerElt));
436  EltMerges.push_back(Merge.getReg(0));
437  Regs = Regs.drop_front(PartsPerElt);
438  }
439 
440  MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges);
441  } else {
442  // Vector was split, and elements promoted to a wider type.
443  LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
444  auto BV = MIRBuilder.buildBuildVector(BVType, Regs);
445  MIRBuilder.buildTrunc(OrigRegs[0], BV);
446  }
447 }
448 
450  MachineIRBuilder &MIRBuilder, const Function &F,
451  ArrayRef<ArrayRef<Register>> VRegs) const {
453 
454  // The infrastructure for normal calling convention lowering is essentially
455  // useless for kernels. We want to avoid any kind of legalization or argument
456  // splitting.
457  if (CC == CallingConv::AMDGPU_KERNEL)
458  return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
459 
460  // AMDGPU_GS and AMDGP_HS are not supported yet.
462  return false;
463 
464  const bool IsShader = AMDGPU::isShader(CC);
465  const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
466 
467  MachineFunction &MF = MIRBuilder.getMF();
468  MachineBasicBlock &MBB = MIRBuilder.getMBB();
469  MachineRegisterInfo &MRI = MF.getRegInfo();
471  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
472  const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
473  const DataLayout &DL = F.getParent()->getDataLayout();
474 
475 
477  CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
478 
479  if (Info->hasImplicitBufferPtr()) {
480  Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
481  MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
482  CCInfo.AllocateReg(ImplicitBufferPtrReg);
483  }
484 
485 
486  SmallVector<ArgInfo, 32> SplitArgs;
487  unsigned Idx = 0;
488  unsigned PSInputNum = 0;
489 
490  for (auto &Arg : F.args()) {
491  if (DL.getTypeStoreSize(Arg.getType()) == 0)
492  continue;
493 
494  const bool InReg = Arg.hasAttribute(Attribute::InReg);
495 
496  // SGPR arguments to functions not implemented.
497  if (!IsShader && InReg)
498  return false;
499 
500  // TODO: Handle sret.
501  if (Arg.hasAttribute(Attribute::StructRet) ||
502  Arg.hasAttribute(Attribute::SwiftSelf) ||
503  Arg.hasAttribute(Attribute::SwiftError) ||
504  Arg.hasAttribute(Attribute::Nest))
505  return false;
506 
507  if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
508  const bool ArgUsed = !Arg.use_empty();
509  bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
510 
511  if (!SkipArg) {
512  Info->markPSInputAllocated(PSInputNum);
513  if (ArgUsed)
514  Info->markPSInputEnabled(PSInputNum);
515  }
516 
517  ++PSInputNum;
518 
519  if (SkipArg) {
520  for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
521  MIRBuilder.buildUndef(VRegs[Idx][I]);
522 
523  ++Idx;
524  continue;
525  }
526  }
527 
528  ArgInfo OrigArg(VRegs[Idx], Arg.getType());
529  setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
530 
531  splitToValueTypes(
532  OrigArg, SplitArgs, DL, MRI, CC,
533  // FIXME: We should probably be passing multiple registers to
534  // handleAssignments to do this
535  [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
536  packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs,
537  LLTy, PartLLT);
538  });
539 
540  ++Idx;
541  }
542 
543  // At least one interpolation mode must be enabled or else the GPU will
544  // hang.
545  //
546  // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
547  // set PSInputAddr, the user wants to enable some bits after the compilation
548  // based on run-time states. Since we can't know what the final PSInputEna
549  // will look like, so we shouldn't do anything here and the user should take
550  // responsibility for the correct programming.
551  //
552  // Otherwise, the following restrictions apply:
553  // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
554  // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
555  // enabled too.
556  if (CC == CallingConv::AMDGPU_PS) {
557  if ((Info->getPSInputAddr() & 0x7F) == 0 ||
558  ((Info->getPSInputAddr() & 0xF) == 0 &&
559  Info->isPSInputAllocated(11))) {
560  CCInfo.AllocateReg(AMDGPU::VGPR0);
561  CCInfo.AllocateReg(AMDGPU::VGPR1);
562  Info->markPSInputAllocated(0);
563  Info->markPSInputEnabled(0);
564  }
565 
566  if (Subtarget.isAmdPalOS()) {
567  // For isAmdPalOS, the user does not enable some bits after compilation
568  // based on run-time states; the register values being generated here are
569  // the final ones set in hardware. Therefore we need to apply the
570  // workaround to PSInputAddr and PSInputEnable together. (The case where
571  // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
572  // set up an input arg for a particular interpolation mode, but nothing
573  // uses that input arg. Really we should have an earlier pass that removes
574  // such an arg.)
575  unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
576  if ((PsInputBits & 0x7F) == 0 ||
577  ((PsInputBits & 0xF) == 0 &&
578  (PsInputBits >> 11 & 1)))
579  Info->markPSInputEnabled(
581  }
582  }
583 
584  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
585  CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
586 
587  if (!MBB.empty())
588  MIRBuilder.setInstr(*MBB.begin());
589 
590  FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
591  if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler))
592  return false;
593 
594  if (!IsEntryFunc) {
595  // Special inputs come after user arguments.
596  TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
597  }
598 
599  // Start adding system SGPRs.
600  if (IsEntryFunc) {
601  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
602  } else {
603  CCInfo.AllocateReg(Info->getScratchRSrcReg());
604  CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
605  CCInfo.AllocateReg(Info->getFrameOffsetReg());
606  TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
607  }
608 
609  // Move back to the end of the basic block.
610  MIRBuilder.setMBB(MBB);
611 
612  return true;
613 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
Interface definition for SIRegisterInfo.
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:211
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
bool isPSInputAllocated(unsigned Index) const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
This file describes how to lower LLVM calls to machine code calls.
void allocateSpecialInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1100
unsigned const TargetRegisterInfo * TRI
F(f)
void markPSInputEnabled(unsigned Index)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:689
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
bool isVector() const
bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef< ArgInfo > Args, ValueHandler &Handler) const
Invoke Handler::assignArg on each of the given Args and then use Callback to move them to the assigne...
A description of a memory reference used in the backend.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:217
unsigned addDispatchID(const SIRegisterInfo &TRI)
The returned value is undefined.
Definition: MathExtras.h:45
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
The memory access is dereferenceable (i.e., doesn&#39;t trap).
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LocInfo getLocInfo() const
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
unsigned getSizeInBits() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don&#39;t insert <empty> = Opcode <empty>.
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
MachineFunction & getMF()
Getter for the function we currently build.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
void markPSInputAllocated(unsigned Index)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:119
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
Class to represent pointers.
Definition: DerivedTypes.h:544
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef< ArrayRef< Register >> VRegs) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs...
bool isEntryFunctionCC(CallingConv::ID CC)
Helper class to build MachineInstr.
SI DAG Lowering interface definition.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
Extended Value Type.
Definition: ValueTypes.h:33
R600 Clause Merge
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1436
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
size_t size() const
Definition: SmallVector.h:52
Argument handling is mostly uniform between the four places that make these decisions: function forma...
Definition: CallLowering.h:68
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
void unpackRegs(ArrayRef< Register > DstRegs, Register SrcReg, Type *PackedTy, MachineIRBuilder &MIRBuilder) const
Generate instructions for unpacking SrcReg into the DstRegs corresponding to the aggregate type Packe...
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder, ArrayRef< Register > OrigRegs, ArrayRef< Register > Regs, LLT LLTy, LLT PartLLT)
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
CCState - This class holds information needed while lowering arguments and return values...
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1173
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef< Register > VRegs) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
Interface definition of the TargetLowering class that is common to all AMD GPUs.
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
CCValAssign - Represent assignment of one arg/retval to a location.
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
bool lowerFormalArgumentsKernel(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef< ArrayRef< Register >> VRegs) const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
Address space for private memory.
Definition: AMDGPU.h:271
This file declares the MachineIRBuilder class.
bool isShader(CallingConv::ID cc)
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:187
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
#define I(x, y, z)
Definition: MD5.cpp:58
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
The memory access always returns the same value (or traps).
uint32_t Size
Definition: Profile.cpp:46
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &MIRBuilder, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:72
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:269
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
SmallVector< Register, 4 > Regs
Definition: CallLowering.h:47
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
iterator_range< arg_iterator > args()
Definition: Function.h:713
Wrapper class representing virtual and physical registers.
Definition: Register.h:18
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
const SIRegisterInfo * getRegisterInfo() const override