LLVM  16.0.0git
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUCallLowering.h"
16 #include "AMDGPU.h"
17 #include "AMDGPULegalizerInfo.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIRegisterInfo.h"
21 #include "llvm/CodeGen/Analysis.h"
25 #include "llvm/IR/IntrinsicsAMDGPU.h"
26 
27 #define DEBUG_TYPE "amdgpu-call-lowering"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 /// Wrapper around extendRegister to ensure we extend to a full 32-bit register.
34 static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,
35  Register ValVReg, CCValAssign &VA) {
36  if (VA.getLocVT().getSizeInBits() < 32) {
37  // 16-bit types are reported as legal for 32-bit registers. We need to
38  // extend and do a 32-bit copy to avoid the verifier complaining about it.
39  return Handler.MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
40  }
41 
42  return Handler.extendRegister(ValVReg, VA);
43 }
44 
45 struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
46  AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
48  : OutgoingValueHandler(B, MRI), MIB(MIB) {}
49 
51 
52  Register getStackAddress(uint64_t Size, int64_t Offset,
53  MachinePointerInfo &MPO,
54  ISD::ArgFlagsTy Flags) override {
55  llvm_unreachable("not implemented");
56  }
57 
58  void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
59  MachinePointerInfo &MPO, CCValAssign &VA) override {
60  llvm_unreachable("not implemented");
61  }
62 
63  void assignValueToReg(Register ValVReg, Register PhysReg,
64  CCValAssign VA) override {
65  Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
66 
67  // If this is a scalar return, insert a readfirstlane just in case the value
68  // ends up in a VGPR.
69  // FIXME: Assert this is a shader return.
70  const SIRegisterInfo *TRI
71  = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
72  if (TRI->isSGPRReg(MRI, PhysReg)) {
73  auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
74  {MRI.getType(ExtReg)}, false)
75  .addReg(ExtReg);
76  ExtReg = ToSGPR.getReg(0);
77  }
78 
79  MIRBuilder.buildCopy(PhysReg, ExtReg);
80  MIB.addUse(PhysReg, RegState::Implicit);
81  }
82 };
83 
84 struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
85  uint64_t StackUsed = 0;
86 
87  AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
88  : IncomingValueHandler(B, MRI) {}
89 
90  Register getStackAddress(uint64_t Size, int64_t Offset,
91  MachinePointerInfo &MPO,
92  ISD::ArgFlagsTy Flags) override {
93  auto &MFI = MIRBuilder.getMF().getFrameInfo();
94 
95  // Byval is assumed to be writable memory, but other stack passed arguments
96  // are not.
97  const bool IsImmutable = !Flags.isByVal();
98  int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
99  MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
100  auto AddrReg = MIRBuilder.buildFrameIndex(
102  StackUsed = std::max(StackUsed, Size + Offset);
103  return AddrReg.getReg(0);
104  }
105 
106  void assignValueToReg(Register ValVReg, Register PhysReg,
107  CCValAssign VA) override {
108  markPhysRegUsed(PhysReg);
109 
110  if (VA.getLocVT().getSizeInBits() < 32) {
111  // 16-bit types are reported as legal for 32-bit registers. We need to do
112  // a 32-bit copy, and truncate to avoid the verifier complaining about it.
113  auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
114 
115  // If we have signext/zeroext, it applies to the whole 32-bit register
116  // before truncation.
117  auto Extended =
118  buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT()));
119  MIRBuilder.buildTrunc(ValVReg, Extended);
120  return;
121  }
122 
123  IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
124  }
125 
126  void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
127  MachinePointerInfo &MPO, CCValAssign &VA) override {
128  MachineFunction &MF = MIRBuilder.getMF();
129 
130  auto MMO = MF.getMachineMemOperand(
132  inferAlignFromPtrInfo(MF, MPO));
133  MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
134  }
135 
136  /// How the physical register gets marked varies between formal
137  /// parameters (it's a basic-block live-in), and a call instruction
138  /// (it's an implicit-def of the BL).
139  virtual void markPhysRegUsed(unsigned PhysReg) = 0;
140 };
141 
142 struct FormalArgHandler : public AMDGPUIncomingArgHandler {
144  : AMDGPUIncomingArgHandler(B, MRI) {}
145 
146  void markPhysRegUsed(unsigned PhysReg) override {
147  MIRBuilder.getMBB().addLiveIn(PhysReg);
148  }
149 };
150 
151 struct CallReturnHandler : public AMDGPUIncomingArgHandler {
154  : AMDGPUIncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
155 
156  void markPhysRegUsed(unsigned PhysReg) override {
157  MIB.addDef(PhysReg, RegState::Implicit);
158  }
159 
161 };
162 
163 struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
164  /// For tail calls, the byte offset of the call's argument area from the
165  /// callee's. Unused elsewhere.
166  int FPDiff;
167 
168  // Cache the SP register vreg if we need it more than once in this call site.
169  Register SPReg;
170 
171  bool IsTailCall;
172 
173  AMDGPUOutgoingArgHandler(MachineIRBuilder &MIRBuilder,
175  bool IsTailCall = false, int FPDiff = 0)
176  : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB), FPDiff(FPDiff),
177  IsTailCall(IsTailCall) {}
178 
179  Register getStackAddress(uint64_t Size, int64_t Offset,
180  MachinePointerInfo &MPO,
181  ISD::ArgFlagsTy Flags) override {
182  MachineFunction &MF = MIRBuilder.getMF();
183  const LLT PtrTy = LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32);
184  const LLT S32 = LLT::scalar(32);
185 
186  if (IsTailCall) {
187  Offset += FPDiff;
188  int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
189  auto FIReg = MIRBuilder.buildFrameIndex(PtrTy, FI);
190  MPO = MachinePointerInfo::getFixedStack(MF, FI);
191  return FIReg.getReg(0);
192  }
193 
195 
196  if (!SPReg) {
197  const GCNSubtarget &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>();
198  if (ST.enableFlatScratch()) {
199  // The stack is accessed unswizzled, so we can use a regular copy.
200  SPReg = MIRBuilder.buildCopy(PtrTy,
201  MFI->getStackPtrOffsetReg()).getReg(0);
202  } else {
203  // The address we produce here, without knowing the use context, is going
204  // to be interpreted as a vector address, so we need to convert to a
205  // swizzled address.
206  SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
207  {MFI->getStackPtrOffsetReg()}).getReg(0);
208  }
209  }
210 
211  auto OffsetReg = MIRBuilder.buildConstant(S32, Offset);
212 
213  auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);
214  MPO = MachinePointerInfo::getStack(MF, Offset);
215  return AddrReg.getReg(0);
216  }
217 
218  void assignValueToReg(Register ValVReg, Register PhysReg,
219  CCValAssign VA) override {
220  MIB.addUse(PhysReg, RegState::Implicit);
221  Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
222  MIRBuilder.buildCopy(PhysReg, ExtReg);
223  }
224 
225  void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
226  MachinePointerInfo &MPO, CCValAssign &VA) override {
227  MachineFunction &MF = MIRBuilder.getMF();
228  uint64_t LocMemOffset = VA.getLocMemOffset();
229  const auto &ST = MF.getSubtarget<GCNSubtarget>();
230 
231  auto MMO = MF.getMachineMemOperand(
232  MPO, MachineMemOperand::MOStore, MemTy,
233  commonAlignment(ST.getStackAlignment(), LocMemOffset));
234  MIRBuilder.buildStore(ValVReg, Addr, *MMO);
235  }
236 
237  void assignValueToAddress(const CallLowering::ArgInfo &Arg,
238  unsigned ValRegIndex, Register Addr, LLT MemTy,
239  MachinePointerInfo &MPO, CCValAssign &VA) override {
240  Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
241  ? extendRegister(Arg.Regs[ValRegIndex], VA)
242  : Arg.Regs[ValRegIndex];
243  assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
244  }
245 };
246 }
247 
249  : CallLowering(&TLI) {
250 }
251 
252 // FIXME: Compatibility shim
253 static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
254  switch (MIOpc) {
255  case TargetOpcode::G_SEXT:
256  return ISD::SIGN_EXTEND;
257  case TargetOpcode::G_ZEXT:
258  return ISD::ZERO_EXTEND;
259  case TargetOpcode::G_ANYEXT:
260  return ISD::ANY_EXTEND;
261  default:
262  llvm_unreachable("not an extend opcode");
263  }
264 }
265 
266 bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,
267  CallingConv::ID CallConv,
269  bool IsVarArg) const {
270  // For shaders. Vector types should be explicitly handled by CC.
271  if (AMDGPU::isEntryFunctionCC(CallConv))
272  return true;
273 
275  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
276  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
277  MF.getFunction().getContext());
278 
279  return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv, IsVarArg));
280 }
281 
282 /// Lower the return value for the already existing \p Ret. This assumes that
283 /// \p B's insertion point is correct.
284 bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
285  const Value *Val, ArrayRef<Register> VRegs,
286  MachineInstrBuilder &Ret) const {
287  if (!Val)
288  return true;
289 
290  auto &MF = B.getMF();
291  const auto &F = MF.getFunction();
292  const DataLayout &DL = MF.getDataLayout();
293  MachineRegisterInfo *MRI = B.getMRI();
294  LLVMContext &Ctx = F.getContext();
295 
296  CallingConv::ID CC = F.getCallingConv();
297  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
298 
299  SmallVector<EVT, 8> SplitEVTs;
300  ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
301  assert(VRegs.size() == SplitEVTs.size() &&
302  "For each split Type there should be exactly one VReg.");
303 
304  SmallVector<ArgInfo, 8> SplitRetInfos;
305 
306  for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
307  EVT VT = SplitEVTs[i];
308  Register Reg = VRegs[i];
309  ArgInfo RetInfo(Reg, VT.getTypeForEVT(Ctx), 0);
311 
312  if (VT.isScalarInteger()) {
313  unsigned ExtendOp = TargetOpcode::G_ANYEXT;
314  if (RetInfo.Flags[0].isSExt()) {
315  assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
316  ExtendOp = TargetOpcode::G_SEXT;
317  } else if (RetInfo.Flags[0].isZExt()) {
318  assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
319  ExtendOp = TargetOpcode::G_ZEXT;
320  }
321 
322  EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
323  extOpcodeToISDExtOpcode(ExtendOp));
324  if (ExtVT != VT) {
325  RetInfo.Ty = ExtVT.getTypeForEVT(Ctx);
326  LLT ExtTy = getLLTForType(*RetInfo.Ty, DL);
327  Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);
328  }
329  }
330 
331  if (Reg != RetInfo.Regs[0]) {
332  RetInfo.Regs[0] = Reg;
333  // Reset the arg flags after modifying Reg.
335  }
336 
337  splitToValueTypes(RetInfo, SplitRetInfos, DL, CC);
338  }
339 
340  CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
341 
342  OutgoingValueAssigner Assigner(AssignFn);
343  AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);
344  return determineAndHandleAssignments(RetHandler, Assigner, SplitRetInfos, B,
345  CC, F.isVarArg());
346 }
347 
349  ArrayRef<Register> VRegs,
350  FunctionLoweringInfo &FLI) const {
351 
352  MachineFunction &MF = B.getMF();
354  MFI->setIfReturnsVoid(!Val);
355 
356  assert(!Val == VRegs.empty() && "Return value without a vreg");
357 
358  CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
359  const bool IsShader = AMDGPU::isShader(CC);
360  const bool IsWaveEnd =
361  (IsShader && MFI->returnsVoid()) || AMDGPU::isKernel(CC);
362  if (IsWaveEnd) {
363  B.buildInstr(AMDGPU::S_ENDPGM)
364  .addImm(0);
365  return true;
366  }
367 
368  unsigned ReturnOpc =
369  IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
370  auto Ret = B.buildInstrNoInsert(ReturnOpc);
371 
372  if (!FLI.CanLowerReturn)
373  insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
374  else if (!lowerReturnVal(B, Val, VRegs, Ret))
375  return false;
376 
377  // TODO: Handle CalleeSavedRegsViaCopy.
378 
379  B.insertInstr(Ret);
380  return true;
381 }
382 
383 void AMDGPUCallLowering::lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
384  uint64_t Offset) const {
385  MachineFunction &MF = B.getMF();
388  Register KernArgSegmentPtr =
390  Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
391 
392  auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset);
393 
394  B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
395 }
396 
397 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, ArgInfo &OrigArg,
398  uint64_t Offset,
399  Align Alignment) const {
400  MachineFunction &MF = B.getMF();
401  const Function &F = MF.getFunction();
402  const DataLayout &DL = F.getParent()->getDataLayout();
404 
406 
407  SmallVector<ArgInfo, 32> SplitArgs;
408  SmallVector<uint64_t> FieldOffsets;
409  splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv(), &FieldOffsets);
410 
411  unsigned Idx = 0;
412  for (ArgInfo &SplitArg : SplitArgs) {
413  Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
414  lowerParameterPtr(PtrReg, B, Offset + FieldOffsets[Idx]);
415 
416  LLT ArgTy = getLLTForType(*SplitArg.Ty, DL);
417  if (SplitArg.Flags[0].isPointer()) {
418  // Compensate for losing pointeriness in splitValueTypes.
419  LLT PtrTy = LLT::pointer(SplitArg.Flags[0].getPointerAddrSpace(),
420  ArgTy.getScalarSizeInBits());
421  ArgTy = ArgTy.isVector() ? LLT::vector(ArgTy.getElementCount(), PtrTy)
422  : PtrTy;
423  }
424 
426  PtrInfo,
429  ArgTy, commonAlignment(Alignment, FieldOffsets[Idx]));
430 
431  assert(SplitArg.Regs.size() == 1);
432 
433  B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
434  ++Idx;
435  }
436 }
437 
438 // Allocate special inputs passed in user SGPRs.
439 static void allocateHSAUserSGPRs(CCState &CCInfo,
441  MachineFunction &MF,
442  const SIRegisterInfo &TRI,
444  // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
445  if (Info.hasPrivateSegmentBuffer()) {
446  Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
447  MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
448  CCInfo.AllocateReg(PrivateSegmentBufferReg);
449  }
450 
451  if (Info.hasDispatchPtr()) {
452  Register DispatchPtrReg = Info.addDispatchPtr(TRI);
453  MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
454  CCInfo.AllocateReg(DispatchPtrReg);
455  }
456 
457  if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
458  Register QueuePtrReg = Info.addQueuePtr(TRI);
459  MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
460  CCInfo.AllocateReg(QueuePtrReg);
461  }
462 
463  if (Info.hasKernargSegmentPtr()) {
465  Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
468  MRI.addLiveIn(InputPtrReg, VReg);
469  B.getMBB().addLiveIn(InputPtrReg);
470  B.buildCopy(VReg, InputPtrReg);
471  CCInfo.AllocateReg(InputPtrReg);
472  }
473 
474  if (Info.hasDispatchID()) {
475  Register DispatchIDReg = Info.addDispatchID(TRI);
476  MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
477  CCInfo.AllocateReg(DispatchIDReg);
478  }
479 
480  if (Info.hasFlatScratchInit()) {
481  Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
482  MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
483  CCInfo.AllocateReg(FlatScratchInitReg);
484  }
485 
486  // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
487  // these from the dispatch pointer.
488 }
489 
491  MachineIRBuilder &B, const Function &F,
492  ArrayRef<ArrayRef<Register>> VRegs) const {
493  MachineFunction &MF = B.getMF();
494  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
497  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
498  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
499  const DataLayout &DL = F.getParent()->getDataLayout();
500 
501  Info->allocateKnownAddressLDSGlobal(F);
502 
504  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
505 
506  allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info);
507 
508  unsigned i = 0;
509  const Align KernArgBaseAlign(16);
510  const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
511  uint64_t ExplicitArgOffset = 0;
512 
513  // TODO: Align down to dword alignment and extract bits for extending loads.
514  for (auto &Arg : F.args()) {
515  const bool IsByRef = Arg.hasByRefAttr();
516  Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
517  unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
518  if (AllocSize == 0)
519  continue;
520 
521  MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : None;
522  Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
523 
524  uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
525  ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
526 
527  if (Arg.use_empty()) {
528  ++i;
529  continue;
530  }
531 
532  Align Alignment = commonAlignment(KernArgBaseAlign, ArgOffset);
533 
534  if (IsByRef) {
535  unsigned ByRefAS = cast<PointerType>(Arg.getType())->getAddressSpace();
536 
537  assert(VRegs[i].size() == 1 &&
538  "expected only one register for byval pointers");
539  if (ByRefAS == AMDGPUAS::CONSTANT_ADDRESS) {
540  lowerParameterPtr(VRegs[i][0], B, ArgOffset);
541  } else {
542  const LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
543  Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);
544  lowerParameterPtr(PtrReg, B, ArgOffset);
545 
546  B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
547  }
548  } else {
549  ArgInfo OrigArg(VRegs[i], Arg, i);
550  const unsigned OrigArgIdx = i + AttributeList::FirstArgIndex;
551  setArgFlags(OrigArg, OrigArgIdx, DL, F);
552  lowerParameter(B, OrigArg, ArgOffset, Alignment);
553  }
554 
555  ++i;
556  }
557 
558  TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
559  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
560  return true;
561 }
562 
565  FunctionLoweringInfo &FLI) const {
566  CallingConv::ID CC = F.getCallingConv();
567 
568  // The infrastructure for normal calling convention lowering is essentially
569  // useless for kernels. We want to avoid any kind of legalization or argument
570  // splitting.
572  return lowerFormalArgumentsKernel(B, F, VRegs);
573 
574  const bool IsGraphics = AMDGPU::isGraphics(CC);
575  const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
576 
577  MachineFunction &MF = B.getMF();
578  MachineBasicBlock &MBB = B.getMBB();
581  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
582  const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
583  const DataLayout &DL = F.getParent()->getDataLayout();
584 
585  Info->allocateKnownAddressLDSGlobal(F);
586 
588  CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
589 
590  if (Info->hasImplicitBufferPtr()) {
591  Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
592  MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
593  CCInfo.AllocateReg(ImplicitBufferPtrReg);
594  }
595 
596  // FIXME: This probably isn't defined for mesa
597  if (Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
598  Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
599  MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
600  CCInfo.AllocateReg(FlatScratchInitReg);
601  }
602 
603  SmallVector<ArgInfo, 32> SplitArgs;
604  unsigned Idx = 0;
605  unsigned PSInputNum = 0;
606 
607  // Insert the hidden sret parameter if the return value won't fit in the
608  // return registers.
609  if (!FLI.CanLowerReturn)
610  insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
611 
612  for (auto &Arg : F.args()) {
613  if (DL.getTypeStoreSize(Arg.getType()) == 0)
614  continue;
615 
616  const bool InReg = Arg.hasAttribute(Attribute::InReg);
617 
618  // SGPR arguments to functions not implemented.
619  if (!IsGraphics && InReg)
620  return false;
621 
622  if (Arg.hasAttribute(Attribute::SwiftSelf) ||
623  Arg.hasAttribute(Attribute::SwiftError) ||
624  Arg.hasAttribute(Attribute::Nest))
625  return false;
626 
627  if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
628  const bool ArgUsed = !Arg.use_empty();
629  bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
630 
631  if (!SkipArg) {
632  Info->markPSInputAllocated(PSInputNum);
633  if (ArgUsed)
634  Info->markPSInputEnabled(PSInputNum);
635  }
636 
637  ++PSInputNum;
638 
639  if (SkipArg) {
640  for (Register R : VRegs[Idx])
641  B.buildUndef(R);
642 
643  ++Idx;
644  continue;
645  }
646  }
647 
648  ArgInfo OrigArg(VRegs[Idx], Arg, Idx);
649  const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
650  setArgFlags(OrigArg, OrigArgIdx, DL, F);
651 
652  splitToValueTypes(OrigArg, SplitArgs, DL, CC);
653  ++Idx;
654  }
655 
656  // At least one interpolation mode must be enabled or else the GPU will
657  // hang.
658  //
659  // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
660  // set PSInputAddr, the user wants to enable some bits after the compilation
661  // based on run-time states. Since we can't know what the final PSInputEna
662  // will look like, so we shouldn't do anything here and the user should take
663  // responsibility for the correct programming.
664  //
665  // Otherwise, the following restrictions apply:
666  // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
667  // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
668  // enabled too.
669  if (CC == CallingConv::AMDGPU_PS) {
670  if ((Info->getPSInputAddr() & 0x7F) == 0 ||
671  ((Info->getPSInputAddr() & 0xF) == 0 &&
672  Info->isPSInputAllocated(11))) {
673  CCInfo.AllocateReg(AMDGPU::VGPR0);
674  CCInfo.AllocateReg(AMDGPU::VGPR1);
675  Info->markPSInputAllocated(0);
676  Info->markPSInputEnabled(0);
677  }
678 
679  if (Subtarget.isAmdPalOS()) {
680  // For isAmdPalOS, the user does not enable some bits after compilation
681  // based on run-time states; the register values being generated here are
682  // the final ones set in hardware. Therefore we need to apply the
683  // workaround to PSInputAddr and PSInputEnable together. (The case where
684  // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
685  // set up an input arg for a particular interpolation mode, but nothing
686  // uses that input arg. Really we should have an earlier pass that removes
687  // such an arg.)
688  unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
689  if ((PsInputBits & 0x7F) == 0 ||
690  ((PsInputBits & 0xF) == 0 &&
691  (PsInputBits >> 11 & 1)))
692  Info->markPSInputEnabled(
693  countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
694  }
695  }
696 
697  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
698  CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
699 
700  if (!MBB.empty())
701  B.setInstr(*MBB.begin());
702 
703  if (!IsEntryFunc && !IsGraphics) {
704  // For the fixed ABI, pass workitem IDs in the last argument register.
705  TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
706  }
707 
708  IncomingValueAssigner Assigner(AssignFn);
709  if (!determineAssignments(Assigner, SplitArgs, CCInfo))
710  return false;
711 
712  FormalArgHandler Handler(B, MRI);
713  if (!handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, B))
714  return false;
715 
716  uint64_t StackOffset = Assigner.StackOffset;
717 
718  // Start adding system SGPRs.
719  if (IsEntryFunc) {
720  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
721  } else {
722  if (!Subtarget.enableFlatScratch())
723  CCInfo.AllocateReg(Info->getScratchRSrcReg());
724  TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
725  }
726 
727  // When we tail call, we need to check if the callee's arguments will fit on
728  // the caller's stack. So, whenever we lower formal arguments, we should keep
729  // track of this information, since we might lower a tail call in this
730  // function later.
731  Info->setBytesInStackArgArea(StackOffset);
732 
733  // Move back to the end of the basic block.
734  B.setMBB(MBB);
735 
736  return true;
737 }
738 
740  CCState &CCInfo,
741  SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,
742  CallLoweringInfo &Info) const {
743  MachineFunction &MF = MIRBuilder.getMF();
744 
745  // If there's no call site, this doesn't correspond to a call from the IR and
746  // doesn't need implicit inputs.
747  if (!Info.CB)
748  return true;
749 
750  const AMDGPUFunctionArgInfo *CalleeArgInfo
752 
754  const AMDGPUFunctionArgInfo &CallerArgInfo = MFI->getArgInfo();
755 
756 
757  // TODO: Unify with private memory register handling. This is complicated by
758  // the fact that at least in kernels, the input argument is not necessarily
759  // in the same location as the input.
769  };
770 
771  static constexpr StringLiteral ImplicitAttrNames[] = {
772  "amdgpu-no-dispatch-ptr",
773  "amdgpu-no-queue-ptr",
774  "amdgpu-no-implicitarg-ptr",
775  "amdgpu-no-dispatch-id",
776  "amdgpu-no-workgroup-id-x",
777  "amdgpu-no-workgroup-id-y",
778  "amdgpu-no-workgroup-id-z",
779  "amdgpu-no-lds-kernel-id",
780  };
781 
783 
784  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
785  const AMDGPULegalizerInfo *LI
786  = static_cast<const AMDGPULegalizerInfo*>(ST.getLegalizerInfo());
787 
788  unsigned I = 0;
789  for (auto InputID : InputRegs) {
790  const ArgDescriptor *OutgoingArg;
791  const TargetRegisterClass *ArgRC;
792  LLT ArgTy;
793 
794  // If the callee does not use the attribute value, skip copying the value.
795  if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
796  continue;
797 
798  std::tie(OutgoingArg, ArgRC, ArgTy) =
799  CalleeArgInfo->getPreloadedValue(InputID);
800  if (!OutgoingArg)
801  continue;
802 
803  const ArgDescriptor *IncomingArg;
804  const TargetRegisterClass *IncomingArgRC;
805  std::tie(IncomingArg, IncomingArgRC, ArgTy) =
806  CallerArgInfo.getPreloadedValue(InputID);
807  assert(IncomingArgRC == ArgRC);
808 
809  Register InputReg = MRI.createGenericVirtualRegister(ArgTy);
810 
811  if (IncomingArg) {
812  LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
813  } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
814  LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
815  } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
818  if (Id.has_value()) {
819  MIRBuilder.buildConstant(InputReg, Id.value());
820  } else {
821  MIRBuilder.buildUndef(InputReg);
822  }
823  } else {
824  // We may have proven the input wasn't needed, although the ABI is
825  // requiring it. We just need to allocate the register appropriately.
826  MIRBuilder.buildUndef(InputReg);
827  }
828 
829  if (OutgoingArg->isRegister()) {
830  ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
831  if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
832  report_fatal_error("failed to allocate implicit input argument");
833  } else {
834  LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
835  return false;
836  }
837  }
838 
839  // Pack workitem IDs into a single register or pass it as is if already
840  // packed.
841  const ArgDescriptor *OutgoingArg;
842  const TargetRegisterClass *ArgRC;
843  LLT ArgTy;
844 
845  std::tie(OutgoingArg, ArgRC, ArgTy) =
847  if (!OutgoingArg)
848  std::tie(OutgoingArg, ArgRC, ArgTy) =
850  if (!OutgoingArg)
851  std::tie(OutgoingArg, ArgRC, ArgTy) =
853  if (!OutgoingArg)
854  return false;
855 
856  auto WorkitemIDX =
857  CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
858  auto WorkitemIDY =
859  CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
860  auto WorkitemIDZ =
861  CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
862 
863  const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
864  const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
865  const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
866  const LLT S32 = LLT::scalar(32);
867 
868  const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");
869  const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");
870  const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");
871 
872  // If incoming ids are not packed we need to pack them.
873  // FIXME: Should consider known workgroup size to eliminate known 0 cases.
874  Register InputReg;
875  if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
876  NeedWorkItemIDX) {
877  if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
878  InputReg = MRI.createGenericVirtualRegister(S32);
879  LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
880  std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
881  } else {
882  InputReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
883  }
884  }
885 
886  if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
887  NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
889  LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
890  std::get<2>(WorkitemIDY));
891 
892  Y = MIRBuilder.buildShl(S32, Y, MIRBuilder.buildConstant(S32, 10)).getReg(0);
893  InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
894  }
895 
896  if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
897  NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
899  LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
900  std::get<2>(WorkitemIDZ));
901 
902  Z = MIRBuilder.buildShl(S32, Z, MIRBuilder.buildConstant(S32, 20)).getReg(0);
903  InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
904  }
905 
906  if (!InputReg &&
907  (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
908  InputReg = MRI.createGenericVirtualRegister(S32);
909  if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
910  // We're in a situation where the outgoing function requires the workitem
911  // ID, but the calling function does not have it (e.g a graphics function
912  // calling a C calling convention function). This is illegal, but we need
913  // to produce something.
914  MIRBuilder.buildUndef(InputReg);
915  } else {
916  // Workitem ids are already packed, any of present incoming arguments will
917  // carry all required fields.
919  IncomingArgX ? *IncomingArgX :
920  IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
921  LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
922  &AMDGPU::VGPR_32RegClass, S32);
923  }
924  }
925 
926  if (OutgoingArg->isRegister()) {
927  if (InputReg)
928  ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
929 
930  if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
931  report_fatal_error("failed to allocate implicit input argument");
932  } else {
933  LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
934  return false;
935  }
936 
937  return true;
938 }
939 
940 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
941 /// CC.
942 static std::pair<CCAssignFn *, CCAssignFn *>
944  return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
945 }
946 
947 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
948  bool IsTailCall) {
949  assert(!(IsIndirect && IsTailCall) && "Indirect calls can't be tail calls, "
950  "because the address can be divergent");
951  return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::G_SI_CALL;
952 }
953 
954 // Add operands to call instruction to track the callee.
956  MachineIRBuilder &MIRBuilder,
957  AMDGPUCallLowering::CallLoweringInfo &Info) {
958  if (Info.Callee.isReg()) {
959  CallInst.addReg(Info.Callee.getReg());
960  CallInst.addImm(0);
961  } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {
962  // The call lowering lightly assumed we can directly encode a call target in
963  // the instruction, which is not the case. Materialize the address here.
964  const GlobalValue *GV = Info.Callee.getGlobal();
965  auto Ptr = MIRBuilder.buildGlobalValue(
966  LLT::pointer(GV->getAddressSpace(), 64), GV);
967  CallInst.addReg(Ptr.getReg(0));
968  CallInst.add(Info.Callee);
969  } else
970  return false;
971 
972  return true;
973 }
974 
977  SmallVectorImpl<ArgInfo> &InArgs) const {
978  const Function &CallerF = MF.getFunction();
979  CallingConv::ID CalleeCC = Info.CallConv;
980  CallingConv::ID CallerCC = CallerF.getCallingConv();
981 
982  // If the calling conventions match, then everything must be the same.
983  if (CalleeCC == CallerCC)
984  return true;
985 
986  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
987 
988  // Make sure that the caller and callee preserve all of the same registers.
989  auto TRI = ST.getRegisterInfo();
990 
991  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
992  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
993  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
994  return false;
995 
996  // Check if the caller and callee will handle arguments in the same way.
997  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
998  CCAssignFn *CalleeAssignFnFixed;
999  CCAssignFn *CalleeAssignFnVarArg;
1000  std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
1001  getAssignFnsForCC(CalleeCC, TLI);
1002 
1003  CCAssignFn *CallerAssignFnFixed;
1004  CCAssignFn *CallerAssignFnVarArg;
1005  std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1006  getAssignFnsForCC(CallerCC, TLI);
1007 
1008  // FIXME: We are not accounting for potential differences in implicitly passed
1009  // inputs, but only the fixed ABI is supported now anyway.
1010  IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
1011  CalleeAssignFnVarArg);
1012  IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
1013  CallerAssignFnVarArg);
1014  return resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner);
1015 }
1016 
1019  SmallVectorImpl<ArgInfo> &OutArgs) const {
1020  // If there are no outgoing arguments, then we are done.
1021  if (OutArgs.empty())
1022  return true;
1023 
1024  const Function &CallerF = MF.getFunction();
1025  CallingConv::ID CalleeCC = Info.CallConv;
1026  CallingConv::ID CallerCC = CallerF.getCallingConv();
1027  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1028 
1029  CCAssignFn *AssignFnFixed;
1030  CCAssignFn *AssignFnVarArg;
1031  std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1032 
1033  // We have outgoing arguments. Make sure that we can tail call with them.
1035  CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
1036  OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1037 
1038  if (!determineAssignments(Assigner, OutArgs, OutInfo)) {
1039  LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
1040  return false;
1041  }
1042 
1043  // Make sure that they can fit on the caller's stack.
1044  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1045  if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
1046  LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
1047  return false;
1048  }
1049 
1050  // Verify that the parameters in callee-saved registers match.
1051  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1052  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1053  const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
1055  return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
1056 }
1057 
1058 /// Return true if the calling convention is one that we can guarantee TCO for.
1060  return CC == CallingConv::Fast;
1061 }
1062 
1063 /// Return true if we might ever do TCO for calls with this calling convention.
1065  switch (CC) {
1066  case CallingConv::C:
1068  return true;
1069  default:
1070  return canGuaranteeTCO(CC);
1071  }
1072 }
1073 
1076  SmallVectorImpl<ArgInfo> &InArgs, SmallVectorImpl<ArgInfo> &OutArgs) const {
1077  // Must pass all target-independent checks in order to tail call optimize.
1078  if (!Info.IsTailCall)
1079  return false;
1080 
1081  // Indirect calls can't be tail calls, because the address can be divergent.
1082  // TODO Check divergence info if the call really is divergent.
1083  if (Info.Callee.isReg())
1084  return false;
1085 
1086  MachineFunction &MF = B.getMF();
1087  const Function &CallerF = MF.getFunction();
1088  CallingConv::ID CalleeCC = Info.CallConv;
1089  CallingConv::ID CallerCC = CallerF.getCallingConv();
1090 
1091  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1092  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1093  // Kernels aren't callable, and don't have a live in return address so it
1094  // doesn't make sense to do a tail call with entry functions.
1095  if (!CallerPreserved)
1096  return false;
1097 
1098  if (!mayTailCallThisCC(CalleeCC)) {
1099  LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
1100  return false;
1101  }
1102 
1103  if (any_of(CallerF.args(), [](const Argument &A) {
1104  return A.hasByValAttr() || A.hasSwiftErrorAttr();
1105  })) {
1106  LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval "
1107  "or swifterror arguments\n");
1108  return false;
1109  }
1110 
1111  // If we have -tailcallopt, then we're done.
1113  return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
1114 
1115  // Verify that the incoming and outgoing arguments from the callee are
1116  // safe to tail call.
1117  if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1118  LLVM_DEBUG(
1119  dbgs()
1120  << "... Caller and callee have incompatible calling conventions.\n");
1121  return false;
1122  }
1123 
1124  if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
1125  return false;
1126 
1127  LLVM_DEBUG(dbgs() << "... Call is eligible for tail call optimization.\n");
1128  return true;
1129 }
1130 
1131 // Insert outgoing implicit arguments for a call, by inserting copies to the
1132 // implicit argument registers and adding the necessary implicit uses to the
1133 // call instruction.
1136  const GCNSubtarget &ST, const SIMachineFunctionInfo &FuncInfo,
1137  ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const {
1138  if (!ST.enableFlatScratch()) {
1139  // Insert copies for the SRD. In the HSA case, this should be an identity
1140  // copy.
1141  auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::fixed_vector(4, 32),
1142  FuncInfo.getScratchRSrcReg());
1143  MIRBuilder.buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
1144  CallInst.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Implicit);
1145  }
1146 
1147  for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1148  MIRBuilder.buildCopy((Register)ArgReg.first, ArgReg.second);
1149  CallInst.addReg(ArgReg.first, RegState::Implicit);
1150  }
1151 }
1152 
1154  MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
1155  SmallVectorImpl<ArgInfo> &OutArgs) const {
1156  MachineFunction &MF = MIRBuilder.getMF();
1157  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1159  const Function &F = MF.getFunction();
1161  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1162 
1163  // True when we're tail calling, but without -tailcallopt.
1164  bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
1165 
1166  // Find out which ABI gets to decide where things go.
1167  CallingConv::ID CalleeCC = Info.CallConv;
1168  CCAssignFn *AssignFnFixed;
1169  CCAssignFn *AssignFnVarArg;
1170  std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1171 
1172  MachineInstrBuilder CallSeqStart;
1173  if (!IsSibCall)
1174  CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
1175 
1176  unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
1177  auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1178  if (!addCallTargetOperands(MIB, MIRBuilder, Info))
1179  return false;
1180 
1181  // Byte offset for the tail call. When we are sibcalling, this will always
1182  // be 0.
1183  MIB.addImm(0);
1184 
1185  // Tell the call which registers are clobbered.
1186  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1187  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1188  MIB.addRegMask(Mask);
1189 
1190  // FPDiff is the byte offset of the call's argument area from the callee's.
1191  // Stores to callee stack arguments will be placed in FixedStackSlots offset
1192  // by this amount for a tail call. In a sibling call it must be 0 because the
1193  // caller will deallocate the entire stack and the callee still expects its
1194  // arguments to begin at SP+0.
1195  int FPDiff = 0;
1196 
1197  // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1198  // by -tailcallopt. For sibcalls, the memory operands for the call are
1199  // already available in the caller's incoming argument space.
1200  unsigned NumBytes = 0;
1201  if (!IsSibCall) {
1202  // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1203  // before handling assignments, because FPDiff must be known for memory
1204  // arguments.
1205  unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1207  CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1208 
1209  // FIXME: Not accounting for callee implicit inputs
1210  OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg);
1211  if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
1212  return false;
1213 
1214  // The callee will pop the argument stack as a tail call. Thus, we must
1215  // keep it 16-byte aligned.
1216  NumBytes = alignTo(OutInfo.getNextStackOffset(), ST.getStackAlignment());
1217 
1218  // FPDiff will be negative if this tail call requires more space than we
1219  // would automatically have in our incoming argument space. Positive if we
1220  // actually shrink the stack.
1221  FPDiff = NumReusableBytes - NumBytes;
1222 
1223  // The stack pointer must be 16-byte aligned at all times it's used for a
1224  // memory operation, which in practice means at *all* times and in
1225  // particular across call boundaries. Therefore our own arguments started at
1226  // a 16-byte aligned SP and the delta applied for the tail call should
1227  // satisfy the same constraint.
1228  assert(isAligned(ST.getStackAlignment(), FPDiff) &&
1229  "unaligned stack on tail call");
1230  }
1231 
1233  CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1234 
1235  // We could pass MIB and directly add the implicit uses to the call
1236  // now. However, as an aesthetic choice, place implicit argument operands
1237  // after the ordinary user argument registers.
1238  SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
1239 
1240  if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
1241  // With a fixed ABI, allocate fixed registers before user arguments.
1242  if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1243  return false;
1244  }
1245 
1246  OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1247 
1248  if (!determineAssignments(Assigner, OutArgs, CCInfo))
1249  return false;
1250 
1251  // Do the actual argument marshalling.
1252  AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, true, FPDiff);
1253  if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1254  return false;
1255 
1256  handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, ImplicitArgRegs);
1257 
1258  // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1259  // sequence start and end here.
1260  if (!IsSibCall) {
1261  MIB->getOperand(1).setImm(FPDiff);
1262  CallSeqStart.addImm(NumBytes).addImm(0);
1263  // End the call sequence *before* emitting the call. Normally, we would
1264  // tidy the frame up after the call. However, here, we've laid out the
1265  // parameters so that when SP is reset, they will be in the correct
1266  // location.
1267  MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN).addImm(NumBytes).addImm(0);
1268  }
1269 
1270  // Now we can add the actual call instruction to the correct basic block.
1271  MIRBuilder.insertInstr(MIB);
1272 
1273  // If Callee is a reg, since it is used by a target specific
1274  // instruction, it must have a register class matching the
1275  // constraint of that instruction.
1276 
1277  // FIXME: We should define regbankselectable call instructions to handle
1278  // divergent call targets.
1279  if (MIB->getOperand(0).isReg()) {
1281  MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1282  MIB->getDesc(), MIB->getOperand(0), 0));
1283  }
1284 
1286  Info.LoweredTailCall = true;
1287  return true;
1288 }
1289 
1291  CallLoweringInfo &Info) const {
1292  if (Info.IsVarArg) {
1293  LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
1294  return false;
1295  }
1296 
1297  MachineFunction &MF = MIRBuilder.getMF();
1298  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1299  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1300 
1301  const Function &F = MF.getFunction();
1303  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1304  const DataLayout &DL = F.getParent()->getDataLayout();
1305 
1306  SmallVector<ArgInfo, 8> OutArgs;
1307  for (auto &OrigArg : Info.OrigArgs)
1308  splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1309 
1310  SmallVector<ArgInfo, 8> InArgs;
1311  if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy())
1312  splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1313 
1314  // If we can lower as a tail call, do that instead.
1315  bool CanTailCallOpt =
1316  isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1317 
1318  // We must emit a tail call if we have musttail.
1319  if (Info.IsMustTailCall && !CanTailCallOpt) {
1320  LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1321  return false;
1322  }
1323 
1324  Info.IsTailCall = CanTailCallOpt;
1325  if (CanTailCallOpt)
1326  return lowerTailCall(MIRBuilder, Info, OutArgs);
1327 
1328  // Find out which ABI gets to decide where things go.
1329  CCAssignFn *AssignFnFixed;
1330  CCAssignFn *AssignFnVarArg;
1331  std::tie(AssignFnFixed, AssignFnVarArg) =
1332  getAssignFnsForCC(Info.CallConv, TLI);
1333 
1334  MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)
1335  .addImm(0)
1336  .addImm(0);
1337 
1338  // Create a temporarily-floating call instruction so we can add the implicit
1339  // uses of arg registers.
1340  unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
1341 
1342  auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1343  MIB.addDef(TRI->getReturnAddressReg(MF));
1344 
1345  if (!addCallTargetOperands(MIB, MIRBuilder, Info))
1346  return false;
1347 
1348  // Tell the call which registers are clobbered.
1349  const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
1350  MIB.addRegMask(Mask);
1351 
1353  CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1354 
1355  // We could pass MIB and directly add the implicit uses to the call
1356  // now. However, as an aesthetic choice, place implicit argument operands
1357  // after the ordinary user argument registers.
1358  SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
1359 
1360  if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
1361  // With a fixed ABI, allocate fixed registers before user arguments.
1362  if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1363  return false;
1364  }
1365 
1366  // Do the actual argument marshalling.
1367  SmallVector<Register, 8> PhysRegs;
1368 
1369  OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1370  if (!determineAssignments(Assigner, OutArgs, CCInfo))
1371  return false;
1372 
1373  AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, false);
1374  if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1375  return false;
1376 
1378 
1379  handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, ImplicitArgRegs);
1380 
1381  // Get a count of how many bytes are to be pushed on the stack.
1382  unsigned NumBytes = CCInfo.getNextStackOffset();
1383 
1384  // If Callee is a reg, since it is used by a target specific
1385  // instruction, it must have a register class matching the
1386  // constraint of that instruction.
1387 
1388  // FIXME: We should define regbankselectable call instructions to handle
1389  // divergent call targets.
1390  if (MIB->getOperand(1).isReg()) {
1391  MIB->getOperand(1).setReg(constrainOperandRegClass(
1392  MF, *TRI, MRI, *ST.getInstrInfo(),
1393  *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1394  1));
1395  }
1396 
1397  // Now we can add the actual call instruction to the correct position.
1398  MIRBuilder.insertInstr(MIB);
1399 
1400  // Finally we can copy the returned value back into its virtual-register. In
1401  // symmetry with the arguments, the physical register must be an
1402  // implicit-define of the call instruction.
1403  if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1404  CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv,
1405  Info.IsVarArg);
1406  IncomingValueAssigner Assigner(RetAssignFn);
1407  CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1408  if (!determineAndHandleAssignments(Handler, Assigner, InArgs, MIRBuilder,
1409  Info.CallConv, Info.IsVarArg))
1410  return false;
1411  }
1412 
1413  uint64_t CalleePopBytes = NumBytes;
1414 
1415  MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1416  .addImm(0)
1417  .addImm(CalleePopBytes);
1418 
1419  if (!Info.CanLowerReturn) {
1420  insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
1421  Info.DemoteRegister, Info.DemoteStackIndex);
1422  }
1423 
1424  return true;
1425 }
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:151
llvm::MachineRegisterInfo::addLiveIn
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Definition: MachineRegisterInfo.h:959
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
llvm::AMDGPUFunctionArgInfo::PreloadedValue
PreloadedValue
Definition: AMDGPUArgumentUsageInfo.h:98
llvm::SIMachineFunctionInfo::setIfReturnsVoid
void setIfReturnsVoid(bool Value)
Definition: SIMachineFunctionInfo.h:898
llvm::AMDGPUFunctionArgInfo::QUEUE_PTR
@ QUEUE_PTR
Definition: AMDGPUArgumentUsageInfo.h:102
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::AMDGPUCallLowering::handleImplicitCallArguments
void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, ArrayRef< std::pair< MCRegister, Register >> ImplicitArgRegs) const
Definition: AMDGPUCallLowering.cpp:1134
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:146
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::TargetOptions::GuaranteedTailCallOpt
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
Definition: TargetOptions.h:221
llvm::MachineIRBuilder::buildGlobalValue
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
Definition: MachineIRBuilder.cpp:154
llvm::Function::args
iterator_range< arg_iterator > args()
Definition: Function.h:746
llvm::MachineIRBuilder::buildOr
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_OR Op0, Op1.
Definition: MachineIRBuilder.h:1615
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition: SIMachineFunctionInfo.h:758
llvm::LLT::getScalarSizeInBits
unsigned getScalarSizeInBits() const
Definition: LowLevelTypeImpl.h:224
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:189
SIMachineFunctionInfo.h
llvm::ArgDescriptor::createArg
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Definition: AMDGPUArgumentUsageInfo.h:54
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::SITargetLowering::allocateSystemSGPRs
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
Definition: SIISelLowering.cpp:2147
llvm::ArgDescriptor
Definition: AMDGPUArgumentUsageInfo.h:23
llvm::Function
Definition: Function.h:60
allocateHSAUserSGPRs
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
Definition: AMDGPUCallLowering.cpp:439
llvm::AMDGPUCallLowering::AMDGPUCallLowering
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
Definition: AMDGPUCallLowering.cpp:248
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::SIMachineFunctionInfo::getArgInfo
AMDGPUFunctionArgInfo & getArgInfo()
Definition: SIMachineFunctionInfo.h:745
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:454
llvm::CallLowering::ValueHandler::extendRegister
Register extendRegister(Register ValReg, CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
Definition: CallLowering.cpp:1134
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:151
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:144
llvm::AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
Definition: AMDGPUArgumentUsageInfo.h:168
llvm::CallLowering::ValueHandler
Definition: CallLowering.h:229
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::AMDGPU::getAmdhsaCodeObjectVersion
unsigned getAmdhsaCodeObjectVersion()
Definition: AMDGPUBaseInfo.cpp:148
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::CallLowering::OutgoingValueHandler
Base class for ValueHandlers used for arguments passed to a function call, or for return values.
Definition: CallLowering.h:333
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:142
llvm::MachineRegisterInfo::getLiveInVirtReg
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
Definition: MachineRegisterInfo.cpp:456
llvm::CallLowering::splitToValueTypes
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
Definition: CallLowering.cpp:250
llvm::Optional< uint32_t >
addCallTargetOperands
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)
Definition: AMDGPUCallLowering.cpp:955
llvm::AMDGPUCallLowering::lowerCall
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
Definition: AMDGPUCallLowering.cpp:1290
llvm::LLT::vector
static LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelTypeImpl.h:56
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::MachineIRBuilder::buildInstrNoInsert
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
Definition: MachineIRBuilder.cpp:39
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
MachineIRBuilder.h
llvm::AMDGPUCallLowering::isEligibleForTailCallOptimization
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const
Returns true if the call can be lowered as a tail call.
Definition: AMDGPUCallLowering.cpp:1074
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::FunctionLoweringInfo::CanLowerReturn
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
Definition: FunctionLoweringInfo.h:62
llvm::ZB_Undefined
@ ZB_Undefined
The returned value is undefined.
Definition: MathExtras.h:42
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:228
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:159
extOpcodeToISDExtOpcode
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
Definition: AMDGPUCallLowering.cpp:253
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::AMDGPUFunctionArgInfo::WORKGROUP_ID_X
@ WORKGROUP_ID_X
Definition: AMDGPUArgumentUsageInfo.h:107
llvm::AMDGPULegalizerInfo
This class provides the information for the target register banks.
Definition: AMDGPULegalizerInfo.h:31
llvm::MachineIRBuilder::buildConstant
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Definition: MachineIRBuilder.cpp:293
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:213
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::ComputeValueVTs
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:121
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:1067
llvm::getLLTForType
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Definition: LowLevelType.cpp:20
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::LLT::fixed_vector
static LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelTypeImpl.h:74
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::SIMachineFunctionInfo::returnsVoid
bool returnsVoid() const
Definition: SIMachineFunctionInfo.h:894
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:845
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::FunctionLoweringInfo::DemoteRegister
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
Definition: FunctionLoweringInfo.h:69
llvm::AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR
@ KERNARG_SEGMENT_PTR
Definition: AMDGPUArgumentUsageInfo.h:103
llvm::AMDGPUFunctionArgInfo
Definition: AMDGPUArgumentUsageInfo.h:97
llvm::CallLowering::resultsCompatible
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const
Definition: CallLowering.cpp:1034
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:31
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1779
llvm::AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR
@ IMPLICIT_ARG_PTR
Definition: AMDGPUArgumentUsageInfo.h:112
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::MachineIRBuilder::buildShl
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Definition: MachineIRBuilder.h:1571
getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:517
FunctionLoweringInfo.h
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:755
llvm::AMDGPUFunctionArgInfo::WorkItemIDX
ArgDescriptor WorkItemIDX
Definition: AMDGPUArgumentUsageInfo.h:150
llvm::CallLowering::ArgInfo
Definition: CallLowering.h:62
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AMDGPUFunctionArgInfo::WORKITEM_ID_Y
@ WORKITEM_ID_Y
Definition: AMDGPUArgumentUsageInfo.h:116
llvm::FormalArgHandler
Definition: M68kCallLowering.h:66
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
llvm::CallLowering::OutgoingValueAssigner
Definition: CallLowering.h:223
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register >> VRegs) const
Definition: AMDGPUCallLowering.cpp:490
llvm::CallLowering::determineAssignments
bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const
Analyze the argument list in Args, using Assigner to populate CCInfo.
Definition: CallLowering.cpp:587
llvm::SIMachineFunctionInfo::getStackPtrOffsetReg
Register getStackPtrOffsetReg() const
Definition: SIMachineFunctionInfo.h:814
llvm::MachineIRBuilder::getMF
MachineFunction & getMF()
Getter for the function we currently build.
Definition: MachineIRBuilder.h:271
llvm::CallLowering::IncomingValueHandler
Base class for ValueHandlers used for arguments coming into the current function, or for return value...
Definition: CallLowering.h:318
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::AMDGPUCallLowering::passSpecialInputs
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register >> &ArgRegs, CallLoweringInfo &Info) const
Definition: AMDGPUCallLowering.cpp:739
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:153
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:149
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:182
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:94
llvm::AMDGPUTargetLowering::getTypeForExtReturn
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
Definition: AMDGPUISelLowering.cpp:598
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::SITargetLowering::allocateSpecialInputVGPRsFixed
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
Definition: SIISelLowering.cpp:2035
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1802
llvm::AMDGPUCallLowering::lowerTailCall
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const
Definition: AMDGPUCallLowering.cpp:1153
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:175
llvm::TargetRegisterInfo::regmaskSubsetEqual
bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const
Return true if all bits that are set in mask mask0 are also set in mask1.
Definition: TargetRegisterInfo.cpp:492
llvm::LLT::pointer
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelTypeImpl.h:49
llvm::AMDGPUMachineFunction::getLDSKernelIdMetadata
static Optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
Definition: AMDGPUMachineFunction.cpp:147
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::AttributeList::ReturnIndex
@ ReturnIndex
Definition: Attributes.h:433
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::AMDGPUFunctionArgInfo::getPreloadedValue
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
Definition: AMDGPUArgumentUsageInfo.cpp:90
llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:221
llvm::AMDGPUCallLowering::doCallerAndCalleePassArgsTheSameWay
bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const
Definition: AMDGPUCallLowering.cpp:975
llvm::AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
@ WORKGROUP_ID_Z
Definition: AMDGPUArgumentUsageInfo.h:109
canGuaranteeTCO
static bool canGuaranteeTCO(CallingConv::ID CC)
Return true if the calling convention is one that we can guarantee TCO for.
Definition: AMDGPUCallLowering.cpp:1059
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
uint64_t
llvm::AMDGPUFunctionArgInfo::WORKITEM_ID_Z
@ WORKITEM_ID_Z
Definition: AMDGPUArgumentUsageInfo.h:117
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:238
llvm::CallLowering::checkReturn
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
Definition: CallLowering.cpp:937
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
llvm::AMDGPUTargetLowering::CCAssignFnForReturn
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
Definition: AMDGPUISelLowering.cpp:1092
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:39
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::AMDGPUFunctionArgInfo::WORKITEM_ID_X
@ WORKITEM_ID_X
Definition: AMDGPUArgumentUsageInfo.h:115
llvm::MachineIRBuilder::buildPtrAdd
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
Definition: MachineIRBuilder.cpp:190
llvm::inferAlignFromPtrInfo
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
Definition: Utils.cpp:711
I
#define I(x, y, z)
Definition: MD5.cpp:58
Analysis.h
llvm::LLT::isVector
bool isVector() const
Definition: LowLevelTypeImpl.h:122
llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition: MachineFrameInfo.h:639
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:118
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineFrameInfo::CreateFixedObject
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition: MachineFrameInfo.cpp:83
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:52
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:894
llvm::MachineIRBuilder::getMBB
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
Definition: MachineIRBuilder.h:296
llvm::AMDGPUTargetLowering::CCAssignFnForCall
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
Definition: AMDGPUISelLowering.cpp:1087
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:673
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:377
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::MachineRegisterInfo::createGenericVirtualRegister
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Definition: MachineRegisterInfo.cpp:186
llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition: MachineInstrBuilder.h:123
llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition: AMDGPUSubtarget.h:218
llvm::AMDGPUFunctionArgInfo::WorkItemIDZ
ArgDescriptor WorkItemIDZ
Definition: AMDGPUArgumentUsageInfo.h:152
llvm::ArgDescriptor::isRegister
bool isRegister() const
Definition: AMDGPUArgumentUsageInfo.h:67
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:679
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1690
llvm::MachineIRBuilder::buildInstr
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Definition: MachineIRBuilder.h:383
llvm::AMDGPUFunctionArgInfo::DISPATCH_ID
@ DISPATCH_ID
Definition: AMDGPUArgumentUsageInfo.h:104
llvm::AMDGPUFunctionArgInfo::LDS_KERNEL_ID
@ LDS_KERNEL_ID
Definition: AMDGPUArgumentUsageInfo.h:106
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::CCState::AllocateReg
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
Definition: CallingConvLower.h:349
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1716
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:152
AMDGPU.h
llvm::MachineIRBuilder::insertInstr
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
Definition: MachineIRBuilder.cpp:43
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:46
llvm::CallLowering::insertSRetLoads
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
Definition: CallLowering.cpp:832
llvm::MachineIRBuilder::buildCopy
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
Definition: MachineIRBuilder.cpp:288
llvm::ArgDescriptor::isMasked
bool isMasked() const
Definition: AMDGPUArgumentUsageInfo.h:85
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1794
getAssignFnsForCC
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
Definition: AMDGPUCallLowering.cpp:943
llvm::SIMachineFunctionInfo::getScratchRSrcReg
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Definition: SIMachineFunctionInfo.h:787
llvm::ArgInfo
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:49
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineIRBuilder::buildAnyExt
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
Definition: MachineIRBuilder.cpp:452
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:404
llvm::MachineIRBuilder::buildFrameIndex
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
Definition: MachineIRBuilder.cpp:145
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:201
llvm::CallLowering::insertSRetIncomingArgument
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
Definition: CallLowering.cpp:893
llvm::SITargetLowering::allocateSpecialEntryInputVGPRs
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Definition: SIISelLowering.cpp:1892
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
mayTailCallThisCC
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
Definition: AMDGPUCallLowering.cpp:1064
llvm::CallLowering::insertSRetStores
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
Definition: CallLowering.cpp:862
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:375
llvm::AMDGPUFunctionArgInfo::WORKGROUP_ID_Y
@ WORKGROUP_ID_Y
Definition: AMDGPUArgumentUsageInfo.h:108
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::CallLowering::ValueHandler::MRI
MachineRegisterInfo & MRI
Definition: CallLowering.h:231
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:653
MachineFrameInfo.h
llvm::MachineIRBuilder::buildUndef
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
Definition: MachineIRBuilder.cpp:596
llvm::CallLowering::handleAssignments
bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs=None) const
Use Handler to insert code to handle the argument/return values represented by Args.
Definition: CallLowering.cpp:648
llvm::CallLowering::IncomingValueAssigner
Definition: CallLowering.h:217
llvm::SITargetLowering
Definition: SIISelLowering.h:31
llvm::CCState::getNextStackOffset
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
Definition: CallingConvLower.h:262
llvm::CallLowering::CallLoweringInfo
Definition: CallLowering.h:102
llvm::CallLowering::ValueAssigner::StackOffset
uint64_t StackOffset
Stack offset for next argument.
Definition: CallLowering.h:204
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
llvm::CallLowering::ValueHandler::MIRBuilder
MachineIRBuilder & MIRBuilder
Definition: CallLowering.h:230
llvm::SIMachineFunctionInfo::getBytesInStackArgArea
unsigned getBytesInStackArgArea() const
Definition: SIMachineFunctionInfo.h:605
llvm::AMDGPUFunctionArgInfo::DISPATCH_PTR
@ DISPATCH_PTR
Definition: AMDGPUArgumentUsageInfo.h:101
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: Globals.cpp:121
AMDGPULegalizerInfo.h
AMDGPUCallLowering.h
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1018
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition: MachineRegisterInfo.h:745
llvm::AMDGPUFunctionArgInfo::WorkItemIDY
ArgDescriptor WorkItemIDY
Definition: AMDGPUArgumentUsageInfo.h:151
llvm::LLT::getElementCount
ElementCount getElementCount() const
Definition: LowLevelTypeImpl.h:143
llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::ArgDescriptor::getRegister
MCRegister getRegister() const
Definition: AMDGPUArgumentUsageInfo.h:71
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:351
llvm::constrainOperandRegClass
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:53
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:285
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:277
llvm::CallingConv::AMDGPU_Gfx
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:233
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::AMDGPUCallLowering::lowerFormalArguments
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register >> VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
Definition: AMDGPUCallLowering.cpp:563
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1473
llvm::CallReturnHandler
Definition: M68kCallLowering.h:71
llvm::MachineIRBuilder::buildStore
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
Definition: MachineIRBuilder.cpp:425
getCallOpcode
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
Definition: AMDGPUCallLowering.cpp:947
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
llvm::SITargetLowering::allocateSpecialInputSGPRs
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Definition: SIISelLowering.cpp:2048
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:241
llvm::AMDGPUCallLowering::areCalleeOutgoingArgsTailCallable
bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const
Definition: AMDGPUCallLowering.cpp:1017
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
llvm::CallLowering::parametersInCSRMatch
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
Definition: CallLowering.cpp:982
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:42
llvm::CallReturnHandler::CallReturnHandler
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstrBuilder &MIB)
Definition: M68kCallLowering.h:72
llvm::CallLowering
Definition: CallLowering.h:44
llvm::AMDGPUCallLowering::lowerReturn
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
Definition: AMDGPUCallLowering.cpp:348
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1031
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AttributeList::FirstArgIndex
@ FirstArgIndex
Definition: Attributes.h:435
SIRegisterInfo.h
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:483
AMDGPUTargetMachine.h
llvm::FormalArgHandler::FormalArgHandler
FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Definition: M68kCallLowering.h:67
llvm::CallLowering::determineAndHandleAssignments
bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs=None) const
Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...
Definition: CallLowering.cpp:562
llvm::LLT
Definition: LowLevelTypeImpl.h:39
llvm::CallLowering::setArgFlags
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
Definition: CallLowering.cpp:192