LLVM  12.0.0git
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUCallLowering.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "SIISelLowering.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
24 #include "llvm/CodeGen/Analysis.h"
29 
30 using namespace llvm;
31 
32 namespace {
33 
34 struct OutgoingValueHandler : public CallLowering::ValueHandler {
35  OutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
36  MachineInstrBuilder MIB, CCAssignFn *AssignFn)
37  : ValueHandler(B, MRI, AssignFn), MIB(MIB) {}
38 
40 
41  bool isIncomingArgumentHandler() const override { return false; }
42 
43  Register getStackAddress(uint64_t Size, int64_t Offset,
44  MachinePointerInfo &MPO) override {
45  llvm_unreachable("not implemented");
46  }
47 
48  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
49  MachinePointerInfo &MPO, CCValAssign &VA) override {
50  llvm_unreachable("not implemented");
51  }
52 
53  void assignValueToReg(Register ValVReg, Register PhysReg,
54  CCValAssign &VA) override {
55  Register ExtReg;
56  if (VA.getLocVT().getSizeInBits() < 32) {
57  // 16-bit types are reported as legal for 32-bit registers. We need to
58  // extend and do a 32-bit copy to avoid the verifier complaining about it.
59  ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
60  } else
61  ExtReg = extendRegister(ValVReg, VA);
62 
63  // If this is a scalar return, insert a readfirstlane just in case the value
64  // ends up in a VGPR.
65  // FIXME: Assert this is a shader return.
66  const SIRegisterInfo *TRI
67  = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
68  if (TRI->isSGPRReg(MRI, PhysReg)) {
69  auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
70  {MRI.getType(ExtReg)}, false)
71  .addReg(ExtReg);
72  ExtReg = ToSGPR.getReg(0);
73  }
74 
75  MIRBuilder.buildCopy(PhysReg, ExtReg);
76  MIB.addUse(PhysReg, RegState::Implicit);
77  }
78 
79  bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
80  CCValAssign::LocInfo LocInfo,
82  ISD::ArgFlagsTy Flags,
83  CCState &State) override {
84  return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
85  }
86 };
87 
88 struct IncomingArgHandler : public CallLowering::ValueHandler {
89  uint64_t StackUsed = 0;
90 
91  IncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
92  CCAssignFn *AssignFn)
93  : ValueHandler(B, MRI, AssignFn) {}
94 
95  Register getStackAddress(uint64_t Size, int64_t Offset,
96  MachinePointerInfo &MPO) override {
97  auto &MFI = MIRBuilder.getMF().getFrameInfo();
98  int FI = MFI.CreateFixedObject(Size, Offset, true);
99  MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
100  auto AddrReg = MIRBuilder.buildFrameIndex(
102  StackUsed = std::max(StackUsed, Size + Offset);
103  return AddrReg.getReg(0);
104  }
105 
106  void assignValueToReg(Register ValVReg, Register PhysReg,
107  CCValAssign &VA) override {
108  markPhysRegUsed(PhysReg);
109 
110  if (VA.getLocVT().getSizeInBits() < 32) {
111  // 16-bit types are reported as legal for 32-bit registers. We need to do
112  // a 32-bit copy, and truncate to avoid the verifier complaining about it.
113  auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
114  MIRBuilder.buildTrunc(ValVReg, Copy);
115  return;
116  }
117 
118  switch (VA.getLocInfo()) {
119  case CCValAssign::LocInfo::SExt:
120  case CCValAssign::LocInfo::ZExt:
121  case CCValAssign::LocInfo::AExt: {
122  auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
123  MIRBuilder.buildTrunc(ValVReg, Copy);
124  break;
125  }
126  default:
127  MIRBuilder.buildCopy(ValVReg, PhysReg);
128  break;
129  }
130  }
131 
132  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
133  MachinePointerInfo &MPO, CCValAssign &VA) override {
134  MachineFunction &MF = MIRBuilder.getMF();
135 
136  // FIXME: Get alignment
137  auto MMO = MF.getMachineMemOperand(
139  inferAlignFromPtrInfo(MF, MPO));
140  MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
141  }
142 
143  /// How the physical register gets marked varies between formal
144  /// parameters (it's a basic-block live-in), and a call instruction
145  /// (it's an implicit-def of the BL).
146  virtual void markPhysRegUsed(unsigned PhysReg) = 0;
147 
148  // FIXME: What is the point of this being a callback?
149  bool isIncomingArgumentHandler() const override { return true; }
150 };
151 
152 struct FormalArgHandler : public IncomingArgHandler {
153  FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
154  CCAssignFn *AssignFn)
155  : IncomingArgHandler(B, MRI, AssignFn) {}
156 
157  void markPhysRegUsed(unsigned PhysReg) override {
158  MIRBuilder.getMBB().addLiveIn(PhysReg);
159  }
160 };
161 
162 }
163 
165  : CallLowering(&TLI) {
166 }
167 
168 // FIXME: Compatability shim
169 static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
170  switch (MIOpc) {
171  case TargetOpcode::G_SEXT:
172  return ISD::SIGN_EXTEND;
173  case TargetOpcode::G_ZEXT:
174  return ISD::ZERO_EXTEND;
175  case TargetOpcode::G_ANYEXT:
176  return ISD::ANY_EXTEND;
177  default:
178  llvm_unreachable("not an extend opcode");
179  }
180 }
181 
182 void AMDGPUCallLowering::splitToValueTypes(
184  const ArgInfo &OrigArg, unsigned OrigArgIdx,
185  SmallVectorImpl<ArgInfo> &SplitArgs,
186  const DataLayout &DL, CallingConv::ID CallConv,
187  SplitArgTy PerformArgSplit) const {
188  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
189  LLVMContext &Ctx = OrigArg.Ty->getContext();
190 
191  if (OrigArg.Ty->isVoidTy())
192  return;
193 
194  SmallVector<EVT, 4> SplitVTs;
195  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
196 
197  assert(OrigArg.Regs.size() == SplitVTs.size());
198 
199  int SplitIdx = 0;
200  for (EVT VT : SplitVTs) {
201  Register Reg = OrigArg.Regs[SplitIdx];
202  Type *Ty = VT.getTypeForEVT(Ctx);
203  LLT LLTy = getLLTForType(*Ty, DL);
204 
205  if (OrigArgIdx == AttributeList::ReturnIndex && VT.isScalarInteger()) {
206  unsigned ExtendOp = TargetOpcode::G_ANYEXT;
207  if (OrigArg.Flags[0].isSExt()) {
208  assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
209  ExtendOp = TargetOpcode::G_SEXT;
210  } else if (OrigArg.Flags[0].isZExt()) {
211  assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
212  ExtendOp = TargetOpcode::G_ZEXT;
213  }
214 
215  EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
216  extOpcodeToISDExtOpcode(ExtendOp));
217  if (ExtVT != VT) {
218  VT = ExtVT;
219  Ty = ExtVT.getTypeForEVT(Ctx);
220  LLTy = getLLTForType(*Ty, DL);
221  Reg = B.buildInstr(ExtendOp, {LLTy}, {Reg}).getReg(0);
222  }
223  }
224 
225  unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
226  MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
227 
228  if (NumParts == 1) {
229  // No splitting to do, but we want to replace the original type (e.g. [1 x
230  // double] -> double).
231  SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed);
232 
233  ++SplitIdx;
234  continue;
235  }
236 
237  SmallVector<Register, 8> SplitRegs;
238  Type *PartTy = EVT(RegVT).getTypeForEVT(Ctx);
239  LLT PartLLT = getLLTForType(*PartTy, DL);
241 
242  // FIXME: Should we be reporting all of the part registers for a single
243  // argument, and let handleAssignments take care of the repacking?
244  for (unsigned i = 0; i < NumParts; ++i) {
245  Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
246  SplitRegs.push_back(PartReg);
247  SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
248  }
249 
250  PerformArgSplit(SplitRegs, Reg, LLTy, PartLLT, SplitIdx);
251 
252  ++SplitIdx;
253  }
254 }
255 
256 // Get the appropriate type to make \p OrigTy \p Factor times bigger.
257 static LLT getMultipleType(LLT OrigTy, int Factor) {
258  if (OrigTy.isVector()) {
259  return LLT::vector(OrigTy.getNumElements() * Factor,
260  OrigTy.getElementType());
261  }
262 
263  return LLT::scalar(OrigTy.getSizeInBits() * Factor);
264 }
265 
266 // TODO: Move to generic code
268  ArrayRef<Register> DstRegs,
269  Register SrcReg,
271  LLT SrcTy,
272  LLT PartTy) {
273  assert(DstRegs.size() > 1 && "Nothing to unpack");
274 
275  const unsigned SrcSize = SrcTy.getSizeInBits();
276  const unsigned PartSize = PartTy.getSizeInBits();
277 
278  if (SrcTy.isVector() && !PartTy.isVector() &&
279  PartSize > SrcTy.getElementType().getSizeInBits()) {
280  // Vector was scalarized, and the elements extended.
281  auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(),
282  SrcReg);
283  for (int i = 0, e = DstRegs.size(); i != e; ++i)
284  B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i));
285  return;
286  }
287 
288  if (SrcSize % PartSize == 0) {
289  B.buildUnmerge(DstRegs, SrcReg);
290  return;
291  }
292 
293  const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize;
294 
295  LLT BigTy = getMultipleType(PartTy, NumRoundedParts);
296  auto ImpDef = B.buildUndef(BigTy);
297 
298  auto Big = B.buildInsert(BigTy, ImpDef.getReg(0), SrcReg, 0).getReg(0);
299 
300  int64_t Offset = 0;
301  for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize)
302  B.buildExtract(DstRegs[i], Big, Offset);
303 }
304 
305 /// Lower the return value for the already existing \p Ret. This assumes that
306 /// \p B's insertion point is correct.
307 bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
308  const Value *Val, ArrayRef<Register> VRegs,
309  MachineInstrBuilder &Ret) const {
310  if (!Val)
311  return true;
312 
313  auto &MF = B.getMF();
314  const auto &F = MF.getFunction();
315  const DataLayout &DL = MF.getDataLayout();
317 
318  CallingConv::ID CC = F.getCallingConv();
319  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
320 
321  ArgInfo OrigRetInfo(VRegs, Val->getType());
322  setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
323  SmallVector<ArgInfo, 4> SplitRetInfos;
324 
325  splitToValueTypes(
326  B, OrigRetInfo, AttributeList::ReturnIndex, SplitRetInfos, DL, CC,
327  [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT,
328  int VTSplitIdx) {
329  unpackRegsToOrigType(B, Regs, SrcReg,
330  SplitRetInfos[VTSplitIdx],
331  LLTy, PartLLT);
332  });
333 
334  CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
335  OutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn);
336  return handleAssignments(B, SplitRetInfos, RetHandler);
337 }
338 
340  const Value *Val,
341  ArrayRef<Register> VRegs) const {
342 
343  MachineFunction &MF = B.getMF();
346  MFI->setIfReturnsVoid(!Val);
347 
348  assert(!Val == VRegs.empty() && "Return value without a vreg");
349 
351  const bool IsShader = AMDGPU::isShader(CC);
352  const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) ||
353  AMDGPU::isKernel(CC);
354  if (IsWaveEnd) {
355  B.buildInstr(AMDGPU::S_ENDPGM)
356  .addImm(0);
357  return true;
358  }
359 
360  auto const &ST = MF.getSubtarget<GCNSubtarget>();
361 
362  unsigned ReturnOpc =
363  IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
364 
365  auto Ret = B.buildInstrNoInsert(ReturnOpc);
366  Register ReturnAddrVReg;
367  if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
368  ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
369  Ret.addUse(ReturnAddrVReg);
370  }
371 
372  if (!lowerReturnVal(B, Val, VRegs, Ret))
373  return false;
374 
375  if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
376  const SIRegisterInfo *TRI = ST.getRegisterInfo();
377  Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
378  &AMDGPU::SGPR_64RegClass);
379  B.buildCopy(ReturnAddrVReg, LiveInReturn);
380  }
381 
382  // TODO: Handle CalleeSavedRegsViaCopy.
383 
384  B.insertInstr(Ret);
385  return true;
386 }
387 
388 Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B,
389  Type *ParamTy,
390  uint64_t Offset) const {
391 
392  MachineFunction &MF = B.getMF();
395  const Function &F = MF.getFunction();
396  const DataLayout &DL = F.getParent()->getDataLayout();
398  LLT PtrType = getLLTForType(*PtrTy, DL);
399  Register KernArgSegmentPtr =
401  Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
402 
403  auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset);
404 
405  return B.buildPtrAdd(PtrType, KernArgSegmentVReg, OffsetReg).getReg(0);
406 }
407 
408 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, Type *ParamTy,
409  uint64_t Offset, Align Alignment,
410  Register DstReg) const {
411  MachineFunction &MF = B.getMF();
412  const Function &F = MF.getFunction();
413  const DataLayout &DL = F.getParent()->getDataLayout();
415  unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
416  Register PtrReg = lowerParameterPtr(B, ParamTy, Offset);
417 
419  PtrInfo,
422  TypeSize, Alignment);
423 
424  B.buildLoad(DstReg, PtrReg, *MMO);
425 }
426 
427 // Allocate special inputs passed in user SGPRs.
428 static void allocateHSAUserSGPRs(CCState &CCInfo,
429  MachineIRBuilder &B,
430  MachineFunction &MF,
431  const SIRegisterInfo &TRI,
433  // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
434  if (Info.hasPrivateSegmentBuffer()) {
435  Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
436  MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
437  CCInfo.AllocateReg(PrivateSegmentBufferReg);
438  }
439 
440  if (Info.hasDispatchPtr()) {
441  Register DispatchPtrReg = Info.addDispatchPtr(TRI);
442  MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
443  CCInfo.AllocateReg(DispatchPtrReg);
444  }
445 
446  if (Info.hasQueuePtr()) {
447  Register QueuePtrReg = Info.addQueuePtr(TRI);
448  MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
449  CCInfo.AllocateReg(QueuePtrReg);
450  }
451 
452  if (Info.hasKernargSegmentPtr()) {
454  Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
456  Register VReg = MRI.createGenericVirtualRegister(P4);
457  MRI.addLiveIn(InputPtrReg, VReg);
458  B.getMBB().addLiveIn(InputPtrReg);
459  B.buildCopy(VReg, InputPtrReg);
460  CCInfo.AllocateReg(InputPtrReg);
461  }
462 
463  if (Info.hasDispatchID()) {
464  Register DispatchIDReg = Info.addDispatchID(TRI);
465  MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
466  CCInfo.AllocateReg(DispatchIDReg);
467  }
468 
469  if (Info.hasFlatScratchInit()) {
470  Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
471  MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
472  CCInfo.AllocateReg(FlatScratchInitReg);
473  }
474 
475  // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
476  // these from the dispatch pointer.
477 }
478 
480  MachineIRBuilder &B, const Function &F,
481  ArrayRef<ArrayRef<Register>> VRegs) const {
482  MachineFunction &MF = B.getMF();
483  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
486  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
487  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
488 
489  const DataLayout &DL = F.getParent()->getDataLayout();
490 
492  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
493 
494  allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info);
495 
496  unsigned i = 0;
497  const Align KernArgBaseAlign(16);
498  const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
499  uint64_t ExplicitArgOffset = 0;
500 
501  // TODO: Align down to dword alignment and extract bits for extending loads.
502  for (auto &Arg : F.args()) {
503  Type *ArgTy = Arg.getType();
504  unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
505  if (AllocSize == 0)
506  continue;
507 
508  Align ABIAlign = DL.getABITypeAlign(ArgTy);
509 
510  uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
511  ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
512 
513  if (Arg.use_empty()) {
514  ++i;
515  continue;
516  }
517 
518  ArrayRef<Register> OrigArgRegs = VRegs[i];
519  Register ArgReg =
520  OrigArgRegs.size() == 1
521  ? OrigArgRegs[0]
522  : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
523 
524  Align Alignment = commonAlignment(KernArgBaseAlign, ArgOffset);
525  lowerParameter(B, ArgTy, ArgOffset, Alignment, ArgReg);
526  if (OrigArgRegs.size() > 1)
527  unpackRegs(OrigArgRegs, ArgReg, ArgTy, B);
528  ++i;
529  }
530 
531  TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
532  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
533  return true;
534 }
535 
536 /// Pack values \p SrcRegs to cover the vector type result \p DstRegs.
540  LLT LLTy = MRI.getType(DstRegs[0]);
541  LLT PartLLT = MRI.getType(SrcRegs[0]);
542 
543  // Deal with v3s16 split into v2s16
544  LLT LCMTy = getLCMType(LLTy, PartLLT);
545  if (LCMTy == LLTy) {
546  // Common case where no padding is needed.
547  assert(DstRegs.size() == 1);
548  return B.buildConcatVectors(DstRegs[0], SrcRegs);
549  }
550 
551  const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits();
552  Register Undef = B.buildUndef(PartLLT).getReg(0);
553 
554  // Build vector of undefs.
555  SmallVector<Register, 8> WidenedSrcs(NumWide, Undef);
556 
557  // Replace the first sources with the real registers.
558  std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin());
559 
560  auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs);
561  int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits();
562 
563  SmallVector<Register, 8> PadDstRegs(NumDst);
564  std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin());
565 
566  // Create the excess dead defs for the unmerge.
567  for (int I = DstRegs.size(); I != NumDst; ++I)
568  PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
569 
570  return B.buildUnmerge(PadDstRegs, Widened);
571 }
572 
573 // TODO: Move this to generic code
575  ArrayRef<Register> OrigRegs,
576  ArrayRef<Register> Regs,
577  LLT LLTy,
578  LLT PartLLT) {
580 
581  if (!LLTy.isVector() && !PartLLT.isVector()) {
582  assert(OrigRegs.size() == 1);
583  LLT OrigTy = MRI.getType(OrigRegs[0]);
584 
585  unsigned SrcSize = PartLLT.getSizeInBits() * Regs.size();
586  if (SrcSize == OrigTy.getSizeInBits())
587  B.buildMerge(OrigRegs[0], Regs);
588  else {
589  auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs);
590  B.buildTrunc(OrigRegs[0], Widened);
591  }
592 
593  return;
594  }
595 
596  if (LLTy.isVector() && PartLLT.isVector()) {
597  assert(OrigRegs.size() == 1);
598  assert(LLTy.getElementType() == PartLLT.getElementType());
599  mergeVectorRegsToResultRegs(B, OrigRegs, Regs);
600  return;
601  }
602 
603  assert(LLTy.isVector() && !PartLLT.isVector());
604 
605  LLT DstEltTy = LLTy.getElementType();
606 
607  // Pointer information was discarded. We'll need to coerce some register types
608  // to avoid violating type constraints.
609  LLT RealDstEltTy = MRI.getType(OrigRegs[0]).getElementType();
610 
611  assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits());
612 
613  if (DstEltTy == PartLLT) {
614  // Vector was trivially scalarized.
615 
616  if (RealDstEltTy.isPointer()) {
617  for (Register Reg : Regs)
618  MRI.setType(Reg, RealDstEltTy);
619  }
620 
621  B.buildBuildVector(OrigRegs[0], Regs);
622  } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
623  // Deal with vector with 64-bit elements decomposed to 32-bit
624  // registers. Need to create intermediate 64-bit elements.
625  SmallVector<Register, 8> EltMerges;
626  int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
627 
628  assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
629 
630  for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
631  auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt));
632  // Fix the type in case this is really a vector of pointers.
633  MRI.setType(Merge.getReg(0), RealDstEltTy);
634  EltMerges.push_back(Merge.getReg(0));
635  Regs = Regs.drop_front(PartsPerElt);
636  }
637 
638  B.buildBuildVector(OrigRegs[0], EltMerges);
639  } else {
640  // Vector was split, and elements promoted to a wider type.
641  LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
642  auto BV = B.buildBuildVector(BVType, Regs);
643  B.buildTrunc(OrigRegs[0], BV);
644  }
645 }
646 
648  MachineIRBuilder &B, const Function &F,
649  ArrayRef<ArrayRef<Register>> VRegs) const {
651 
652  // The infrastructure for normal calling convention lowering is essentially
653  // useless for kernels. We want to avoid any kind of legalization or argument
654  // splitting.
655  if (CC == CallingConv::AMDGPU_KERNEL)
656  return lowerFormalArgumentsKernel(B, F, VRegs);
657 
658  const bool IsShader = AMDGPU::isShader(CC);
659  const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
660 
661  MachineFunction &MF = B.getMF();
665  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
666  const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
667  const DataLayout &DL = F.getParent()->getDataLayout();
668 
669 
671  CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
672 
673  if (!IsEntryFunc) {
674  Register ReturnAddrReg = TRI->getReturnAddressReg(MF);
675  Register LiveInReturn = MF.addLiveIn(ReturnAddrReg,
676  &AMDGPU::SGPR_64RegClass);
677  MBB.addLiveIn(ReturnAddrReg);
678  B.buildCopy(LiveInReturn, ReturnAddrReg);
679  }
680 
681  if (Info->hasImplicitBufferPtr()) {
682  Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
683  MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
684  CCInfo.AllocateReg(ImplicitBufferPtrReg);
685  }
686 
687 
688  SmallVector<ArgInfo, 32> SplitArgs;
689  unsigned Idx = 0;
690  unsigned PSInputNum = 0;
691 
692  for (auto &Arg : F.args()) {
693  if (DL.getTypeStoreSize(Arg.getType()) == 0)
694  continue;
695 
696  const bool InReg = Arg.hasAttribute(Attribute::InReg);
697 
698  // SGPR arguments to functions not implemented.
699  if (!IsShader && InReg)
700  return false;
701 
702  if (Arg.hasAttribute(Attribute::SwiftSelf) ||
703  Arg.hasAttribute(Attribute::SwiftError) ||
704  Arg.hasAttribute(Attribute::Nest))
705  return false;
706 
707  if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
708  const bool ArgUsed = !Arg.use_empty();
709  bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
710 
711  if (!SkipArg) {
712  Info->markPSInputAllocated(PSInputNum);
713  if (ArgUsed)
714  Info->markPSInputEnabled(PSInputNum);
715  }
716 
717  ++PSInputNum;
718 
719  if (SkipArg) {
720  for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
721  B.buildUndef(VRegs[Idx][I]);
722 
723  ++Idx;
724  continue;
725  }
726  }
727 
728  ArgInfo OrigArg(VRegs[Idx], Arg.getType());
729  const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
730  setArgFlags(OrigArg, OrigArgIdx, DL, F);
731 
732  splitToValueTypes(
733  B, OrigArg, OrigArgIdx, SplitArgs, DL, CC,
734  // FIXME: We should probably be passing multiple registers to
735  // handleAssignments to do this
736  [&](ArrayRef<Register> Regs, Register DstReg,
737  LLT LLTy, LLT PartLLT, int VTSplitIdx) {
738  assert(DstReg == VRegs[Idx][VTSplitIdx]);
739  packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs,
740  LLTy, PartLLT);
741  });
742 
743  ++Idx;
744  }
745 
746  // At least one interpolation mode must be enabled or else the GPU will
747  // hang.
748  //
749  // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
750  // set PSInputAddr, the user wants to enable some bits after the compilation
751  // based on run-time states. Since we can't know what the final PSInputEna
752  // will look like, so we shouldn't do anything here and the user should take
753  // responsibility for the correct programming.
754  //
755  // Otherwise, the following restrictions apply:
756  // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
757  // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
758  // enabled too.
759  if (CC == CallingConv::AMDGPU_PS) {
760  if ((Info->getPSInputAddr() & 0x7F) == 0 ||
761  ((Info->getPSInputAddr() & 0xF) == 0 &&
762  Info->isPSInputAllocated(11))) {
763  CCInfo.AllocateReg(AMDGPU::VGPR0);
764  CCInfo.AllocateReg(AMDGPU::VGPR1);
765  Info->markPSInputAllocated(0);
766  Info->markPSInputEnabled(0);
767  }
768 
769  if (Subtarget.isAmdPalOS()) {
770  // For isAmdPalOS, the user does not enable some bits after compilation
771  // based on run-time states; the register values being generated here are
772  // the final ones set in hardware. Therefore we need to apply the
773  // workaround to PSInputAddr and PSInputEnable together. (The case where
774  // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
775  // set up an input arg for a particular interpolation mode, but nothing
776  // uses that input arg. Really we should have an earlier pass that removes
777  // such an arg.)
778  unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
779  if ((PsInputBits & 0x7F) == 0 ||
780  ((PsInputBits & 0xF) == 0 &&
781  (PsInputBits >> 11 & 1)))
782  Info->markPSInputEnabled(
784  }
785  }
786 
787  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
788  CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
789 
790  if (!MBB.empty())
791  B.setInstr(*MBB.begin());
792 
793  if (!IsEntryFunc) {
794  // For the fixed ABI, pass workitem IDs in the last argument register.
796  TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
797  }
798 
799  FormalArgHandler Handler(B, MRI, AssignFn);
800  if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler))
801  return false;
802 
803  if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) {
804  // Special inputs come after user arguments.
805  TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
806  }
807 
808  // Start adding system SGPRs.
809  if (IsEntryFunc) {
810  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
811  } else {
812  CCInfo.AllocateReg(Info->getScratchRSrcReg());
813  TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
814  }
815 
816  // Move back to the end of the basic block.
817  B.setMBB(MBB);
818 
819  return true;
820 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:178
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:687
Interface definition for SIRegisterInfo.
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:219
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
AMDGPU specific subclass of TargetSubtarget.
bool isPSInputAllocated(unsigned Index) const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator begin() const
Definition: ArrayRef.h:144
Register getReg(unsigned Idx) const
Get the register for the operand index.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
static MachineInstrBuilder mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef< Register > DstRegs, ArrayRef< Register > SrcRegs)
Pack values SrcRegs to cover the vector type result DstRegs.
This file describes how to lower LLVM calls to machine code calls.
bool handleAssignments(MachineIRBuilder &MIRBuilder, SmallVectorImpl< ArgInfo > &Args, ValueHandler &Handler) const
Invoke Handler::assignArg on each of the given Args and then use Callback to move them to the assigne...
void allocateSpecialInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments at the end of allocated user arguments. ...
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:211
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
unsigned Reg
Register addDispatchID(const SIRegisterInfo &TRI)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:659
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
unsigned const TargetRegisterInfo * TRI
F(f)
void markPSInputEnabled(unsigned Index)
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
Function & getFunction()
Return the LLVM function that this machine code represents.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:301
MachineBasicBlock & MBB
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:157
bool isVector() const
A description of a memory reference used in the backend.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:397
Register addDispatchPtr(const SIRegisterInfo &TRI)
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:177
The returned value is undefined.
Definition: MathExtras.h:47
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
The memory access is dereferenceable (i.e., doesn&#39;t trap).
MCRegister getReturnAddressReg(const MachineFunction &MF) const
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
LocInfo getLocInfo() const
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:221
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
Definition: Utils.cpp:480
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
Register addFlatScratchInit(const SIRegisterInfo &TRI)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:458
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don&#39;t insert <empty> = Opcode <empty>.
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
MachineFunction & getMF()
Getter for the function we currently build.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
void markPSInputAllocated(unsigned Index)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:120
Analysis containing CSE Info
Definition: CSEInfo.cpp:25
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Class to represent pointers.
Definition: DerivedTypes.h:662
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Register addQueuePtr(const SIRegisterInfo &TRI)
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value...
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Address space for private memory.
Definition: AMDGPU.h:303
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:156
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
bool isEntryFunctionCC(CallingConv::ID CC)
Helper class to build MachineInstr.
SI DAG Lowering interface definition.
LLT getLCMType(LLT Ty0, LLT Ty1)
Return the least common multiple type of Ty0 and Ty1, by changing the number of vector elements or sc...
Definition: Utils.cpp:513
constexpr double e
Definition: MathExtras.h:58
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
The AMDGPU TargetMachine interface definition for hw codgen targets.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:252
Extended Value Type.
Definition: ValueTypes.h:35
R600 Clause Merge
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
Argument handling is mostly uniform between the four places that make these decisions: function forma...
Definition: CallLowering.h:113
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:205
void unpackRegs(ArrayRef< Register > DstRegs, Register SrcReg, Type *PackedTy, MachineIRBuilder &MIRBuilder) const
Generate instructions for unpacking SrcReg into the DstRegs corresponding to the aggregate type Packe...
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register >> VRegs) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
CCState - This class holds information needed while lowering arguments and return values...
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:219
Interface definition of the TargetLowering class that is common to all AMD GPUs.
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
CCValAssign - Represent assignment of one arg/retval to a location.
iterator end() const
Definition: ArrayRef.h:145
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register >> VRegs) const
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
LLVM_READNONE bool isKernel(CallingConv::ID CC)
This file declares the MachineIRBuilder class.
auto size(R &&Range, std::enable_if_t< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1473
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isShader(CallingConv::ID cc)
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:647
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:650
bool isPointer() const
static void unpackRegsToOrigType(MachineIRBuilder &B, ArrayRef< Register > DstRegs, Register SrcReg, const CallLowering::ArgInfo &Info, LLT SrcTy, LLT PartTy)
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:195
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
#define I(x, y, z)
Definition: MD5.cpp:59
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
The memory access always returns the same value (or traps).
uint32_t Size
Definition: Profile.cpp:46
static void packSplitRegsToOrigType(MachineIRBuilder &B, ArrayRef< Register > OrigRegs, ArrayRef< Register > Regs, LLT LLTy, LLT PartLLT)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
LLVM Value Representation.
Definition: Value.h:74
static LLT getMultipleType(LLT OrigTy, int Factor)
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Conversion operators.
Definition: ISDOpcodes.h:644
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
void setType(Register VReg, LLT Ty)
Set the low-level type of VReg to Ty.
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1542
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
iterator_range< arg_iterator > args()
Definition: Function.h:744
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:151
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
const SIRegisterInfo * getRegisterInfo() const override