LLVM 18.0.0git
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the lowering of LLVM calls to machine code calls for
11/// GlobalISel.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUCallLowering.h"
16#include "AMDGPU.h"
17#include "AMDGPULegalizerInfo.h"
18#include "AMDGPUTargetMachine.h"
20#include "SIRegisterInfo.h"
25#include "llvm/IR/IntrinsicsAMDGPU.h"
26
27#define DEBUG_TYPE "amdgpu-call-lowering"
28
29using namespace llvm;
30
31namespace {
32
33/// Wrapper around extendRegister to ensure we extend to a full 32-bit register.
34static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,
35 Register ValVReg, CCValAssign &VA) {
36 if (VA.getLocVT().getSizeInBits() < 32) {
37 // 16-bit types are reported as legal for 32-bit registers. We need to
38 // extend and do a 32-bit copy to avoid the verifier complaining about it.
39 return Handler.MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
40 }
41
42 return Handler.extendRegister(ValVReg, VA);
43}
44
45struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
46 AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
48 : OutgoingValueHandler(B, MRI), MIB(MIB) {}
49
51
54 ISD::ArgFlagsTy Flags) override {
55 llvm_unreachable("not implemented");
56 }
57
58 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
59 MachinePointerInfo &MPO, CCValAssign &VA) override {
60 llvm_unreachable("not implemented");
61 }
62
63 void assignValueToReg(Register ValVReg, Register PhysReg,
64 CCValAssign VA) override {
65 Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
66
67 // If this is a scalar return, insert a readfirstlane just in case the value
68 // ends up in a VGPR.
69 // FIXME: Assert this is a shader return.
70 const SIRegisterInfo *TRI
71 = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
72 if (TRI->isSGPRReg(MRI, PhysReg)) {
73 LLT Ty = MRI.getType(ExtReg);
74 LLT S32 = LLT::scalar(32);
75 if (Ty != S32) {
76 // FIXME: We should probably support readfirstlane intrinsics with all
77 // legal 32-bit types.
78 assert(Ty.getSizeInBits() == 32);
79 if (Ty.isPointer())
80 ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);
81 else
82 ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
83 }
84
85 auto ToSGPR = MIRBuilder
86 .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
87 {MRI.getType(ExtReg)})
88 .addReg(ExtReg);
89 ExtReg = ToSGPR.getReg(0);
90 }
91
92 MIRBuilder.buildCopy(PhysReg, ExtReg);
93 MIB.addUse(PhysReg, RegState::Implicit);
94 }
95};
96
97struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
98 uint64_t StackUsed = 0;
99
100 AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
101 : IncomingValueHandler(B, MRI) {}
102
105 ISD::ArgFlagsTy Flags) override {
106 auto &MFI = MIRBuilder.getMF().getFrameInfo();
107
108 // Byval is assumed to be writable memory, but other stack passed arguments
109 // are not.
110 const bool IsImmutable = !Flags.isByVal();
111 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
112 MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
113 auto AddrReg = MIRBuilder.buildFrameIndex(
115 StackUsed = std::max(StackUsed, Size + Offset);
116 return AddrReg.getReg(0);
117 }
118
119 void assignValueToReg(Register ValVReg, Register PhysReg,
120 CCValAssign VA) override {
121 markPhysRegUsed(PhysReg);
122
123 if (VA.getLocVT().getSizeInBits() < 32) {
124 // 16-bit types are reported as legal for 32-bit registers. We need to do
125 // a 32-bit copy, and truncate to avoid the verifier complaining about it.
126 auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
127
128 // If we have signext/zeroext, it applies to the whole 32-bit register
129 // before truncation.
130 auto Extended =
131 buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT()));
132 MIRBuilder.buildTrunc(ValVReg, Extended);
133 return;
134 }
135
136 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
137 }
138
139 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
140 MachinePointerInfo &MPO, CCValAssign &VA) override {
141 MachineFunction &MF = MIRBuilder.getMF();
142
143 auto MMO = MF.getMachineMemOperand(
145 inferAlignFromPtrInfo(MF, MPO));
146 MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
147 }
148
149 /// How the physical register gets marked varies between formal
150 /// parameters (it's a basic-block live-in), and a call instruction
151 /// (it's an implicit-def of the BL).
152 virtual void markPhysRegUsed(unsigned PhysReg) = 0;
153};
154
155struct FormalArgHandler : public AMDGPUIncomingArgHandler {
157 : AMDGPUIncomingArgHandler(B, MRI) {}
158
159 void markPhysRegUsed(unsigned PhysReg) override {
160 MIRBuilder.getMBB().addLiveIn(PhysReg);
161 }
162};
163
164struct CallReturnHandler : public AMDGPUIncomingArgHandler {
167 : AMDGPUIncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
168
169 void markPhysRegUsed(unsigned PhysReg) override {
170 MIB.addDef(PhysReg, RegState::Implicit);
171 }
172
174};
175
176struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
177 /// For tail calls, the byte offset of the call's argument area from the
178 /// callee's. Unused elsewhere.
179 int FPDiff;
180
181 // Cache the SP register vreg if we need it more than once in this call site.
182 Register SPReg;
183
184 bool IsTailCall;
185
186 AMDGPUOutgoingArgHandler(MachineIRBuilder &MIRBuilder,
188 bool IsTailCall = false, int FPDiff = 0)
189 : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB), FPDiff(FPDiff),
190 IsTailCall(IsTailCall) {}
191
192 Register getStackAddress(uint64_t Size, int64_t Offset,
194 ISD::ArgFlagsTy Flags) override {
195 MachineFunction &MF = MIRBuilder.getMF();
196 const LLT PtrTy = LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32);
197 const LLT S32 = LLT::scalar(32);
198
199 if (IsTailCall) {
200 Offset += FPDiff;
201 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
202 auto FIReg = MIRBuilder.buildFrameIndex(PtrTy, FI);
204 return FIReg.getReg(0);
205 }
206
208
209 if (!SPReg) {
210 const GCNSubtarget &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>();
211 if (ST.enableFlatScratch()) {
212 // The stack is accessed unswizzled, so we can use a regular copy.
213 SPReg = MIRBuilder.buildCopy(PtrTy,
214 MFI->getStackPtrOffsetReg()).getReg(0);
215 } else {
216 // The address we produce here, without knowing the use context, is going
217 // to be interpreted as a vector address, so we need to convert to a
218 // swizzled address.
219 SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
220 {MFI->getStackPtrOffsetReg()}).getReg(0);
221 }
222 }
223
224 auto OffsetReg = MIRBuilder.buildConstant(S32, Offset);
225
226 auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);
228 return AddrReg.getReg(0);
229 }
230
231 void assignValueToReg(Register ValVReg, Register PhysReg,
232 CCValAssign VA) override {
233 MIB.addUse(PhysReg, RegState::Implicit);
234 Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
235 MIRBuilder.buildCopy(PhysReg, ExtReg);
236 }
237
238 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
239 MachinePointerInfo &MPO, CCValAssign &VA) override {
240 MachineFunction &MF = MIRBuilder.getMF();
241 uint64_t LocMemOffset = VA.getLocMemOffset();
242 const auto &ST = MF.getSubtarget<GCNSubtarget>();
243
244 auto MMO = MF.getMachineMemOperand(
245 MPO, MachineMemOperand::MOStore, MemTy,
246 commonAlignment(ST.getStackAlignment(), LocMemOffset));
247 MIRBuilder.buildStore(ValVReg, Addr, *MMO);
248 }
249
250 void assignValueToAddress(const CallLowering::ArgInfo &Arg,
251 unsigned ValRegIndex, Register Addr, LLT MemTy,
252 MachinePointerInfo &MPO, CCValAssign &VA) override {
253 Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
254 ? extendRegister(Arg.Regs[ValRegIndex], VA)
255 : Arg.Regs[ValRegIndex];
256 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
257 }
258};
259}
260
262 : CallLowering(&TLI) {
263}
264
265// FIXME: Compatibility shim
267 switch (MIOpc) {
268 case TargetOpcode::G_SEXT:
269 return ISD::SIGN_EXTEND;
270 case TargetOpcode::G_ZEXT:
271 return ISD::ZERO_EXTEND;
272 case TargetOpcode::G_ANYEXT:
273 return ISD::ANY_EXTEND;
274 default:
275 llvm_unreachable("not an extend opcode");
276 }
277}
278
279bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,
280 CallingConv::ID CallConv,
282 bool IsVarArg) const {
283 // For shaders. Vector types should be explicitly handled by CC.
284 if (AMDGPU::isEntryFunctionCC(CallConv))
285 return true;
286
288 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
289 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
290 MF.getFunction().getContext());
291
292 return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv, IsVarArg));
293}
294
295/// Lower the return value for the already existing \p Ret. This assumes that
296/// \p B's insertion point is correct.
297bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
298 const Value *Val, ArrayRef<Register> VRegs,
299 MachineInstrBuilder &Ret) const {
300 if (!Val)
301 return true;
302
303 auto &MF = B.getMF();
304 const auto &F = MF.getFunction();
305 const DataLayout &DL = MF.getDataLayout();
306 MachineRegisterInfo *MRI = B.getMRI();
307 LLVMContext &Ctx = F.getContext();
308
309 CallingConv::ID CC = F.getCallingConv();
310 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
311
312 SmallVector<EVT, 8> SplitEVTs;
313 ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
314 assert(VRegs.size() == SplitEVTs.size() &&
315 "For each split Type there should be exactly one VReg.");
316
317 SmallVector<ArgInfo, 8> SplitRetInfos;
318
319 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
320 EVT VT = SplitEVTs[i];
321 Register Reg = VRegs[i];
322 ArgInfo RetInfo(Reg, VT.getTypeForEVT(Ctx), 0);
324
325 if (VT.isScalarInteger()) {
326 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
327 if (RetInfo.Flags[0].isSExt()) {
328 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
329 ExtendOp = TargetOpcode::G_SEXT;
330 } else if (RetInfo.Flags[0].isZExt()) {
331 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
332 ExtendOp = TargetOpcode::G_ZEXT;
333 }
334
335 EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
336 extOpcodeToISDExtOpcode(ExtendOp));
337 if (ExtVT != VT) {
338 RetInfo.Ty = ExtVT.getTypeForEVT(Ctx);
339 LLT ExtTy = getLLTForType(*RetInfo.Ty, DL);
340 Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);
341 }
342 }
343
344 if (Reg != RetInfo.Regs[0]) {
345 RetInfo.Regs[0] = Reg;
346 // Reset the arg flags after modifying Reg.
348 }
349
350 splitToValueTypes(RetInfo, SplitRetInfos, DL, CC);
351 }
352
353 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
354
355 OutgoingValueAssigner Assigner(AssignFn);
356 AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);
357 return determineAndHandleAssignments(RetHandler, Assigner, SplitRetInfos, B,
358 CC, F.isVarArg());
359}
360
362 ArrayRef<Register> VRegs,
363 FunctionLoweringInfo &FLI) const {
364
365 MachineFunction &MF = B.getMF();
367 MFI->setIfReturnsVoid(!Val);
368
369 assert(!Val == VRegs.empty() && "Return value without a vreg");
370
371 CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
372 const bool IsShader = AMDGPU::isShader(CC);
373 const bool IsWaveEnd =
374 (IsShader && MFI->returnsVoid()) || AMDGPU::isKernel(CC);
375 if (IsWaveEnd) {
376 B.buildInstr(AMDGPU::S_ENDPGM)
377 .addImm(0);
378 return true;
379 }
380
381 unsigned ReturnOpc =
382 IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
383 auto Ret = B.buildInstrNoInsert(ReturnOpc);
384
385 if (!FLI.CanLowerReturn)
386 insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
387 else if (!lowerReturnVal(B, Val, VRegs, Ret))
388 return false;
389
390 // TODO: Handle CalleeSavedRegsViaCopy.
391
392 B.insertInstr(Ret);
393 return true;
394}
395
396void AMDGPUCallLowering::lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
397 uint64_t Offset) const {
398 MachineFunction &MF = B.getMF();
401 Register KernArgSegmentPtr =
403 Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
404
405 auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset);
406
407 B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
408}
409
410void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, ArgInfo &OrigArg,
412 Align Alignment) const {
413 MachineFunction &MF = B.getMF();
414 const Function &F = MF.getFunction();
415 const DataLayout &DL = F.getParent()->getDataLayout();
417
419
420 SmallVector<ArgInfo, 32> SplitArgs;
421 SmallVector<uint64_t> FieldOffsets;
422 splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv(), &FieldOffsets);
423
424 unsigned Idx = 0;
425 for (ArgInfo &SplitArg : SplitArgs) {
426 Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
427 lowerParameterPtr(PtrReg, B, Offset + FieldOffsets[Idx]);
428
429 LLT ArgTy = getLLTForType(*SplitArg.Ty, DL);
430 if (SplitArg.Flags[0].isPointer()) {
431 // Compensate for losing pointeriness in splitValueTypes.
432 LLT PtrTy = LLT::pointer(SplitArg.Flags[0].getPointerAddrSpace(),
433 ArgTy.getScalarSizeInBits());
434 ArgTy = ArgTy.isVector() ? LLT::vector(ArgTy.getElementCount(), PtrTy)
435 : PtrTy;
436 }
437
439 PtrInfo,
442 ArgTy, commonAlignment(Alignment, FieldOffsets[Idx]));
443
444 assert(SplitArg.Regs.size() == 1);
445
446 B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
447 ++Idx;
448 }
449}
450
451// Allocate special inputs passed in user SGPRs.
452static void allocateHSAUserSGPRs(CCState &CCInfo,
454 MachineFunction &MF,
455 const SIRegisterInfo &TRI,
456 SIMachineFunctionInfo &Info) {
457 // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
458 const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo();
459 if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
460 Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
461 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
462 CCInfo.AllocateReg(PrivateSegmentBufferReg);
463 }
464
465 if (UserSGPRInfo.hasDispatchPtr()) {
466 Register DispatchPtrReg = Info.addDispatchPtr(TRI);
467 MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
468 CCInfo.AllocateReg(DispatchPtrReg);
469 }
470
471 const Module *M = MF.getFunction().getParent();
472 if (UserSGPRInfo.hasQueuePtr() &&
474 Register QueuePtrReg = Info.addQueuePtr(TRI);
475 MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
476 CCInfo.AllocateReg(QueuePtrReg);
477 }
478
479 if (UserSGPRInfo.hasKernargSegmentPtr()) {
481 Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
483 Register VReg = MRI.createGenericVirtualRegister(P4);
484 MRI.addLiveIn(InputPtrReg, VReg);
485 B.getMBB().addLiveIn(InputPtrReg);
486 B.buildCopy(VReg, InputPtrReg);
487 CCInfo.AllocateReg(InputPtrReg);
488 }
489
490 if (UserSGPRInfo.hasDispatchID()) {
491 Register DispatchIDReg = Info.addDispatchID(TRI);
492 MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
493 CCInfo.AllocateReg(DispatchIDReg);
494 }
495
496 if (UserSGPRInfo.hasFlatScratchInit()) {
497 Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
498 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
499 CCInfo.AllocateReg(FlatScratchInitReg);
500 }
501
502 // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
503 // these from the dispatch pointer.
504}
505
507 MachineIRBuilder &B, const Function &F,
508 ArrayRef<ArrayRef<Register>> VRegs) const {
509 MachineFunction &MF = B.getMF();
510 const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
513 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
514 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
515 const DataLayout &DL = F.getParent()->getDataLayout();
516
518 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
519
520 allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info);
521
522 unsigned i = 0;
523 const Align KernArgBaseAlign(16);
524 const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset();
525 uint64_t ExplicitArgOffset = 0;
526
527 // TODO: Align down to dword alignment and extract bits for extending loads.
528 for (auto &Arg : F.args()) {
529 const bool IsByRef = Arg.hasByRefAttr();
530 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
531 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
532 if (AllocSize == 0)
533 continue;
534
535 MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;
536 Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
537
538 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
539 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
540
541 if (Arg.use_empty()) {
542 ++i;
543 continue;
544 }
545
546 Align Alignment = commonAlignment(KernArgBaseAlign, ArgOffset);
547
548 if (IsByRef) {
549 unsigned ByRefAS = cast<PointerType>(Arg.getType())->getAddressSpace();
550
551 assert(VRegs[i].size() == 1 &&
552 "expected only one register for byval pointers");
553 if (ByRefAS == AMDGPUAS::CONSTANT_ADDRESS) {
554 lowerParameterPtr(VRegs[i][0], B, ArgOffset);
555 } else {
556 const LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
557 Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);
558 lowerParameterPtr(PtrReg, B, ArgOffset);
559
560 B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
561 }
562 } else {
563 ArgInfo OrigArg(VRegs[i], Arg, i);
564 const unsigned OrigArgIdx = i + AttributeList::FirstArgIndex;
565 setArgFlags(OrigArg, OrigArgIdx, DL, F);
566 lowerParameter(B, OrigArg, ArgOffset, Alignment);
567 }
568
569 ++i;
570 }
571
572 TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
573 TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
574 return true;
575}
576
579 FunctionLoweringInfo &FLI) const {
580 CallingConv::ID CC = F.getCallingConv();
581
582 // The infrastructure for normal calling convention lowering is essentially
583 // useless for kernels. We want to avoid any kind of legalization or argument
584 // splitting.
586 return lowerFormalArgumentsKernel(B, F, VRegs);
587
588 const bool IsGraphics = AMDGPU::isGraphics(CC);
589 const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
590
591 MachineFunction &MF = B.getMF();
592 MachineBasicBlock &MBB = B.getMBB();
595 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
596 const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
597 const DataLayout &DL = F.getParent()->getDataLayout();
598
600 CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
601 const GCNUserSGPRUsageInfo &UserSGPRInfo = Info->getUserSGPRInfo();
602
603 if (UserSGPRInfo.hasImplicitBufferPtr()) {
604 Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
605 MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
606 CCInfo.AllocateReg(ImplicitBufferPtrReg);
607 }
608
609 // FIXME: This probably isn't defined for mesa
610 if (UserSGPRInfo.hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
611 Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
612 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
613 CCInfo.AllocateReg(FlatScratchInitReg);
614 }
615
616 SmallVector<ArgInfo, 32> SplitArgs;
617 unsigned Idx = 0;
618 unsigned PSInputNum = 0;
619
620 // Insert the hidden sret parameter if the return value won't fit in the
621 // return registers.
622 if (!FLI.CanLowerReturn)
624
625 for (auto &Arg : F.args()) {
626 if (DL.getTypeStoreSize(Arg.getType()) == 0)
627 continue;
628
629 const bool InReg = Arg.hasAttribute(Attribute::InReg);
630
631 // SGPR arguments to functions not implemented.
632 if (!IsGraphics && InReg)
633 return false;
634
635 if (Arg.hasAttribute(Attribute::SwiftSelf) ||
636 Arg.hasAttribute(Attribute::SwiftError) ||
637 Arg.hasAttribute(Attribute::Nest))
638 return false;
639
640 if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
641 const bool ArgUsed = !Arg.use_empty();
642 bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
643
644 if (!SkipArg) {
645 Info->markPSInputAllocated(PSInputNum);
646 if (ArgUsed)
647 Info->markPSInputEnabled(PSInputNum);
648 }
649
650 ++PSInputNum;
651
652 if (SkipArg) {
653 for (Register R : VRegs[Idx])
654 B.buildUndef(R);
655
656 ++Idx;
657 continue;
658 }
659 }
660
661 ArgInfo OrigArg(VRegs[Idx], Arg, Idx);
662 const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
663 setArgFlags(OrigArg, OrigArgIdx, DL, F);
664
665 splitToValueTypes(OrigArg, SplitArgs, DL, CC);
666 ++Idx;
667 }
668
669 // At least one interpolation mode must be enabled or else the GPU will
670 // hang.
671 //
672 // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
673 // set PSInputAddr, the user wants to enable some bits after the compilation
674 // based on run-time states. Since we can't know what the final PSInputEna
675 // will look like, so we shouldn't do anything here and the user should take
676 // responsibility for the correct programming.
677 //
678 // Otherwise, the following restrictions apply:
679 // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
680 // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
681 // enabled too.
682 if (CC == CallingConv::AMDGPU_PS) {
683 if ((Info->getPSInputAddr() & 0x7F) == 0 ||
684 ((Info->getPSInputAddr() & 0xF) == 0 &&
685 Info->isPSInputAllocated(11))) {
686 CCInfo.AllocateReg(AMDGPU::VGPR0);
687 CCInfo.AllocateReg(AMDGPU::VGPR1);
688 Info->markPSInputAllocated(0);
689 Info->markPSInputEnabled(0);
690 }
691
692 if (Subtarget.isAmdPalOS()) {
693 // For isAmdPalOS, the user does not enable some bits after compilation
694 // based on run-time states; the register values being generated here are
695 // the final ones set in hardware. Therefore we need to apply the
696 // workaround to PSInputAddr and PSInputEnable together. (The case where
697 // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
698 // set up an input arg for a particular interpolation mode, but nothing
699 // uses that input arg. Really we should have an earlier pass that removes
700 // such an arg.)
701 unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
702 if ((PsInputBits & 0x7F) == 0 ||
703 ((PsInputBits & 0xF) == 0 &&
704 (PsInputBits >> 11 & 1)))
705 Info->markPSInputEnabled(llvm::countr_zero(Info->getPSInputAddr()));
706 }
707 }
708
709 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
710 CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
711
712 if (!MBB.empty())
713 B.setInstr(*MBB.begin());
714
715 if (!IsEntryFunc && !IsGraphics) {
716 // For the fixed ABI, pass workitem IDs in the last argument register.
717 TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
718 }
719
720 IncomingValueAssigner Assigner(AssignFn);
721 if (!determineAssignments(Assigner, SplitArgs, CCInfo))
722 return false;
723
724 FormalArgHandler Handler(B, MRI);
725 if (!handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, B))
726 return false;
727
728 uint64_t StackSize = Assigner.StackSize;
729
730 // Start adding system SGPRs.
731 if (IsEntryFunc) {
732 TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
733 } else {
734 if (!Subtarget.enableFlatScratch())
735 CCInfo.AllocateReg(Info->getScratchRSrcReg());
736 TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
737 }
738
739 // When we tail call, we need to check if the callee's arguments will fit on
740 // the caller's stack. So, whenever we lower formal arguments, we should keep
741 // track of this information, since we might lower a tail call in this
742 // function later.
743 Info->setBytesInStackArgArea(StackSize);
744
745 // Move back to the end of the basic block.
746 B.setMBB(MBB);
747
748 return true;
749}
750
752 CCState &CCInfo,
753 SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,
754 CallLoweringInfo &Info) const {
755 MachineFunction &MF = MIRBuilder.getMF();
756
757 // If there's no call site, this doesn't correspond to a call from the IR and
758 // doesn't need implicit inputs.
759 if (!Info.CB)
760 return true;
761
762 const AMDGPUFunctionArgInfo *CalleeArgInfo
764
766 const AMDGPUFunctionArgInfo &CallerArgInfo = MFI->getArgInfo();
767
768
769 // TODO: Unify with private memory register handling. This is complicated by
770 // the fact that at least in kernels, the input argument is not necessarily
771 // in the same location as the input.
781 };
782
783 static constexpr StringLiteral ImplicitAttrNames[] = {
784 "amdgpu-no-dispatch-ptr",
785 "amdgpu-no-queue-ptr",
786 "amdgpu-no-implicitarg-ptr",
787 "amdgpu-no-dispatch-id",
788 "amdgpu-no-workgroup-id-x",
789 "amdgpu-no-workgroup-id-y",
790 "amdgpu-no-workgroup-id-z",
791 "amdgpu-no-lds-kernel-id",
792 };
793
795
796 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
797 const AMDGPULegalizerInfo *LI
798 = static_cast<const AMDGPULegalizerInfo*>(ST.getLegalizerInfo());
799
800 unsigned I = 0;
801 for (auto InputID : InputRegs) {
802 const ArgDescriptor *OutgoingArg;
803 const TargetRegisterClass *ArgRC;
804 LLT ArgTy;
805
806 // If the callee does not use the attribute value, skip copying the value.
807 if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
808 continue;
809
810 std::tie(OutgoingArg, ArgRC, ArgTy) =
811 CalleeArgInfo->getPreloadedValue(InputID);
812 if (!OutgoingArg)
813 continue;
814
815 const ArgDescriptor *IncomingArg;
816 const TargetRegisterClass *IncomingArgRC;
817 std::tie(IncomingArg, IncomingArgRC, ArgTy) =
818 CallerArgInfo.getPreloadedValue(InputID);
819 assert(IncomingArgRC == ArgRC);
820
821 Register InputReg = MRI.createGenericVirtualRegister(ArgTy);
822
823 if (IncomingArg) {
824 LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
825 } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
826 LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
827 } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
828 std::optional<uint32_t> Id =
830 if (Id) {
831 MIRBuilder.buildConstant(InputReg, *Id);
832 } else {
833 MIRBuilder.buildUndef(InputReg);
834 }
835 } else {
836 // We may have proven the input wasn't needed, although the ABI is
837 // requiring it. We just need to allocate the register appropriately.
838 MIRBuilder.buildUndef(InputReg);
839 }
840
841 if (OutgoingArg->isRegister()) {
842 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
843 if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
844 report_fatal_error("failed to allocate implicit input argument");
845 } else {
846 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
847 return false;
848 }
849 }
850
851 // Pack workitem IDs into a single register or pass it as is if already
852 // packed.
853 const ArgDescriptor *OutgoingArg;
854 const TargetRegisterClass *ArgRC;
855 LLT ArgTy;
856
857 std::tie(OutgoingArg, ArgRC, ArgTy) =
859 if (!OutgoingArg)
860 std::tie(OutgoingArg, ArgRC, ArgTy) =
862 if (!OutgoingArg)
863 std::tie(OutgoingArg, ArgRC, ArgTy) =
865 if (!OutgoingArg)
866 return false;
867
868 auto WorkitemIDX =
869 CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
870 auto WorkitemIDY =
871 CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
872 auto WorkitemIDZ =
873 CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
874
875 const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
876 const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
877 const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
878 const LLT S32 = LLT::scalar(32);
879
880 const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");
881 const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");
882 const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");
883
884 // If incoming ids are not packed we need to pack them.
885 // FIXME: Should consider known workgroup size to eliminate known 0 cases.
886 Register InputReg;
887 if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
888 NeedWorkItemIDX) {
889 if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
890 InputReg = MRI.createGenericVirtualRegister(S32);
891 LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
892 std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
893 } else {
894 InputReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
895 }
896 }
897
898 if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
899 NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
900 Register Y = MRI.createGenericVirtualRegister(S32);
901 LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
902 std::get<2>(WorkitemIDY));
903
904 Y = MIRBuilder.buildShl(S32, Y, MIRBuilder.buildConstant(S32, 10)).getReg(0);
905 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
906 }
907
908 if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
909 NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
910 Register Z = MRI.createGenericVirtualRegister(S32);
911 LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
912 std::get<2>(WorkitemIDZ));
913
914 Z = MIRBuilder.buildShl(S32, Z, MIRBuilder.buildConstant(S32, 20)).getReg(0);
915 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
916 }
917
918 if (!InputReg &&
919 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
920 InputReg = MRI.createGenericVirtualRegister(S32);
921 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
922 // We're in a situation where the outgoing function requires the workitem
923 // ID, but the calling function does not have it (e.g a graphics function
924 // calling a C calling convention function). This is illegal, but we need
925 // to produce something.
926 MIRBuilder.buildUndef(InputReg);
927 } else {
928 // Workitem ids are already packed, any of present incoming arguments will
929 // carry all required fields.
931 IncomingArgX ? *IncomingArgX :
932 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
933 LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
934 &AMDGPU::VGPR_32RegClass, S32);
935 }
936 }
937
938 if (OutgoingArg->isRegister()) {
939 if (InputReg)
940 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
941
942 if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
943 report_fatal_error("failed to allocate implicit input argument");
944 } else {
945 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
946 return false;
947 }
948
949 return true;
950}
951
952/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
953/// CC.
954static std::pair<CCAssignFn *, CCAssignFn *>
956 return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
957}
958
959static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
960 bool IsTailCall, CallingConv::ID CC) {
961 assert(!(IsIndirect && IsTailCall) && "Indirect calls can't be tail calls, "
962 "because the address can be divergent");
963 if (!IsTailCall)
964 return AMDGPU::G_SI_CALL;
965
966 return CC == CallingConv::AMDGPU_Gfx ? AMDGPU::SI_TCRETURN_GFX :
967 AMDGPU::SI_TCRETURN;
968}
969
970// Add operands to call instruction to track the callee.
972 MachineIRBuilder &MIRBuilder,
974 if (Info.Callee.isReg()) {
975 CallInst.addReg(Info.Callee.getReg());
976 CallInst.addImm(0);
977 } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {
978 // The call lowering lightly assumed we can directly encode a call target in
979 // the instruction, which is not the case. Materialize the address here.
980 const GlobalValue *GV = Info.Callee.getGlobal();
981 auto Ptr = MIRBuilder.buildGlobalValue(
982 LLT::pointer(GV->getAddressSpace(), 64), GV);
983 CallInst.addReg(Ptr.getReg(0));
984 CallInst.add(Info.Callee);
985 } else
986 return false;
987
988 return true;
989}
990
993 SmallVectorImpl<ArgInfo> &InArgs) const {
994 const Function &CallerF = MF.getFunction();
995 CallingConv::ID CalleeCC = Info.CallConv;
996 CallingConv::ID CallerCC = CallerF.getCallingConv();
997
998 // If the calling conventions match, then everything must be the same.
999 if (CalleeCC == CallerCC)
1000 return true;
1001
1002 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1003
1004 // Make sure that the caller and callee preserve all of the same registers.
1005 auto TRI = ST.getRegisterInfo();
1006
1007 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1008 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1009 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1010 return false;
1011
1012 // Check if the caller and callee will handle arguments in the same way.
1013 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1014 CCAssignFn *CalleeAssignFnFixed;
1015 CCAssignFn *CalleeAssignFnVarArg;
1016 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
1017 getAssignFnsForCC(CalleeCC, TLI);
1018
1019 CCAssignFn *CallerAssignFnFixed;
1020 CCAssignFn *CallerAssignFnVarArg;
1021 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1022 getAssignFnsForCC(CallerCC, TLI);
1023
1024 // FIXME: We are not accounting for potential differences in implicitly passed
1025 // inputs, but only the fixed ABI is supported now anyway.
1026 IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
1027 CalleeAssignFnVarArg);
1028 IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
1029 CallerAssignFnVarArg);
1030 return resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner);
1031}
1032
1035 SmallVectorImpl<ArgInfo> &OutArgs) const {
1036 // If there are no outgoing arguments, then we are done.
1037 if (OutArgs.empty())
1038 return true;
1039
1040 const Function &CallerF = MF.getFunction();
1041 CallingConv::ID CalleeCC = Info.CallConv;
1042 CallingConv::ID CallerCC = CallerF.getCallingConv();
1043 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1044
1045 CCAssignFn *AssignFnFixed;
1046 CCAssignFn *AssignFnVarArg;
1047 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1048
1049 // We have outgoing arguments. Make sure that we can tail call with them.
1051 CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
1052 OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1053
1054 if (!determineAssignments(Assigner, OutArgs, OutInfo)) {
1055 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
1056 return false;
1057 }
1058
1059 // Make sure that they can fit on the caller's stack.
1060 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1061 if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
1062 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
1063 return false;
1064 }
1065
1066 // Verify that the parameters in callee-saved registers match.
1067 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1068 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1069 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
1071 return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
1072}
1073
1074/// Return true if the calling convention is one that we can guarantee TCO for.
1076 return CC == CallingConv::Fast;
1077}
1078
1079/// Return true if we might ever do TCO for calls with this calling convention.
1081 switch (CC) {
1082 case CallingConv::C:
1084 return true;
1085 default:
1086 return canGuaranteeTCO(CC);
1087 }
1088}
1089
1092 SmallVectorImpl<ArgInfo> &InArgs, SmallVectorImpl<ArgInfo> &OutArgs) const {
1093 // Must pass all target-independent checks in order to tail call optimize.
1094 if (!Info.IsTailCall)
1095 return false;
1096
1097 // Indirect calls can't be tail calls, because the address can be divergent.
1098 // TODO Check divergence info if the call really is divergent.
1099 if (Info.Callee.isReg())
1100 return false;
1101
1102 MachineFunction &MF = B.getMF();
1103 const Function &CallerF = MF.getFunction();
1104 CallingConv::ID CalleeCC = Info.CallConv;
1105 CallingConv::ID CallerCC = CallerF.getCallingConv();
1106
1107 const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1108 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1109 // Kernels aren't callable, and don't have a live in return address so it
1110 // doesn't make sense to do a tail call with entry functions.
1111 if (!CallerPreserved)
1112 return false;
1113
1114 if (!mayTailCallThisCC(CalleeCC)) {
1115 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
1116 return false;
1117 }
1118
1119 if (any_of(CallerF.args(), [](const Argument &A) {
1120 return A.hasByValAttr() || A.hasSwiftErrorAttr();
1121 })) {
1122 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval "
1123 "or swifterror arguments\n");
1124 return false;
1125 }
1126
1127 // If we have -tailcallopt, then we're done.
1129 return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
1130
1131 // Verify that the incoming and outgoing arguments from the callee are
1132 // safe to tail call.
1133 if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1134 LLVM_DEBUG(
1135 dbgs()
1136 << "... Caller and callee have incompatible calling conventions.\n");
1137 return false;
1138 }
1139
1140 if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
1141 return false;
1142
1143 LLVM_DEBUG(dbgs() << "... Call is eligible for tail call optimization.\n");
1144 return true;
1145}
1146
1147// Insert outgoing implicit arguments for a call, by inserting copies to the
1148// implicit argument registers and adding the necessary implicit uses to the
1149// call instruction.
1152 const GCNSubtarget &ST, const SIMachineFunctionInfo &FuncInfo,
1153 ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const {
1154 if (!ST.enableFlatScratch()) {
1155 // Insert copies for the SRD. In the HSA case, this should be an identity
1156 // copy.
1157 auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::fixed_vector(4, 32),
1158 FuncInfo.getScratchRSrcReg());
1159 MIRBuilder.buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
1160 CallInst.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Implicit);
1161 }
1162
1163 for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1164 MIRBuilder.buildCopy((Register)ArgReg.first, ArgReg.second);
1165 CallInst.addReg(ArgReg.first, RegState::Implicit);
1166 }
1167}
1168
1170 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
1171 SmallVectorImpl<ArgInfo> &OutArgs) const {
1172 MachineFunction &MF = MIRBuilder.getMF();
1173 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1175 const Function &F = MF.getFunction();
1177 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1178
1179 // True when we're tail calling, but without -tailcallopt.
1180 bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
1181
1182 // Find out which ABI gets to decide where things go.
1183 CallingConv::ID CalleeCC = Info.CallConv;
1184 CCAssignFn *AssignFnFixed;
1185 CCAssignFn *AssignFnVarArg;
1186 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1187
1188 MachineInstrBuilder CallSeqStart;
1189 if (!IsSibCall)
1190 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
1191
1192 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true, CalleeCC);
1193 auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1194 if (!addCallTargetOperands(MIB, MIRBuilder, Info))
1195 return false;
1196
1197 // Byte offset for the tail call. When we are sibcalling, this will always
1198 // be 0.
1199 MIB.addImm(0);
1200
1201 // Tell the call which registers are clobbered.
1202 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1203 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1204 MIB.addRegMask(Mask);
1205
1206 // FPDiff is the byte offset of the call's argument area from the callee's.
1207 // Stores to callee stack arguments will be placed in FixedStackSlots offset
1208 // by this amount for a tail call. In a sibling call it must be 0 because the
1209 // caller will deallocate the entire stack and the callee still expects its
1210 // arguments to begin at SP+0.
1211 int FPDiff = 0;
1212
1213 // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1214 // by -tailcallopt. For sibcalls, the memory operands for the call are
1215 // already available in the caller's incoming argument space.
1216 unsigned NumBytes = 0;
1217 if (!IsSibCall) {
1218 // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1219 // before handling assignments, because FPDiff must be known for memory
1220 // arguments.
1221 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1223 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1224
1225 // FIXME: Not accounting for callee implicit inputs
1226 OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg);
1227 if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
1228 return false;
1229
1230 // The callee will pop the argument stack as a tail call. Thus, we must
1231 // keep it 16-byte aligned.
1232 NumBytes = alignTo(OutInfo.getStackSize(), ST.getStackAlignment());
1233
1234 // FPDiff will be negative if this tail call requires more space than we
1235 // would automatically have in our incoming argument space. Positive if we
1236 // actually shrink the stack.
1237 FPDiff = NumReusableBytes - NumBytes;
1238
1239 // The stack pointer must be 16-byte aligned at all times it's used for a
1240 // memory operation, which in practice means at *all* times and in
1241 // particular across call boundaries. Therefore our own arguments started at
1242 // a 16-byte aligned SP and the delta applied for the tail call should
1243 // satisfy the same constraint.
1244 assert(isAligned(ST.getStackAlignment(), FPDiff) &&
1245 "unaligned stack on tail call");
1246 }
1247
1249 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1250
1251 // We could pass MIB and directly add the implicit uses to the call
1252 // now. However, as an aesthetic choice, place implicit argument operands
1253 // after the ordinary user argument registers.
1255
1256 if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
1257 // With a fixed ABI, allocate fixed registers before user arguments.
1258 if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1259 return false;
1260 }
1261
1262 OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1263
1264 if (!determineAssignments(Assigner, OutArgs, CCInfo))
1265 return false;
1266
1267 // Do the actual argument marshalling.
1268 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, true, FPDiff);
1269 if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1270 return false;
1271
1272 handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, ImplicitArgRegs);
1273
1274 // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1275 // sequence start and end here.
1276 if (!IsSibCall) {
1277 MIB->getOperand(1).setImm(FPDiff);
1278 CallSeqStart.addImm(NumBytes).addImm(0);
1279 // End the call sequence *before* emitting the call. Normally, we would
1280 // tidy the frame up after the call. However, here, we've laid out the
1281 // parameters so that when SP is reset, they will be in the correct
1282 // location.
1283 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN).addImm(NumBytes).addImm(0);
1284 }
1285
1286 // Now we can add the actual call instruction to the correct basic block.
1287 MIRBuilder.insertInstr(MIB);
1288
1289 // If Callee is a reg, since it is used by a target specific
1290 // instruction, it must have a register class matching the
1291 // constraint of that instruction.
1292
1293 // FIXME: We should define regbankselectable call instructions to handle
1294 // divergent call targets.
1295 if (MIB->getOperand(0).isReg()) {
1297 MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1298 MIB->getDesc(), MIB->getOperand(0), 0));
1299 }
1300
1302 Info.LoweredTailCall = true;
1303 return true;
1304}
1305
1307 CallLoweringInfo &Info) const {
1308 if (Info.IsVarArg) {
1309 LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
1310 return false;
1311 }
1312
1313 MachineFunction &MF = MIRBuilder.getMF();
1314 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1315 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1316
1317 const Function &F = MF.getFunction();
1319 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1320 const DataLayout &DL = F.getParent()->getDataLayout();
1321
1323 for (auto &OrigArg : Info.OrigArgs)
1324 splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1325
1327 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy())
1328 splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1329
1330 // If we can lower as a tail call, do that instead.
1331 bool CanTailCallOpt =
1332 isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1333
1334 // We must emit a tail call if we have musttail.
1335 if (Info.IsMustTailCall && !CanTailCallOpt) {
1336 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1337 return false;
1338 }
1339
1340 Info.IsTailCall = CanTailCallOpt;
1341 if (CanTailCallOpt)
1342 return lowerTailCall(MIRBuilder, Info, OutArgs);
1343
1344 // Find out which ABI gets to decide where things go.
1345 CCAssignFn *AssignFnFixed;
1346 CCAssignFn *AssignFnVarArg;
1347 std::tie(AssignFnFixed, AssignFnVarArg) =
1348 getAssignFnsForCC(Info.CallConv, TLI);
1349
1350 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)
1351 .addImm(0)
1352 .addImm(0);
1353
1354 // Create a temporarily-floating call instruction so we can add the implicit
1355 // uses of arg registers.
1356 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, Info.CallConv);
1357
1358 auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1359 MIB.addDef(TRI->getReturnAddressReg(MF));
1360
1361 if (!Info.IsConvergent)
1363
1364 if (!addCallTargetOperands(MIB, MIRBuilder, Info))
1365 return false;
1366
1367 // Tell the call which registers are clobbered.
1368 const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
1369 MIB.addRegMask(Mask);
1370
1372 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1373
1374 // We could pass MIB and directly add the implicit uses to the call
1375 // now. However, as an aesthetic choice, place implicit argument operands
1376 // after the ordinary user argument registers.
1378
1379 if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
1380 // With a fixed ABI, allocate fixed registers before user arguments.
1381 if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1382 return false;
1383 }
1384
1385 // Do the actual argument marshalling.
1386 SmallVector<Register, 8> PhysRegs;
1387
1388 OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1389 if (!determineAssignments(Assigner, OutArgs, CCInfo))
1390 return false;
1391
1392 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, false);
1393 if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1394 return false;
1395
1397
1398 handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, ImplicitArgRegs);
1399
1400 // Get a count of how many bytes are to be pushed on the stack.
1401 unsigned NumBytes = CCInfo.getStackSize();
1402
1403 // If Callee is a reg, since it is used by a target specific
1404 // instruction, it must have a register class matching the
1405 // constraint of that instruction.
1406
1407 // FIXME: We should define regbankselectable call instructions to handle
1408 // divergent call targets.
1409 if (MIB->getOperand(1).isReg()) {
1410 MIB->getOperand(1).setReg(constrainOperandRegClass(
1411 MF, *TRI, MRI, *ST.getInstrInfo(),
1412 *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1413 1));
1414 }
1415
1416 // Now we can add the actual call instruction to the correct position.
1417 MIRBuilder.insertInstr(MIB);
1418
1419 // Finally we can copy the returned value back into its virtual-register. In
1420 // symmetry with the arguments, the physical register must be an
1421 // implicit-define of the call instruction.
1422 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1423 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv,
1424 Info.IsVarArg);
1425 IncomingValueAssigner Assigner(RetAssignFn);
1426 CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1427 if (!determineAndHandleAssignments(Handler, Assigner, InArgs, MIRBuilder,
1428 Info.CallConv, Info.IsVarArg))
1429 return false;
1430 }
1431
1432 uint64_t CalleePopBytes = NumBytes;
1433
1434 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1435 .addImm(0)
1436 .addImm(CalleePopBytes);
1437
1438 if (!Info.CanLowerReturn) {
1439 insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
1440 Info.DemoteRegister, Info.DemoteStackIndex);
1441 }
1442
1443 return true;
1444}
unsigned const MachineRegisterInfo * MRI
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const
Returns true if the call can be lowered as a tail call.
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs) const
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register > > &ArgRegs, CallLoweringInfo &Info) const
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const
void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, ArrayRef< std::pair< MCRegister, Register > > ImplicitArgRegs) const
This class provides the information for the target register banks.
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
CCState - This class holds information needed while lowering arguments and return values.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs=std::nullopt) const
Use Handler to insert code to handle the argument/return values represented by Args.
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs=std::nullopt) const
Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const
Analyze the argument list in Args, using Assigner to populate CCInfo.
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
iterator_range< arg_iterator > args()
Definition: Function.h:802
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:239
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:320
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:247
bool hasKernargSegmentPtr() const
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:249
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:56
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isVector() const
Definition: LowLevelType.h:145
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:49
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:175
constexpr bool isPointer() const
Definition: LowLevelType.h:141
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:166
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:92
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setHasTailCall(bool V=true)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
AMDGPUFunctionArgInfo & getArgInfo()
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:857
TargetOptions Options
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:394
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:396
unsigned getCodeObjectVersion(const Module &M)
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
bool isShader(CallingConv::ID cc)
bool isGraphics(CallingConv::ID cc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:229
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:191
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:780
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:777
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< TypeSize > *Offsets, TypeSize StartingOffset)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:122
@ Offset
Definition: DWP.cpp:440
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:53
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1685
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
Definition: Utils.cpp:716
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
MCRegister getRegister() const
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
SmallVector< Register, 4 > Regs
Definition: CallLowering.h:63
Base class for ValueHandlers used for arguments coming into the current function, or for return value...
Definition: CallLowering.h:320
Register buildExtensionHint(CCValAssign &VA, Register SrcReg, LLT NarrowTy)
Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on VA, returning the new register ...
void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign VA) override
Provides a default implementation for argument handling.
Base class for ValueHandlers used for arguments passed to a function call, or for return values.
Definition: CallLowering.h:335
uint64_t StackSize
The size of the currently allocated portion of the stack.
Definition: CallLowering.h:206
Register extendRegister(Register ValReg, CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
virtual Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags)=0
Materialize a VReg containing the address of the specified stack-based object.
virtual void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA)=0
The specified value has been assigned to a stack location.
virtual void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign VA)=0
The specified value has been assigned to a physical register, handle the appropriate COPY (either to ...
Extended Value Type.
Definition: ValueTypes.h:34
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:194
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117