LLVM 17.0.0git
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the lowering of LLVM calls to machine code calls for
11/// GlobalISel.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUCallLowering.h"
16#include "AMDGPU.h"
17#include "AMDGPULegalizerInfo.h"
18#include "AMDGPUTargetMachine.h"
20#include "SIRegisterInfo.h"
25#include "llvm/IR/IntrinsicsAMDGPU.h"
26
27#define DEBUG_TYPE "amdgpu-call-lowering"
28
29using namespace llvm;
30
31namespace {
32
33/// Wrapper around extendRegister to ensure we extend to a full 32-bit register.
34static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,
35 Register ValVReg, CCValAssign &VA) {
36 if (VA.getLocVT().getSizeInBits() < 32) {
37 // 16-bit types are reported as legal for 32-bit registers. We need to
38 // extend and do a 32-bit copy to avoid the verifier complaining about it.
39 return Handler.MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
40 }
41
42 return Handler.extendRegister(ValVReg, VA);
43}
44
45struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
46 AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
48 : OutgoingValueHandler(B, MRI), MIB(MIB) {}
49
51
54 ISD::ArgFlagsTy Flags) override {
55 llvm_unreachable("not implemented");
56 }
57
58 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
59 MachinePointerInfo &MPO, CCValAssign &VA) override {
60 llvm_unreachable("not implemented");
61 }
62
63 void assignValueToReg(Register ValVReg, Register PhysReg,
64 CCValAssign VA) override {
65 Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
66
67 // If this is a scalar return, insert a readfirstlane just in case the value
68 // ends up in a VGPR.
69 // FIXME: Assert this is a shader return.
70 const SIRegisterInfo *TRI
71 = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
72 if (TRI->isSGPRReg(MRI, PhysReg)) {
73 LLT Ty = MRI.getType(ExtReg);
74 LLT S32 = LLT::scalar(32);
75 if (Ty != S32) {
76 // FIXME: We should probably support readfirstlane intrinsics with all
77 // legal 32-bit types.
78 assert(Ty.getSizeInBits() == 32);
79 if (Ty.isPointer())
80 ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);
81 else
82 ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
83 }
84
85 auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
86 {MRI.getType(ExtReg)}, false)
87 .addReg(ExtReg);
88 ExtReg = ToSGPR.getReg(0);
89 }
90
91 MIRBuilder.buildCopy(PhysReg, ExtReg);
92 MIB.addUse(PhysReg, RegState::Implicit);
93 }
94};
95
96struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
97 uint64_t StackUsed = 0;
98
99 AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
100 : IncomingValueHandler(B, MRI) {}
101
104 ISD::ArgFlagsTy Flags) override {
105 auto &MFI = MIRBuilder.getMF().getFrameInfo();
106
107 // Byval is assumed to be writable memory, but other stack passed arguments
108 // are not.
109 const bool IsImmutable = !Flags.isByVal();
110 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
111 MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
112 auto AddrReg = MIRBuilder.buildFrameIndex(
114 StackUsed = std::max(StackUsed, Size + Offset);
115 return AddrReg.getReg(0);
116 }
117
118 void assignValueToReg(Register ValVReg, Register PhysReg,
119 CCValAssign VA) override {
120 markPhysRegUsed(PhysReg);
121
122 if (VA.getLocVT().getSizeInBits() < 32) {
123 // 16-bit types are reported as legal for 32-bit registers. We need to do
124 // a 32-bit copy, and truncate to avoid the verifier complaining about it.
125 auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
126
127 // If we have signext/zeroext, it applies to the whole 32-bit register
128 // before truncation.
129 auto Extended =
130 buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT()));
131 MIRBuilder.buildTrunc(ValVReg, Extended);
132 return;
133 }
134
135 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
136 }
137
138 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
139 MachinePointerInfo &MPO, CCValAssign &VA) override {
140 MachineFunction &MF = MIRBuilder.getMF();
141
142 auto MMO = MF.getMachineMemOperand(
144 inferAlignFromPtrInfo(MF, MPO));
145 MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
146 }
147
148 /// How the physical register gets marked varies between formal
149 /// parameters (it's a basic-block live-in), and a call instruction
150 /// (it's an implicit-def of the BL).
151 virtual void markPhysRegUsed(unsigned PhysReg) = 0;
152};
153
154struct FormalArgHandler : public AMDGPUIncomingArgHandler {
156 : AMDGPUIncomingArgHandler(B, MRI) {}
157
158 void markPhysRegUsed(unsigned PhysReg) override {
159 MIRBuilder.getMBB().addLiveIn(PhysReg);
160 }
161};
162
163struct CallReturnHandler : public AMDGPUIncomingArgHandler {
166 : AMDGPUIncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
167
168 void markPhysRegUsed(unsigned PhysReg) override {
169 MIB.addDef(PhysReg, RegState::Implicit);
170 }
171
173};
174
175struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
176 /// For tail calls, the byte offset of the call's argument area from the
177 /// callee's. Unused elsewhere.
178 int FPDiff;
179
180 // Cache the SP register vreg if we need it more than once in this call site.
181 Register SPReg;
182
183 bool IsTailCall;
184
185 AMDGPUOutgoingArgHandler(MachineIRBuilder &MIRBuilder,
187 bool IsTailCall = false, int FPDiff = 0)
188 : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB), FPDiff(FPDiff),
189 IsTailCall(IsTailCall) {}
190
191 Register getStackAddress(uint64_t Size, int64_t Offset,
193 ISD::ArgFlagsTy Flags) override {
194 MachineFunction &MF = MIRBuilder.getMF();
195 const LLT PtrTy = LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32);
196 const LLT S32 = LLT::scalar(32);
197
198 if (IsTailCall) {
199 Offset += FPDiff;
200 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
201 auto FIReg = MIRBuilder.buildFrameIndex(PtrTy, FI);
203 return FIReg.getReg(0);
204 }
205
207
208 if (!SPReg) {
209 const GCNSubtarget &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>();
210 if (ST.enableFlatScratch()) {
211 // The stack is accessed unswizzled, so we can use a regular copy.
212 SPReg = MIRBuilder.buildCopy(PtrTy,
213 MFI->getStackPtrOffsetReg()).getReg(0);
214 } else {
215 // The address we produce here, without knowing the use context, is going
216 // to be interpreted as a vector address, so we need to convert to a
217 // swizzled address.
218 SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
219 {MFI->getStackPtrOffsetReg()}).getReg(0);
220 }
221 }
222
223 auto OffsetReg = MIRBuilder.buildConstant(S32, Offset);
224
225 auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);
227 return AddrReg.getReg(0);
228 }
229
230 void assignValueToReg(Register ValVReg, Register PhysReg,
231 CCValAssign VA) override {
232 MIB.addUse(PhysReg, RegState::Implicit);
233 Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
234 MIRBuilder.buildCopy(PhysReg, ExtReg);
235 }
236
237 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
238 MachinePointerInfo &MPO, CCValAssign &VA) override {
239 MachineFunction &MF = MIRBuilder.getMF();
240 uint64_t LocMemOffset = VA.getLocMemOffset();
241 const auto &ST = MF.getSubtarget<GCNSubtarget>();
242
243 auto MMO = MF.getMachineMemOperand(
244 MPO, MachineMemOperand::MOStore, MemTy,
245 commonAlignment(ST.getStackAlignment(), LocMemOffset));
246 MIRBuilder.buildStore(ValVReg, Addr, *MMO);
247 }
248
249 void assignValueToAddress(const CallLowering::ArgInfo &Arg,
250 unsigned ValRegIndex, Register Addr, LLT MemTy,
251 MachinePointerInfo &MPO, CCValAssign &VA) override {
252 Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
253 ? extendRegister(Arg.Regs[ValRegIndex], VA)
254 : Arg.Regs[ValRegIndex];
255 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
256 }
257};
258}
259
261 : CallLowering(&TLI) {
262}
263
264// FIXME: Compatibility shim
266 switch (MIOpc) {
267 case TargetOpcode::G_SEXT:
268 return ISD::SIGN_EXTEND;
269 case TargetOpcode::G_ZEXT:
270 return ISD::ZERO_EXTEND;
271 case TargetOpcode::G_ANYEXT:
272 return ISD::ANY_EXTEND;
273 default:
274 llvm_unreachable("not an extend opcode");
275 }
276}
277
278bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,
279 CallingConv::ID CallConv,
281 bool IsVarArg) const {
282 // For shaders. Vector types should be explicitly handled by CC.
283 if (AMDGPU::isEntryFunctionCC(CallConv))
284 return true;
285
287 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
288 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
289 MF.getFunction().getContext());
290
291 return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv, IsVarArg));
292}
293
294/// Lower the return value for the already existing \p Ret. This assumes that
295/// \p B's insertion point is correct.
296bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
297 const Value *Val, ArrayRef<Register> VRegs,
298 MachineInstrBuilder &Ret) const {
299 if (!Val)
300 return true;
301
302 auto &MF = B.getMF();
303 const auto &F = MF.getFunction();
304 const DataLayout &DL = MF.getDataLayout();
305 MachineRegisterInfo *MRI = B.getMRI();
306 LLVMContext &Ctx = F.getContext();
307
308 CallingConv::ID CC = F.getCallingConv();
309 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
310
311 SmallVector<EVT, 8> SplitEVTs;
312 ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
313 assert(VRegs.size() == SplitEVTs.size() &&
314 "For each split Type there should be exactly one VReg.");
315
316 SmallVector<ArgInfo, 8> SplitRetInfos;
317
318 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
319 EVT VT = SplitEVTs[i];
320 Register Reg = VRegs[i];
321 ArgInfo RetInfo(Reg, VT.getTypeForEVT(Ctx), 0);
323
324 if (VT.isScalarInteger()) {
325 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
326 if (RetInfo.Flags[0].isSExt()) {
327 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
328 ExtendOp = TargetOpcode::G_SEXT;
329 } else if (RetInfo.Flags[0].isZExt()) {
330 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
331 ExtendOp = TargetOpcode::G_ZEXT;
332 }
333
334 EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
335 extOpcodeToISDExtOpcode(ExtendOp));
336 if (ExtVT != VT) {
337 RetInfo.Ty = ExtVT.getTypeForEVT(Ctx);
338 LLT ExtTy = getLLTForType(*RetInfo.Ty, DL);
339 Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);
340 }
341 }
342
343 if (Reg != RetInfo.Regs[0]) {
344 RetInfo.Regs[0] = Reg;
345 // Reset the arg flags after modifying Reg.
347 }
348
349 splitToValueTypes(RetInfo, SplitRetInfos, DL, CC);
350 }
351
352 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
353
354 OutgoingValueAssigner Assigner(AssignFn);
355 AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);
356 return determineAndHandleAssignments(RetHandler, Assigner, SplitRetInfos, B,
357 CC, F.isVarArg());
358}
359
361 ArrayRef<Register> VRegs,
362 FunctionLoweringInfo &FLI) const {
363
364 MachineFunction &MF = B.getMF();
366 MFI->setIfReturnsVoid(!Val);
367
368 assert(!Val == VRegs.empty() && "Return value without a vreg");
369
370 CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
371 const bool IsShader = AMDGPU::isShader(CC);
372 const bool IsWaveEnd =
373 (IsShader && MFI->returnsVoid()) || AMDGPU::isKernel(CC);
374 if (IsWaveEnd) {
375 B.buildInstr(AMDGPU::S_ENDPGM)
376 .addImm(0);
377 return true;
378 }
379
380 unsigned ReturnOpc =
381 IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
382 auto Ret = B.buildInstrNoInsert(ReturnOpc);
383
384 if (!FLI.CanLowerReturn)
385 insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
386 else if (!lowerReturnVal(B, Val, VRegs, Ret))
387 return false;
388
389 // TODO: Handle CalleeSavedRegsViaCopy.
390
391 B.insertInstr(Ret);
392 return true;
393}
394
395void AMDGPUCallLowering::lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
396 uint64_t Offset) const {
397 MachineFunction &MF = B.getMF();
400 Register KernArgSegmentPtr =
402 Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
403
404 auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset);
405
406 B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
407}
408
409void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, ArgInfo &OrigArg,
411 Align Alignment) const {
412 MachineFunction &MF = B.getMF();
413 const Function &F = MF.getFunction();
414 const DataLayout &DL = F.getParent()->getDataLayout();
416
418
419 SmallVector<ArgInfo, 32> SplitArgs;
420 SmallVector<uint64_t> FieldOffsets;
421 splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv(), &FieldOffsets);
422
423 unsigned Idx = 0;
424 for (ArgInfo &SplitArg : SplitArgs) {
425 Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
426 lowerParameterPtr(PtrReg, B, Offset + FieldOffsets[Idx]);
427
428 LLT ArgTy = getLLTForType(*SplitArg.Ty, DL);
429 if (SplitArg.Flags[0].isPointer()) {
430 // Compensate for losing pointeriness in splitValueTypes.
431 LLT PtrTy = LLT::pointer(SplitArg.Flags[0].getPointerAddrSpace(),
432 ArgTy.getScalarSizeInBits());
433 ArgTy = ArgTy.isVector() ? LLT::vector(ArgTy.getElementCount(), PtrTy)
434 : PtrTy;
435 }
436
438 PtrInfo,
441 ArgTy, commonAlignment(Alignment, FieldOffsets[Idx]));
442
443 assert(SplitArg.Regs.size() == 1);
444
445 B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
446 ++Idx;
447 }
448}
449
450// Allocate special inputs passed in user SGPRs.
451static void allocateHSAUserSGPRs(CCState &CCInfo,
453 MachineFunction &MF,
454 const SIRegisterInfo &TRI,
455 SIMachineFunctionInfo &Info) {
456 // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
457 if (Info.hasPrivateSegmentBuffer()) {
458 Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
459 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
460 CCInfo.AllocateReg(PrivateSegmentBufferReg);
461 }
462
463 if (Info.hasDispatchPtr()) {
464 Register DispatchPtrReg = Info.addDispatchPtr(TRI);
465 MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
466 CCInfo.AllocateReg(DispatchPtrReg);
467 }
468
469 const Module *M = MF.getFunction().getParent();
470 if (Info.hasQueuePtr() &&
472 Register QueuePtrReg = Info.addQueuePtr(TRI);
473 MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
474 CCInfo.AllocateReg(QueuePtrReg);
475 }
476
477 if (Info.hasKernargSegmentPtr()) {
479 Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
481 Register VReg = MRI.createGenericVirtualRegister(P4);
482 MRI.addLiveIn(InputPtrReg, VReg);
483 B.getMBB().addLiveIn(InputPtrReg);
484 B.buildCopy(VReg, InputPtrReg);
485 CCInfo.AllocateReg(InputPtrReg);
486 }
487
488 if (Info.hasDispatchID()) {
489 Register DispatchIDReg = Info.addDispatchID(TRI);
490 MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
491 CCInfo.AllocateReg(DispatchIDReg);
492 }
493
494 if (Info.hasFlatScratchInit()) {
495 Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
496 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
497 CCInfo.AllocateReg(FlatScratchInitReg);
498 }
499
500 // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
501 // these from the dispatch pointer.
502}
503
505 MachineIRBuilder &B, const Function &F,
506 ArrayRef<ArrayRef<Register>> VRegs) const {
507 MachineFunction &MF = B.getMF();
508 const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
511 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
512 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
513 const DataLayout &DL = F.getParent()->getDataLayout();
514
515 Info->allocateKnownAddressLDSGlobal(F);
516
518 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
519
520 allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info);
521
522 unsigned i = 0;
523 const Align KernArgBaseAlign(16);
524 const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
525 uint64_t ExplicitArgOffset = 0;
526
527 // TODO: Align down to dword alignment and extract bits for extending loads.
528 for (auto &Arg : F.args()) {
529 const bool IsByRef = Arg.hasByRefAttr();
530 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
531 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
532 if (AllocSize == 0)
533 continue;
534
535 MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;
536 Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
537
538 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
539 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
540
541 if (Arg.use_empty()) {
542 ++i;
543 continue;
544 }
545
546 Align Alignment = commonAlignment(KernArgBaseAlign, ArgOffset);
547
548 if (IsByRef) {
549 unsigned ByRefAS = cast<PointerType>(Arg.getType())->getAddressSpace();
550
551 assert(VRegs[i].size() == 1 &&
552 "expected only one register for byval pointers");
553 if (ByRefAS == AMDGPUAS::CONSTANT_ADDRESS) {
554 lowerParameterPtr(VRegs[i][0], B, ArgOffset);
555 } else {
556 const LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
557 Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);
558 lowerParameterPtr(PtrReg, B, ArgOffset);
559
560 B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
561 }
562 } else {
563 ArgInfo OrigArg(VRegs[i], Arg, i);
564 const unsigned OrigArgIdx = i + AttributeList::FirstArgIndex;
565 setArgFlags(OrigArg, OrigArgIdx, DL, F);
566 lowerParameter(B, OrigArg, ArgOffset, Alignment);
567 }
568
569 ++i;
570 }
571
572 TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
573 TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
574 return true;
575}
576
579 FunctionLoweringInfo &FLI) const {
580 CallingConv::ID CC = F.getCallingConv();
581
582 // The infrastructure for normal calling convention lowering is essentially
583 // useless for kernels. We want to avoid any kind of legalization or argument
584 // splitting.
586 return lowerFormalArgumentsKernel(B, F, VRegs);
587
588 const bool IsGraphics = AMDGPU::isGraphics(CC);
589 const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
590
591 MachineFunction &MF = B.getMF();
592 MachineBasicBlock &MBB = B.getMBB();
595 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
596 const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
597 const DataLayout &DL = F.getParent()->getDataLayout();
598
599 Info->allocateKnownAddressLDSGlobal(F);
600
602 CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
603
604 if (Info->hasImplicitBufferPtr()) {
605 Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
606 MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
607 CCInfo.AllocateReg(ImplicitBufferPtrReg);
608 }
609
610 // FIXME: This probably isn't defined for mesa
611 if (Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
612 Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
613 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
614 CCInfo.AllocateReg(FlatScratchInitReg);
615 }
616
617 SmallVector<ArgInfo, 32> SplitArgs;
618 unsigned Idx = 0;
619 unsigned PSInputNum = 0;
620
621 // Insert the hidden sret parameter if the return value won't fit in the
622 // return registers.
623 if (!FLI.CanLowerReturn)
625
626 for (auto &Arg : F.args()) {
627 if (DL.getTypeStoreSize(Arg.getType()) == 0)
628 continue;
629
630 const bool InReg = Arg.hasAttribute(Attribute::InReg);
631
632 // SGPR arguments to functions not implemented.
633 if (!IsGraphics && InReg)
634 return false;
635
636 if (Arg.hasAttribute(Attribute::SwiftSelf) ||
637 Arg.hasAttribute(Attribute::SwiftError) ||
638 Arg.hasAttribute(Attribute::Nest))
639 return false;
640
641 if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
642 const bool ArgUsed = !Arg.use_empty();
643 bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
644
645 if (!SkipArg) {
646 Info->markPSInputAllocated(PSInputNum);
647 if (ArgUsed)
648 Info->markPSInputEnabled(PSInputNum);
649 }
650
651 ++PSInputNum;
652
653 if (SkipArg) {
654 for (Register R : VRegs[Idx])
655 B.buildUndef(R);
656
657 ++Idx;
658 continue;
659 }
660 }
661
662 ArgInfo OrigArg(VRegs[Idx], Arg, Idx);
663 const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
664 setArgFlags(OrigArg, OrigArgIdx, DL, F);
665
666 splitToValueTypes(OrigArg, SplitArgs, DL, CC);
667 ++Idx;
668 }
669
670 // At least one interpolation mode must be enabled or else the GPU will
671 // hang.
672 //
673 // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
674 // set PSInputAddr, the user wants to enable some bits after the compilation
675 // based on run-time states. Since we can't know what the final PSInputEna
676 // will look like, so we shouldn't do anything here and the user should take
677 // responsibility for the correct programming.
678 //
679 // Otherwise, the following restrictions apply:
680 // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
681 // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
682 // enabled too.
683 if (CC == CallingConv::AMDGPU_PS) {
684 if ((Info->getPSInputAddr() & 0x7F) == 0 ||
685 ((Info->getPSInputAddr() & 0xF) == 0 &&
686 Info->isPSInputAllocated(11))) {
687 CCInfo.AllocateReg(AMDGPU::VGPR0);
688 CCInfo.AllocateReg(AMDGPU::VGPR1);
689 Info->markPSInputAllocated(0);
690 Info->markPSInputEnabled(0);
691 }
692
693 if (Subtarget.isAmdPalOS()) {
694 // For isAmdPalOS, the user does not enable some bits after compilation
695 // based on run-time states; the register values being generated here are
696 // the final ones set in hardware. Therefore we need to apply the
697 // workaround to PSInputAddr and PSInputEnable together. (The case where
698 // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
699 // set up an input arg for a particular interpolation mode, but nothing
700 // uses that input arg. Really we should have an earlier pass that removes
701 // such an arg.)
702 unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
703 if ((PsInputBits & 0x7F) == 0 ||
704 ((PsInputBits & 0xF) == 0 &&
705 (PsInputBits >> 11 & 1)))
706 Info->markPSInputEnabled(llvm::countr_zero(Info->getPSInputAddr()));
707 }
708 }
709
710 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
711 CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
712
713 if (!MBB.empty())
714 B.setInstr(*MBB.begin());
715
716 if (!IsEntryFunc && !IsGraphics) {
717 // For the fixed ABI, pass workitem IDs in the last argument register.
718 TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
719 }
720
721 IncomingValueAssigner Assigner(AssignFn);
722 if (!determineAssignments(Assigner, SplitArgs, CCInfo))
723 return false;
724
725 FormalArgHandler Handler(B, MRI);
726 if (!handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, B))
727 return false;
728
730
731 // Start adding system SGPRs.
732 if (IsEntryFunc) {
733 TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
734 } else {
735 if (!Subtarget.enableFlatScratch())
736 CCInfo.AllocateReg(Info->getScratchRSrcReg());
737 TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
738 }
739
740 // When we tail call, we need to check if the callee's arguments will fit on
741 // the caller's stack. So, whenever we lower formal arguments, we should keep
742 // track of this information, since we might lower a tail call in this
743 // function later.
744 Info->setBytesInStackArgArea(StackOffset);
745
746 // Move back to the end of the basic block.
747 B.setMBB(MBB);
748
749 return true;
750}
751
753 CCState &CCInfo,
754 SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,
755 CallLoweringInfo &Info) const {
756 MachineFunction &MF = MIRBuilder.getMF();
757
758 // If there's no call site, this doesn't correspond to a call from the IR and
759 // doesn't need implicit inputs.
760 if (!Info.CB)
761 return true;
762
763 const AMDGPUFunctionArgInfo *CalleeArgInfo
765
767 const AMDGPUFunctionArgInfo &CallerArgInfo = MFI->getArgInfo();
768
769
770 // TODO: Unify with private memory register handling. This is complicated by
771 // the fact that at least in kernels, the input argument is not necessarily
772 // in the same location as the input.
782 };
783
784 static constexpr StringLiteral ImplicitAttrNames[] = {
785 "amdgpu-no-dispatch-ptr",
786 "amdgpu-no-queue-ptr",
787 "amdgpu-no-implicitarg-ptr",
788 "amdgpu-no-dispatch-id",
789 "amdgpu-no-workgroup-id-x",
790 "amdgpu-no-workgroup-id-y",
791 "amdgpu-no-workgroup-id-z",
792 "amdgpu-no-lds-kernel-id",
793 };
794
796
797 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
798 const AMDGPULegalizerInfo *LI
799 = static_cast<const AMDGPULegalizerInfo*>(ST.getLegalizerInfo());
800
801 unsigned I = 0;
802 for (auto InputID : InputRegs) {
803 const ArgDescriptor *OutgoingArg;
804 const TargetRegisterClass *ArgRC;
805 LLT ArgTy;
806
807 // If the callee does not use the attribute value, skip copying the value.
808 if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
809 continue;
810
811 std::tie(OutgoingArg, ArgRC, ArgTy) =
812 CalleeArgInfo->getPreloadedValue(InputID);
813 if (!OutgoingArg)
814 continue;
815
816 const ArgDescriptor *IncomingArg;
817 const TargetRegisterClass *IncomingArgRC;
818 std::tie(IncomingArg, IncomingArgRC, ArgTy) =
819 CallerArgInfo.getPreloadedValue(InputID);
820 assert(IncomingArgRC == ArgRC);
821
822 Register InputReg = MRI.createGenericVirtualRegister(ArgTy);
823
824 if (IncomingArg) {
825 LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
826 } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
827 LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
828 } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
829 std::optional<uint32_t> Id =
831 if (Id) {
832 MIRBuilder.buildConstant(InputReg, *Id);
833 } else {
834 MIRBuilder.buildUndef(InputReg);
835 }
836 } else {
837 // We may have proven the input wasn't needed, although the ABI is
838 // requiring it. We just need to allocate the register appropriately.
839 MIRBuilder.buildUndef(InputReg);
840 }
841
842 if (OutgoingArg->isRegister()) {
843 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
844 if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
845 report_fatal_error("failed to allocate implicit input argument");
846 } else {
847 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
848 return false;
849 }
850 }
851
852 // Pack workitem IDs into a single register or pass it as is if already
853 // packed.
854 const ArgDescriptor *OutgoingArg;
855 const TargetRegisterClass *ArgRC;
856 LLT ArgTy;
857
858 std::tie(OutgoingArg, ArgRC, ArgTy) =
860 if (!OutgoingArg)
861 std::tie(OutgoingArg, ArgRC, ArgTy) =
863 if (!OutgoingArg)
864 std::tie(OutgoingArg, ArgRC, ArgTy) =
866 if (!OutgoingArg)
867 return false;
868
869 auto WorkitemIDX =
870 CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
871 auto WorkitemIDY =
872 CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
873 auto WorkitemIDZ =
874 CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
875
876 const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
877 const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
878 const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
879 const LLT S32 = LLT::scalar(32);
880
881 const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");
882 const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");
883 const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");
884
885 // If incoming ids are not packed we need to pack them.
886 // FIXME: Should consider known workgroup size to eliminate known 0 cases.
887 Register InputReg;
888 if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
889 NeedWorkItemIDX) {
890 if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
891 InputReg = MRI.createGenericVirtualRegister(S32);
892 LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
893 std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
894 } else {
895 InputReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
896 }
897 }
898
899 if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
900 NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
901 Register Y = MRI.createGenericVirtualRegister(S32);
902 LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
903 std::get<2>(WorkitemIDY));
904
905 Y = MIRBuilder.buildShl(S32, Y, MIRBuilder.buildConstant(S32, 10)).getReg(0);
906 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
907 }
908
909 if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
910 NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
911 Register Z = MRI.createGenericVirtualRegister(S32);
912 LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
913 std::get<2>(WorkitemIDZ));
914
915 Z = MIRBuilder.buildShl(S32, Z, MIRBuilder.buildConstant(S32, 20)).getReg(0);
916 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
917 }
918
919 if (!InputReg &&
920 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
921 InputReg = MRI.createGenericVirtualRegister(S32);
922 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
923 // We're in a situation where the outgoing function requires the workitem
924 // ID, but the calling function does not have it (e.g a graphics function
925 // calling a C calling convention function). This is illegal, but we need
926 // to produce something.
927 MIRBuilder.buildUndef(InputReg);
928 } else {
929 // Workitem ids are already packed, any of present incoming arguments will
930 // carry all required fields.
932 IncomingArgX ? *IncomingArgX :
933 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
934 LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
935 &AMDGPU::VGPR_32RegClass, S32);
936 }
937 }
938
939 if (OutgoingArg->isRegister()) {
940 if (InputReg)
941 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
942
943 if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
944 report_fatal_error("failed to allocate implicit input argument");
945 } else {
946 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
947 return false;
948 }
949
950 return true;
951}
952
953/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
954/// CC.
955static std::pair<CCAssignFn *, CCAssignFn *>
957 return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
958}
959
960static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
961 bool IsTailCall) {
962 assert(!(IsIndirect && IsTailCall) && "Indirect calls can't be tail calls, "
963 "because the address can be divergent");
964 return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::G_SI_CALL;
965}
966
967// Add operands to call instruction to track the callee.
969 MachineIRBuilder &MIRBuilder,
971 if (Info.Callee.isReg()) {
972 CallInst.addReg(Info.Callee.getReg());
973 CallInst.addImm(0);
974 } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {
975 // The call lowering lightly assumed we can directly encode a call target in
976 // the instruction, which is not the case. Materialize the address here.
977 const GlobalValue *GV = Info.Callee.getGlobal();
978 auto Ptr = MIRBuilder.buildGlobalValue(
979 LLT::pointer(GV->getAddressSpace(), 64), GV);
980 CallInst.addReg(Ptr.getReg(0));
981 CallInst.add(Info.Callee);
982 } else
983 return false;
984
985 return true;
986}
987
990 SmallVectorImpl<ArgInfo> &InArgs) const {
991 const Function &CallerF = MF.getFunction();
992 CallingConv::ID CalleeCC = Info.CallConv;
993 CallingConv::ID CallerCC = CallerF.getCallingConv();
994
995 // If the calling conventions match, then everything must be the same.
996 if (CalleeCC == CallerCC)
997 return true;
998
999 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1000
1001 // Make sure that the caller and callee preserve all of the same registers.
1002 auto TRI = ST.getRegisterInfo();
1003
1004 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1005 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1006 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1007 return false;
1008
1009 // Check if the caller and callee will handle arguments in the same way.
1010 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1011 CCAssignFn *CalleeAssignFnFixed;
1012 CCAssignFn *CalleeAssignFnVarArg;
1013 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
1014 getAssignFnsForCC(CalleeCC, TLI);
1015
1016 CCAssignFn *CallerAssignFnFixed;
1017 CCAssignFn *CallerAssignFnVarArg;
1018 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1019 getAssignFnsForCC(CallerCC, TLI);
1020
1021 // FIXME: We are not accounting for potential differences in implicitly passed
1022 // inputs, but only the fixed ABI is supported now anyway.
1023 IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
1024 CalleeAssignFnVarArg);
1025 IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
1026 CallerAssignFnVarArg);
1027 return resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner);
1028}
1029
1032 SmallVectorImpl<ArgInfo> &OutArgs) const {
1033 // If there are no outgoing arguments, then we are done.
1034 if (OutArgs.empty())
1035 return true;
1036
1037 const Function &CallerF = MF.getFunction();
1038 CallingConv::ID CalleeCC = Info.CallConv;
1039 CallingConv::ID CallerCC = CallerF.getCallingConv();
1040 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1041
1042 CCAssignFn *AssignFnFixed;
1043 CCAssignFn *AssignFnVarArg;
1044 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1045
1046 // We have outgoing arguments. Make sure that we can tail call with them.
1048 CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
1049 OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1050
1051 if (!determineAssignments(Assigner, OutArgs, OutInfo)) {
1052 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
1053 return false;
1054 }
1055
1056 // Make sure that they can fit on the caller's stack.
1058 if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
1059 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
1060 return false;
1061 }
1062
1063 // Verify that the parameters in callee-saved registers match.
1064 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1065 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1066 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
1068 return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
1069}
1070
1071/// Return true if the calling convention is one that we can guarantee TCO for.
1073 return CC == CallingConv::Fast;
1074}
1075
1076/// Return true if we might ever do TCO for calls with this calling convention.
1078 switch (CC) {
1079 case CallingConv::C:
1081 return true;
1082 default:
1083 return canGuaranteeTCO(CC);
1084 }
1085}
1086
1089 SmallVectorImpl<ArgInfo> &InArgs, SmallVectorImpl<ArgInfo> &OutArgs) const {
1090 // Must pass all target-independent checks in order to tail call optimize.
1091 if (!Info.IsTailCall)
1092 return false;
1093
1094 // Indirect calls can't be tail calls, because the address can be divergent.
1095 // TODO Check divergence info if the call really is divergent.
1096 if (Info.Callee.isReg())
1097 return false;
1098
1099 MachineFunction &MF = B.getMF();
1100 const Function &CallerF = MF.getFunction();
1101 CallingConv::ID CalleeCC = Info.CallConv;
1102 CallingConv::ID CallerCC = CallerF.getCallingConv();
1103
1104 const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1105 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1106 // Kernels aren't callable, and don't have a live in return address so it
1107 // doesn't make sense to do a tail call with entry functions.
1108 if (!CallerPreserved)
1109 return false;
1110
1111 if (!mayTailCallThisCC(CalleeCC)) {
1112 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
1113 return false;
1114 }
1115
1116 if (any_of(CallerF.args(), [](const Argument &A) {
1117 return A.hasByValAttr() || A.hasSwiftErrorAttr();
1118 })) {
1119 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval "
1120 "or swifterror arguments\n");
1121 return false;
1122 }
1123
1124 // If we have -tailcallopt, then we're done.
1126 return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
1127
1128 // Verify that the incoming and outgoing arguments from the callee are
1129 // safe to tail call.
1130 if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1131 LLVM_DEBUG(
1132 dbgs()
1133 << "... Caller and callee have incompatible calling conventions.\n");
1134 return false;
1135 }
1136
1137 if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
1138 return false;
1139
1140 LLVM_DEBUG(dbgs() << "... Call is eligible for tail call optimization.\n");
1141 return true;
1142}
1143
1144// Insert outgoing implicit arguments for a call, by inserting copies to the
1145// implicit argument registers and adding the necessary implicit uses to the
1146// call instruction.
1150 ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const {
1151 if (!ST.enableFlatScratch()) {
1152 // Insert copies for the SRD. In the HSA case, this should be an identity
1153 // copy.
1154 auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::fixed_vector(4, 32),
1155 FuncInfo.getScratchRSrcReg());
1156 MIRBuilder.buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
1157 CallInst.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Implicit);
1158 }
1159
1160 for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1161 MIRBuilder.buildCopy((Register)ArgReg.first, ArgReg.second);
1162 CallInst.addReg(ArgReg.first, RegState::Implicit);
1163 }
1164}
1165
1167 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
1168 SmallVectorImpl<ArgInfo> &OutArgs) const {
1169 MachineFunction &MF = MIRBuilder.getMF();
1170 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1172 const Function &F = MF.getFunction();
1174 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1175
1176 // True when we're tail calling, but without -tailcallopt.
1177 bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
1178
1179 // Find out which ABI gets to decide where things go.
1180 CallingConv::ID CalleeCC = Info.CallConv;
1181 CCAssignFn *AssignFnFixed;
1182 CCAssignFn *AssignFnVarArg;
1183 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1184
1185 MachineInstrBuilder CallSeqStart;
1186 if (!IsSibCall)
1187 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
1188
1189 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
1190 auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1191 if (!addCallTargetOperands(MIB, MIRBuilder, Info))
1192 return false;
1193
1194 // Byte offset for the tail call. When we are sibcalling, this will always
1195 // be 0.
1196 MIB.addImm(0);
1197
1198 // Tell the call which registers are clobbered.
1199 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1200 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1201 MIB.addRegMask(Mask);
1202
1203 // FPDiff is the byte offset of the call's argument area from the callee's.
1204 // Stores to callee stack arguments will be placed in FixedStackSlots offset
1205 // by this amount for a tail call. In a sibling call it must be 0 because the
1206 // caller will deallocate the entire stack and the callee still expects its
1207 // arguments to begin at SP+0.
1208 int FPDiff = 0;
1209
1210 // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1211 // by -tailcallopt. For sibcalls, the memory operands for the call are
1212 // already available in the caller's incoming argument space.
1213 unsigned NumBytes = 0;
1214 if (!IsSibCall) {
1215 // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1216 // before handling assignments, because FPDiff must be known for memory
1217 // arguments.
1218 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1220 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1221
1222 // FIXME: Not accounting for callee implicit inputs
1223 OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg);
1224 if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
1225 return false;
1226
1227 // The callee will pop the argument stack as a tail call. Thus, we must
1228 // keep it 16-byte aligned.
1229 NumBytes = alignTo(OutInfo.getNextStackOffset(), ST.getStackAlignment());
1230
1231 // FPDiff will be negative if this tail call requires more space than we
1232 // would automatically have in our incoming argument space. Positive if we
1233 // actually shrink the stack.
1234 FPDiff = NumReusableBytes - NumBytes;
1235
1236 // The stack pointer must be 16-byte aligned at all times it's used for a
1237 // memory operation, which in practice means at *all* times and in
1238 // particular across call boundaries. Therefore our own arguments started at
1239 // a 16-byte aligned SP and the delta applied for the tail call should
1240 // satisfy the same constraint.
1241 assert(isAligned(ST.getStackAlignment(), FPDiff) &&
1242 "unaligned stack on tail call");
1243 }
1244
1246 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1247
1248 // We could pass MIB and directly add the implicit uses to the call
1249 // now. However, as an aesthetic choice, place implicit argument operands
1250 // after the ordinary user argument registers.
1252
1253 if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
1254 // With a fixed ABI, allocate fixed registers before user arguments.
1255 if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1256 return false;
1257 }
1258
1259 OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1260
1261 if (!determineAssignments(Assigner, OutArgs, CCInfo))
1262 return false;
1263
1264 // Do the actual argument marshalling.
1265 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, true, FPDiff);
1266 if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1267 return false;
1268
1269 handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, ImplicitArgRegs);
1270
1271 // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1272 // sequence start and end here.
1273 if (!IsSibCall) {
1274 MIB->getOperand(1).setImm(FPDiff);
1275 CallSeqStart.addImm(NumBytes).addImm(0);
1276 // End the call sequence *before* emitting the call. Normally, we would
1277 // tidy the frame up after the call. However, here, we've laid out the
1278 // parameters so that when SP is reset, they will be in the correct
1279 // location.
1280 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN).addImm(NumBytes).addImm(0);
1281 }
1282
1283 // Now we can add the actual call instruction to the correct basic block.
1284 MIRBuilder.insertInstr(MIB);
1285
1286 // If Callee is a reg, since it is used by a target specific
1287 // instruction, it must have a register class matching the
1288 // constraint of that instruction.
1289
1290 // FIXME: We should define regbankselectable call instructions to handle
1291 // divergent call targets.
1292 if (MIB->getOperand(0).isReg()) {
1294 MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1295 MIB->getDesc(), MIB->getOperand(0), 0));
1296 }
1297
1299 Info.LoweredTailCall = true;
1300 return true;
1301}
1302
1304 CallLoweringInfo &Info) const {
1305 if (Info.IsVarArg) {
1306 LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
1307 return false;
1308 }
1309
1310 MachineFunction &MF = MIRBuilder.getMF();
1311 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1312 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1313
1314 const Function &F = MF.getFunction();
1316 const SITargetLowering &TLI = *getTLI<SITargetLowering>();
1317 const DataLayout &DL = F.getParent()->getDataLayout();
1318
1320 for (auto &OrigArg : Info.OrigArgs)
1321 splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1322
1324 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy())
1325 splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1326
1327 // If we can lower as a tail call, do that instead.
1328 bool CanTailCallOpt =
1329 isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1330
1331 // We must emit a tail call if we have musttail.
1332 if (Info.IsMustTailCall && !CanTailCallOpt) {
1333 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1334 return false;
1335 }
1336
1337 Info.IsTailCall = CanTailCallOpt;
1338 if (CanTailCallOpt)
1339 return lowerTailCall(MIRBuilder, Info, OutArgs);
1340
1341 // Find out which ABI gets to decide where things go.
1342 CCAssignFn *AssignFnFixed;
1343 CCAssignFn *AssignFnVarArg;
1344 std::tie(AssignFnFixed, AssignFnVarArg) =
1345 getAssignFnsForCC(Info.CallConv, TLI);
1346
1347 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)
1348 .addImm(0)
1349 .addImm(0);
1350
1351 // Create a temporarily-floating call instruction so we can add the implicit
1352 // uses of arg registers.
1353 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
1354
1355 auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1356 MIB.addDef(TRI->getReturnAddressReg(MF));
1357
1358 if (!addCallTargetOperands(MIB, MIRBuilder, Info))
1359 return false;
1360
1361 // Tell the call which registers are clobbered.
1362 const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
1363 MIB.addRegMask(Mask);
1364
1366 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1367
1368 // We could pass MIB and directly add the implicit uses to the call
1369 // now. However, as an aesthetic choice, place implicit argument operands
1370 // after the ordinary user argument registers.
1372
1373 if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
1374 // With a fixed ABI, allocate fixed registers before user arguments.
1375 if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1376 return false;
1377 }
1378
1379 // Do the actual argument marshalling.
1380 SmallVector<Register, 8> PhysRegs;
1381
1382 OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
1383 if (!determineAssignments(Assigner, OutArgs, CCInfo))
1384 return false;
1385
1386 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, false);
1387 if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1388 return false;
1389
1391
1392 handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, ImplicitArgRegs);
1393
1394 // Get a count of how many bytes are to be pushed on the stack.
1395 unsigned NumBytes = CCInfo.getNextStackOffset();
1396
1397 // If Callee is a reg, since it is used by a target specific
1398 // instruction, it must have a register class matching the
1399 // constraint of that instruction.
1400
1401 // FIXME: We should define regbankselectable call instructions to handle
1402 // divergent call targets.
1403 if (MIB->getOperand(1).isReg()) {
1404 MIB->getOperand(1).setReg(constrainOperandRegClass(
1405 MF, *TRI, MRI, *ST.getInstrInfo(),
1406 *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1407 1));
1408 }
1409
1410 // Now we can add the actual call instruction to the correct position.
1411 MIRBuilder.insertInstr(MIB);
1412
1413 // Finally we can copy the returned value back into its virtual-register. In
1414 // symmetry with the arguments, the physical register must be an
1415 // implicit-define of the call instruction.
1416 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1417 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv,
1418 Info.IsVarArg);
1419 IncomingValueAssigner Assigner(RetAssignFn);
1420 CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1421 if (!determineAndHandleAssignments(Handler, Assigner, InArgs, MIRBuilder,
1422 Info.CallConv, Info.IsVarArg))
1423 return false;
1424 }
1425
1426 uint64_t CalleePopBytes = NumBytes;
1427
1428 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1429 .addImm(0)
1430 .addImm(CalleePopBytes);
1431
1432 if (!Info.CanLowerReturn) {
1433 insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
1434 Info.DemoteRegister, Info.DemoteStackIndex);
1435 }
1436
1437 return true;
1438}
unsigned const MachineRegisterInfo * MRI
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
@ Flags
Definition: TextStubV5.cpp:93
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const
Returns true if the call can be lowered as a tail call.
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs) const
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register > > &ArgRegs, CallLoweringInfo &Info) const
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const
void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, ArrayRef< std::pair< MCRegister, Register > > ImplicitArgRegs) const
This class provides the information for the target register banks.
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
CCState - This class holds information needed while lowering arguments and return values.
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
CCValAssign - Represent assignment of one arg/retval to a location.
unsigned getLocMemOffset() const
LocInfo getLocInfo() const
This class represents a function call, abstracting a target machine's calling convention.
bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs=std::nullopt) const
Use Handler to insert code to handle the argument/return values represented by Args.
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs=std::nullopt) const
Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const
Analyze the argument list in Args, using Assigner to populate CCInfo.
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
iterator_range< arg_iterator > args()
Definition: Function.h:795
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:239
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setHasTailCall(bool V=true)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
AMDGPUFunctionArgInfo & getArgInfo()
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:36
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:840
TargetOptions Options
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:380
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:382
unsigned getCodeObjectVersion(const Module &M)
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
bool isShader(CallingConv::ID cc)
bool isGraphics(CallingConv::ID cc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:229
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:191
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:779
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:773
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:776
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:53
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1777
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:121
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
Definition: Utils.cpp:712
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
MCRegister getRegister() const
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:51
Base class for ValueHandlers used for arguments coming into the current function, or for return value...
Definition: CallLowering.h:318
Register buildExtensionHint(CCValAssign &VA, Register SrcReg, LLT NarrowTy)
Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on VA, returning the new register ...
void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign VA) override
Provides a default implementation for argument handling.
Base class for ValueHandlers used for arguments passed to a function call, or for return values.
Definition: CallLowering.h:333
uint64_t StackOffset
Stack offset for next argument.
Definition: CallLowering.h:204
Register extendRegister(Register ValReg, CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
virtual Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags)=0
Materialize a VReg containing the address of the specified stack-based object.
virtual void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA)=0
The specified value has been assigned to a stack location.
virtual void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign VA)=0
The specified value has been assigned to a physical register, handle the appropriate COPY (either to ...
Extended Value Type.
Definition: ValueTypes.h:34
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:194
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117