25#include "llvm/IR/IntrinsicsAMDGPU.h"
27#define DEBUG_TYPE "amdgpu-call-lowering"
48 : OutgoingValueHandler(
B,
MRI), MIB(MIB) {}
65 Register ExtReg = extendRegisterMin32(*
this, ValVReg, VA);
72 if (
TRI->isSGPRReg(
MRI, PhysReg)) {
73 LLT Ty =
MRI.getType(ExtReg);
80 ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);
82 ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
85 auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
86 {
MRI.getType(ExtReg)},
false)
88 ExtReg = ToSGPR.getReg(0);
91 MIRBuilder.buildCopy(PhysReg, ExtReg);
100 : IncomingValueHandler(
B,
MRI) {}
105 auto &MFI = MIRBuilder.getMF().getFrameInfo();
109 const bool IsImmutable = !
Flags.isByVal();
110 int FI = MFI.CreateFixedObject(
Size,
Offset, IsImmutable);
112 auto AddrReg = MIRBuilder.buildFrameIndex(
114 StackUsed = std::max(StackUsed,
Size +
Offset);
115 return AddrReg.getReg(0);
120 markPhysRegUsed(PhysReg);
125 auto Copy = MIRBuilder.buildCopy(
LLT::scalar(32), PhysReg);
131 MIRBuilder.buildTrunc(ValVReg, Extended);
145 MIRBuilder.buildLoad(ValVReg,
Addr, *MMO);
151 virtual void markPhysRegUsed(
unsigned PhysReg) = 0;
156 : AMDGPUIncomingArgHandler(
B,
MRI) {}
158 void markPhysRegUsed(
unsigned PhysReg)
override {
159 MIRBuilder.getMBB().addLiveIn(PhysReg);
166 : AMDGPUIncomingArgHandler(MIRBuilder,
MRI), MIB(MIB) {}
168 void markPhysRegUsed(
unsigned PhysReg)
override {
175struct AMDGPUOutgoingArgHandler :
public AMDGPUOutgoingValueHandler {
187 bool IsTailCall =
false,
int FPDiff = 0)
188 : AMDGPUOutgoingValueHandler(MIRBuilder,
MRI, MIB), FPDiff(FPDiff),
189 IsTailCall(IsTailCall) {}
203 return FIReg.getReg(0);
210 if (ST.enableFlatScratch()) {
218 SPReg = MIRBuilder.
buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
225 auto AddrReg = MIRBuilder.
buildPtrAdd(PtrTy, SPReg, OffsetReg);
227 return AddrReg.getReg(0);
233 Register ExtReg = extendRegisterMin32(*
this, ValVReg, VA);
253 ? extendRegister(
Arg.Regs[ValRegIndex], VA)
254 :
Arg.Regs[ValRegIndex];
255 assignValueToAddress(ValVReg,
Addr, MemTy, MPO, VA);
267 case TargetOpcode::G_SEXT:
269 case TargetOpcode::G_ZEXT:
271 case TargetOpcode::G_ANYEXT:
281 bool IsVarArg)
const {
288 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
302 auto &MF =
B.getMF();
314 "For each split Type there should be exactly one VReg.");
318 for (
unsigned i = 0; i < SplitEVTs.
size(); ++i) {
319 EVT VT = SplitEVTs[i];
325 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
326 if (RetInfo.Flags[0].isSExt()) {
327 assert(RetInfo.Regs.size() == 1 &&
"expect only simple return values");
328 ExtendOp = TargetOpcode::G_SEXT;
329 }
else if (RetInfo.Flags[0].isZExt()) {
330 assert(RetInfo.Regs.size() == 1 &&
"expect only simple return values");
331 ExtendOp = TargetOpcode::G_ZEXT;
343 if (Reg != RetInfo.Regs[0]) {
344 RetInfo.Regs[0] =
Reg;
354 OutgoingValueAssigner Assigner(AssignFn);
355 AMDGPUOutgoingValueHandler RetHandler(
B, *
MRI, Ret);
368 assert(!Val == VRegs.
empty() &&
"Return value without a vreg");
372 const bool IsWaveEnd =
375 B.buildInstr(AMDGPU::S_ENDPGM)
381 IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
382 auto Ret =
B.buildInstrNoInsert(ReturnOpc);
386 else if (!lowerReturnVal(
B, Val, VRegs, Ret))
402 Register KernArgSegmentVReg =
MRI.getLiveInVirtReg(KernArgSegmentPtr);
406 B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
411 Align Alignment)
const {
424 for (
ArgInfo &SplitArg : SplitArgs) {
425 Register PtrReg =
B.getMRI()->createGenericVirtualRegister(PtrTy);
426 lowerParameterPtr(PtrReg,
B,
Offset + FieldOffsets[
Idx]);
429 if (SplitArg.Flags[0].isPointer()) {
443 assert(SplitArg.Regs.size() == 1);
445 B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
457 if (
Info.hasPrivateSegmentBuffer()) {
459 MF.
addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
463 if (
Info.hasDispatchPtr()) {
465 MF.
addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
470 if (
Info.hasQueuePtr() &&
473 MF.
addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
477 if (
Info.hasKernargSegmentPtr()) {
481 Register VReg =
MRI.createGenericVirtualRegister(P4);
482 MRI.addLiveIn(InputPtrReg, VReg);
483 B.getMBB().addLiveIn(InputPtrReg);
484 B.buildCopy(VReg, InputPtrReg);
488 if (
Info.hasDispatchID()) {
490 MF.
addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
494 if (
Info.hasFlatScratchInit()) {
496 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
515 Info->allocateKnownAddressLDSGlobal(
F);
518 CCState CCInfo(
F.getCallingConv(),
F.isVarArg(), MF, ArgLocs,
F.getContext());
523 const Align KernArgBaseAlign(16);
528 for (
auto &
Arg :
F.args()) {
529 const bool IsByRef =
Arg.hasByRefAttr();
530 Type *ArgTy = IsByRef ?
Arg.getParamByRefType() :
Arg.getType();
531 unsigned AllocSize =
DL.getTypeAllocSize(ArgTy);
535 MaybeAlign ParamAlign = IsByRef ?
Arg.getParamAlign() : std::nullopt;
536 Align ABIAlign =
DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
538 uint64_t ArgOffset =
alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
539 ExplicitArgOffset =
alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
541 if (
Arg.use_empty()) {
549 unsigned ByRefAS = cast<PointerType>(
Arg.getType())->getAddressSpace();
552 "expected only one register for byval pointers");
554 lowerParameterPtr(VRegs[i][0],
B, ArgOffset);
557 Register PtrReg =
MRI.createGenericVirtualRegister(ConstPtrTy);
558 lowerParameterPtr(PtrReg,
B, ArgOffset);
560 B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
566 lowerParameter(
B, OrigArg, ArgOffset, Alignment);
599 Info->allocateKnownAddressLDSGlobal(
F);
602 CCState CCInfo(
CC,
F.isVarArg(), MF, ArgLocs,
F.getContext());
604 if (
Info->hasImplicitBufferPtr()) {
606 MF.
addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
611 if (
Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
613 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
619 unsigned PSInputNum = 0;
626 for (
auto &
Arg :
F.args()) {
627 if (
DL.getTypeStoreSize(
Arg.getType()) == 0)
630 const bool InReg =
Arg.hasAttribute(Attribute::InReg);
633 if (!IsGraphics && InReg)
636 if (
Arg.hasAttribute(Attribute::SwiftSelf) ||
637 Arg.hasAttribute(Attribute::SwiftError) ||
638 Arg.hasAttribute(Attribute::Nest))
642 const bool ArgUsed = !
Arg.use_empty();
643 bool SkipArg = !ArgUsed && !
Info->isPSInputAllocated(PSInputNum);
646 Info->markPSInputAllocated(PSInputNum);
648 Info->markPSInputEnabled(PSInputNum);
684 if ((
Info->getPSInputAddr() & 0x7F) == 0 ||
685 ((
Info->getPSInputAddr() & 0xF) == 0 &&
686 Info->isPSInputAllocated(11))) {
689 Info->markPSInputAllocated(0);
690 Info->markPSInputEnabled(0);
693 if (Subtarget.isAmdPalOS()) {
702 unsigned PsInputBits =
Info->getPSInputAddr() &
Info->getPSInputEnable();
703 if ((PsInputBits & 0x7F) == 0 ||
704 ((PsInputBits & 0xF) == 0 &&
705 (PsInputBits >> 11 & 1)))
716 if (!IsEntryFunc && !IsGraphics) {
735 if (!Subtarget.enableFlatScratch())
785 "amdgpu-no-dispatch-ptr",
786 "amdgpu-no-queue-ptr",
787 "amdgpu-no-implicitarg-ptr",
788 "amdgpu-no-dispatch-id",
789 "amdgpu-no-workgroup-id-x",
790 "amdgpu-no-workgroup-id-y",
791 "amdgpu-no-workgroup-id-z",
792 "amdgpu-no-lds-kernel-id",
802 for (
auto InputID : InputRegs) {
808 if (
Info.CB->hasFnAttr(ImplicitAttrNames[
I++]))
811 std::tie(OutgoingArg, ArgRC, ArgTy) =
818 std::tie(IncomingArg, IncomingArgRC, ArgTy) =
819 CallerArgInfo.getPreloadedValue(InputID);
820 assert(IncomingArgRC == ArgRC);
822 Register InputReg =
MRI.createGenericVirtualRegister(ArgTy);
825 LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
827 LI->getImplicitArgPtr(InputReg,
MRI, MIRBuilder);
829 std::optional<uint32_t> Id =
843 ArgRegs.emplace_back(OutgoingArg->
getRegister(), InputReg);
847 LLVM_DEBUG(
dbgs() <<
"Unhandled stack passed implicit input argument\n");
858 std::tie(OutgoingArg, ArgRC, ArgTy) =
861 std::tie(OutgoingArg, ArgRC, ArgTy) =
864 std::tie(OutgoingArg, ArgRC, ArgTy) =
876 const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
877 const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
878 const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
881 const bool NeedWorkItemIDX = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-x");
882 const bool NeedWorkItemIDY = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-y");
883 const bool NeedWorkItemIDZ = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-z");
890 if (ST.getMaxWorkitemID(MF.
getFunction(), 0) != 0) {
891 InputReg =
MRI.createGenericVirtualRegister(S32);
892 LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
893 std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
900 NeedWorkItemIDY && ST.getMaxWorkitemID(MF.
getFunction(), 1) != 0) {
902 LI->loadInputValue(
Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
903 std::get<2>(WorkitemIDY));
906 InputReg = InputReg ? MIRBuilder.
buildOr(S32, InputReg,
Y).
getReg(0) :
Y;
910 NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.
getFunction(), 2) != 0) {
911 Register Z =
MRI.createGenericVirtualRegister(S32);
912 LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
913 std::get<2>(WorkitemIDZ));
916 InputReg = InputReg ? MIRBuilder.
buildOr(S32, InputReg, Z).
getReg(0) : Z;
920 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
921 InputReg =
MRI.createGenericVirtualRegister(S32);
922 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
932 IncomingArgX ? *IncomingArgX :
933 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
934 LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
935 &AMDGPU::VGPR_32RegClass, S32);
941 ArgRegs.emplace_back(OutgoingArg->
getRegister(), InputReg);
946 LLVM_DEBUG(
dbgs() <<
"Unhandled stack passed implicit input argument\n");
955static std::pair<CCAssignFn *, CCAssignFn *>
962 assert(!(IsIndirect && IsTailCall) &&
"Indirect calls can't be tail calls, "
963 "because the address can be divergent");
964 return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::G_SI_CALL;
971 if (
Info.Callee.isReg()) {
974 }
else if (
Info.Callee.isGlobal() &&
Info.Callee.getOffset() == 0) {
996 if (CalleeCC == CallerCC)
1002 auto TRI = ST.getRegisterInfo();
1004 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
1005 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
1006 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1013 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
1018 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1024 CalleeAssignFnVarArg);
1026 CallerAssignFnVarArg);
1034 if (OutArgs.
empty())
1059 LLVM_DEBUG(
dbgs() <<
"... Cannot fit call operands on caller's stack.\n");
1066 const uint32_t *CallerPreservedMask =
TRI->getCallPreservedMask(MF, CallerCC);
1091 if (!
Info.IsTailCall)
1096 if (
Info.Callee.isReg())
1105 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
1108 if (!CallerPreserved)
1112 LLVM_DEBUG(
dbgs() <<
"... Calling convention cannot be tail called.\n");
1117 return A.hasByValAttr() || A.hasSwiftErrorAttr();
1119 LLVM_DEBUG(
dbgs() <<
"... Cannot tail call from callers with byval "
1120 "or swifterror arguments\n");
1133 <<
"... Caller and callee have incompatible calling conventions.\n");
1140 LLVM_DEBUG(
dbgs() <<
"... Call is eligible for tail call optimization.\n");
1150 ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs)
const {
1151 if (!ST.enableFlatScratch()) {
1156 MIRBuilder.
buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
1160 for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1187 CallSeqStart = MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKUP);
1200 const uint32_t *Mask =
TRI->getCallPreservedMask(MF, CalleeCC);
1201 MIB.addRegMask(Mask);
1213 unsigned NumBytes = 0;
1218 unsigned NumReusableBytes =
FuncInfo->getBytesInStackArgArea();
1220 CCState OutInfo(CalleeCC,
false, MF, OutLocs,
F.getContext());
1234 FPDiff = NumReusableBytes - NumBytes;
1242 "unaligned stack on tail call");
1265 AMDGPUOutgoingArgHandler Handler(MIRBuilder,
MRI, MIB,
true, FPDiff);
1274 MIB->getOperand(1).setImm(FPDiff);
1292 if (MIB->getOperand(0).isReg()) {
1294 MF, *
TRI,
MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1295 MIB->getDesc(), MIB->getOperand(0), 0));
1299 Info.LoweredTailCall =
true;
1305 if (
Info.IsVarArg) {
1320 for (
auto &OrigArg :
Info.OrigArgs)
1324 if (
Info.CanLowerReturn && !
Info.OrigRet.Ty->isVoidTy())
1328 bool CanTailCallOpt =
1332 if (
Info.IsMustTailCall && !CanTailCallOpt) {
1333 LLVM_DEBUG(
dbgs() <<
"Failed to lower musttail call as tail call\n");
1337 Info.IsTailCall = CanTailCallOpt;
1344 std::tie(AssignFnFixed, AssignFnVarArg) =
1347 MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKUP)
1356 MIB.
addDef(
TRI->getReturnAddressReg(MF));
1363 MIB.addRegMask(Mask);
1386 AMDGPUOutgoingArgHandler Handler(MIRBuilder,
MRI, MIB,
false);
1403 if (MIB->getOperand(1).isReg()) {
1405 MF, *
TRI,
MRI, *ST.getInstrInfo(),
1406 *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1416 if (
Info.CanLowerReturn && !
Info.OrigRet.Ty->isVoidTy()) {
1426 uint64_t CalleePopBytes = NumBytes;
1428 MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1432 if (!
Info.CanLowerReturn) {
1434 Info.DemoteRegister,
Info.DemoteStackIndex);
unsigned const MachineRegisterInfo * MRI
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const
Returns true if the call can be lowered as a tail call.
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs) const
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register > > &ArgRegs, CallLoweringInfo &Info) const
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const
void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, ArrayRef< std::pair< MCRegister, Register > > ImplicitArgRegs) const
This class provides the information for the target register banks.
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
CCValAssign - Represent assignment of one arg/retval to a location.
unsigned getLocMemOffset() const
LocInfo getLocInfo() const
This class represents a function call, abstracting a target machine's calling convention.
bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs=std::nullopt) const
Use Handler to insert code to handle the argument/return values represented by Args.
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs=std::nullopt) const
Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const
Analyze the argument list in Args, using Assigner to populate CCInfo.
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
A parsed version of the target data layout string in and methods for querying it.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
iterator_range< arg_iterator > args()
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
const SIRegisterInfo * getRegisterInfo() const override
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
This is an important class for using LLVM in a threaded context.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setHasTailCall(bool V=true)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getStackPtrOffsetReg() const
void setIfReturnsVoid(bool Value)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
AMDGPUFunctionArgInfo & getArgInfo()
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getCodeObjectVersion(const Module &M)
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
bool isShader(CallingConv::ID cc)
bool isGraphics(CallingConv::ID cc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ SIGN_EXTEND
Conversion operators.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
ArgDescriptor WorkItemIDZ
ArgDescriptor WorkItemIDY
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
ArgDescriptor WorkItemIDX
This struct is a compact representation of a valid (non-zero power of two) alignment.
MCRegister getRegister() const
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Helper struct shared between Function Specialization and SCCP Solver.
Base class for ValueHandlers used for arguments coming into the current function, or for return value...
Register buildExtensionHint(CCValAssign &VA, Register SrcReg, LLT NarrowTy)
Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on VA, returning the new register ...
void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign VA) override
Provides a default implementation for argument handling.
Base class for ValueHandlers used for arguments passed to a function call, or for return values.
uint64_t StackOffset
Stack offset for next argument.
MachineIRBuilder & MIRBuilder
Register extendRegister(Register ValReg, CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
virtual Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags)=0
Materialize a VReg containing the address of the specified stack-based object.
virtual void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA)=0
The specified value has been assigned to a stack location.
virtual void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign VA)=0
The specified value has been assigned to a physical register, handle the appropriate COPY (either to ...
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.