Go to the documentation of this file.
25 #include "llvm/IR/IntrinsicsAMDGPU.h"
27 #define DEBUG_TYPE "amdgpu-call-lowering"
48 : OutgoingValueHandler(
B,
MRI), MIB(MIB) {}
65 Register ExtReg = extendRegisterMin32(*
this, ValVReg, VA);
72 if (
TRI->isSGPRReg(
MRI, PhysReg)) {
73 auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
76 ExtReg = ToSGPR.getReg(0);
79 MIRBuilder.buildCopy(PhysReg, ExtReg);
88 : IncomingValueHandler(
B,
MRI) {}
93 auto &MFI = MIRBuilder.getMF().getFrameInfo();
97 const bool IsImmutable = !Flags.
isByVal();
98 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
100 auto AddrReg = MIRBuilder.buildFrameIndex(
102 StackUsed =
std::max(StackUsed, Size + Offset);
103 return AddrReg.getReg(0);
108 markPhysRegUsed(PhysReg);
113 auto Copy = MIRBuilder.buildCopy(
LLT::scalar(32), PhysReg);
118 buildExtensionHint(VA, Copy.getReg(0),
LLT(VA.
getLocVT()));
119 MIRBuilder.buildTrunc(ValVReg, Extended);
123 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
133 MIRBuilder.buildLoad(ValVReg,
Addr, *MMO);
139 virtual void markPhysRegUsed(
unsigned PhysReg) = 0;
144 : AMDGPUIncomingArgHandler(
B,
MRI) {}
146 void markPhysRegUsed(
unsigned PhysReg)
override {
156 void markPhysRegUsed(
unsigned PhysReg)
override {
163 struct AMDGPUOutgoingArgHandler :
public AMDGPUOutgoingValueHandler {
175 bool IsTailCall =
false,
int FPDiff = 0)
176 : AMDGPUOutgoingValueHandler(MIRBuilder,
MRI, MIB), FPDiff(FPDiff),
177 IsTailCall(IsTailCall) {}
191 return FIReg.getReg(0);
198 if (
ST.enableFlatScratch()) {
206 SPReg = MIRBuilder.
buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
213 auto AddrReg = MIRBuilder.
buildPtrAdd(PtrTy, SPReg, OffsetReg);
215 return AddrReg.getReg(0);
221 Register ExtReg = extendRegisterMin32(*
this, ValVReg, VA);
241 ? extendRegister(
Arg.Regs[ValRegIndex], VA)
242 :
Arg.Regs[ValRegIndex];
243 assignValueToAddress(ValVReg,
Addr, MemTy, MPO, VA);
255 case TargetOpcode::G_SEXT:
257 case TargetOpcode::G_ZEXT:
259 case TargetOpcode::G_ANYEXT:
269 bool IsVarArg)
const {
276 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
290 auto &MF =
B.getMF();
302 "For each split Type there should be exactly one VReg.");
306 for (
unsigned i = 0;
i < SplitEVTs.size(); ++
i) {
307 EVT VT = SplitEVTs[
i];
313 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
314 if (RetInfo.Flags[0].isSExt()) {
315 assert(RetInfo.Regs.size() == 1 &&
"expect only simple return values");
316 ExtendOp = TargetOpcode::G_SEXT;
317 }
else if (RetInfo.Flags[0].isZExt()) {
318 assert(RetInfo.Regs.size() == 1 &&
"expect only simple return values");
319 ExtendOp = TargetOpcode::G_ZEXT;
331 if (
Reg != RetInfo.Regs[0]) {
332 RetInfo.Regs[0] =
Reg;
342 OutgoingValueAssigner Assigner(AssignFn);
343 AMDGPUOutgoingValueHandler RetHandler(
B, *
MRI,
Ret);
356 assert(!Val == VRegs.
empty() &&
"Return value without a vreg");
360 const bool IsWaveEnd =
363 B.buildInstr(AMDGPU::S_ENDPGM)
369 IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
370 auto Ret =
B.buildInstrNoInsert(ReturnOpc);
374 else if (!lowerReturnVal(
B, Val, VRegs,
Ret))
392 auto OffsetReg =
B.buildConstant(
LLT::scalar(64), Offset);
394 B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
399 Align Alignment)
const {
412 for (
ArgInfo &SplitArg : SplitArgs) {
413 Register PtrReg =
B.getMRI()->createGenericVirtualRegister(PtrTy);
414 lowerParameterPtr(PtrReg,
B, Offset + FieldOffsets[Idx]);
417 if (SplitArg.Flags[0].isPointer()) {
431 assert(SplitArg.Regs.size() == 1);
433 B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
445 if (
Info.hasPrivateSegmentBuffer()) {
447 MF.
addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
451 if (
Info.hasDispatchPtr()) {
453 MF.
addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
459 MF.
addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
463 if (
Info.hasKernargSegmentPtr()) {
469 B.getMBB().addLiveIn(InputPtrReg);
470 B.buildCopy(VReg, InputPtrReg);
474 if (
Info.hasDispatchID()) {
476 MF.
addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
480 if (
Info.hasFlatScratchInit()) {
482 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
501 Info->allocateModuleLDSGlobal(
F);
504 CCState CCInfo(
F.getCallingConv(),
F.isVarArg(), MF, ArgLocs,
F.getContext());
509 const Align KernArgBaseAlign(16);
514 for (
auto &
Arg :
F.args()) {
515 const bool IsByRef =
Arg.hasByRefAttr();
516 Type *ArgTy = IsByRef ?
Arg.getParamByRefType() :
Arg.getType();
517 unsigned AllocSize =
DL.getTypeAllocSize(ArgTy);
523 ABIAlign =
DL.getABITypeAlign(ArgTy);
525 uint64_t ArgOffset =
alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
526 ExplicitArgOffset =
alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
528 if (
Arg.use_empty()) {
536 unsigned ByRefAS = cast<PointerType>(
Arg.getType())->getAddressSpace();
539 "expected only one register for byval pointers");
541 lowerParameterPtr(VRegs[
i][0],
B, ArgOffset);
545 lowerParameterPtr(PtrReg,
B, ArgOffset);
547 B.buildAddrSpaceCast(VRegs[
i][0], PtrReg);
553 lowerParameter(
B, OrigArg, ArgOffset, Alignment);
586 Info->allocateModuleLDSGlobal(
F);
589 CCState CCInfo(CC,
F.isVarArg(), MF, ArgLocs,
F.getContext());
591 if (
Info->hasImplicitBufferPtr()) {
593 MF.
addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
598 if (
Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
600 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
606 unsigned PSInputNum = 0;
613 for (
auto &
Arg :
F.args()) {
614 if (
DL.getTypeStoreSize(
Arg.getType()) == 0)
617 const bool InReg =
Arg.hasAttribute(Attribute::InReg);
620 if (!IsGraphics && InReg)
623 if (
Arg.hasAttribute(Attribute::SwiftSelf) ||
624 Arg.hasAttribute(Attribute::SwiftError) ||
625 Arg.hasAttribute(Attribute::Nest))
629 const bool ArgUsed = !
Arg.use_empty();
630 bool SkipArg = !ArgUsed && !
Info->isPSInputAllocated(PSInputNum);
633 Info->markPSInputAllocated(PSInputNum);
635 Info->markPSInputEnabled(PSInputNum);
671 if ((
Info->getPSInputAddr() & 0x7F) == 0 ||
672 ((
Info->getPSInputAddr() & 0xF) == 0 &&
673 Info->isPSInputAllocated(11))) {
676 Info->markPSInputAllocated(0);
677 Info->markPSInputEnabled(0);
680 if (Subtarget.isAmdPalOS()) {
689 unsigned PsInputBits =
Info->getPSInputAddr() &
Info->getPSInputEnable();
690 if ((PsInputBits & 0x7F) == 0 ||
691 ((PsInputBits & 0xF) == 0 &&
692 (PsInputBits >> 11 & 1)))
693 Info->markPSInputEnabled(
704 if (!IsEntryFunc && !IsGraphics) {
723 if (!Subtarget.enableFlatScratch())
772 "amdgpu-no-dispatch-ptr",
773 "amdgpu-no-queue-ptr",
774 "amdgpu-no-implicitarg-ptr",
775 "amdgpu-no-dispatch-id",
776 "amdgpu-no-workgroup-id-x",
777 "amdgpu-no-workgroup-id-y",
778 "amdgpu-no-workgroup-id-z"
788 for (
auto InputID : InputRegs) {
794 if (
Info.CB->hasFnAttr(ImplicitAttrNames[
I++]))
797 std::tie(OutgoingArg, ArgRC, ArgTy) =
804 std::tie(IncomingArg, IncomingArgRC, ArgTy) =
805 CallerArgInfo.getPreloadedValue(InputID);
806 assert(IncomingArgRC == ArgRC);
811 LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
813 LI->getImplicitArgPtr(InputReg,
MRI, MIRBuilder);
821 ArgRegs.emplace_back(OutgoingArg->
getRegister(), InputReg);
825 LLVM_DEBUG(
dbgs() <<
"Unhandled stack passed implicit input argument\n");
836 std::tie(OutgoingArg, ArgRC, ArgTy) =
839 std::tie(OutgoingArg, ArgRC, ArgTy) =
842 std::tie(OutgoingArg, ArgRC, ArgTy) =
854 const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
855 const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
856 const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
859 const bool NeedWorkItemIDX = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-x");
860 const bool NeedWorkItemIDY = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-y");
861 const bool NeedWorkItemIDZ = !
Info.CB->hasFnAttr(
"amdgpu-no-workitem-id-z");
870 LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
871 std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
878 NeedWorkItemIDY &&
ST.getMaxWorkitemID(MF.
getFunction(), 1) != 0) {
880 LI->loadInputValue(
Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
881 std::get<2>(WorkitemIDY));
884 InputReg = InputReg ? MIRBuilder.
buildOr(S32, InputReg,
Y).
getReg(0) :
Y;
888 NeedWorkItemIDZ &&
ST.getMaxWorkitemID(MF.
getFunction(), 2) != 0) {
890 LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
891 std::get<2>(WorkitemIDZ));
894 InputReg = InputReg ? MIRBuilder.
buildOr(S32, InputReg, Z).
getReg(0) : Z;
898 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
900 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
910 IncomingArgX ? *IncomingArgX :
911 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
912 LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
913 &AMDGPU::VGPR_32RegClass, S32);
919 ArgRegs.emplace_back(OutgoingArg->
getRegister(), InputReg);
924 LLVM_DEBUG(
dbgs() <<
"Unhandled stack passed implicit input argument\n");
933 static std::pair<CCAssignFn *, CCAssignFn *>
940 assert(!(IsIndirect && IsTailCall) &&
"Indirect calls can't be tail calls, "
941 "because the address can be divergent");
942 return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::G_SI_CALL;
948 AMDGPUCallLowering::CallLoweringInfo &
Info) {
949 if (
Info.Callee.isReg()) {
952 }
else if (
Info.Callee.isGlobal() &&
Info.Callee.getOffset() == 0) {
974 if (CalleeCC == CallerCC)
980 auto TRI =
ST.getRegisterInfo();
991 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
996 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1002 CalleeAssignFnVarArg);
1004 CallerAssignFnVarArg);
1012 if (OutArgs.empty())
1037 LLVM_DEBUG(
dbgs() <<
"... Cannot fit call operands on caller's stack.\n");
1069 if (!
Info.IsTailCall)
1074 if (
Info.Callee.isReg())
1086 if (!CallerPreserved)
1090 LLVM_DEBUG(
dbgs() <<
"... Calling convention cannot be tail called.\n");
1095 return A.hasByValAttr() || A.hasSwiftErrorAttr();
1097 LLVM_DEBUG(
dbgs() <<
"... Cannot tail call from callers with byval "
1098 "or swifterror arguments\n");
1111 <<
"... Caller and callee have incompatible calling conventions.\n");
1118 LLVM_DEBUG(
dbgs() <<
"... Call is eligible for tail call optimization.\n");
1128 ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs)
const {
1129 if (!
ST.enableFlatScratch()) {
1134 MIRBuilder.
buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
1138 for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1165 CallSeqStart = MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKUP);
1179 MIB.addRegMask(
Mask);
1191 unsigned NumBytes = 0;
1196 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1198 CCState OutInfo(CalleeCC,
false, MF, OutLocs,
F.getContext());
1212 FPDiff = NumReusableBytes - NumBytes;
1220 "unaligned stack on tail call");
1243 AMDGPUOutgoingArgHandler Handler(MIRBuilder,
MRI, MIB,
true, FPDiff);
1252 MIB->getOperand(1).setImm(FPDiff);
1270 if (MIB->getOperand(0).isReg()) {
1272 MF, *
TRI,
MRI, *
ST.getInstrInfo(), *
ST.getRegBankInfo(), *MIB,
1273 MIB->getDesc(), MIB->getOperand(0), 0));
1277 Info.LoweredTailCall =
true;
1283 if (
Info.IsVarArg) {
1298 for (
auto &OrigArg :
Info.OrigArgs)
1302 if (
Info.CanLowerReturn && !
Info.OrigRet.Ty->isVoidTy())
1306 bool CanTailCallOpt =
1310 if (
Info.IsMustTailCall && !CanTailCallOpt) {
1311 LLVM_DEBUG(
dbgs() <<
"Failed to lower musttail call as tail call\n");
1315 Info.IsTailCall = CanTailCallOpt;
1322 std::tie(AssignFnFixed, AssignFnVarArg) =
1325 MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKUP)
1334 MIB.
addDef(
TRI->getReturnAddressReg(MF));
1341 MIB.addRegMask(
Mask);
1364 AMDGPUOutgoingArgHandler Handler(MIRBuilder,
MRI, MIB,
false);
1381 if (MIB->getOperand(1).isReg()) {
1384 *
ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1394 if (
Info.CanLowerReturn && !
Info.OrigRet.Ty->isVoidTy()) {
1404 uint64_t CalleePopBytes = NumBytes;
1406 MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1410 if (!
Info.CanLowerReturn) {
1412 Info.DemoteRegister,
Info.DemoteStackIndex);
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
void setIfReturnsVoid(bool Value)
This class represents an incoming formal argument to a Function.
void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, ArrayRef< std::pair< MCRegister, Register >> ImplicitArgRegs) const
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This is an optimization pass for GlobalISel generic memory operations.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
iterator_range< arg_iterator > args()
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_OR Op0, Op1.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
A parsed version of the target data layout string in and methods for querying it.
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
unsigned getScalarSizeInBits() const
CCState - This class holds information needed while lowering arguments and return values.
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
@ AMDGPU_Gfx
Calling convention used for AMD graphics targets.
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
AMDGPUFunctionArgInfo & getArgInfo()
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Register extendRegister(Register ValReg, CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
Reg
All possible values of the reg field in the ModR/M byte.
const TargetRegisterInfo * getTargetRegisterInfo() const
@ MOInvariant
The memory access always returns the same value (or traps).
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
unsigned getAmdhsaCodeObjectVersion()
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
The instances of the Type class are immutable: once they are created, they are never changed.
Base class for ValueHandlers used for arguments passed to a function call, or for return values.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
static LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const
Returns true if the call can be lowered as a tail call.
unsigned const TargetRegisterInfo * TRI
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
@ ZB_Undefined
The returned value is undefined.
const SIRegisterInfo * getRegisterInfo() const override
bool empty() const
empty - Check if the array is empty.
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
This class provides the information for the target register banks.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
LLVM_READNONE bool isKernel(CallingConv::ID CC)
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isShader(CallingConv::ID cc)
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
ArgDescriptor WorkItemIDX
const MachineOperand & getOperand(unsigned i) const
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register >> VRegs) const
bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const
Analyze the argument list in Args, using Assigner to populate CCInfo.
Register getStackPtrOffsetReg() const
MachineFunction & getMF()
Getter for the function we currently build.
Base class for ValueHandlers used for arguments coming into the current function, or for return value...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Analysis containing CSE Info
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register >> &ArgRegs, CallLoweringInfo &Info) const
LocInfo getLocInfo() const
unsigned getLocMemOffset() const
This struct is a compact representation of a valid (non-zero power of two) alignment.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Register getReg(unsigned Idx) const
Get the register for the operand index.
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
bool isEntryFunctionCC(CallingConv::ID CC)
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const
Return true if all bits that are set in mask mask0 are also set in mask1.
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
Helper class to build MachineInstr.
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const
static bool canGuaranteeTCO(CallingConv::ID CC)
Return true if the calling convention is one that we can guarantee TCO for.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
This class contains a discriminated union of information about pointers in memory operands,...
This is an important class for using LLVM in a threaded context.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
void setHasTailCall(bool V=true)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
ArgDescriptor WorkItemIDZ
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
@ C
C - The default llvm calling convention, compatible with C.
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
bool isGraphics(CallingConv::ID cc)
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Helper struct shared between Function Specialization and SCCP Solver.
@ MOLoad
The memory access reads data.
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
Wrapper class representing virtual and physical registers.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineRegisterInfo & MRI
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs=None) const
Use Handler to insert code to handle the argument/return values represented by Args.
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
uint64_t StackOffset
Stack offset for next argument.
@ PRIVATE_ADDRESS
Address space for private memory.
MachineIRBuilder & MIRBuilder
unsigned getBytesInStackArgArea() const
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ MOStore
The memory access writes data.
unsigned getAddressSpace() const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
ArgDescriptor WorkItemIDY
ElementCount getElementCount() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MCRegister getRegister() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
size_t size() const
size - Get the array size.
Align max(MaybeAlign Lhs, Align Rhs)
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register >> VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
This class represents a function call, abstracting a target machine's calling convention.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
@ SIGN_EXTEND
Conversion operators.
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstrBuilder &MIB)
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
LLVM Value Representation.
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs=None) const
Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const