25 #include "llvm/IR/IntrinsicsAMDGPU.h" 27 #define DEBUG_TYPE "amdgpu-call-lowering" 36 : ValueHandler(IsIncoming,
B,
MRI, AssignFn) {}
44 return MIRBuilder.buildAnyExt(
LLT::scalar(32), ValVReg).getReg(0);
47 return extendRegister(ValVReg, VA);
51 struct AMDGPUOutgoingValueHandler :
public AMDGPUValueHandler {
54 : AMDGPUValueHandler(
false,
B,
MRI, AssignFn), MIB(MIB) {}
70 Register ExtReg = extendRegisterMin32(ValVReg, VA);
76 = static_cast<const SIRegisterInfo *>(
MRI.getTargetRegisterInfo());
77 if (
TRI->isSGPRReg(
MRI, PhysReg)) {
78 auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
79 {
MRI.getType(ExtReg)},
false)
81 ExtReg = ToSGPR.getReg(0);
84 MIRBuilder.buildCopy(PhysReg, ExtReg);
88 bool assignArg(
unsigned ValNo,
MVT ValVT,
MVT LocVT,
93 return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
97 struct AMDGPUIncomingArgHandler :
public AMDGPUValueHandler {
98 uint64_t StackUsed = 0;
102 : AMDGPUValueHandler(
true,
B,
MRI, AssignFn) {}
106 auto &MFI = MIRBuilder.getMF().getFrameInfo();
107 int FI = MFI.CreateFixedObject(
Size,
Offset,
true);
109 auto AddrReg = MIRBuilder.buildFrameIndex(
112 return AddrReg.getReg(0);
117 markPhysRegUsed(PhysReg);
122 auto Copy = MIRBuilder.buildCopy(
LLT::scalar(32), PhysReg);
123 MIRBuilder.buildTrunc(ValVReg, Copy);
128 case CCValAssign::LocInfo::SExt:
129 case CCValAssign::LocInfo::ZExt:
130 case CCValAssign::LocInfo::AExt: {
131 auto Copy = MIRBuilder.buildCopy(
LLT{VA.
getLocVT()}, PhysReg);
132 MIRBuilder.buildTrunc(ValVReg, Copy);
136 MIRBuilder.buildCopy(ValVReg, PhysReg);
146 const LLT RegTy =
MRI.getType(ValVReg);
153 MIRBuilder.buildLoad(ValVReg,
Addr, *MMO);
159 virtual void markPhysRegUsed(
unsigned PhysReg) = 0;
162 struct FormalArgHandler :
public AMDGPUIncomingArgHandler {
165 : AMDGPUIncomingArgHandler(
B,
MRI, AssignFn) {}
167 void markPhysRegUsed(
unsigned PhysReg)
override {
168 MIRBuilder.getMBB().addLiveIn(PhysReg);
172 struct CallReturnHandler :
public AMDGPUIncomingArgHandler {
175 : AMDGPUIncomingArgHandler(MIRBuilder,
MRI, AssignFn), MIB(MIB) {}
177 void markPhysRegUsed(
unsigned PhysReg)
override {
184 struct AMDGPUOutgoingArgHandler :
public AMDGPUValueHandler {
200 bool IsTailCall =
false,
int FPDiff = 0)
201 : AMDGPUValueHandler(
false, MIRBuilder,
MRI, AssignFn), MIB(MIB),
202 AssignFnVarArg(AssignFnVarArg), FPDiff(FPDiff), IsTailCall(IsTailCall) {
222 auto AddrReg = MIRBuilder.
buildPtrAdd(PtrTy, SPReg, OffsetReg);
224 return AddrReg.getReg(0);
230 Register ExtReg = extendRegisterMin32(ValVReg, VA);
250 ? extendRegister(
Arg.Regs[0], VA)
256 const LLT RegTy =
MRI.getType(ValVReg);
258 assignValueToAddress(ValVReg,
Addr, MemSize, MPO, VA);
270 case TargetOpcode::G_SEXT:
272 case TargetOpcode::G_ZEXT:
274 case TargetOpcode::G_ANYEXT:
283 const ArgInfo &OrigArg,
293 assert(OrigArg.Regs.size() == SplitVTs.
size());
295 if (SplitVTs.
size() == 0)
298 if (SplitVTs.
size() == 1) {
301 SplitArgs.
emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
302 OrigArg.Flags[0], OrigArg.IsFixed);
307 assert(OrigArg.Regs.size() == SplitVTs.
size() &&
"Regs / types mismatch");
310 OrigArg.Ty, CallConv,
false);
311 for (
unsigned i = 0,
e = SplitVTs.
size(); i <
e; ++i) {
312 Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
313 SplitArgs.
emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
316 SplitArgs.
back().Flags[0].setInConsecutiveRegs();
319 SplitArgs.
back().Flags[0].setInConsecutiveRegsLast();
322 void AMDGPUCallLowering::processSplitArgs(
327 SplitArgTy PerformArgSplit)
const {
334 for (
int SplitIdx = 0,
e = SplitArg.
size(); SplitIdx !=
e; ++SplitIdx) {
335 const ArgInfo &CurSplitArg = SplitArg[SplitIdx];
358 for (
unsigned i = 0; i < NumParts; ++i) {
359 Register PartReg =
MRI.createGenericVirtualRegister(PartLLT);
364 PerformArgSplit(SplitRegs,
Reg, LLTy, PartLLT, SplitIdx);
375 assert(DstRegs.
size() > 1 &&
"Nothing to unpack");
383 for (
int i = 0,
e = DstRegs.
size(); i !=
e; ++i)
384 B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i));
389 if (GCDTy == PartTy) {
391 B.buildUnmerge(DstRegs, SrcReg);
396 LLT DstTy =
MRI.getType(DstRegs[0]);
404 if (LCMSize != SrcSize) {
408 for (
unsigned Size = SrcSize;
Size != LCMSize;
Size += SrcSize)
411 UnmergeSrc =
B.buildMerge(LCMTy, MergeParts).getReg(0);
416 for (
unsigned Size = DstSize * DstRegs.
size();
Size != LCMSize;
418 UnmergeResults.
push_back(
MRI.createGenericVirtualRegister(DstTy));
421 B.buildUnmerge(UnmergeResults, UnmergeSrc);
427 bool IsVarArg)
const {
434 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
448 auto &MF =
B.getMF();
460 "For each split Type there should be exactly one VReg.");
470 for (
unsigned i = 0; i < SplitEVTs.
size(); ++i) {
471 EVT VT = SplitEVTs[i];
477 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
478 if (RetInfo.Flags[0].isSExt()) {
479 assert(RetInfo.Regs.size() == 1 &&
"expect only simple return values");
480 ExtendOp = TargetOpcode::G_SEXT;
481 }
else if (RetInfo.Flags[0].isZExt()) {
482 assert(RetInfo.Regs.size() == 1 &&
"expect only simple return values");
483 ExtendOp = TargetOpcode::G_ZEXT;
495 if (
Reg != RetInfo.Regs[0]) {
496 RetInfo.Regs[0] =
Reg;
501 splitToValueTypes(
B, RetInfo, PreSplitRetInfos,
DL, CC);
504 processSplitArgs(
B, RetInfo,
505 PreSplitRetInfos, SplitRetInfos,
DL, CC,
true,
507 LLT PartLLT,
int VTSplitIdx) {
509 PreSplitRetInfos[VTSplitIdx], LLTy,
512 PreSplitRetInfos.clear();
516 AMDGPUOutgoingValueHandler RetHandler(
B, *
MRI,
Ret, AssignFn);
529 assert(!Val == VRegs.
empty() &&
"Return value without a vreg");
533 const bool IsWaveEnd =
536 B.buildInstr(AMDGPU::S_ENDPGM)
544 IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
546 auto Ret =
B.buildInstrNoInsert(ReturnOpc);
548 if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
549 ReturnAddrVReg =
MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
550 Ret.addUse(ReturnAddrVReg);
555 else if (!lowerReturnVal(
B, Val, VRegs,
Ret))
558 if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
561 &AMDGPU::SGPR_64RegClass);
562 B.buildCopy(ReturnAddrVReg, LiveInReturn);
579 Register KernArgSegmentVReg =
MRI.getLiveInVirtReg(KernArgSegmentPtr);
583 B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
593 unsigned TypeSize =
DL.getTypeStoreSize(ParamTy);
596 Register PtrReg =
B.getMRI()->createGenericVirtualRegister(PtrTy);
597 lowerParameterPtr(PtrReg,
B, ParamTy,
Offset);
605 B.buildLoad(DstReg, PtrReg, *MMO);
615 if (
Info.hasPrivateSegmentBuffer()) {
617 MF.
addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
621 if (
Info.hasDispatchPtr()) {
623 MF.
addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
627 if (
Info.hasQueuePtr()) {
629 MF.
addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
633 if (
Info.hasKernargSegmentPtr()) {
637 Register VReg =
MRI.createGenericVirtualRegister(P4);
638 MRI.addLiveIn(InputPtrReg, VReg);
639 B.getMBB().addLiveIn(InputPtrReg);
640 B.buildCopy(VReg, InputPtrReg);
644 if (
Info.hasDispatchID()) {
646 MF.
addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
650 if (
Info.hasFlatScratchInit()) {
652 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
673 CCState CCInfo(
F.getCallingConv(),
F.isVarArg(), MF, ArgLocs,
F.getContext());
678 const Align KernArgBaseAlign(16);
680 uint64_t ExplicitArgOffset = 0;
683 for (
auto &
Arg :
F.args()) {
684 const bool IsByRef =
Arg.hasByRefAttr();
685 Type *ArgTy = IsByRef ?
Arg.getParamByRefType() :
Arg.getType();
686 unsigned AllocSize =
DL.getTypeAllocSize(ArgTy);
692 ABIAlign =
DL.getABITypeAlign(ArgTy);
694 uint64_t ArgOffset =
alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
695 ExplicitArgOffset =
alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
697 if (
Arg.use_empty()) {
705 unsigned ByRefAS = cast<PointerType>(
Arg.getType())->getAddressSpace();
708 "expected only one register for byval pointers");
710 lowerParameterPtr(VRegs[i][0],
B, ArgTy, ArgOffset);
713 Register PtrReg =
MRI.createGenericVirtualRegister(ConstPtrTy);
714 lowerParameterPtr(PtrReg,
B, ArgTy, ArgOffset);
716 B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
721 OrigArgRegs.
size() == 1
725 lowerParameter(
B, ArgTy, ArgOffset, Alignment, ArgReg);
726 if (OrigArgRegs.
size() > 1)
742 LLT LLTy =
MRI.getType(DstRegs[0]);
743 LLT PartLLT =
MRI.getType(SrcRegs[0]);
750 return B.buildConcatVectors(DstRegs[0], SrcRegs);
762 auto Widened =
B.buildConcatVectors(LCMTy, WidenedSrcs);
769 for (
int I = DstRegs.
size();
I != NumDst; ++
I)
770 PadDstRegs[
I] =
MRI.createGenericVirtualRegister(LLTy);
772 return B.buildUnmerge(PadDstRegs, Widened);
785 LLT OrigTy =
MRI.getType(OrigRegs[0]);
788 if (SrcSize == OrigTy.getSizeInBits())
789 B.buildMerge(OrigRegs[0], Regs);
791 auto Widened =
B.buildMerge(
LLT::scalar(SrcSize), Regs);
792 B.buildTrunc(OrigRegs[0], Widened);
811 LLT RealDstEltTy =
MRI.getType(OrigRegs[0]).getElementType();
815 if (DstEltTy == PartLLT) {
820 MRI.setType(
Reg, RealDstEltTy);
823 B.buildBuildVector(OrigRegs[0], Regs);
824 }
else if (DstEltTy.getSizeInBits() > PartLLT.
getSizeInBits()) {
828 int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.
getSizeInBits();
835 MRI.setType(
Merge.getReg(0), RealDstEltTy);
840 B.buildBuildVector(OrigRegs[0], EltMerges);
844 auto BV =
B.buildBuildVector(BVType, Regs);
845 B.buildTrunc(OrigRegs[0], BV);
873 CCState CCInfo(CC,
F.isVarArg(), MF, ArgLocs,
F.getContext());
876 Register ReturnAddrReg =
TRI->getReturnAddressReg(MF);
878 &AMDGPU::SGPR_64RegClass);
880 B.buildCopy(LiveInReturn, ReturnAddrReg);
883 if (
Info->hasImplicitBufferPtr()) {
885 MF.
addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
886 CCInfo.AllocateReg(ImplicitBufferPtrReg);
892 unsigned PSInputNum = 0;
899 for (
auto &
Arg :
F.args()) {
900 if (
DL.getTypeStoreSize(
Arg.getType()) == 0)
903 const bool InReg =
Arg.hasAttribute(Attribute::InReg);
906 if (!IsGraphics && InReg)
909 if (
Arg.hasAttribute(Attribute::SwiftSelf) ||
910 Arg.hasAttribute(Attribute::SwiftError) ||
911 Arg.hasAttribute(Attribute::Nest))
915 const bool ArgUsed = !
Arg.use_empty();
916 bool SkipArg = !ArgUsed && !
Info->isPSInputAllocated(PSInputNum);
919 Info->markPSInputAllocated(PSInputNum);
921 Info->markPSInputEnabled(PSInputNum);
927 for (
int I = 0,
E = VRegs[Idx].
size();
I !=
E; ++
I)
928 B.buildUndef(VRegs[Idx][
I]);
940 splitToValueTypes(
B, OrigArg, SplitArg,
DL, CC);
942 processSplitArgs(
B, OrigArg, SplitArg, SplitArgs,
DL, CC,
false,
946 LLT PartLLT,
int VTSplitIdx) {
947 assert(DstReg == VRegs[Idx][VTSplitIdx]);
969 if ((
Info->getPSInputAddr() & 0x7F) == 0 ||
970 ((
Info->getPSInputAddr() & 0xF) == 0 &&
971 Info->isPSInputAllocated(11))) {
972 CCInfo.AllocateReg(AMDGPU::VGPR0);
973 CCInfo.AllocateReg(AMDGPU::VGPR1);
974 Info->markPSInputAllocated(0);
975 Info->markPSInputEnabled(0);
978 if (Subtarget.isAmdPalOS()) {
987 unsigned PsInputBits =
Info->getPSInputAddr() &
Info->getPSInputEnable();
988 if ((PsInputBits & 0x7F) == 0 ||
989 ((PsInputBits & 0xF) == 0 &&
990 (PsInputBits >> 11 & 1)))
991 Info->markPSInputEnabled(
1008 FormalArgHandler Handler(
B,
MRI, AssignFn);
1021 if (!Subtarget.enableFlatScratch())
1022 CCInfo.AllocateReg(
Info->getScratchRSrcReg());
1062 = static_cast<const AMDGPULegalizerInfo*>(
ST.getLegalizerInfo());
1064 for (
auto InputID : InputRegs) {
1069 std::tie(OutgoingArg, ArgRC, ArgTy) =
1076 std::tie(IncomingArg, IncomingArgRC, ArgTy) =
1077 CallerArgInfo.getPreloadedValue(InputID);
1078 assert(IncomingArgRC == ArgRC);
1080 Register InputReg =
MRI.createGenericVirtualRegister(ArgTy);
1083 LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
1086 LI->getImplicitArgPtr(InputReg,
MRI, MIRBuilder);
1090 ArgRegs.emplace_back(OutgoingArg->
getRegister(), InputReg);
1094 LLVM_DEBUG(
dbgs() <<
"Unhandled stack passed implicit input argument\n");
1105 std::tie(OutgoingArg, ArgRC, ArgTy) =
1108 std::tie(OutgoingArg, ArgRC, ArgTy) =
1111 std::tie(OutgoingArg, ArgRC, ArgTy) =
1123 const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
1124 const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
1125 const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
1132 InputReg =
MRI.createGenericVirtualRegister(S32);
1133 LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
1134 std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
1139 LI->loadInputValue(
Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
1140 std::get<2>(WorkitemIDY));
1143 InputReg = InputReg ? MIRBuilder.
buildOr(S32, InputReg,
Y).
getReg(0) :
Y;
1148 LI->loadInputValue(
Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
1149 std::get<2>(WorkitemIDZ));
1152 InputReg = InputReg ? MIRBuilder.
buildOr(S32, InputReg,
Z).
getReg(0) :
Z;
1156 InputReg =
MRI.createGenericVirtualRegister(S32);
1161 IncomingArgX ? *IncomingArgX :
1162 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
1163 LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
1164 &AMDGPU::VGPR_32RegClass, S32);
1168 ArgRegs.emplace_back(OutgoingArg->
getRegister(), InputReg);
1172 LLVM_DEBUG(
dbgs() <<
"Unhandled stack passed implicit input argument\n");
1181 static std::pair<CCAssignFn *, CCAssignFn *>
1188 return AMDGPU::SI_CALL;
1194 AMDGPUCallLowering::CallLoweringInfo &
Info) {
1195 if (
Info.Callee.isReg()) {
1198 }
else if (
Info.Callee.isGlobal() &&
Info.Callee.getOffset() == 0) {
1214 if (
Info.IsVarArg) {
1243 for (
auto &OrigArg :
Info.OrigArgs) {
1244 splitToValueTypes(MIRBuilder, OrigArg, SplitArg,
DL,
Info.CallConv);
1247 MIRBuilder, OrigArg, SplitArg, OutArgs,
DL,
Info.CallConv,
true,
1252 unpackRegsToOrigType(MIRBuilder, Regs, SrcReg, OrigArg, LLTy, PartLLT);
1259 bool CanTailCallOpt =
false;
1262 if (
Info.IsMustTailCall && !CanTailCallOpt) {
1263 LLVM_DEBUG(
dbgs() <<
"Failed to lower musttail call as tail call\n");
1270 std::tie(AssignFnFixed, AssignFnVarArg) =
1273 MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKUP)
1282 MIB.
addDef(
TRI->getReturnAddressReg(MF));
1289 MIB.addRegMask(
Mask);
1307 AMDGPUOutgoingArgHandler Handler(MIRBuilder,
MRI, MIB, AssignFnFixed,
1308 AssignFnVarArg,
false);
1314 if (!
ST.enableFlatScratch()) {
1319 MIRBuilder.
buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
1323 for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1329 unsigned NumBytes = CCInfo.getNextStackOffset();
1337 if (MIB->getOperand(1).isReg()) {
1340 *
ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1351 MIRBuilder.
buildInstr(AMDGPU::ADJCALLSTACKDOWN);
1354 if (!
Info.CanLowerReturn) {
1356 Info.DemoteRegister,
Info.DemoteStackIndex);
1357 }
else if (!
Info.OrigRet.Ty->isVoidTy()) {
1361 MIRBuilder,
Info.OrigRet, PreSplitRetInfos,
DL,
Info.CallConv);
1363 processSplitArgs(MIRBuilder,
Info.OrigRet,
1364 PreSplitRetInfos, InArgs,
DL,
Info.CallConv,
false,
1366 LLT LLTy,
LLT PartLLT,
int VTSplitIdx) {
1367 assert(DstReg == Info.OrigRet.Regs[VTSplitIdx]);
1368 packSplitRegsToOrigType(MIRBuilder, Info.OrigRet.Regs[VTSplitIdx],
1369 Regs, LLTy, PartLLT);
1380 if (
Info.CanLowerReturn && !
Info.OrigRet.Ty->isVoidTy()) {
1383 CallReturnHandler Handler(MIRBuilder,
MRI, MIB, RetAssignFn);
1388 uint64_t CalleePopBytes = NumBytes;
1390 .addImm(CalleePopBytes);
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
bool isGraphics(CallingConv::ID cc)
A parsed version of the target data layout string in and methods for querying it.
reference emplace_back(ArgTypes &&... Args)
Interface definition for SIRegisterInfo.
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
ArgDescriptor WorkItemIDZ
AMDGPU specific subclass of TargetSubtarget.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
This class represents lattice values for constants.
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
Not emitted register (e.g. carry, or temporary result).
Register getReg(unsigned Idx) const
Get the register for the operand index.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
static MachineInstrBuilder mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef< Register > DstRegs, ArrayRef< Register > SrcRegs)
Pack values SrcRegs to cover the vector type result DstRegs.
Address space for constant memory (VTX2).
void push_back(const T &Elt)
This file describes how to lower LLVM calls to machine code calls.
bool handleAssignments(MachineIRBuilder &MIRBuilder, SmallVectorImpl< ArgInfo > &Args, ValueHandler &Handler) const
Invoke Handler::assignArg on each of the given Args and then use Handler to move them to the assigned...
void allocateSpecialInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments at the end of allocated user arguments.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
This class represents a function call, abstracting a target machine's calling convention.
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
ArgDescriptor WorkItemIDX
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
ArgDescriptor WorkItemIDY
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_OR Op0, Op1.
unsigned const TargetRegisterInfo * TRI
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
unsigned getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Value of the register doesn't matter.
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
A description of a memory reference used in the backend.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
void setIfReturnsVoid(bool Value)
The returned value is undefined.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register >> &ArgRegs, CallLoweringInfo &Info) const
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
The memory access is dereferenceable (i.e., doesn't trap).
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)
LocInfo getLocInfo() const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
Type * getType() const
All values are typed, get the type of this value.
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
MachineFunction & getMF()
Getter for the function we currently build.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void setReg(Register Reg)
Change the register this operand corresponds to.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register >> VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
Address space for private memory.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Analysis containing CSE Info
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
The instances of the Type class are immutable: once they are created, they are never changed.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
This is an important class for using LLVM in a threaded context.
size_t size() const
size - Get the array size.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
bool isEntryFunctionCC(CallingConv::ID CC)
Helper class to build MachineInstr.
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
MCRegister getRegister() const
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
unsigned getAddressSpace() const
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Register getStackPtrOffsetReg() const
AMDGPUFunctionArgInfo & getArgInfo()
The AMDGPU TargetMachine interface definition for hw codgen targets.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
virtual bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const
For some targets, an LLVM struct type must be broken down into multiple simple types,...
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
Argument handling is mostly uniform between the four places that make these decisions: function forma...
This class contains a discriminated union of information about pointers in memory operands,...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
The memory access writes data.
void unpackRegs(ArrayRef< Register > DstRegs, Register SrcReg, Type *PackedTy, MachineIRBuilder &MIRBuilder) const
Generate instructions for unpacking SrcReg into the DstRegs corresponding to the aggregate type Packe...
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
CCState - This class holds information needed while lowering arguments and return values.
Align max(MaybeAlign Lhs, Align Rhs)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CCValAssign - Represent assignment of one arg/retval to a location.
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register >> VRegs) const
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Calling convention used for Mesa/AMDPAL pixel shaders.
This class provides the information for the target register banks.
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isShader(CallingConv::ID cc)
This file declares the targeting of the Machinelegalizer class for AMDGPU.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
ANY_EXTEND - Used for integer types. The high bits are undefined.
static void unpackRegsToOrigType(MachineIRBuilder &B, ArrayRef< Register > DstRegs, Register SrcReg, const CallLowering::ArgInfo &Info, LLT SrcTy, LLT PartTy)
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
Calling convention for AMDGPU code object kernels.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
The memory access always returns the same value (or traps).
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
Calling convention used for AMD graphics targets.
static void packSplitRegsToOrigType(MachineIRBuilder &B, ArrayRef< Register > OrigRegs, ArrayRef< Register > Regs, LLT LLTy, LLT PartLLT)
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
LLVM Value Representation.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static bool EnableFixedFunctionABI
const MachineOperand & getOperand(unsigned i) const
OutputIt copy(R &&Range, OutputIt Out)
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
Wrapper class representing virtual and physical registers.
bool empty() const
empty - Check if the array is empty.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const SIRegisterInfo * getRegisterInfo() const override