Line data Source code
1 : //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : ///
10 : /// \file
11 : /// This file implements the lowering of LLVM calls to machine code calls for
12 : /// GlobalISel.
13 : ///
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "AMDGPUCallLowering.h"
17 : #include "AMDGPU.h"
18 : #include "AMDGPUISelLowering.h"
19 : #include "AMDGPUSubtarget.h"
20 : #include "SIISelLowering.h"
21 : #include "SIMachineFunctionInfo.h"
22 : #include "SIRegisterInfo.h"
23 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 : #include "llvm/CodeGen/CallingConvLower.h"
25 : #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 : #include "llvm/CodeGen/MachineInstrBuilder.h"
27 :
28 : using namespace llvm;
29 :
30 2492 : AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
31 4984 : : CallLowering(&TLI) {
32 2492 : }
33 :
34 60 : bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
35 : const Value *Val,
36 : ArrayRef<unsigned> VRegs) const {
37 : // FIXME: Add support for non-void returns.
38 60 : if (Val)
39 : return false;
40 :
41 59 : MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
42 59 : return true;
43 : }
44 :
45 90 : unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
46 : Type *ParamTy,
47 : uint64_t Offset) const {
48 :
49 90 : MachineFunction &MF = MIRBuilder.getMF();
50 90 : const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
51 90 : MachineRegisterInfo &MRI = MF.getRegInfo();
52 90 : const Function &F = MF.getFunction();
53 90 : const DataLayout &DL = F.getParent()->getDataLayout();
54 90 : PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
55 90 : LLT PtrType = getLLTForType(*PtrTy, DL);
56 90 : unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
57 : unsigned KernArgSegmentPtr =
58 : MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
59 90 : unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
60 :
61 90 : unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
62 90 : MIRBuilder.buildConstant(OffsetReg, Offset);
63 :
64 90 : MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
65 :
66 90 : return DstReg;
67 : }
68 :
69 90 : void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
70 : Type *ParamTy, uint64_t Offset,
71 : unsigned Align,
72 : unsigned DstReg) const {
73 90 : MachineFunction &MF = MIRBuilder.getMF();
74 90 : const Function &F = MF.getFunction();
75 90 : const DataLayout &DL = F.getParent()->getDataLayout();
76 90 : PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
77 90 : MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
78 : unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
79 90 : unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
80 :
81 : MachineMemOperand *MMO =
82 90 : MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
83 : MachineMemOperand::MONonTemporal |
84 : MachineMemOperand::MOInvariant,
85 : TypeSize, Align);
86 :
87 90 : MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
88 90 : }
89 :
90 60 : bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
91 : const Function &F,
92 : ArrayRef<unsigned> VRegs) const {
93 : // AMDGPU_GS and AMDGP_HS are not supported yet.
94 60 : if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
95 : F.getCallingConv() == CallingConv::AMDGPU_HS)
96 : return false;
97 :
98 60 : MachineFunction &MF = MIRBuilder.getMF();
99 60 : const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
100 60 : MachineRegisterInfo &MRI = MF.getRegInfo();
101 60 : SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
102 60 : const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
103 60 : const DataLayout &DL = F.getParent()->getDataLayout();
104 :
105 : SmallVector<CCValAssign, 16> ArgLocs;
106 120 : CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
107 :
108 : // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
109 60 : if (Info->hasPrivateSegmentBuffer()) {
110 38 : unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
111 38 : MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
112 38 : CCInfo.AllocateReg(PrivateSegmentBufferReg);
113 : }
114 :
115 60 : if (Info->hasDispatchPtr()) {
116 0 : unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
117 : // FIXME: Need to add reg as live-in
118 0 : CCInfo.AllocateReg(DispatchPtrReg);
119 : }
120 :
121 60 : if (Info->hasQueuePtr()) {
122 0 : unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
123 : // FIXME: Need to add reg as live-in
124 0 : CCInfo.AllocateReg(QueuePtrReg);
125 : }
126 :
127 60 : if (Info->hasKernargSegmentPtr()) {
128 51 : unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
129 51 : const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
130 51 : unsigned VReg = MRI.createGenericVirtualRegister(P2);
131 : MRI.addLiveIn(InputPtrReg, VReg);
132 51 : MIRBuilder.getMBB().addLiveIn(InputPtrReg);
133 51 : MIRBuilder.buildCopy(VReg, InputPtrReg);
134 51 : CCInfo.AllocateReg(InputPtrReg);
135 : }
136 :
137 60 : if (Info->hasDispatchID()) {
138 0 : unsigned DispatchIDReg = Info->addDispatchID(*TRI);
139 : // FIXME: Need to add reg as live-in
140 0 : CCInfo.AllocateReg(DispatchIDReg);
141 : }
142 :
143 60 : if (Info->hasFlatScratchInit()) {
144 0 : unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
145 : // FIXME: Need to add reg as live-in
146 0 : CCInfo.AllocateReg(FlatScratchInitReg);
147 : }
148 :
149 : // The infrastructure for normal calling convention lowering is essentially
150 : // useless for kernels. We want to avoid any kind of legalization or argument
151 : // splitting.
152 60 : if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
153 : unsigned i = 0;
154 : const unsigned KernArgBaseAlign = 16;
155 51 : const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
156 : uint64_t ExplicitArgOffset = 0;
157 :
158 : // TODO: Align down to dword alignment and extract bits for extending loads.
159 142 : for (auto &Arg : F.args()) {
160 91 : Type *ArgTy = Arg.getType();
161 91 : unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
162 91 : if (AllocSize == 0)
163 : continue;
164 :
165 90 : unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
166 :
167 90 : uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
168 90 : ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
169 :
170 90 : unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
171 90 : ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
172 180 : lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
173 90 : ++i;
174 : }
175 :
176 : return true;
177 : }
178 :
179 9 : unsigned NumArgs = F.arg_size();
180 : Function::const_arg_iterator CurOrigArg = F.arg_begin();
181 9 : const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
182 : unsigned PSInputNum = 0;
183 9 : BitVector Skipped(NumArgs);
184 22 : for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
185 13 : EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
186 :
187 : // We can only hanlde simple value types at the moment.
188 : ISD::ArgFlagsTy Flags;
189 26 : ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
190 13 : setArgFlags(OrigArg, i + 1, DL, F);
191 13 : Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
192 :
193 3 : if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
194 13 : !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
195 : PSInputNum <= 15) {
196 2 : if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
197 : Skipped.set(i);
198 1 : ++PSInputNum;
199 1 : continue;
200 : }
201 :
202 : Info->markPSInputAllocated(PSInputNum);
203 1 : if (!CurOrigArg->use_empty())
204 : Info->markPSInputEnabled(PSInputNum);
205 :
206 1 : ++PSInputNum;
207 : }
208 :
209 12 : CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
210 : /*IsVarArg=*/false);
211 :
212 12 : if (ValEVT.isVector()) {
213 0 : EVT ElemVT = ValEVT.getVectorElementType();
214 0 : if (!ValEVT.isSimple())
215 0 : return false;
216 0 : MVT ValVT = ElemVT.getSimpleVT();
217 0 : bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
218 : OrigArg.Flags, CCInfo);
219 0 : if (!Res)
220 : return false;
221 : } else {
222 12 : MVT ValVT = ValEVT.getSimpleVT();
223 12 : if (!ValEVT.isSimple())
224 0 : return false;
225 : bool Res =
226 12 : AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
227 :
228 : // Fail if we don't know how to handle this type.
229 12 : if (Res)
230 : return false;
231 : }
232 : }
233 :
234 : Function::const_arg_iterator Arg = F.arg_begin();
235 :
236 9 : if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
237 : F.getCallingConv() == CallingConv::AMDGPU_PS) {
238 13 : for (unsigned i = 0, OrigArgIdx = 0;
239 22 : OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
240 13 : if (Skipped.test(OrigArgIdx))
241 : continue;
242 12 : CCValAssign &VA = ArgLocs[i++];
243 24 : MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
244 12 : MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
245 12 : MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
246 : }
247 : return true;
248 : }
249 :
250 : return false;
251 : }
|