LLVM 18.0.0git
SIRegisterInfo.cpp
Go to the documentation of this file.
1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "GCNSubtarget.h"
20#include "SIRegisterInfo.h"
26
27using namespace llvm;
28
29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
31
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc("Enable spilling SGPRs to VGPRs"),
36 cl::init(true));
37
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
40
41// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
42// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
43// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
44// meaning index 7 in SubRegFromChannelTable.
45static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
47
48namespace llvm {
49
50// A temporary struct to spill SGPRs.
51// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
52// just v_writelane and v_readlane.
53//
54// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
55// is saved to scratch (or the other way around for loads).
56// For this, a VGPR is required where the needed lanes can be clobbered. The
57// RegScavenger can provide a VGPR where currently active lanes can be
58// clobbered, but we still need to save inactive lanes.
59// The high-level steps are:
60// - Try to scavenge SGPR(s) to save exec
61// - Try to scavenge VGPR
62// - Save needed, all or inactive lanes of a TmpVGPR
63// - Spill/Restore SGPRs using TmpVGPR
64// - Restore TmpVGPR
65//
66// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
67// cannot scavenge temporary SGPRs to save exec, we use the following code:
68// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
69// s_not exec, exec
70// buffer_store_dword TmpVGPR ; save inactive lanes
71// s_not exec, exec
73 struct PerVGPRData {
74 unsigned PerVGPR;
75 unsigned NumVGPRs;
76 int64_t VGPRLanes;
77 };
78
79 // The SGPR to save
83 unsigned NumSubRegs;
84 bool IsKill;
85 const DebugLoc &DL;
86
87 /* When spilling to stack */
88 // The SGPRs are written into this VGPR, which is then written to scratch
89 // (or vice versa for loads).
90 Register TmpVGPR = AMDGPU::NoRegister;
91 // Temporary spill slot to save TmpVGPR to.
92 int TmpVGPRIndex = 0;
93 // If TmpVGPR is live before the spill or if it is scavenged.
94 bool TmpVGPRLive = false;
95 // Scavenged SGPR to save EXEC.
96 Register SavedExecReg = AMDGPU::NoRegister;
97 // Stack index to write the SGPRs to.
98 int Index;
99 unsigned EltSize = 4;
100
109 unsigned MovOpc;
110 unsigned NotOpc;
111
115 : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
116 MI->getOperand(0).isKill(), Index, RS) {}
117
120 bool IsKill, int Index, RegScavenger *RS)
121 : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
122 Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
123 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
125 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
128
129 if (IsWave32) {
130 ExecReg = AMDGPU::EXEC_LO;
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
133 } else {
134 ExecReg = AMDGPU::EXEC;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
137 }
138
139 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
140 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
141 SuperReg != AMDGPU::EXEC && "exec should never spill");
142 }
143
146 Data.PerVGPR = IsWave32 ? 32 : 64;
147 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
148 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
149 return Data;
150 }
151
152 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
153 // free.
154 // Writes these instructions if an SGPR can be scavenged:
155 // s_mov_b64 s[6:7], exec ; Save exec
156 // s_mov_b64 exec, 3 ; Wanted lanemask
157 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
158 //
159 // Writes these instructions if no SGPR can be scavenged:
160 // buffer_store_dword v0 ; Only if no free VGPR was found
161 // s_not_b64 exec, exec
162 // buffer_store_dword v0 ; Save inactive lanes
163 // ; exec stays inverted, it is flipped back in
164 // ; restore.
165 void prepare() {
166 // Scavenged temporary VGPR to use. It must be scavenged once for any number
167 // of spilled subregs.
168 // FIXME: The liveness analysis is limited and does not tell if a register
169 // is in use in lanes that are currently inactive. We can never be sure if
170 // a register as actually in use in another lane, so we need to save all
171 // used lanes of the chosen VGPR.
172 assert(RS && "Cannot spill SGPR to memory without RegScavenger");
173 TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false,
174 0, false);
175
176 // Reserve temporary stack slot
178 if (TmpVGPR) {
179 // Found a register that is dead in the currently active lanes, we only
180 // need to spill inactive lanes.
181 TmpVGPRLive = false;
182 } else {
183 // Pick v0 because it doesn't make a difference.
184 TmpVGPR = AMDGPU::VGPR0;
185 TmpVGPRLive = true;
186 }
187
188 if (TmpVGPRLive) {
189 // We need to inform the scavenger that this index is already in use until
190 // we're done with the custom emergency spill.
192 }
193
194 // We may end up recursively calling the scavenger, and don't want to re-use
195 // the same register.
197
198 // Try to scavenge SGPRs to save exec
199 assert(!SavedExecReg && "Exec is already saved, refuse to save again");
200 const TargetRegisterClass &RC =
201 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
203 SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false);
204
205 int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
206
207 if (SavedExecReg) {
209 // Set exec to needed lanes
211 auto I =
212 BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
213 if (!TmpVGPRLive)
215 // Spill needed lanes
216 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
217 } else {
218 // The modify and restore of exec clobber SCC, which we would have to save
219 // and restore. FIXME: We probably would need to reserve a register for
220 // this.
221 if (RS->isRegUsed(AMDGPU::SCC))
222 MI->emitError("unhandled SGPR spill to memory");
223
224 // Spill active lanes
225 if (TmpVGPRLive)
226 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
227 /*IsKill*/ false);
228 // Spill inactive lanes
229 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
230 if (!TmpVGPRLive)
232 I->getOperand(2).setIsDead(); // Mark SCC as dead.
233 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
234 }
235 }
236
237 // Writes these instructions if an SGPR can be scavenged:
238 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
239 // s_waitcnt vmcnt(0) ; If a free VGPR was found
240 // s_mov_b64 exec, s[6:7] ; Save exec
241 //
242 // Writes these instructions if no SGPR can be scavenged:
243 // buffer_load_dword v0 ; Restore inactive lanes
244 // s_waitcnt vmcnt(0) ; If a free VGPR was found
245 // s_not_b64 exec, exec
246 // buffer_load_dword v0 ; Only if no free VGPR was found
247 void restore() {
248 if (SavedExecReg) {
249 // Restore used lanes
250 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
251 /*IsKill*/ false);
252 // Restore exec
253 auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
255 // Add an implicit use of the load so it is not dead.
256 // FIXME This inserts an unnecessary waitcnt
257 if (!TmpVGPRLive) {
259 }
260 } else {
261 // Restore inactive lanes
262 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
263 /*IsKill*/ false);
264 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
265 if (!TmpVGPRLive)
267 I->getOperand(2).setIsDead(); // Mark SCC as dead.
268
269 // Restore active lanes
270 if (TmpVGPRLive)
271 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
272 }
273
274 // Inform the scavenger where we're releasing our custom scavenged register.
275 if (TmpVGPRLive) {
276 MachineBasicBlock::iterator RestorePt = std::prev(MI);
278 }
279 }
280
281 // Write TmpVGPR to memory or read TmpVGPR from memory.
282 // Either using a single buffer_load/store if exec is set to the needed mask
283 // or using
284 // buffer_load
285 // s_not exec, exec
286 // buffer_load
287 // s_not exec, exec
288 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
289 if (SavedExecReg) {
290 // Spill needed lanes
291 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
292 } else {
293 // The modify and restore of exec clobber SCC, which we would have to save
294 // and restore. FIXME: We probably would need to reserve a register for
295 // this.
296 if (RS->isRegUsed(AMDGPU::SCC))
297 MI->emitError("unhandled SGPR spill to memory");
298
299 // Spill active lanes
300 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
301 /*IsKill*/ false);
302 // Spill inactive lanes
303 auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
304 Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
305 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
306 auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
307 Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
308 }
309 }
310
312 assert(MBB->getParent() == &MF);
313 MI = NewMI;
314 MBB = NewMBB;
315 }
316};
317
318} // namespace llvm
319
321 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
322 SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
323
324 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
325 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
326 (getSubRegIndexLaneMask(AMDGPU::lo16) |
327 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
328 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
329 "getNumCoveredRegs() will not work with generated subreg masks!");
330
331 RegPressureIgnoredUnits.resize(getNumRegUnits());
332 RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin());
333 for (auto Reg : AMDGPU::VGPR_HI16RegClass)
334 RegPressureIgnoredUnits.set(*regunits(Reg).begin());
335
336 // HACK: Until this is fully tablegen'd.
337 static llvm::once_flag InitializeRegSplitPartsFlag;
338
339 static auto InitializeRegSplitPartsOnce = [this]() {
340 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
341 unsigned Size = getSubRegIdxSize(Idx);
342 if (Size & 31)
343 continue;
344 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
345 unsigned Pos = getSubRegIdxOffset(Idx);
346 if (Pos % Size)
347 continue;
348 Pos /= Size;
349 if (Vec.empty()) {
350 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
351 Vec.resize(MaxNumParts);
352 }
353 Vec[Pos] = Idx;
354 }
355 };
356
357 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
358
359 static auto InitializeSubRegFromChannelTableOnce = [this]() {
360 for (auto &Row : SubRegFromChannelTable)
361 Row.fill(AMDGPU::NoSubRegister);
362 for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
363 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
364 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
366 Width = SubRegFromChannelTableWidthMap[Width];
367 if (Width == 0)
368 continue;
369 unsigned TableIdx = Width - 1;
370 assert(TableIdx < SubRegFromChannelTable.size());
371 assert(Offset < SubRegFromChannelTable[TableIdx].size());
372 SubRegFromChannelTable[TableIdx][Offset] = Idx;
373 }
374 };
375
376 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
377 llvm::call_once(InitializeSubRegFromChannelTableFlag,
378 InitializeSubRegFromChannelTableOnce);
379}
380
381void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
382 MCRegister Reg) const {
383 for (MCRegAliasIterator R(Reg, this, true); R.isValid(); ++R)
384 Reserved.set(*R);
385}
386
387// Forced to be here by one .inc
389 const MachineFunction *MF) const {
391 switch (CC) {
392 case CallingConv::C:
395 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
396 : CSR_AMDGPU_SaveList;
398 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
399 : CSR_AMDGPU_SI_Gfx_SaveList;
401 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
402 default: {
403 // Dummy to not crash RegisterClassInfo.
404 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
405 return &NoCalleeSavedReg;
406 }
407 }
408}
409
410const MCPhysReg *
412 return nullptr;
413}
414
416 CallingConv::ID CC) const {
417 switch (CC) {
418 case CallingConv::C:
421 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
422 : CSR_AMDGPU_RegMask;
424 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
425 : CSR_AMDGPU_SI_Gfx_RegMask;
428 // Calls to these functions never return, so we can pretend everything is
429 // preserved.
430 return AMDGPU_AllVGPRs_RegMask;
431 default:
432 return nullptr;
433 }
434}
435
437 return CSR_AMDGPU_NoRegs_RegMask;
438}
439
441 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
442}
443
446 const MachineFunction &MF) const {
447 // FIXME: Should have a helper function like getEquivalentVGPRClass to get the
448 // equivalent AV class. If used one, the verifier will crash after
449 // RegBankSelect in the GISel flow. The aligned regclasses are not fully given
450 // until Instruction selection.
451 if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) {
452 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
453 return &AMDGPU::AV_32RegClass;
454 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
455 return &AMDGPU::AV_64RegClass;
456 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
457 RC == &AMDGPU::AReg_64_Align2RegClass)
458 return &AMDGPU::AV_64_Align2RegClass;
459 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
460 return &AMDGPU::AV_96RegClass;
461 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
462 RC == &AMDGPU::AReg_96_Align2RegClass)
463 return &AMDGPU::AV_96_Align2RegClass;
464 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
465 return &AMDGPU::AV_128RegClass;
466 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
467 RC == &AMDGPU::AReg_128_Align2RegClass)
468 return &AMDGPU::AV_128_Align2RegClass;
469 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
470 return &AMDGPU::AV_160RegClass;
471 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
472 RC == &AMDGPU::AReg_160_Align2RegClass)
473 return &AMDGPU::AV_160_Align2RegClass;
474 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
475 return &AMDGPU::AV_192RegClass;
476 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
477 RC == &AMDGPU::AReg_192_Align2RegClass)
478 return &AMDGPU::AV_192_Align2RegClass;
479 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
480 return &AMDGPU::AV_256RegClass;
481 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
482 RC == &AMDGPU::AReg_256_Align2RegClass)
483 return &AMDGPU::AV_256_Align2RegClass;
484 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
485 return &AMDGPU::AV_512RegClass;
486 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
487 RC == &AMDGPU::AReg_512_Align2RegClass)
488 return &AMDGPU::AV_512_Align2RegClass;
489 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
490 return &AMDGPU::AV_1024RegClass;
491 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
492 RC == &AMDGPU::AReg_1024_Align2RegClass)
493 return &AMDGPU::AV_1024_Align2RegClass;
494 }
495
497}
498
500 const SIFrameLowering *TFI = ST.getFrameLowering();
502 // During ISel lowering we always reserve the stack pointer in entry and chain
503 // functions, but never actually want to reference it when accessing our own
504 // frame. If we need a frame pointer we use it, but otherwise we can just use
505 // an immediate "0" which we represent by returning NoRegister.
506 if (FuncInfo->isEntryFunction() || FuncInfo->isChainFunction()) {
507 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
508 }
509 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
510 : FuncInfo->getStackPtrOffsetReg();
511}
512
514 // When we need stack realignment, we can't reference off of the
515 // stack pointer, so we reserve a base pointer.
516 const MachineFrameInfo &MFI = MF.getFrameInfo();
517 return MFI.getNumFixedObjects() && shouldRealignStack(MF);
518}
519
520Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
521
523 return AMDGPU_AllVGPRs_RegMask;
524}
525
527 return AMDGPU_AllAGPRs_RegMask;
528}
529
531 return AMDGPU_AllVectorRegs_RegMask;
532}
533
535 return AMDGPU_AllAllocatableSRegs_RegMask;
536}
537
538unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
539 unsigned NumRegs) {
540 assert(NumRegs < SubRegFromChannelTableWidthMap.size());
541 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
542 assert(NumRegIndex && "Not implemented");
543 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
544 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
545}
546
549 const unsigned Align,
550 const TargetRegisterClass *RC) const {
551 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), Align) - Align;
552 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
553 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
554}
555
557 const MachineFunction &MF) const {
558 return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
559}
560
562 BitVector Reserved(getNumRegs());
563 Reserved.set(AMDGPU::MODE);
564
566
567 // Reserve special purpose registers.
568 //
569 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
570 // this seems likely to result in bugs, so I'm marking them as reserved.
571 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
572 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
573
574 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
575 reserveRegisterTuples(Reserved, AMDGPU::M0);
576
577 // Reserve src_vccz, src_execz, src_scc.
578 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
579 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
580 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
581
582 // Reserve the memory aperture registers
583 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
584 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
585 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
586 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
587
588 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
589 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
590
591 // Reserve xnack_mask registers - support is not implemented in Codegen.
592 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
593
594 // Reserve lds_direct register - support is not implemented in Codegen.
595 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
596
597 // Reserve Trap Handler registers - support is not implemented in Codegen.
598 reserveRegisterTuples(Reserved, AMDGPU::TBA);
599 reserveRegisterTuples(Reserved, AMDGPU::TMA);
600 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
601 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
602 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
603 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
604 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
605 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
606 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
607 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
608
609 // Reserve null register - it shall never be allocated
610 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
611
612 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
613 // will result in bugs.
614 if (isWave32) {
615 Reserved.set(AMDGPU::VCC);
616 Reserved.set(AMDGPU::VCC_HI);
617 }
618
619 // Reserve SGPRs.
620 //
621 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
622 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
623 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
624 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
625 reserveRegisterTuples(Reserved, Reg);
626 }
627
628 Register ScratchRSrcReg = MFI->getScratchRSrcReg();
629 if (ScratchRSrcReg != AMDGPU::NoRegister) {
630 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
631 // need to spill.
632 // TODO: May need to reserve a VGPR if doing LDS spilling.
633 reserveRegisterTuples(Reserved, ScratchRSrcReg);
634 }
635
636 Register LongBranchReservedReg = MFI->getLongBranchReservedReg();
637 if (LongBranchReservedReg)
638 reserveRegisterTuples(Reserved, LongBranchReservedReg);
639
640 // We have to assume the SP is needed in case there are calls in the function,
641 // which is detected after the function is lowered. If we aren't really going
642 // to need SP, don't bother reserving it.
643 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
644 if (StackPtrReg) {
645 reserveRegisterTuples(Reserved, StackPtrReg);
646 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
647 }
648
649 MCRegister FrameReg = MFI->getFrameOffsetReg();
650 if (FrameReg) {
651 reserveRegisterTuples(Reserved, FrameReg);
652 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
653 }
654
655 if (hasBasePointer(MF)) {
656 MCRegister BasePtrReg = getBaseRegister();
657 reserveRegisterTuples(Reserved, BasePtrReg);
658 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
659 }
660
661 // FIXME: Use same reserved register introduced in D149775
662 // SGPR used to preserve EXEC MASK around WWM spill/copy instructions.
663 Register ExecCopyReg = MFI->getSGPRForEXECCopy();
664 if (ExecCopyReg)
665 reserveRegisterTuples(Reserved, ExecCopyReg);
666
667 // Reserve VGPRs/AGPRs.
668 //
669 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
670 unsigned MaxNumAGPRs = MaxNumVGPRs;
671 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
672
673 // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
674 // a wave may have up to 512 total vector registers combining together both
675 // VGPRs and AGPRs. Hence, in an entry function without calls and without
676 // AGPRs used within it, it is possible to use the whole vector register
677 // budget for VGPRs.
678 //
679 // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
680 // register file accordingly.
681 if (ST.hasGFX90AInsts()) {
682 if (MFI->usesAGPRs(MF)) {
683 MaxNumVGPRs /= 2;
684 MaxNumAGPRs = MaxNumVGPRs;
685 } else {
686 if (MaxNumVGPRs > TotalNumVGPRs) {
687 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
688 MaxNumVGPRs = TotalNumVGPRs;
689 } else
690 MaxNumAGPRs = 0;
691 }
692 }
693
694 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
695 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
696 reserveRegisterTuples(Reserved, Reg);
697 }
698
699 if (ST.hasMAIInsts()) {
700 for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
701 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
702 reserveRegisterTuples(Reserved, Reg);
703 }
704 } else {
705 // Reserve all the AGPRs if there are no instructions to use it.
706 for (MCRegister Reg : AMDGPU::AGPR_32RegClass)
707 reserveRegisterTuples(Reserved, Reg);
708 }
709
710 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
711 // VGPR available at all times.
712 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
713 reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
714 }
715
716 for (Register Reg : MFI->getWWMReservedRegs())
717 reserveRegisterTuples(Reserved, Reg);
718
719 // FIXME: Stop using reserved registers for this.
720 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
721 reserveRegisterTuples(Reserved, Reg);
722
723 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
724 reserveRegisterTuples(Reserved, Reg);
725
726 return Reserved;
727}
728
730 MCRegister PhysReg) const {
731 return !MF.getRegInfo().isReserved(PhysReg);
732}
733
736 // On entry or in chain functions, the base address is 0, so it can't possibly
737 // need any more alignment.
738
739 // FIXME: Should be able to specify the entry frame alignment per calling
740 // convention instead.
741 if (Info->isEntryFunction() || Info->isChainFunction())
742 return false;
743
745}
746
749 if (Info->isEntryFunction()) {
750 const MachineFrameInfo &MFI = Fn.getFrameInfo();
751 return MFI.hasStackObjects() || MFI.hasCalls();
752 }
753
754 // May need scavenger for dealing with callee saved registers.
755 return true;
756}
757
759 const MachineFunction &MF) const {
760 // Do not use frame virtual registers. They used to be used for SGPRs, but
761 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
762 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
763 // spill.
764 return false;
765}
766
768 const MachineFunction &MF) const {
769 const MachineFrameInfo &MFI = MF.getFrameInfo();
770 return MFI.hasStackObjects();
771}
772
774 const MachineFunction &) const {
775 // There are no special dedicated stack or frame pointers.
776 return true;
777}
778
781
782 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
783 AMDGPU::OpName::offset);
784 return MI->getOperand(OffIdx).getImm();
785}
786
788 int Idx) const {
790 return 0;
791
792 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
793 AMDGPU::OpName::vaddr) ||
794 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
795 AMDGPU::OpName::saddr))) &&
796 "Should never see frame index on non-address operand");
797
799}
800
803 return false;
804
805 int64_t FullOffset = Offset + getScratchInstrOffset(MI);
806
808 return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset);
809
810 const SIInstrInfo *TII = ST.getInstrInfo();
811 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
813}
814
816 int FrameIdx,
817 int64_t Offset) const {
819 DebugLoc DL; // Defaults to "unknown"
820
821 if (Ins != MBB->end())
822 DL = Ins->getDebugLoc();
823
825 const SIInstrInfo *TII = ST.getInstrInfo();
827 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
828 : AMDGPU::V_MOV_B32_e32;
829
830 Register BaseReg = MRI.createVirtualRegister(
831 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
832 : &AMDGPU::VGPR_32RegClass);
833
834 if (Offset == 0) {
835 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
836 .addFrameIndex(FrameIdx);
837 return BaseReg;
838 }
839
840 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
841
842 Register FIReg = MRI.createVirtualRegister(
843 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
844 : &AMDGPU::VGPR_32RegClass);
845
846 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
847 .addImm(Offset);
848 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
849 .addFrameIndex(FrameIdx);
850
851 if (ST.enableFlatScratch() ) {
852 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
853 .addReg(OffsetReg, RegState::Kill)
854 .addReg(FIReg);
855 return BaseReg;
856 }
857
858 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
859 .addReg(OffsetReg, RegState::Kill)
860 .addReg(FIReg)
861 .addImm(0); // clamp bit
862
863 return BaseReg;
864}
865
867 int64_t Offset) const {
868 const SIInstrInfo *TII = ST.getInstrInfo();
869 bool IsFlat = TII->isFLATScratch(MI);
870
871#ifndef NDEBUG
872 // FIXME: Is it possible to be storing a frame index to itself?
873 bool SeenFI = false;
874 for (const MachineOperand &MO: MI.operands()) {
875 if (MO.isFI()) {
876 if (SeenFI)
877 llvm_unreachable("should not see multiple frame indices");
878
879 SeenFI = true;
880 }
881 }
882#endif
883
884 MachineOperand *FIOp =
885 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
886 : AMDGPU::OpName::vaddr);
887
888 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
889 int64_t NewOffset = OffsetOp->getImm() + Offset;
890
891 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
892 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI));
893
894 if (IsFlat) {
895 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
897 "offset should be legal");
898 FIOp->ChangeToRegister(BaseReg, false);
899 OffsetOp->setImm(NewOffset);
900 return;
901 }
902
903#ifndef NDEBUG
904 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
905 assert(SOffset->isImm() && SOffset->getImm() == 0);
906#endif
907
909 "offset should be legal");
910
911 FIOp->ChangeToRegister(BaseReg, false);
912 OffsetOp->setImm(NewOffset);
913}
914
916 Register BaseReg,
917 int64_t Offset) const {
919 return false;
920
921 int64_t NewOffset = Offset + getScratchInstrOffset(MI);
922
924 return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset);
925
926 const SIInstrInfo *TII = ST.getInstrInfo();
927 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
929}
930
932 const MachineFunction &MF, unsigned Kind) const {
933 // This is inaccurate. It depends on the instruction and address space. The
934 // only place where we should hit this is for dealing with frame indexes /
935 // private accesses, so this is correct in that case.
936 return &AMDGPU::VGPR_32RegClass;
937}
938
941 if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
942 return getEquivalentVGPRClass(RC);
943 if (RC == &AMDGPU::SCC_CLASSRegClass)
944 return getWaveMaskRegClass();
945
946 return RC;
947}
948
949static unsigned getNumSubRegsForSpillOp(unsigned Op) {
950
951 switch (Op) {
952 case AMDGPU::SI_SPILL_S1024_SAVE:
953 case AMDGPU::SI_SPILL_S1024_RESTORE:
954 case AMDGPU::SI_SPILL_V1024_SAVE:
955 case AMDGPU::SI_SPILL_V1024_RESTORE:
956 case AMDGPU::SI_SPILL_A1024_SAVE:
957 case AMDGPU::SI_SPILL_A1024_RESTORE:
958 case AMDGPU::SI_SPILL_AV1024_SAVE:
959 case AMDGPU::SI_SPILL_AV1024_RESTORE:
960 return 32;
961 case AMDGPU::SI_SPILL_S512_SAVE:
962 case AMDGPU::SI_SPILL_S512_RESTORE:
963 case AMDGPU::SI_SPILL_V512_SAVE:
964 case AMDGPU::SI_SPILL_V512_RESTORE:
965 case AMDGPU::SI_SPILL_A512_SAVE:
966 case AMDGPU::SI_SPILL_A512_RESTORE:
967 case AMDGPU::SI_SPILL_AV512_SAVE:
968 case AMDGPU::SI_SPILL_AV512_RESTORE:
969 return 16;
970 case AMDGPU::SI_SPILL_S384_SAVE:
971 case AMDGPU::SI_SPILL_S384_RESTORE:
972 case AMDGPU::SI_SPILL_V384_SAVE:
973 case AMDGPU::SI_SPILL_V384_RESTORE:
974 case AMDGPU::SI_SPILL_A384_SAVE:
975 case AMDGPU::SI_SPILL_A384_RESTORE:
976 case AMDGPU::SI_SPILL_AV384_SAVE:
977 case AMDGPU::SI_SPILL_AV384_RESTORE:
978 return 12;
979 case AMDGPU::SI_SPILL_S352_SAVE:
980 case AMDGPU::SI_SPILL_S352_RESTORE:
981 case AMDGPU::SI_SPILL_V352_SAVE:
982 case AMDGPU::SI_SPILL_V352_RESTORE:
983 case AMDGPU::SI_SPILL_A352_SAVE:
984 case AMDGPU::SI_SPILL_A352_RESTORE:
985 case AMDGPU::SI_SPILL_AV352_SAVE:
986 case AMDGPU::SI_SPILL_AV352_RESTORE:
987 return 11;
988 case AMDGPU::SI_SPILL_S320_SAVE:
989 case AMDGPU::SI_SPILL_S320_RESTORE:
990 case AMDGPU::SI_SPILL_V320_SAVE:
991 case AMDGPU::SI_SPILL_V320_RESTORE:
992 case AMDGPU::SI_SPILL_A320_SAVE:
993 case AMDGPU::SI_SPILL_A320_RESTORE:
994 case AMDGPU::SI_SPILL_AV320_SAVE:
995 case AMDGPU::SI_SPILL_AV320_RESTORE:
996 return 10;
997 case AMDGPU::SI_SPILL_S288_SAVE:
998 case AMDGPU::SI_SPILL_S288_RESTORE:
999 case AMDGPU::SI_SPILL_V288_SAVE:
1000 case AMDGPU::SI_SPILL_V288_RESTORE:
1001 case AMDGPU::SI_SPILL_A288_SAVE:
1002 case AMDGPU::SI_SPILL_A288_RESTORE:
1003 case AMDGPU::SI_SPILL_AV288_SAVE:
1004 case AMDGPU::SI_SPILL_AV288_RESTORE:
1005 return 9;
1006 case AMDGPU::SI_SPILL_S256_SAVE:
1007 case AMDGPU::SI_SPILL_S256_RESTORE:
1008 case AMDGPU::SI_SPILL_V256_SAVE:
1009 case AMDGPU::SI_SPILL_V256_RESTORE:
1010 case AMDGPU::SI_SPILL_A256_SAVE:
1011 case AMDGPU::SI_SPILL_A256_RESTORE:
1012 case AMDGPU::SI_SPILL_AV256_SAVE:
1013 case AMDGPU::SI_SPILL_AV256_RESTORE:
1014 return 8;
1015 case AMDGPU::SI_SPILL_S224_SAVE:
1016 case AMDGPU::SI_SPILL_S224_RESTORE:
1017 case AMDGPU::SI_SPILL_V224_SAVE:
1018 case AMDGPU::SI_SPILL_V224_RESTORE:
1019 case AMDGPU::SI_SPILL_A224_SAVE:
1020 case AMDGPU::SI_SPILL_A224_RESTORE:
1021 case AMDGPU::SI_SPILL_AV224_SAVE:
1022 case AMDGPU::SI_SPILL_AV224_RESTORE:
1023 return 7;
1024 case AMDGPU::SI_SPILL_S192_SAVE:
1025 case AMDGPU::SI_SPILL_S192_RESTORE:
1026 case AMDGPU::SI_SPILL_V192_SAVE:
1027 case AMDGPU::SI_SPILL_V192_RESTORE:
1028 case AMDGPU::SI_SPILL_A192_SAVE:
1029 case AMDGPU::SI_SPILL_A192_RESTORE:
1030 case AMDGPU::SI_SPILL_AV192_SAVE:
1031 case AMDGPU::SI_SPILL_AV192_RESTORE:
1032 return 6;
1033 case AMDGPU::SI_SPILL_S160_SAVE:
1034 case AMDGPU::SI_SPILL_S160_RESTORE:
1035 case AMDGPU::SI_SPILL_V160_SAVE:
1036 case AMDGPU::SI_SPILL_V160_RESTORE:
1037 case AMDGPU::SI_SPILL_A160_SAVE:
1038 case AMDGPU::SI_SPILL_A160_RESTORE:
1039 case AMDGPU::SI_SPILL_AV160_SAVE:
1040 case AMDGPU::SI_SPILL_AV160_RESTORE:
1041 return 5;
1042 case AMDGPU::SI_SPILL_S128_SAVE:
1043 case AMDGPU::SI_SPILL_S128_RESTORE:
1044 case AMDGPU::SI_SPILL_V128_SAVE:
1045 case AMDGPU::SI_SPILL_V128_RESTORE:
1046 case AMDGPU::SI_SPILL_A128_SAVE:
1047 case AMDGPU::SI_SPILL_A128_RESTORE:
1048 case AMDGPU::SI_SPILL_AV128_SAVE:
1049 case AMDGPU::SI_SPILL_AV128_RESTORE:
1050 return 4;
1051 case AMDGPU::SI_SPILL_S96_SAVE:
1052 case AMDGPU::SI_SPILL_S96_RESTORE:
1053 case AMDGPU::SI_SPILL_V96_SAVE:
1054 case AMDGPU::SI_SPILL_V96_RESTORE:
1055 case AMDGPU::SI_SPILL_A96_SAVE:
1056 case AMDGPU::SI_SPILL_A96_RESTORE:
1057 case AMDGPU::SI_SPILL_AV96_SAVE:
1058 case AMDGPU::SI_SPILL_AV96_RESTORE:
1059 return 3;
1060 case AMDGPU::SI_SPILL_S64_SAVE:
1061 case AMDGPU::SI_SPILL_S64_RESTORE:
1062 case AMDGPU::SI_SPILL_V64_SAVE:
1063 case AMDGPU::SI_SPILL_V64_RESTORE:
1064 case AMDGPU::SI_SPILL_A64_SAVE:
1065 case AMDGPU::SI_SPILL_A64_RESTORE:
1066 case AMDGPU::SI_SPILL_AV64_SAVE:
1067 case AMDGPU::SI_SPILL_AV64_RESTORE:
1068 return 2;
1069 case AMDGPU::SI_SPILL_S32_SAVE:
1070 case AMDGPU::SI_SPILL_S32_RESTORE:
1071 case AMDGPU::SI_SPILL_V32_SAVE:
1072 case AMDGPU::SI_SPILL_V32_RESTORE:
1073 case AMDGPU::SI_SPILL_A32_SAVE:
1074 case AMDGPU::SI_SPILL_A32_RESTORE:
1075 case AMDGPU::SI_SPILL_AV32_SAVE:
1076 case AMDGPU::SI_SPILL_AV32_RESTORE:
1077 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1078 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1079 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1080 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1081 return 1;
1082 default: llvm_unreachable("Invalid spill opcode");
1083 }
1084}
1085
1086static int getOffsetMUBUFStore(unsigned Opc) {
1087 switch (Opc) {
1088 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1089 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1090 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1091 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1092 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1093 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1094 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1095 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1096 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1097 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1098 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1099 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1100 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1101 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1102 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1103 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1104 default:
1105 return -1;
1106 }
1107}
1108
1109static int getOffsetMUBUFLoad(unsigned Opc) {
1110 switch (Opc) {
1111 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1112 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1113 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1114 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1115 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1116 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1117 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1118 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1119 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1120 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1121 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1122 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1123 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1124 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1125 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1126 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1127 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1128 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1129 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1130 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1131 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1132 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1133 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1134 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1135 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1136 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1137 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1138 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1139 default:
1140 return -1;
1141 }
1142}
1143
1144static int getOffenMUBUFStore(unsigned Opc) {
1145 switch (Opc) {
1146 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1147 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1148 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1149 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1150 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1151 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1152 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1153 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1154 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1155 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1156 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1157 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1158 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1159 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1160 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1161 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1162 default:
1163 return -1;
1164 }
1165}
1166
1167static int getOffenMUBUFLoad(unsigned Opc) {
1168 switch (Opc) {
1169 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1170 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1171 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1172 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1173 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1174 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1175 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1176 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1177 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1178 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1179 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1180 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1181 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1182 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1183 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1184 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1185 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1186 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1187 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1188 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1189 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1190 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1191 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1192 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1193 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1194 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1195 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1196 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1197 default:
1198 return -1;
1199 }
1200}
1201
1205 int Index, unsigned Lane,
1206 unsigned ValueReg, bool IsKill) {
1209 const SIInstrInfo *TII = ST.getInstrInfo();
1210
1211 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
1212
1213 if (Reg == AMDGPU::NoRegister)
1214 return MachineInstrBuilder();
1215
1216 bool IsStore = MI->mayStore();
1218 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
1219
1220 unsigned Dst = IsStore ? Reg : ValueReg;
1221 unsigned Src = IsStore ? ValueReg : Reg;
1222 bool IsVGPR = TRI->isVGPR(MRI, Reg);
1223 DebugLoc DL = MI->getDebugLoc();
1224 if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
1225 // Spiller during regalloc may restore a spilled register to its superclass.
1226 // It could result in AGPR spills restored to VGPRs or the other way around,
1227 // making the src and dst with identical regclasses at this point. It just
1228 // needs a copy in such cases.
1229 auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
1230 .addReg(Src, getKillRegState(IsKill));
1232 return CopyMIB;
1233 }
1234 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1235 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1236
1237 auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst)
1238 .addReg(Src, getKillRegState(IsKill));
1240 return MIB;
1241}
1242
1243// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
1244// need to handle the case where an SGPR may need to be spilled while spilling.
1246 MachineFrameInfo &MFI,
1248 int Index,
1249 int64_t Offset) {
1250 const SIInstrInfo *TII = ST.getInstrInfo();
1251 MachineBasicBlock *MBB = MI->getParent();
1252 const DebugLoc &DL = MI->getDebugLoc();
1253 bool IsStore = MI->mayStore();
1254
1255 unsigned Opc = MI->getOpcode();
1256 int LoadStoreOp = IsStore ?
1258 if (LoadStoreOp == -1)
1259 return false;
1260
1261 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
1262 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
1263 return true;
1264
1265 MachineInstrBuilder NewMI =
1266 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
1267 .add(*Reg)
1268 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
1269 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
1270 .addImm(Offset)
1271 .addImm(0) // cpol
1272 .addImm(0) // swz
1273 .cloneMemRefs(*MI);
1274
1275 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
1276 AMDGPU::OpName::vdata_in);
1277 if (VDataIn)
1278 NewMI.add(*VDataIn);
1279 return true;
1280}
1281
1283 unsigned LoadStoreOp,
1284 unsigned EltSize) {
1285 bool IsStore = TII->get(LoadStoreOp).mayStore();
1286 bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr);
1287 bool UseST =
1288 !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr);
1289
1290 switch (EltSize) {
1291 case 4:
1292 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1293 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1294 break;
1295 case 8:
1296 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1297 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1298 break;
1299 case 12:
1300 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1301 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1302 break;
1303 case 16:
1304 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1305 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1306 break;
1307 default:
1308 llvm_unreachable("Unexpected spill load/store size!");
1309 }
1310
1311 if (HasVAddr)
1312 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1313 else if (UseST)
1314 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1315
1316 return LoadStoreOp;
1317}
1318
1321 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1322 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
1323 RegScavenger *RS, LiveRegUnits *LiveUnits) const {
1324 assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
1325
1327 const SIInstrInfo *TII = ST.getInstrInfo();
1328 const MachineFrameInfo &MFI = MF->getFrameInfo();
1329 const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
1330
1331 const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1332 bool IsStore = Desc->mayStore();
1333 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1334
1335 bool CanClobberSCC = false;
1336 bool Scavenged = false;
1337 MCRegister SOffset = ScratchOffsetReg;
1338
1339 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1340 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
1341 const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
1342 const unsigned RegWidth = AMDGPU::getRegBitWidth(*RC) / 8;
1343
1344 // Always use 4 byte operations for AGPRs because we need to scavenge
1345 // a temporary VGPR.
1346 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1347 unsigned NumSubRegs = RegWidth / EltSize;
1348 unsigned Size = NumSubRegs * EltSize;
1349 unsigned RemSize = RegWidth - Size;
1350 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1351 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
1352 int64_t MaterializedOffset = Offset;
1353
1354 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1355 int64_t ScratchOffsetRegDelta = 0;
1356
1357 if (IsFlat && EltSize > 4) {
1358 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1359 Desc = &TII->get(LoadStoreOp);
1360 }
1361
1362 Align Alignment = MFI.getObjectAlign(Index);
1363 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
1364
1365 assert((IsFlat || ((Offset % EltSize) == 0)) &&
1366 "unexpected VGPR spill offset");
1367
1368 // Track a VGPR to use for a constant offset we need to materialize.
1369 Register TmpOffsetVGPR;
1370
1371 // Track a VGPR to use as an intermediate value.
1372 Register TmpIntermediateVGPR;
1373 bool UseVGPROffset = false;
1374
1375 // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate
1376 // combination.
1377 auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,
1378 int64_t VOffset) {
1379 // We are using a VGPR offset
1380 if (IsFlat && SGPRBase) {
1381 // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free
1382 // SGPR, so perform the add as vector.
1383 // We don't need a base SGPR in the kernel.
1384
1385 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1386 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR)
1387 .addReg(SGPRBase)
1388 .addImm(VOffset)
1389 .addImm(0); // clamp
1390 } else {
1391 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1392 .addReg(SGPRBase);
1393 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR)
1394 .addImm(VOffset)
1395 .addReg(TmpOffsetVGPR);
1396 }
1397 } else {
1398 assert(TmpOffsetVGPR);
1399 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1400 .addImm(VOffset);
1401 }
1402 };
1403
1404 bool IsOffsetLegal =
1405 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1408 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1409 SOffset = MCRegister();
1410
1411 // We don't have access to the register scavenger if this function is called
1412 // during PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
1413 // TODO: Clobbering SCC is not necessary for scratch instructions in the
1414 // entry.
1415 if (RS) {
1416 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
1417
1418 // Piggy back on the liveness scan we just did see if SCC is dead.
1419 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1420 } else if (LiveUnits) {
1421 CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
1422 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1423 if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1424 SOffset = Reg;
1425 break;
1426 }
1427 }
1428 }
1429
1430 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1431 SOffset = Register();
1432
1433 if (!SOffset) {
1434 UseVGPROffset = true;
1435
1436 if (RS) {
1437 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
1438 } else {
1439 assert(LiveUnits);
1440 for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1441 if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1442 TmpOffsetVGPR = Reg;
1443 break;
1444 }
1445 }
1446 }
1447
1448 assert(TmpOffsetVGPR);
1449 } else if (!SOffset && CanClobberSCC) {
1450 // There are no free SGPRs, and since we are in the process of spilling
1451 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
1452 // on SI/CI and on VI it is true until we implement spilling using scalar
1453 // stores), we have no way to free up an SGPR. Our solution here is to
1454 // add the offset directly to the ScratchOffset or StackPtrOffset
1455 // register, and then subtract the offset after the spill to return the
1456 // register to it's original value.
1457
1458 // TODO: If we don't have to do an emergency stack slot spill, converting
1459 // to use the VGPR offset is fewer instructions.
1460 if (!ScratchOffsetReg)
1461 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
1462 SOffset = ScratchOffsetReg;
1463 ScratchOffsetRegDelta = Offset;
1464 } else {
1465 Scavenged = true;
1466 }
1467
1468 // We currently only support spilling VGPRs to EltSize boundaries, meaning
1469 // we can simplify the adjustment of Offset here to just scale with
1470 // WavefrontSize.
1471 if (!IsFlat && !UseVGPROffset)
1472 Offset *= ST.getWavefrontSize();
1473
1474 if (!UseVGPROffset && !SOffset)
1475 report_fatal_error("could not scavenge SGPR to spill in entry function");
1476
1477 if (UseVGPROffset) {
1478 // We are using a VGPR offset
1479 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
1480 } else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1481 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
1482 } else {
1483 assert(Offset != 0);
1484 auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1485 .addReg(ScratchOffsetReg)
1486 .addImm(Offset);
1487 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1488 }
1489
1490 Offset = 0;
1491 }
1492
1493 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1494 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1495 && "Unexpected vaddr for flat scratch with a FI operand");
1496
1497 if (UseVGPROffset) {
1498 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1499 } else {
1501 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1502 }
1503
1504 Desc = &TII->get(LoadStoreOp);
1505 }
1506
1507 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1508 ++i, RegOffset += EltSize) {
1509 if (i == NumSubRegs) {
1510 EltSize = RemSize;
1511 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1512 }
1513 Desc = &TII->get(LoadStoreOp);
1514
1515 if (!IsFlat && UseVGPROffset) {
1516 int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp)
1517 : getOffenMUBUFLoad(LoadStoreOp);
1518 Desc = &TII->get(NewLoadStoreOp);
1519 }
1520
1521 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1522 // If we are spilling an AGPR beyond the range of the memory instruction
1523 // offset and need to use a VGPR offset, we ideally have at least 2
1524 // scratch VGPRs. If we don't have a second free VGPR without spilling,
1525 // recycle the VGPR used for the offset which requires resetting after
1526 // each subregister.
1527
1528 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1529 }
1530
1531 unsigned NumRegs = EltSize / 4;
1532 Register SubReg = e == 1
1533 ? ValueReg
1534 : Register(getSubReg(ValueReg,
1535 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1536
1537 unsigned SOffsetRegState = 0;
1538 unsigned SrcDstRegState = getDefRegState(!IsStore);
1539 const bool IsLastSubReg = i + 1 == e;
1540 const bool IsFirstSubReg = i == 0;
1541 if (IsLastSubReg) {
1542 SOffsetRegState |= getKillRegState(Scavenged);
1543 // The last implicit use carries the "Kill" flag.
1544 SrcDstRegState |= getKillRegState(IsKill);
1545 }
1546
1547 // Make sure the whole register is defined if there are undef components by
1548 // adding an implicit def of the super-reg on the first instruction.
1549 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1550 bool NeedSuperRegImpOperand = e > 1;
1551
1552 // Remaining element size to spill into memory after some parts of it
1553 // spilled into either AGPRs or VGPRs.
1554 unsigned RemEltSize = EltSize;
1555
1556 // AGPRs to spill VGPRs and vice versa are allocated in a reverse order,
1557 // starting from the last lane. In case if a register cannot be completely
1558 // spilled into another register that will ensure its alignment does not
1559 // change. For targets with VGPR alignment requirement this is important
1560 // in case of flat scratch usage as we might get a scratch_load or
1561 // scratch_store of an unaligned register otherwise.
1562 for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1563 LaneE = RegOffset / 4;
1564 Lane >= LaneE; --Lane) {
1565 bool IsSubReg = e > 1 || EltSize > 4;
1566 Register Sub = IsSubReg
1567 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1568 : ValueReg;
1569 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1570 if (!MIB.getInstr())
1571 break;
1572 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1573 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1574 NeedSuperRegDef = false;
1575 }
1576 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1577 NeedSuperRegImpOperand = true;
1578 unsigned State = SrcDstRegState;
1579 if (!IsLastSubReg || (Lane != LaneE))
1580 State &= ~RegState::Kill;
1581 if (!IsFirstSubReg || (Lane != LaneS))
1582 State &= ~RegState::Define;
1583 MIB.addReg(ValueReg, RegState::Implicit | State);
1584 }
1585 RemEltSize -= 4;
1586 }
1587
1588 if (!RemEltSize) // Fully spilled into AGPRs.
1589 continue;
1590
1591 if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1592 assert(IsFlat && EltSize > 4);
1593
1594 unsigned NumRegs = RemEltSize / 4;
1595 SubReg = Register(getSubReg(ValueReg,
1596 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1597 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1598 Desc = &TII->get(Opc);
1599 }
1600
1601 unsigned FinalReg = SubReg;
1602
1603 if (IsAGPR) {
1604 assert(EltSize == 4);
1605
1606 if (!TmpIntermediateVGPR) {
1607 TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy();
1608 assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR));
1609 }
1610 if (IsStore) {
1611 auto AccRead = BuildMI(MBB, MI, DL,
1612 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1613 TmpIntermediateVGPR)
1614 .addReg(SubReg, getKillRegState(IsKill));
1615 if (NeedSuperRegDef)
1616 AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1618 }
1619 SubReg = TmpIntermediateVGPR;
1620 } else if (UseVGPROffset) {
1621 // FIXME: change to scavengeRegisterBackwards()
1622 if (!TmpOffsetVGPR) {
1623 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
1624 MI, false, 0);
1625 RS->setRegUsed(TmpOffsetVGPR);
1626 }
1627 }
1628
1629 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
1630 MachineMemOperand *NewMMO =
1631 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1632 commonAlignment(Alignment, RegOffset));
1633
1634 auto MIB =
1635 BuildMI(MBB, MI, DL, *Desc)
1636 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
1637
1638 if (UseVGPROffset) {
1639 // For an AGPR spill, we reuse the same temp VGPR for the offset and the
1640 // intermediate accvgpr_write.
1641 MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));
1642 }
1643
1644 if (!IsFlat)
1645 MIB.addReg(FuncInfo->getScratchRSrcReg());
1646
1647 if (SOffset == AMDGPU::NoRegister) {
1648 if (!IsFlat) {
1649 if (UseVGPROffset && ScratchOffsetReg) {
1650 MIB.addReg(ScratchOffsetReg);
1651 } else {
1652 assert(FuncInfo->isEntryFunction() || FuncInfo->isChainFunction());
1653 MIB.addImm(0);
1654 }
1655 }
1656 } else {
1657 MIB.addReg(SOffset, SOffsetRegState);
1658 }
1659 MIB.addImm(Offset + RegOffset)
1660 .addImm(0); // cpol
1661 if (!IsFlat)
1662 MIB.addImm(0); // swz
1663 MIB.addMemOperand(NewMMO);
1664
1665 if (!IsAGPR && NeedSuperRegDef)
1666 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1667
1668 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1669 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1670 FinalReg)
1671 .addReg(TmpIntermediateVGPR, RegState::Kill);
1673 }
1674
1675 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1676 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1677
1678 // The epilog restore of a wwm-scratch register can cause undesired
1679 // optimization during machine-cp post PrologEpilogInserter if the same
1680 // register was assigned for return value ABI lowering with a COPY
1681 // instruction. As given below, with the epilog reload, the earlier COPY
1682 // appeared to be dead during machine-cp.
1683 // ...
1684 // v0 in WWM operation, needs the WWM spill at prolog/epilog.
1685 // $vgpr0 = V_WRITELANE_B32 $sgpr20, 0, $vgpr0
1686 // ...
1687 // Epilog block:
1688 // $vgpr0 = COPY $vgpr1 // outgoing value moved to v0
1689 // ...
1690 // WWM spill restore to preserve the inactive lanes of v0.
1691 // $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1
1692 // $vgpr0 = BUFFER_LOAD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0
1693 // $exec = S_MOV_B64 killed $sgpr4_sgpr5
1694 // ...
1695 // SI_RETURN implicit $vgpr0
1696 // ...
1697 // To fix it, mark the same reg as a tied op for such restore instructions
1698 // so that it marks a usage for the preceding COPY.
1699 if (!IsStore && MI != MBB.end() && MI->isReturn() &&
1700 MI->readsRegister(SubReg, this)) {
1701 MIB.addReg(SubReg, RegState::Implicit);
1702 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1703 }
1704 }
1705
1706 if (ScratchOffsetRegDelta != 0) {
1707 // Subtract the offset we added to the ScratchOffset register.
1708 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1709 .addReg(SOffset)
1710 .addImm(-ScratchOffsetRegDelta);
1711 }
1712}
1713
1715 int Offset, bool IsLoad,
1716 bool IsKill) const {
1717 // Load/store VGPR
1718 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
1720
1721 Register FrameReg =
1722 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
1723 ? getBaseRegister()
1724 : getFrameRegister(SB.MF);
1725
1726 Align Alignment = FrameInfo.getObjectAlign(Index);
1730 SB.EltSize, Alignment);
1731
1732 if (IsLoad) {
1733 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1734 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1735 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
1736 FrameReg, Offset * SB.EltSize, MMO, SB.RS);
1737 } else {
1738 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1739 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1740 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
1741 FrameReg, Offset * SB.EltSize, MMO, SB.RS);
1742 // This only ever adds one VGPR spill
1743 SB.MFI.addToSpilledVGPRs(1);
1744 }
1745}
1746
1748 RegScavenger *RS, SlotIndexes *Indexes,
1749 LiveIntervals *LIS, bool OnlyToVGPR,
1750 bool SpillToPhysVGPRLane) const {
1751 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1752
1753 ArrayRef<SpilledReg> VGPRSpills =
1754 SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
1756 bool SpillToVGPR = !VGPRSpills.empty();
1757 if (OnlyToVGPR && !SpillToVGPR)
1758 return false;
1759
1760 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&
1761 SB.SuperReg != SB.MFI.getFrameOffsetReg()));
1762
1763 if (SpillToVGPR) {
1764
1765 assert(SB.NumSubRegs == VGPRSpills.size() &&
1766 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1767
1768 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1770 SB.NumSubRegs == 1
1771 ? SB.SuperReg
1772 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1773 SpilledReg Spill = VGPRSpills[i];
1774
1775 bool IsFirstSubreg = i == 0;
1776 bool IsLastSubreg = i == SB.NumSubRegs - 1;
1777 bool UseKill = SB.IsKill && IsLastSubreg;
1778
1779
1780 // Mark the "old value of vgpr" input undef only if this is the first sgpr
1781 // spill to this specific vgpr in the first basic block.
1782 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1783 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
1784 .addReg(SubReg, getKillRegState(UseKill))
1785 .addImm(Spill.Lane)
1786 .addReg(Spill.VGPR);
1787 if (Indexes) {
1788 if (IsFirstSubreg)
1789 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1790 else
1791 Indexes->insertMachineInstrInMaps(*MIB);
1792 }
1793
1794 if (IsFirstSubreg && SB.NumSubRegs > 1) {
1795 // We may be spilling a super-register which is only partially defined,
1796 // and need to ensure later spills think the value is defined.
1797 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1798 }
1799
1800 if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1801 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
1802
1803 // FIXME: Since this spills to another register instead of an actual
1804 // frame index, we should delete the frame index when all references to
1805 // it are fixed.
1806 }
1807 } else {
1808 SB.prepare();
1809
1810 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1811 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1812
1813 // Per VGPR helper data
1814 auto PVD = SB.getPerVGPRData();
1815
1816 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1817 unsigned TmpVGPRFlags = RegState::Undef;
1818
1819 // Write sub registers into the VGPR
1820 for (unsigned i = Offset * PVD.PerVGPR,
1821 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1822 i < e; ++i) {
1824 SB.NumSubRegs == 1
1825 ? SB.SuperReg
1826 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1827
1828 MachineInstrBuilder WriteLane =
1829 BuildMI(*SB.MBB, MI, SB.DL,
1830 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
1831 .addReg(SubReg, SubKillState)
1832 .addImm(i % PVD.PerVGPR)
1833 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1834 TmpVGPRFlags = 0;
1835
1836 if (Indexes) {
1837 if (i == 0)
1838 Indexes->replaceMachineInstrInMaps(*MI, *WriteLane);
1839 else
1840 Indexes->insertMachineInstrInMaps(*WriteLane);
1841 }
1842
1843 // There could be undef components of a spilled super register.
1844 // TODO: Can we detect this and skip the spill?
1845 if (SB.NumSubRegs > 1) {
1846 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1847 unsigned SuperKillState = 0;
1848 if (i + 1 == SB.NumSubRegs)
1849 SuperKillState |= getKillRegState(SB.IsKill);
1850 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1851 }
1852 }
1853
1854 // Write out VGPR
1855 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
1856 }
1857
1858 SB.restore();
1859 }
1860
1861 MI->eraseFromParent();
1863
1864 if (LIS)
1866
1867 return true;
1868}
1869
1871 RegScavenger *RS, SlotIndexes *Indexes,
1872 LiveIntervals *LIS, bool OnlyToVGPR,
1873 bool SpillToPhysVGPRLane) const {
1874 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1875
1876 ArrayRef<SpilledReg> VGPRSpills =
1877 SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
1879 bool SpillToVGPR = !VGPRSpills.empty();
1880 if (OnlyToVGPR && !SpillToVGPR)
1881 return false;
1882
1883 if (SpillToVGPR) {
1884 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1886 SB.NumSubRegs == 1
1887 ? SB.SuperReg
1888 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1889
1890 SpilledReg Spill = VGPRSpills[i];
1891 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1892 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
1893 .addReg(Spill.VGPR)
1894 .addImm(Spill.Lane);
1895 if (SB.NumSubRegs > 1 && i == 0)
1897 if (Indexes) {
1898 if (i == e - 1)
1899 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1900 else
1901 Indexes->insertMachineInstrInMaps(*MIB);
1902 }
1903 }
1904 } else {
1905 SB.prepare();
1906
1907 // Per VGPR helper data
1908 auto PVD = SB.getPerVGPRData();
1909
1910 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1911 // Load in VGPR data
1912 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
1913
1914 // Unpack lanes
1915 for (unsigned i = Offset * PVD.PerVGPR,
1916 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1917 i < e; ++i) {
1919 SB.NumSubRegs == 1
1920 ? SB.SuperReg
1921 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1922
1923 bool LastSubReg = (i + 1 == e);
1924 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1925 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
1926 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1927 .addImm(i);
1928 if (SB.NumSubRegs > 1 && i == 0)
1930 if (Indexes) {
1931 if (i == e - 1)
1932 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1933 else
1934 Indexes->insertMachineInstrInMaps(*MIB);
1935 }
1936 }
1937 }
1938
1939 SB.restore();
1940 }
1941
1942 MI->eraseFromParent();
1943
1944 if (LIS)
1946
1947 return true;
1948}
1949
1951 MachineBasicBlock &RestoreMBB,
1952 Register SGPR, RegScavenger *RS) const {
1953 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
1954 RS);
1955 SB.prepare();
1956 // Generate the spill of SGPR to SB.TmpVGPR.
1957 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1958 auto PVD = SB.getPerVGPRData();
1959 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1960 unsigned TmpVGPRFlags = RegState::Undef;
1961 // Write sub registers into the VGPR
1962 for (unsigned i = Offset * PVD.PerVGPR,
1963 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1964 i < e; ++i) {
1966 SB.NumSubRegs == 1
1967 ? SB.SuperReg
1968 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1969
1970 MachineInstrBuilder WriteLane =
1971 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1972 SB.TmpVGPR)
1973 .addReg(SubReg, SubKillState)
1974 .addImm(i % PVD.PerVGPR)
1975 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1976 TmpVGPRFlags = 0;
1977 // There could be undef components of a spilled super register.
1978 // TODO: Can we detect this and skip the spill?
1979 if (SB.NumSubRegs > 1) {
1980 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1981 unsigned SuperKillState = 0;
1982 if (i + 1 == SB.NumSubRegs)
1983 SuperKillState |= getKillRegState(SB.IsKill);
1984 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1985 }
1986 }
1987 // Don't need to write VGPR out.
1988 }
1989
1990 // Restore clobbered registers in the specified restore block.
1991 MI = RestoreMBB.end();
1992 SB.setMI(&RestoreMBB, MI);
1993 // Generate the restore of SGPR from SB.TmpVGPR.
1994 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1995 // Don't need to load VGPR in.
1996 // Unpack lanes
1997 for (unsigned i = Offset * PVD.PerVGPR,
1998 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1999 i < e; ++i) {
2001 SB.NumSubRegs == 1
2002 ? SB.SuperReg
2003 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
2004 bool LastSubReg = (i + 1 == e);
2005 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
2006 SubReg)
2007 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
2008 .addImm(i);
2009 if (SB.NumSubRegs > 1 && i == 0)
2011 }
2012 }
2013 SB.restore();
2014
2016 return false;
2017}
2018
2019/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
2020/// a VGPR and the stack slot can be safely eliminated when all other users are
2021/// handled.
2024 SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const {
2025 switch (MI->getOpcode()) {
2026 case AMDGPU::SI_SPILL_S1024_SAVE:
2027 case AMDGPU::SI_SPILL_S512_SAVE:
2028 case AMDGPU::SI_SPILL_S384_SAVE:
2029 case AMDGPU::SI_SPILL_S352_SAVE:
2030 case AMDGPU::SI_SPILL_S320_SAVE:
2031 case AMDGPU::SI_SPILL_S288_SAVE:
2032 case AMDGPU::SI_SPILL_S256_SAVE:
2033 case AMDGPU::SI_SPILL_S224_SAVE:
2034 case AMDGPU::SI_SPILL_S192_SAVE:
2035 case AMDGPU::SI_SPILL_S160_SAVE:
2036 case AMDGPU::SI_SPILL_S128_SAVE:
2037 case AMDGPU::SI_SPILL_S96_SAVE:
2038 case AMDGPU::SI_SPILL_S64_SAVE:
2039 case AMDGPU::SI_SPILL_S32_SAVE:
2040 return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2041 case AMDGPU::SI_SPILL_S1024_RESTORE:
2042 case AMDGPU::SI_SPILL_S512_RESTORE:
2043 case AMDGPU::SI_SPILL_S384_RESTORE:
2044 case AMDGPU::SI_SPILL_S352_RESTORE:
2045 case AMDGPU::SI_SPILL_S320_RESTORE:
2046 case AMDGPU::SI_SPILL_S288_RESTORE:
2047 case AMDGPU::SI_SPILL_S256_RESTORE:
2048 case AMDGPU::SI_SPILL_S224_RESTORE:
2049 case AMDGPU::SI_SPILL_S192_RESTORE:
2050 case AMDGPU::SI_SPILL_S160_RESTORE:
2051 case AMDGPU::SI_SPILL_S128_RESTORE:
2052 case AMDGPU::SI_SPILL_S96_RESTORE:
2053 case AMDGPU::SI_SPILL_S64_RESTORE:
2054 case AMDGPU::SI_SPILL_S32_RESTORE:
2055 return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2056 default:
2057 llvm_unreachable("not an SGPR spill instruction");
2058 }
2059}
2060
2062 int SPAdj, unsigned FIOperandNum,
2063 RegScavenger *RS) const {
2064 MachineFunction *MF = MI->getParent()->getParent();
2065 MachineBasicBlock *MBB = MI->getParent();
2067 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2068 const SIInstrInfo *TII = ST.getInstrInfo();
2069 DebugLoc DL = MI->getDebugLoc();
2070
2071 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
2072
2073 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
2074 int Index = MI->getOperand(FIOperandNum).getIndex();
2075
2076 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
2077 ? getBaseRegister()
2078 : getFrameRegister(*MF);
2079
2080 switch (MI->getOpcode()) {
2081 // SGPR register spill
2082 case AMDGPU::SI_SPILL_S1024_SAVE:
2083 case AMDGPU::SI_SPILL_S512_SAVE:
2084 case AMDGPU::SI_SPILL_S384_SAVE:
2085 case AMDGPU::SI_SPILL_S352_SAVE:
2086 case AMDGPU::SI_SPILL_S320_SAVE:
2087 case AMDGPU::SI_SPILL_S288_SAVE:
2088 case AMDGPU::SI_SPILL_S256_SAVE:
2089 case AMDGPU::SI_SPILL_S224_SAVE:
2090 case AMDGPU::SI_SPILL_S192_SAVE:
2091 case AMDGPU::SI_SPILL_S160_SAVE:
2092 case AMDGPU::SI_SPILL_S128_SAVE:
2093 case AMDGPU::SI_SPILL_S96_SAVE:
2094 case AMDGPU::SI_SPILL_S64_SAVE:
2095 case AMDGPU::SI_SPILL_S32_SAVE: {
2096 return spillSGPR(MI, Index, RS);
2097 }
2098
2099 // SGPR register restore
2100 case AMDGPU::SI_SPILL_S1024_RESTORE:
2101 case AMDGPU::SI_SPILL_S512_RESTORE:
2102 case AMDGPU::SI_SPILL_S384_RESTORE:
2103 case AMDGPU::SI_SPILL_S352_RESTORE:
2104 case AMDGPU::SI_SPILL_S320_RESTORE:
2105 case AMDGPU::SI_SPILL_S288_RESTORE:
2106 case AMDGPU::SI_SPILL_S256_RESTORE:
2107 case AMDGPU::SI_SPILL_S224_RESTORE:
2108 case AMDGPU::SI_SPILL_S192_RESTORE:
2109 case AMDGPU::SI_SPILL_S160_RESTORE:
2110 case AMDGPU::SI_SPILL_S128_RESTORE:
2111 case AMDGPU::SI_SPILL_S96_RESTORE:
2112 case AMDGPU::SI_SPILL_S64_RESTORE:
2113 case AMDGPU::SI_SPILL_S32_RESTORE: {
2114 return restoreSGPR(MI, Index, RS);
2115 }
2116
2117 // VGPR register spill
2118 case AMDGPU::SI_SPILL_V1024_SAVE:
2119 case AMDGPU::SI_SPILL_V512_SAVE:
2120 case AMDGPU::SI_SPILL_V384_SAVE:
2121 case AMDGPU::SI_SPILL_V352_SAVE:
2122 case AMDGPU::SI_SPILL_V320_SAVE:
2123 case AMDGPU::SI_SPILL_V288_SAVE:
2124 case AMDGPU::SI_SPILL_V256_SAVE:
2125 case AMDGPU::SI_SPILL_V224_SAVE:
2126 case AMDGPU::SI_SPILL_V192_SAVE:
2127 case AMDGPU::SI_SPILL_V160_SAVE:
2128 case AMDGPU::SI_SPILL_V128_SAVE:
2129 case AMDGPU::SI_SPILL_V96_SAVE:
2130 case AMDGPU::SI_SPILL_V64_SAVE:
2131 case AMDGPU::SI_SPILL_V32_SAVE:
2132 case AMDGPU::SI_SPILL_A1024_SAVE:
2133 case AMDGPU::SI_SPILL_A512_SAVE:
2134 case AMDGPU::SI_SPILL_A384_SAVE:
2135 case AMDGPU::SI_SPILL_A352_SAVE:
2136 case AMDGPU::SI_SPILL_A320_SAVE:
2137 case AMDGPU::SI_SPILL_A288_SAVE:
2138 case AMDGPU::SI_SPILL_A256_SAVE:
2139 case AMDGPU::SI_SPILL_A224_SAVE:
2140 case AMDGPU::SI_SPILL_A192_SAVE:
2141 case AMDGPU::SI_SPILL_A160_SAVE:
2142 case AMDGPU::SI_SPILL_A128_SAVE:
2143 case AMDGPU::SI_SPILL_A96_SAVE:
2144 case AMDGPU::SI_SPILL_A64_SAVE:
2145 case AMDGPU::SI_SPILL_A32_SAVE:
2146 case AMDGPU::SI_SPILL_AV1024_SAVE:
2147 case AMDGPU::SI_SPILL_AV512_SAVE:
2148 case AMDGPU::SI_SPILL_AV384_SAVE:
2149 case AMDGPU::SI_SPILL_AV352_SAVE:
2150 case AMDGPU::SI_SPILL_AV320_SAVE:
2151 case AMDGPU::SI_SPILL_AV288_SAVE:
2152 case AMDGPU::SI_SPILL_AV256_SAVE:
2153 case AMDGPU::SI_SPILL_AV224_SAVE:
2154 case AMDGPU::SI_SPILL_AV192_SAVE:
2155 case AMDGPU::SI_SPILL_AV160_SAVE:
2156 case AMDGPU::SI_SPILL_AV128_SAVE:
2157 case AMDGPU::SI_SPILL_AV96_SAVE:
2158 case AMDGPU::SI_SPILL_AV64_SAVE:
2159 case AMDGPU::SI_SPILL_AV32_SAVE:
2160 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2161 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2162 const MachineOperand *VData = TII->getNamedOperand(*MI,
2163 AMDGPU::OpName::vdata);
2164 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2165 MFI->getStackPtrOffsetReg());
2166
2167 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2168 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2169 auto *MBB = MI->getParent();
2170 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2171 if (IsWWMRegSpill) {
2172 TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
2173 RS->isRegUsed(AMDGPU::SCC));
2174 }
2176 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2177 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2178 *MI->memoperands_begin(), RS);
2179 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
2180 if (IsWWMRegSpill)
2181 TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
2182
2183 MI->eraseFromParent();
2184 return true;
2185 }
2186 case AMDGPU::SI_SPILL_V32_RESTORE:
2187 case AMDGPU::SI_SPILL_V64_RESTORE:
2188 case AMDGPU::SI_SPILL_V96_RESTORE:
2189 case AMDGPU::SI_SPILL_V128_RESTORE:
2190 case AMDGPU::SI_SPILL_V160_RESTORE:
2191 case AMDGPU::SI_SPILL_V192_RESTORE:
2192 case AMDGPU::SI_SPILL_V224_RESTORE:
2193 case AMDGPU::SI_SPILL_V256_RESTORE:
2194 case AMDGPU::SI_SPILL_V288_RESTORE:
2195 case AMDGPU::SI_SPILL_V320_RESTORE:
2196 case AMDGPU::SI_SPILL_V352_RESTORE:
2197 case AMDGPU::SI_SPILL_V384_RESTORE:
2198 case AMDGPU::SI_SPILL_V512_RESTORE:
2199 case AMDGPU::SI_SPILL_V1024_RESTORE:
2200 case AMDGPU::SI_SPILL_A32_RESTORE:
2201 case AMDGPU::SI_SPILL_A64_RESTORE:
2202 case AMDGPU::SI_SPILL_A96_RESTORE:
2203 case AMDGPU::SI_SPILL_A128_RESTORE:
2204 case AMDGPU::SI_SPILL_A160_RESTORE:
2205 case AMDGPU::SI_SPILL_A192_RESTORE:
2206 case AMDGPU::SI_SPILL_A224_RESTORE:
2207 case AMDGPU::SI_SPILL_A256_RESTORE:
2208 case AMDGPU::SI_SPILL_A288_RESTORE:
2209 case AMDGPU::SI_SPILL_A320_RESTORE:
2210 case AMDGPU::SI_SPILL_A352_RESTORE:
2211 case AMDGPU::SI_SPILL_A384_RESTORE:
2212 case AMDGPU::SI_SPILL_A512_RESTORE:
2213 case AMDGPU::SI_SPILL_A1024_RESTORE:
2214 case AMDGPU::SI_SPILL_AV32_RESTORE:
2215 case AMDGPU::SI_SPILL_AV64_RESTORE:
2216 case AMDGPU::SI_SPILL_AV96_RESTORE:
2217 case AMDGPU::SI_SPILL_AV128_RESTORE:
2218 case AMDGPU::SI_SPILL_AV160_RESTORE:
2219 case AMDGPU::SI_SPILL_AV192_RESTORE:
2220 case AMDGPU::SI_SPILL_AV224_RESTORE:
2221 case AMDGPU::SI_SPILL_AV256_RESTORE:
2222 case AMDGPU::SI_SPILL_AV288_RESTORE:
2223 case AMDGPU::SI_SPILL_AV320_RESTORE:
2224 case AMDGPU::SI_SPILL_AV352_RESTORE:
2225 case AMDGPU::SI_SPILL_AV384_RESTORE:
2226 case AMDGPU::SI_SPILL_AV512_RESTORE:
2227 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2228 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2229 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2230 const MachineOperand *VData = TII->getNamedOperand(*MI,
2231 AMDGPU::OpName::vdata);
2232 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2233 MFI->getStackPtrOffsetReg());
2234
2235 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2236 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2237 auto *MBB = MI->getParent();
2238 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2239 if (IsWWMRegSpill) {
2240 TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
2241 RS->isRegUsed(AMDGPU::SCC));
2242 }
2244 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2245 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2246 *MI->memoperands_begin(), RS);
2247
2248 if (IsWWMRegSpill)
2249 TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
2250
2251 MI->eraseFromParent();
2252 return true;
2253 }
2254
2255 default: {
2256 // Other access to frame index
2257 const DebugLoc &DL = MI->getDebugLoc();
2258
2259 int64_t Offset = FrameInfo.getObjectOffset(Index);
2260 if (ST.enableFlatScratch()) {
2261 if (TII->isFLATScratch(*MI)) {
2262 assert((int16_t)FIOperandNum ==
2263 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2264 AMDGPU::OpName::saddr));
2265
2266 // The offset is always swizzled, just replace it
2267 if (FrameReg)
2268 FIOp.ChangeToRegister(FrameReg, false);
2269
2270 if (!Offset)
2271 return false;
2272
2273 MachineOperand *OffsetOp =
2274 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
2275 int64_t NewOffset = Offset + OffsetOp->getImm();
2276 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
2278 OffsetOp->setImm(NewOffset);
2279 if (FrameReg)
2280 return false;
2281 Offset = 0;
2282 }
2283
2284 if (!Offset) {
2285 unsigned Opc = MI->getOpcode();
2286 int NewOpc = -1;
2287 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
2289 } else if (ST.hasFlatScratchSTMode()) {
2290 // On GFX10 we have ST mode to use no registers for an address.
2291 // Otherwise we need to materialize 0 into an SGPR.
2293 }
2294
2295 if (NewOpc != -1) {
2296 // removeOperand doesn't fixup tied operand indexes as it goes, so
2297 // it asserts. Untie vdst_in for now and retie them afterwards.
2298 int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
2299 AMDGPU::OpName::vdst_in);
2300 bool TiedVDst = VDstIn != -1 &&
2301 MI->getOperand(VDstIn).isReg() &&
2302 MI->getOperand(VDstIn).isTied();
2303 if (TiedVDst)
2304 MI->untieRegOperand(VDstIn);
2305
2306 MI->removeOperand(
2307 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
2308
2309 if (TiedVDst) {
2310 int NewVDst =
2311 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2312 int NewVDstIn =
2313 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2314 assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
2315 MI->tieOperands(NewVDst, NewVDstIn);
2316 }
2317 MI->setDesc(TII->get(NewOpc));
2318 return false;
2319 }
2320 }
2321 }
2322
2323 if (!FrameReg) {
2325 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
2326 return false;
2327 }
2328
2329 // We need to use register here. Check if we can use an SGPR or need
2330 // a VGPR.
2331 FIOp.ChangeToRegister(AMDGPU::M0, false);
2332 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
2333
2334 if (!Offset && FrameReg && UseSGPR) {
2335 FIOp.setReg(FrameReg);
2336 return false;
2337 }
2338
2339 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
2340 : &AMDGPU::VGPR_32RegClass;
2341
2342 Register TmpReg =
2343 RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
2344 FIOp.setReg(TmpReg);
2345 FIOp.setIsKill();
2346
2347 if ((!FrameReg || !Offset) && TmpReg) {
2348 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2349 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
2350 if (FrameReg)
2351 MIB.addReg(FrameReg);
2352 else
2353 MIB.addImm(Offset);
2354
2355 return false;
2356 }
2357
2358 bool NeedSaveSCC =
2359 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2360
2361 Register TmpSReg =
2362 UseSGPR ? TmpReg
2363 : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2364 MI, false, 0, !UseSGPR);
2365
2366 // TODO: for flat scratch another attempt can be made with a VGPR index
2367 // if no SGPRs can be scavenged.
2368 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2369 report_fatal_error("Cannot scavenge register in FI elimination!");
2370
2371 if (!TmpSReg) {
2372 // Use frame register and restore it after.
2373 TmpSReg = FrameReg;
2374 FIOp.setReg(FrameReg);
2375 FIOp.setIsKill(false);
2376 }
2377
2378 if (NeedSaveSCC) {
2379 assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
2380 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
2381 .addReg(FrameReg)
2382 .addImm(Offset);
2383 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
2384 .addReg(TmpSReg)
2385 .addImm(0);
2386 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
2387 .addImm(0)
2388 .addReg(TmpSReg);
2389 } else {
2390 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
2391 .addReg(FrameReg)
2392 .addImm(Offset);
2393 }
2394
2395 if (!UseSGPR)
2396 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2397 .addReg(TmpSReg, RegState::Kill);
2398
2399 if (TmpSReg == FrameReg) {
2400 // Undo frame register modification.
2401 if (NeedSaveSCC && !MI->registerDefIsDead(AMDGPU::SCC)) {
2403 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
2404 TmpSReg)
2405 .addReg(FrameReg)
2406 .addImm(-Offset);
2407 I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
2408 .addReg(TmpSReg)
2409 .addImm(0);
2410 BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
2411 TmpSReg)
2412 .addImm(0)
2413 .addReg(TmpSReg);
2414 } else {
2415 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
2416 FrameReg)
2417 .addReg(FrameReg)
2418 .addImm(-Offset);
2419 }
2420 }
2421
2422 return false;
2423 }
2424
2425 bool IsMUBUF = TII->isMUBUF(*MI);
2426
2427 if (!IsMUBUF && !MFI->isEntryFunction() && !MFI->isChainFunction()) {
2428 // Convert to a swizzled stack address by scaling by the wave size.
2429 // In an entry function/kernel the offset is already swizzled.
2430 bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
2431 bool LiveSCC =
2432 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2433 const TargetRegisterClass *RC = IsSALU && !LiveSCC
2434 ? &AMDGPU::SReg_32RegClass
2435 : &AMDGPU::VGPR_32RegClass;
2436 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2437 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2438 Register ResultReg =
2439 IsCopy ? MI->getOperand(0).getReg()
2440 : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
2441
2442 int64_t Offset = FrameInfo.getObjectOffset(Index);
2443 if (Offset == 0) {
2444 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2445 : AMDGPU::V_LSHRREV_B32_e64;
2446 auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg);
2447 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
2448 // For V_LSHRREV, the operands are reversed (the shift count goes
2449 // first).
2450 Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
2451 else
2452 Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
2453 if (IsSALU && !LiveSCC)
2454 Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
2455 if (IsSALU && LiveSCC) {
2456 Register NewDest = RS->scavengeRegisterBackwards(
2457 AMDGPU::SReg_32RegClass, Shift, false, 0);
2458 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
2459 NewDest)
2460 .addReg(ResultReg);
2461 ResultReg = NewDest;
2462 }
2463 } else {
2465 if (!IsSALU) {
2466 if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
2467 nullptr) {
2468 // Reuse ResultReg in intermediate step.
2469 Register ScaledReg = ResultReg;
2470
2471 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
2472 ScaledReg)
2474 .addReg(FrameReg);
2475
2476 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
2477
2478 // TODO: Fold if use instruction is another add of a constant.
2480 // FIXME: This can fail
2481 MIB.addImm(Offset);
2482 MIB.addReg(ScaledReg, RegState::Kill);
2483 if (!IsVOP2)
2484 MIB.addImm(0); // clamp bit
2485 } else {
2486 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
2487 "Need to reuse carry out register");
2488
2489 // Use scavenged unused carry out as offset register.
2490 Register ConstOffsetReg;
2491 if (!isWave32)
2492 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
2493 else
2494 ConstOffsetReg = MIB.getReg(1);
2495
2496 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
2497 .addImm(Offset);
2498 MIB.addReg(ConstOffsetReg, RegState::Kill);
2499 MIB.addReg(ScaledReg, RegState::Kill);
2500 MIB.addImm(0); // clamp bit
2501 }
2502 }
2503 }
2504 if (!MIB || IsSALU) {
2505 // We have to produce a carry out, and there isn't a free SGPR pair
2506 // for it. We can keep the whole computation on the SALU to avoid
2507 // clobbering an additional register at the cost of an extra mov.
2508
2509 // We may have 1 free scratch SGPR even though a carry out is
2510 // unavailable. Only one additional mov is needed.
2511 Register TmpScaledReg = RS->scavengeRegisterBackwards(
2512 AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
2513 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
2514
2515 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
2516 .addReg(FrameReg)
2518 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2519 .addReg(ScaledReg, RegState::Kill)
2520 .addImm(Offset);
2521 if (!IsSALU)
2522 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
2523 .addReg(ScaledReg, RegState::Kill);
2524 else
2525 ResultReg = ScaledReg;
2526
2527 // If there were truly no free SGPRs, we need to undo everything.
2528 if (!TmpScaledReg.isValid()) {
2529 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2530 .addReg(ScaledReg, RegState::Kill)
2531 .addImm(-Offset);
2532 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
2533 .addReg(FrameReg)
2535 }
2536 }
2537 }
2538
2539 // Don't introduce an extra copy if we're just materializing in a mov.
2540 if (IsCopy) {
2541 MI->eraseFromParent();
2542 return true;
2543 }
2544 FIOp.ChangeToRegister(ResultReg, false, false, true);
2545 return false;
2546 }
2547
2548 if (IsMUBUF) {
2549 // Disable offen so we don't need a 0 vgpr base.
2550 assert(static_cast<int>(FIOperandNum) ==
2551 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2552 AMDGPU::OpName::vaddr));
2553
2554 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
2555 assert((SOffset.isImm() && SOffset.getImm() == 0));
2556
2557 if (FrameReg != AMDGPU::NoRegister)
2558 SOffset.ChangeToRegister(FrameReg, false);
2559
2560 int64_t Offset = FrameInfo.getObjectOffset(Index);
2561 int64_t OldImm
2562 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
2563 int64_t NewOffset = OldImm + Offset;
2564
2565 if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
2566 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
2567 MI->eraseFromParent();
2568 return true;
2569 }
2570 }
2571
2572 // If the offset is simply too big, don't convert to a scratch wave offset
2573 // relative index.
2574
2576 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
2577 Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
2578 MI, false, 0);
2579 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2580 .addImm(Offset);
2581 FIOp.ChangeToRegister(TmpReg, false, false, true);
2582 }
2583 }
2584 }
2585 return false;
2586}
2587
2590}
2591
2593 return getRegBitWidth(RC.getID());
2594}
2595
2596static const TargetRegisterClass *
2598 if (BitWidth == 64)
2599 return &AMDGPU::VReg_64RegClass;
2600 if (BitWidth == 96)
2601 return &AMDGPU::VReg_96RegClass;
2602 if (BitWidth == 128)
2603 return &AMDGPU::VReg_128RegClass;
2604 if (BitWidth == 160)
2605 return &AMDGPU::VReg_160RegClass;
2606 if (BitWidth == 192)
2607 return &AMDGPU::VReg_192RegClass;
2608 if (BitWidth == 224)
2609 return &AMDGPU::VReg_224RegClass;
2610 if (BitWidth == 256)
2611 return &AMDGPU::VReg_256RegClass;
2612 if (BitWidth == 288)
2613 return &AMDGPU::VReg_288RegClass;
2614 if (BitWidth == 320)
2615 return &AMDGPU::VReg_320RegClass;
2616 if (BitWidth == 352)
2617 return &AMDGPU::VReg_352RegClass;
2618 if (BitWidth == 384)
2619 return &AMDGPU::VReg_384RegClass;
2620 if (BitWidth == 512)
2621 return &AMDGPU::VReg_512RegClass;
2622 if (BitWidth == 1024)
2623 return &AMDGPU::VReg_1024RegClass;
2624
2625 return nullptr;
2626}
2627
2628static const TargetRegisterClass *
2630 if (BitWidth == 64)
2631 return &AMDGPU::VReg_64_Align2RegClass;
2632 if (BitWidth == 96)
2633 return &AMDGPU::VReg_96_Align2RegClass;
2634 if (BitWidth == 128)
2635 return &AMDGPU::VReg_128_Align2RegClass;
2636 if (BitWidth == 160)
2637 return &AMDGPU::VReg_160_Align2RegClass;
2638 if (BitWidth == 192)
2639 return &AMDGPU::VReg_192_Align2RegClass;
2640 if (BitWidth == 224)
2641 return &AMDGPU::VReg_224_Align2RegClass;
2642 if (BitWidth == 256)
2643 return &AMDGPU::VReg_256_Align2RegClass;
2644 if (BitWidth == 288)
2645 return &AMDGPU::VReg_288_Align2RegClass;
2646 if (BitWidth == 320)
2647 return &AMDGPU::VReg_320_Align2RegClass;
2648 if (BitWidth == 352)
2649 return &AMDGPU::VReg_352_Align2RegClass;
2650 if (BitWidth == 384)
2651 return &AMDGPU::VReg_384_Align2RegClass;
2652 if (BitWidth == 512)
2653 return &AMDGPU::VReg_512_Align2RegClass;
2654 if (BitWidth == 1024)
2655 return &AMDGPU::VReg_1024_Align2RegClass;
2656
2657 return nullptr;
2658}
2659
2660const TargetRegisterClass *
2662 if (BitWidth == 1)
2663 return &AMDGPU::VReg_1RegClass;
2664 if (BitWidth == 16)
2665 return &AMDGPU::VGPR_LO16RegClass;
2666 if (BitWidth == 32)
2667 return &AMDGPU::VGPR_32RegClass;
2670}
2671
2672static const TargetRegisterClass *
2674 if (BitWidth == 64)
2675 return &AMDGPU::AReg_64RegClass;
2676 if (BitWidth == 96)
2677 return &AMDGPU::AReg_96RegClass;
2678 if (BitWidth == 128)
2679 return &AMDGPU::AReg_128RegClass;
2680 if (BitWidth == 160)
2681 return &AMDGPU::AReg_160RegClass;
2682 if (BitWidth == 192)
2683 return &AMDGPU::AReg_192RegClass;
2684 if (BitWidth == 224)
2685 return &AMDGPU::AReg_224RegClass;
2686 if (BitWidth == 256)
2687 return &AMDGPU::AReg_256RegClass;
2688 if (BitWidth == 288)
2689 return &AMDGPU::AReg_288RegClass;
2690 if (BitWidth == 320)
2691 return &AMDGPU::AReg_320RegClass;
2692 if (BitWidth == 352)
2693 return &AMDGPU::AReg_352RegClass;
2694 if (BitWidth == 384)
2695 return &AMDGPU::AReg_384RegClass;
2696 if (BitWidth == 512)
2697 return &AMDGPU::AReg_512RegClass;
2698 if (BitWidth == 1024)
2699 return &AMDGPU::AReg_1024RegClass;
2700
2701 return nullptr;
2702}
2703
2704static const TargetRegisterClass *
2706 if (BitWidth == 64)
2707 return &AMDGPU::AReg_64_Align2RegClass;
2708 if (BitWidth == 96)
2709 return &AMDGPU::AReg_96_Align2RegClass;
2710 if (BitWidth == 128)
2711 return &AMDGPU::AReg_128_Align2RegClass;
2712 if (BitWidth == 160)
2713 return &AMDGPU::AReg_160_Align2RegClass;
2714 if (BitWidth == 192)
2715 return &AMDGPU::AReg_192_Align2RegClass;
2716 if (BitWidth == 224)
2717 return &AMDGPU::AReg_224_Align2RegClass;
2718 if (BitWidth == 256)
2719 return &AMDGPU::AReg_256_Align2RegClass;
2720 if (BitWidth == 288)
2721 return &AMDGPU::AReg_288_Align2RegClass;
2722 if (BitWidth == 320)
2723 return &AMDGPU::AReg_320_Align2RegClass;
2724 if (BitWidth == 352)
2725 return &AMDGPU::AReg_352_Align2RegClass;
2726 if (BitWidth == 384)
2727 return &AMDGPU::AReg_384_Align2RegClass;
2728 if (BitWidth == 512)
2729 return &AMDGPU::AReg_512_Align2RegClass;
2730 if (BitWidth == 1024)
2731 return &AMDGPU::AReg_1024_Align2RegClass;
2732
2733 return nullptr;
2734}
2735
2736const TargetRegisterClass *
2738 if (BitWidth == 16)
2739 return &AMDGPU::AGPR_LO16RegClass;
2740 if (BitWidth == 32)
2741 return &AMDGPU::AGPR_32RegClass;
2744}
2745
2746static const TargetRegisterClass *
2748 if (BitWidth == 64)
2749 return &AMDGPU::AV_64RegClass;
2750 if (BitWidth == 96)
2751 return &AMDGPU::AV_96RegClass;
2752 if (BitWidth == 128)
2753 return &AMDGPU::AV_128RegClass;
2754 if (BitWidth == 160)
2755 return &AMDGPU::AV_160RegClass;
2756 if (BitWidth == 192)
2757 return &AMDGPU::AV_192RegClass;
2758 if (BitWidth == 224)
2759 return &AMDGPU::AV_224RegClass;
2760 if (BitWidth == 256)
2761 return &AMDGPU::AV_256RegClass;
2762 if (BitWidth == 288)
2763 return &AMDGPU::AV_288RegClass;
2764 if (BitWidth == 320)
2765 return &AMDGPU::AV_320RegClass;
2766 if (BitWidth == 352)
2767 return &AMDGPU::AV_352RegClass;
2768 if (BitWidth == 384)
2769 return &AMDGPU::AV_384RegClass;
2770 if (BitWidth == 512)
2771 return &AMDGPU::AV_512RegClass;
2772 if (BitWidth == 1024)
2773 return &AMDGPU::AV_1024RegClass;
2774
2775 return nullptr;
2776}
2777
2778static const TargetRegisterClass *
2780 if (BitWidth == 64)
2781 return &AMDGPU::AV_64_Align2RegClass;
2782 if (BitWidth == 96)
2783 return &AMDGPU::AV_96_Align2RegClass;
2784 if (BitWidth == 128)
2785 return &AMDGPU::AV_128_Align2RegClass;
2786 if (BitWidth == 160)
2787 return &AMDGPU::AV_160_Align2RegClass;
2788 if (BitWidth == 192)
2789 return &AMDGPU::AV_192_Align2RegClass;
2790 if (BitWidth == 224)
2791 return &AMDGPU::AV_224_Align2RegClass;
2792 if (BitWidth == 256)
2793 return &AMDGPU::AV_256_Align2RegClass;
2794 if (BitWidth == 288)
2795 return &AMDGPU::AV_288_Align2RegClass;
2796 if (BitWidth == 320)
2797 return &AMDGPU::AV_320_Align2RegClass;
2798 if (BitWidth == 352)
2799 return &AMDGPU::AV_352_Align2RegClass;
2800 if (BitWidth == 384)
2801 return &AMDGPU::AV_384_Align2RegClass;
2802 if (BitWidth == 512)
2803 return &AMDGPU::AV_512_Align2RegClass;
2804 if (BitWidth == 1024)
2805 return &AMDGPU::AV_1024_Align2RegClass;
2806
2807 return nullptr;
2808}
2809
2810const TargetRegisterClass *
2812 if (BitWidth == 16)
2813 return &AMDGPU::VGPR_LO16RegClass;
2814 if (BitWidth == 32)
2815 return &AMDGPU::AV_32RegClass;
2816 return ST.needsAlignedVGPRs()
2819}
2820
2821const TargetRegisterClass *
2823 if (BitWidth == 16)
2824 return &AMDGPU::SGPR_LO16RegClass;
2825 if (BitWidth == 32)
2826 return &AMDGPU::SReg_32RegClass;
2827 if (BitWidth == 64)
2828 return &AMDGPU::SReg_64RegClass;
2829 if (BitWidth == 96)
2830 return &AMDGPU::SGPR_96RegClass;
2831 if (BitWidth == 128)
2832 return &AMDGPU::SGPR_128RegClass;
2833 if (BitWidth == 160)
2834 return &AMDGPU::SGPR_160RegClass;
2835 if (BitWidth == 192)
2836 return &AMDGPU::SGPR_192RegClass;
2837 if (BitWidth == 224)
2838 return &AMDGPU::SGPR_224RegClass;
2839 if (BitWidth == 256)
2840 return &AMDGPU::SGPR_256RegClass;
2841 if (BitWidth == 288)
2842 return &AMDGPU::SGPR_288RegClass;
2843 if (BitWidth == 320)
2844 return &AMDGPU::SGPR_320RegClass;
2845 if (BitWidth == 352)
2846 return &AMDGPU::SGPR_352RegClass;
2847 if (BitWidth == 384)
2848 return &AMDGPU::SGPR_384RegClass;
2849 if (BitWidth == 512)
2850 return &AMDGPU::SGPR_512RegClass;
2851 if (BitWidth == 1024)
2852 return &AMDGPU::SGPR_1024RegClass;
2853
2854 return nullptr;
2855}
2856
2858 Register Reg) const {
2859 const TargetRegisterClass *RC;
2860 if (Reg.isVirtual())
2861 RC = MRI.getRegClass(Reg);
2862 else
2863 RC = getPhysRegBaseClass(Reg);
2864 return RC ? isSGPRClass(RC) : false;
2865}
2866
2867const TargetRegisterClass *
2869 unsigned Size = getRegSizeInBits(*SRC);
2871 assert(VRC && "Invalid register class size");
2872 return VRC;
2873}
2874
2875const TargetRegisterClass *
2877 unsigned Size = getRegSizeInBits(*SRC);
2879 assert(ARC && "Invalid register class size");
2880 return ARC;
2881}
2882
2883const TargetRegisterClass *
2885 unsigned Size = getRegSizeInBits(*VRC);
2886 if (Size == 32)
2887 return &AMDGPU::SGPR_32RegClass;
2889 assert(SRC && "Invalid register class size");
2890 return SRC;
2891}
2892
2893const TargetRegisterClass *
2895 const TargetRegisterClass *SubRC,
2896 unsigned SubIdx) const {
2897 // Ensure this subregister index is aligned in the super register.
2898 const TargetRegisterClass *MatchRC =
2899 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2900 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2901}
2902
2903bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
2906 return !ST.hasMFMAInlineLiteralBug();
2907
2908 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2909 OpType <= AMDGPU::OPERAND_SRC_LAST;
2910}
2911
2913 const TargetRegisterClass *DefRC,
2914 unsigned DefSubReg,
2915 const TargetRegisterClass *SrcRC,
2916 unsigned SrcSubReg) const {
2917 // We want to prefer the smallest register class possible, so we don't want to
2918 // stop and rewrite on anything that looks like a subregister
2919 // extract. Operations mostly don't care about the super register class, so we
2920 // only want to stop on the most basic of copies between the same register
2921 // class.
2922 //
2923 // e.g. if we have something like
2924 // %0 = ...
2925 // %1 = ...
2926 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
2927 // %3 = COPY %2, sub0
2928 //
2929 // We want to look through the COPY to find:
2930 // => %3 = COPY %0
2931
2932 // Plain copy.
2933 return getCommonSubClass(DefRC, SrcRC) != nullptr;
2934}
2935
2936bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2937 // TODO: 64-bit operands have extending behavior from 32-bit literal.
2938 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2940}
2941
2942/// Returns a lowest register that is not used at any point in the function.
2943/// If all registers are used, then this function will return
2944/// AMDGPU::NoRegister. If \p ReserveHighestRegister = true, then return
2945/// highest unused register.
2948 const MachineFunction &MF, bool ReserveHighestRegister) const {
2949 if (ReserveHighestRegister) {
2950 for (MCRegister Reg : reverse(*RC))
2951 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2952 return Reg;
2953 } else {
2954 for (MCRegister Reg : *RC)
2955 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2956 return Reg;
2957 }
2958 return MCRegister();
2959}
2960
2962 const RegisterBankInfo &RBI,
2963 Register Reg) const {
2964 auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());
2965 if (!RB)
2966 return false;
2967
2968 return !RBI.isDivergentRegBank(RB);
2969}
2970
2972 unsigned EltSize) const {
2973 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC);
2974 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2975
2976 const unsigned RegDWORDs = RegBitWidth / 32;
2977 const unsigned EltDWORDs = EltSize / 4;
2978 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2979
2980 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2981 const unsigned NumParts = RegDWORDs / EltDWORDs;
2982
2983 return ArrayRef(Parts.data(), NumParts);
2984}
2985
2988 Register Reg) const {
2989 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
2990}
2991
2992const TargetRegisterClass *
2994 const MachineOperand &MO) const {
2995 const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg());
2996 return getSubRegisterClass(SrcRC, MO.getSubReg());
2997}
2998
3000 Register Reg) const {
3001 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
3002 // Registers without classes are unaddressable, SGPR-like registers.
3003 return RC && isVGPRClass(RC);
3004}
3005
3007 Register Reg) const {
3008 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
3009
3010 // Registers without classes are unaddressable, SGPR-like registers.
3011 return RC && isAGPRClass(RC);
3012}
3013
3015 const TargetRegisterClass *SrcRC,
3016 unsigned SubReg,
3017 const TargetRegisterClass *DstRC,
3018 unsigned DstSubReg,
3019 const TargetRegisterClass *NewRC,
3020 LiveIntervals &LIS) const {
3021 unsigned SrcSize = getRegSizeInBits(*SrcRC);
3022 unsigned DstSize = getRegSizeInBits(*DstRC);
3023 unsigned NewSize = getRegSizeInBits(*NewRC);
3024
3025 // Do not increase size of registers beyond dword, we would need to allocate
3026 // adjacent registers and constraint regalloc more than needed.
3027
3028 // Always allow dword coalescing.
3029 if (SrcSize <= 32 || DstSize <= 32)
3030 return true;
3031
3032 return NewSize <= DstSize || NewSize <= SrcSize;
3033}
3034
3036 MachineFunction &MF) const {
3038
3039 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
3040 MF.getFunction());
3041 switch (RC->getID()) {
3042 default:
3043 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3044 case AMDGPU::VGPR_32RegClassID:
3045 case AMDGPU::VGPR_LO16RegClassID:
3046 case AMDGPU::VGPR_HI16RegClassID:
3047 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
3048 case AMDGPU::SGPR_32RegClassID:
3049 case AMDGPU::SGPR_LO16RegClassID:
3050 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
3051 }
3052}
3053
3055 unsigned Idx) const {
3056 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
3057 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
3058 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
3059 const_cast<MachineFunction &>(MF));
3060
3061 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
3062 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
3063 const_cast<MachineFunction &>(MF));
3064
3065 llvm_unreachable("Unexpected register pressure set!");
3066}
3067
3068const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
3069 static const int Empty[] = { -1 };
3070
3071 if (RegPressureIgnoredUnits[RegUnit])
3072 return Empty;
3073
3074 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3075}
3076
3078 // Not a callee saved register.
3079 return AMDGPU::SGPR30_SGPR31;
3080}
3081
3082const TargetRegisterClass *
3084 const RegisterBank &RB) const {
3085 switch (RB.getID()) {
3086 case AMDGPU::VGPRRegBankID:
3088 std::max(ST.useRealTrue16Insts() ? 16u : 32u, Size));
3089 case AMDGPU::VCCRegBankID:
3090 assert(Size == 1);
3091 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3092 : &AMDGPU::SReg_64_XEXECRegClass;
3093 case AMDGPU::SGPRRegBankID:
3094 return getSGPRClassForBitWidth(std::max(32u, Size));
3095 case AMDGPU::AGPRRegBankID:
3096 return getAGPRClassForBitWidth(std::max(32u, Size));
3097 default:
3098 llvm_unreachable("unknown register bank");
3099 }
3100}
3101
3102const TargetRegisterClass *
3104 const MachineRegisterInfo &MRI) const {
3105 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
3106 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
3107 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB);
3108
3109 if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
3110 return getAllocatableClass(RC);
3111
3112 return nullptr;
3113}
3114
3116 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3117}
3118
3120 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3121}
3122
3124 // VGPR tuples have an alignment requirement on gfx90a variants.
3125 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3126 : &AMDGPU::VReg_64RegClass;
3127}
3128
3129const TargetRegisterClass *
3130SIRegisterInfo::getRegClass(unsigned RCID) const {
3131 switch ((int)RCID) {
3132 case AMDGPU::SReg_1RegClassID:
3133 return getBoolRC();
3134 case AMDGPU::SReg_1_XEXECRegClassID:
3135 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3136 : &AMDGPU::SReg_64_XEXECRegClass;
3137 case -1:
3138 return nullptr;
3139 default:
3140 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3141 }
3142}
3143
3144// Find reaching register definition
3148 LiveIntervals *LIS) const {
3149 auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
3150 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
3151 SlotIndex DefIdx;
3152
3153 if (Reg.isVirtual()) {
3154 if (!LIS->hasInterval(Reg))
3155 return nullptr;
3156 LiveInterval &LI = LIS->getInterval(Reg);
3157 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
3158 : MRI.getMaxLaneMaskForVReg(Reg);
3159 VNInfo *V = nullptr;
3160 if (LI.hasSubRanges()) {
3161 for (auto &S : LI.subranges()) {
3162 if ((S.LaneMask & SubLanes) == SubLanes) {
3163 V = S.getVNInfoAt(UseIdx);
3164 break;
3165 }
3166 }
3167 } else {
3168 V = LI.getVNInfoAt(UseIdx);
3169 }
3170 if (!V)
3171 return nullptr;
3172 DefIdx = V->def;
3173 } else {
3174 // Find last def.
3175 for (MCRegUnit Unit : regunits(Reg.asMCReg())) {
3176 LiveRange &LR = LIS->getRegUnit(Unit);
3177 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
3178 if (!DefIdx.isValid() ||
3179 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
3180 LIS->getInstructionFromIndex(V->def)))
3181 DefIdx = V->def;
3182 } else {
3183 return nullptr;
3184 }
3185 }
3186 }
3187
3188 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
3189
3190 if (!Def || !MDT.dominates(Def, &Use))
3191 return nullptr;
3192
3193 assert(Def->modifiesRegister(Reg, this));
3194
3195 return Def;
3196}
3197
3199 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3200
3201 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
3202 AMDGPU::SReg_32RegClass,
3203 AMDGPU::AGPR_32RegClass } ) {
3204 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3205 return Super;
3206 }
3207 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3208 &AMDGPU::VGPR_32RegClass)) {
3209 return Super;
3210 }
3211
3212 return AMDGPU::NoRegister;
3213}
3214
3216 if (!ST.needsAlignedVGPRs())
3217 return true;
3218
3219 if (isVGPRClass(&RC))
3220 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
3221 if (isAGPRClass(&RC))
3222 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
3223 if (isVectorSuperClass(&RC))
3224 return RC.hasSuperClassEq(
3225 getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
3226
3227 return true;
3228}
3229
3230const TargetRegisterClass *
3232 if (!RC || !ST.needsAlignedVGPRs())
3233 return RC;
3234
3235 unsigned Size = getRegSizeInBits(*RC);
3236 if (Size <= 32)
3237 return RC;
3238
3239 if (isVGPRClass(RC))
3241 if (isAGPRClass(RC))
3243 if (isVectorSuperClass(RC))
3245
3246 return RC;
3247}
3248
3251 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
3252}
3253
3256 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
3257}
3258
3261 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
3262}
3263
3264unsigned
3266 unsigned SubReg) const {
3267 switch (RC->TSFlags & SIRCFlags::RegKindMask) {
3268 case SIRCFlags::HasSGPR:
3269 return std::min(128u, getSubRegIdxSize(SubReg));
3270 case SIRCFlags::HasAGPR:
3271 case SIRCFlags::HasVGPR:
3273 return std::min(32u, getSubRegIdxSize(SubReg));
3274 default:
3275 break;
3276 }
3277 return 0;
3278}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static const char * getRegisterName(MCRegister Reg)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
bool hasGFX90AInsts() const
bool hasMAIInsts() const
Definition: GCNSubtarget.h:759
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:237
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
Definition: GCNSubtarget.h:615
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:241
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:605
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:804
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:776
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:74
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:543
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
Definition: MachineInstr.h:357
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:118
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
Definition: SIInstrInfo.h:616
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:1241
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:516
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
const ReservedRegSet & getWWMReservedRegs() const
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
static bool isChainScratchRegister(Register VGPR)
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:133
SlotIndexes pass.
Definition: SlotIndexes.h:300
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:523
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
Definition: SlotIndexes.h:580
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const uint8_t TSFlags
Configurable target specific flags.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:412
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
@ OPERAND_REG_IMM_FIRST
Definition: SIDefines.h:237
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:246
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:244
@ OPERAND_REG_IMM_LAST
Definition: SIDefines.h:238
@ OPERAND_SRC_LAST
Definition: SIDefines.h:247
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:229
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:246
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:242
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1684
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
@ HasSGPR
Definition: SIDefines.h:26
@ HasVGPR
Definition: SIDefines.h:24
@ RegKindMask
Definition: SIDefines.h:29
@ HasAGPR
Definition: SIDefines.h:25
unsigned getDefRegState(bool B)
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:87
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:428
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
MachineFunction & MF
MachineBasicBlock * MBB
const SIInstrInfo & TII
The llvm::once_flag structure.
Definition: Threading.h:68