LLVM 17.0.0git
SIRegisterInfo.cpp
Go to the documentation of this file.
1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "GCNSubtarget.h"
20#include "SIRegisterInfo.h"
26
27using namespace llvm;
28
29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
31
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc("Enable spilling VGPRs to SGPRs"),
36 cl::init(true));
37
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
40
41// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
42// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
43// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
44// meaning index 7 in SubRegFromChannelTable.
45static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
47
48namespace llvm {
49
50// A temporary struct to spill SGPRs.
51// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
52// just v_writelane and v_readlane.
53//
54// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
55// is saved to scratch (or the other way around for loads).
56// For this, a VGPR is required where the needed lanes can be clobbered. The
57// RegScavenger can provide a VGPR where currently active lanes can be
58// clobbered, but we still need to save inactive lanes.
59// The high-level steps are:
60// - Try to scavenge SGPR(s) to save exec
61// - Try to scavenge VGPR
62// - Save needed, all or inactive lanes of a TmpVGPR
63// - Spill/Restore SGPRs using TmpVGPR
64// - Restore TmpVGPR
65//
66// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
67// cannot scavenge temporary SGPRs to save exec, we use the following code:
68// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
69// s_not exec, exec
70// buffer_store_dword TmpVGPR ; save inactive lanes
71// s_not exec, exec
73 struct PerVGPRData {
74 unsigned PerVGPR;
75 unsigned NumVGPRs;
76 int64_t VGPRLanes;
77 };
78
79 // The SGPR to save
83 unsigned NumSubRegs;
84 bool IsKill;
85 const DebugLoc &DL;
86
87 /* When spilling to stack */
88 // The SGPRs are written into this VGPR, which is then written to scratch
89 // (or vice versa for loads).
90 Register TmpVGPR = AMDGPU::NoRegister;
91 // Temporary spill slot to save TmpVGPR to.
92 int TmpVGPRIndex = 0;
93 // If TmpVGPR is live before the spill or if it is scavenged.
94 bool TmpVGPRLive = false;
95 // Scavenged SGPR to save EXEC.
96 Register SavedExecReg = AMDGPU::NoRegister;
97 // Stack index to write the SGPRs to.
98 int Index;
99 unsigned EltSize = 4;
100
109 unsigned MovOpc;
110 unsigned NotOpc;
111
115 : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
116 MI->getOperand(0).isKill(), Index, RS) {}
117
120 bool IsKill, int Index, RegScavenger *RS)
121 : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
122 Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
123 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
125 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
128
129 if (IsWave32) {
130 ExecReg = AMDGPU::EXEC_LO;
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
133 } else {
134 ExecReg = AMDGPU::EXEC;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
137 }
138
139 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
140 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
141 SuperReg != AMDGPU::EXEC && "exec should never spill");
142 }
143
146 Data.PerVGPR = IsWave32 ? 32 : 64;
147 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
148 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
149 return Data;
150 }
151
152 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
153 // free.
154 // Writes these instructions if an SGPR can be scavenged:
155 // s_mov_b64 s[6:7], exec ; Save exec
156 // s_mov_b64 exec, 3 ; Wanted lanemask
157 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
158 //
159 // Writes these instructions if no SGPR can be scavenged:
160 // buffer_store_dword v0 ; Only if no free VGPR was found
161 // s_not_b64 exec, exec
162 // buffer_store_dword v0 ; Save inactive lanes
163 // ; exec stays inverted, it is flipped back in
164 // ; restore.
165 void prepare() {
166 // Scavenged temporary VGPR to use. It must be scavenged once for any number
167 // of spilled subregs.
168 // FIXME: The liveness analysis is limited and does not tell if a register
169 // is in use in lanes that are currently inactive. We can never be sure if
170 // a register as actually in use in another lane, so we need to save all
171 // used lanes of the chosen VGPR.
172 assert(RS && "Cannot spill SGPR to memory without RegScavenger");
173 TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false);
174
175 // Reserve temporary stack slot
177 if (TmpVGPR) {
178 // Found a register that is dead in the currently active lanes, we only
179 // need to spill inactive lanes.
180 TmpVGPRLive = false;
181 } else {
182 // Pick v0 because it doesn't make a difference.
183 TmpVGPR = AMDGPU::VGPR0;
184 TmpVGPRLive = true;
185 }
186
187 if (TmpVGPRLive) {
188 // We need to inform the scavenger that this index is already in use until
189 // we're done with the custom emergency spill.
191 }
192
193 // We may end up recursively calling the scavenger, and don't want to re-use
194 // the same register.
196
197 // Try to scavenge SGPRs to save exec
198 assert(!SavedExecReg && "Exec is already saved, refuse to save again");
199 const TargetRegisterClass &RC =
200 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
202 SavedExecReg = RS->scavengeRegister(&RC, MI, 0, false);
203
204 int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
205
206 if (SavedExecReg) {
208 // Set exec to needed lanes
210 auto I =
211 BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
212 if (!TmpVGPRLive)
214 // Spill needed lanes
215 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
216 } else {
217 // The modify and restore of exec clobber SCC, which we would have to save
218 // and restore. FIXME: We probably would need to reserve a register for
219 // this.
220 if (RS->isRegUsed(AMDGPU::SCC))
221 MI->emitError("unhandled SGPR spill to memory");
222
223 // Spill active lanes
224 if (TmpVGPRLive)
225 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
226 /*IsKill*/ false);
227 // Spill inactive lanes
228 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
229 if (!TmpVGPRLive)
231 I->getOperand(2).setIsDead(); // Mark SCC as dead.
232 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
233 }
234 }
235
236 // Writes these instructions if an SGPR can be scavenged:
237 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
238 // s_waitcnt vmcnt(0) ; If a free VGPR was found
239 // s_mov_b64 exec, s[6:7] ; Save exec
240 //
241 // Writes these instructions if no SGPR can be scavenged:
242 // buffer_load_dword v0 ; Restore inactive lanes
243 // s_waitcnt vmcnt(0) ; If a free VGPR was found
244 // s_not_b64 exec, exec
245 // buffer_load_dword v0 ; Only if no free VGPR was found
246 void restore() {
247 if (SavedExecReg) {
248 // Restore used lanes
249 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
250 /*IsKill*/ false);
251 // Restore exec
252 auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
254 // Add an implicit use of the load so it is not dead.
255 // FIXME This inserts an unnecessary waitcnt
256 if (!TmpVGPRLive) {
258 }
259 } else {
260 // Restore inactive lanes
261 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
262 /*IsKill*/ false);
263 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
264 if (!TmpVGPRLive)
266 I->getOperand(2).setIsDead(); // Mark SCC as dead.
267
268 // Restore active lanes
269 if (TmpVGPRLive)
270 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
271 }
272
273 // Inform the scavenger where we're releasing our custom scavenged register.
274 if (TmpVGPRLive) {
275 MachineBasicBlock::iterator RestorePt = std::prev(MI);
277 }
278 }
279
280 // Write TmpVGPR to memory or read TmpVGPR from memory.
281 // Either using a single buffer_load/store if exec is set to the needed mask
282 // or using
283 // buffer_load
284 // s_not exec, exec
285 // buffer_load
286 // s_not exec, exec
287 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
288 if (SavedExecReg) {
289 // Spill needed lanes
290 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
291 } else {
292 // The modify and restore of exec clobber SCC, which we would have to save
293 // and restore. FIXME: We probably would need to reserve a register for
294 // this.
295 if (RS->isRegUsed(AMDGPU::SCC))
296 MI->emitError("unhandled SGPR spill to memory");
297
298 // Spill active lanes
299 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
300 /*IsKill*/ false);
301 // Spill inactive lanes
302 auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
303 Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
304 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
305 auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
306 Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
307 }
308 }
309
311 assert(MBB->getParent() == &MF);
312 MI = NewMI;
313 MBB = NewMBB;
314 }
315};
316
317} // namespace llvm
318
320 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
321 SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
322
323 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
324 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
325 (getSubRegIndexLaneMask(AMDGPU::lo16) |
326 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
327 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
328 "getNumCoveredRegs() will not work with generated subreg masks!");
329
330 RegPressureIgnoredUnits.resize(getNumRegUnits());
331 RegPressureIgnoredUnits.set(
332 *MCRegUnitIterator(MCRegister::from(AMDGPU::M0), this));
333 for (auto Reg : AMDGPU::VGPR_HI16RegClass)
334 RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this));
335
336 // HACK: Until this is fully tablegen'd.
337 static llvm::once_flag InitializeRegSplitPartsFlag;
338
339 static auto InitializeRegSplitPartsOnce = [this]() {
340 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
341 unsigned Size = getSubRegIdxSize(Idx);
342 if (Size & 31)
343 continue;
344 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
345 unsigned Pos = getSubRegIdxOffset(Idx);
346 if (Pos % Size)
347 continue;
348 Pos /= Size;
349 if (Vec.empty()) {
350 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
351 Vec.resize(MaxNumParts);
352 }
353 Vec[Pos] = Idx;
354 }
355 };
356
357 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
358
359 static auto InitializeSubRegFromChannelTableOnce = [this]() {
360 for (auto &Row : SubRegFromChannelTable)
361 Row.fill(AMDGPU::NoSubRegister);
362 for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
363 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
364 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
366 Width = SubRegFromChannelTableWidthMap[Width];
367 if (Width == 0)
368 continue;
369 unsigned TableIdx = Width - 1;
370 assert(TableIdx < SubRegFromChannelTable.size());
371 assert(Offset < SubRegFromChannelTable[TableIdx].size());
372 SubRegFromChannelTable[TableIdx][Offset] = Idx;
373 }
374 };
375
376 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
377 llvm::call_once(InitializeSubRegFromChannelTableFlag,
378 InitializeSubRegFromChannelTableOnce);
379}
380
381void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
382 MCRegister Reg) const {
383 for (MCRegAliasIterator R(Reg, this, true); R.isValid(); ++R)
384 Reserved.set(*R);
385}
386
387// Forced to be here by one .inc
389 const MachineFunction *MF) const {
391 switch (CC) {
392 case CallingConv::C:
395 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
396 : CSR_AMDGPU_SaveList;
398 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
399 : CSR_AMDGPU_SI_Gfx_SaveList;
400 default: {
401 // Dummy to not crash RegisterClassInfo.
402 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
403 return &NoCalleeSavedReg;
404 }
405 }
406}
407
408const MCPhysReg *
410 return nullptr;
411}
412
414 CallingConv::ID CC) const {
415 switch (CC) {
416 case CallingConv::C:
419 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
420 : CSR_AMDGPU_RegMask;
422 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
423 : CSR_AMDGPU_SI_Gfx_RegMask;
424 default:
425 return nullptr;
426 }
427}
428
430 return CSR_AMDGPU_NoRegs_RegMask;
431}
432
435 const MachineFunction &MF) const {
436 // FIXME: Should have a helper function like getEquivalentVGPRClass to get the
437 // equivalent AV class. If used one, the verifier will crash after
438 // RegBankSelect in the GISel flow. The aligned regclasses are not fully given
439 // until Instruction selection.
440 if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) {
441 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
442 return &AMDGPU::AV_32RegClass;
443 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
444 return &AMDGPU::AV_64RegClass;
445 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
446 RC == &AMDGPU::AReg_64_Align2RegClass)
447 return &AMDGPU::AV_64_Align2RegClass;
448 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
449 return &AMDGPU::AV_96RegClass;
450 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
451 RC == &AMDGPU::AReg_96_Align2RegClass)
452 return &AMDGPU::AV_96_Align2RegClass;
453 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
454 return &AMDGPU::AV_128RegClass;
455 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
456 RC == &AMDGPU::AReg_128_Align2RegClass)
457 return &AMDGPU::AV_128_Align2RegClass;
458 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
459 return &AMDGPU::AV_160RegClass;
460 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
461 RC == &AMDGPU::AReg_160_Align2RegClass)
462 return &AMDGPU::AV_160_Align2RegClass;
463 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
464 return &AMDGPU::AV_192RegClass;
465 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
466 RC == &AMDGPU::AReg_192_Align2RegClass)
467 return &AMDGPU::AV_192_Align2RegClass;
468 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
469 return &AMDGPU::AV_256RegClass;
470 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
471 RC == &AMDGPU::AReg_256_Align2RegClass)
472 return &AMDGPU::AV_256_Align2RegClass;
473 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
474 return &AMDGPU::AV_512RegClass;
475 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
476 RC == &AMDGPU::AReg_512_Align2RegClass)
477 return &AMDGPU::AV_512_Align2RegClass;
478 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
479 return &AMDGPU::AV_1024RegClass;
480 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
481 RC == &AMDGPU::AReg_1024_Align2RegClass)
482 return &AMDGPU::AV_1024_Align2RegClass;
483 }
484
486}
487
489 const SIFrameLowering *TFI = ST.getFrameLowering();
491 // During ISel lowering we always reserve the stack pointer in entry
492 // functions, but never actually want to reference it when accessing our own
493 // frame. If we need a frame pointer we use it, but otherwise we can just use
494 // an immediate "0" which we represent by returning NoRegister.
495 if (FuncInfo->isEntryFunction()) {
496 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
497 }
498 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
499 : FuncInfo->getStackPtrOffsetReg();
500}
501
503 // When we need stack realignment, we can't reference off of the
504 // stack pointer, so we reserve a base pointer.
505 const MachineFrameInfo &MFI = MF.getFrameInfo();
506 return MFI.getNumFixedObjects() && shouldRealignStack(MF);
507}
508
509Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
510
512 return AMDGPU_AllVGPRs_RegMask;
513}
514
516 return AMDGPU_AllAGPRs_RegMask;
517}
518
520 return AMDGPU_AllVectorRegs_RegMask;
521}
522
524 return AMDGPU_AllAllocatableSRegs_RegMask;
525}
526
527unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
528 unsigned NumRegs) {
529 assert(NumRegs < SubRegFromChannelTableWidthMap.size());
530 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
531 assert(NumRegIndex && "Not implemented");
532 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
533 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
534}
535
537 const MachineFunction &MF) const {
538 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
539 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
540 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
541}
542
544 BitVector Reserved(getNumRegs());
545 Reserved.set(AMDGPU::MODE);
546
548
549 // Reserve special purpose registers.
550 //
551 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
552 // this seems likely to result in bugs, so I'm marking them as reserved.
553 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
554 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
555
556 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
557 reserveRegisterTuples(Reserved, AMDGPU::M0);
558
559 // Reserve src_vccz, src_execz, src_scc.
560 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
561 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
562 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
563
564 // Reserve the memory aperture registers
565 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
566 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
567 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
568 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
569
570 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
571 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
572
573 // Reserve xnack_mask registers - support is not implemented in Codegen.
574 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
575
576 // Reserve lds_direct register - support is not implemented in Codegen.
577 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
578
579 // Reserve Trap Handler registers - support is not implemented in Codegen.
580 reserveRegisterTuples(Reserved, AMDGPU::TBA);
581 reserveRegisterTuples(Reserved, AMDGPU::TMA);
582 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
583 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
584 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
585 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
586 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
587 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
588 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
589 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
590
591 // Reserve null register - it shall never be allocated
592 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
593
594 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
595 // will result in bugs.
596 if (isWave32) {
597 Reserved.set(AMDGPU::VCC);
598 Reserved.set(AMDGPU::VCC_HI);
599 }
600
601 // Reserve SGPRs.
602 //
603 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
604 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
605 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
606 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
607 reserveRegisterTuples(Reserved, Reg);
608 }
609
610 Register ScratchRSrcReg = MFI->getScratchRSrcReg();
611 if (ScratchRSrcReg != AMDGPU::NoRegister) {
612 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
613 // need to spill.
614 // TODO: May need to reserve a VGPR if doing LDS spilling.
615 reserveRegisterTuples(Reserved, ScratchRSrcReg);
616 }
617
618 // We have to assume the SP is needed in case there are calls in the function,
619 // which is detected after the function is lowered. If we aren't really going
620 // to need SP, don't bother reserving it.
621 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
622 if (StackPtrReg) {
623 reserveRegisterTuples(Reserved, StackPtrReg);
624 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
625 }
626
627 MCRegister FrameReg = MFI->getFrameOffsetReg();
628 if (FrameReg) {
629 reserveRegisterTuples(Reserved, FrameReg);
630 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
631 }
632
633 if (hasBasePointer(MF)) {
634 MCRegister BasePtrReg = getBaseRegister();
635 reserveRegisterTuples(Reserved, BasePtrReg);
636 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
637 }
638
639 // Reserve VGPRs/AGPRs.
640 //
641 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
642 unsigned MaxNumAGPRs = MaxNumVGPRs;
643 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
644
645 // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
646 // a wave may have up to 512 total vector registers combining together both
647 // VGPRs and AGPRs. Hence, in an entry function without calls and without
648 // AGPRs used within it, it is possible to use the whole vector register
649 // budget for VGPRs.
650 //
651 // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
652 // register file accordingly.
653 if (ST.hasGFX90AInsts()) {
654 if (MFI->usesAGPRs(MF)) {
655 MaxNumVGPRs /= 2;
656 MaxNumAGPRs = MaxNumVGPRs;
657 } else {
658 if (MaxNumVGPRs > TotalNumVGPRs) {
659 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
660 MaxNumVGPRs = TotalNumVGPRs;
661 } else
662 MaxNumAGPRs = 0;
663 }
664 }
665
666 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
667 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
668 reserveRegisterTuples(Reserved, Reg);
669 }
670
671 if (ST.hasMAIInsts()) {
672 for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
673 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
674 reserveRegisterTuples(Reserved, Reg);
675 }
676 } else {
677 // Reserve all the AGPRs if there are no instructions to use it.
678 for (MCRegister Reg : AMDGPU::AGPR_32RegClass)
679 reserveRegisterTuples(Reserved, Reg);
680 }
681
682 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
683 // VGPR available at all times.
684 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
685 reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
686 }
687
688 for (Register Reg : MFI->getWWMReservedRegs())
689 reserveRegisterTuples(Reserved, Reg);
690
691 // FIXME: Stop using reserved registers for this.
692 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
693 reserveRegisterTuples(Reserved, Reg);
694
695 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
696 reserveRegisterTuples(Reserved, Reg);
697
698 for (auto Reg : MFI->getSGPRSpillVGPRs())
699 reserveRegisterTuples(Reserved, Reg);
700
701 return Reserved;
702}
703
705 MCRegister PhysReg) const {
706 return !MF.getRegInfo().isReserved(PhysReg);
707}
708
711 // On entry, the base address is 0, so it can't possibly need any more
712 // alignment.
713
714 // FIXME: Should be able to specify the entry frame alignment per calling
715 // convention instead.
716 if (Info->isEntryFunction())
717 return false;
718
720}
721
724 if (Info->isEntryFunction()) {
725 const MachineFrameInfo &MFI = Fn.getFrameInfo();
726 return MFI.hasStackObjects() || MFI.hasCalls();
727 }
728
729 // May need scavenger for dealing with callee saved registers.
730 return true;
731}
732
734 const MachineFunction &MF) const {
735 // Do not use frame virtual registers. They used to be used for SGPRs, but
736 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
737 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
738 // spill.
739 return false;
740}
741
743 const MachineFunction &MF) const {
744 const MachineFrameInfo &MFI = MF.getFrameInfo();
745 return MFI.hasStackObjects();
746}
747
749 const MachineFunction &) const {
750 // There are no special dedicated stack or frame pointers.
751 return true;
752}
753
756
757 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
758 AMDGPU::OpName::offset);
759 return MI->getOperand(OffIdx).getImm();
760}
761
763 int Idx) const {
765 return 0;
766
767 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
768 AMDGPU::OpName::vaddr) ||
769 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
770 AMDGPU::OpName::saddr))) &&
771 "Should never see frame index on non-address operand");
772
774}
775
778 return false;
779
780 int64_t FullOffset = Offset + getScratchInstrOffset(MI);
781
783 return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset);
784
785 const SIInstrInfo *TII = ST.getInstrInfo();
786 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
788}
789
791 int FrameIdx,
792 int64_t Offset) const {
794 DebugLoc DL; // Defaults to "unknown"
795
796 if (Ins != MBB->end())
797 DL = Ins->getDebugLoc();
798
800 const SIInstrInfo *TII = ST.getInstrInfo();
802 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
803 : AMDGPU::V_MOV_B32_e32;
804
805 Register BaseReg = MRI.createVirtualRegister(
806 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
807 : &AMDGPU::VGPR_32RegClass);
808
809 if (Offset == 0) {
810 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
811 .addFrameIndex(FrameIdx);
812 return BaseReg;
813 }
814
815 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
816
817 Register FIReg = MRI.createVirtualRegister(
818 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
819 : &AMDGPU::VGPR_32RegClass);
820
821 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
822 .addImm(Offset);
823 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
824 .addFrameIndex(FrameIdx);
825
826 if (ST.enableFlatScratch() ) {
827 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
828 .addReg(OffsetReg, RegState::Kill)
829 .addReg(FIReg);
830 return BaseReg;
831 }
832
833 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
834 .addReg(OffsetReg, RegState::Kill)
835 .addReg(FIReg)
836 .addImm(0); // clamp bit
837
838 return BaseReg;
839}
840
842 int64_t Offset) const {
843 const SIInstrInfo *TII = ST.getInstrInfo();
844 bool IsFlat = TII->isFLATScratch(MI);
845
846#ifndef NDEBUG
847 // FIXME: Is it possible to be storing a frame index to itself?
848 bool SeenFI = false;
849 for (const MachineOperand &MO: MI.operands()) {
850 if (MO.isFI()) {
851 if (SeenFI)
852 llvm_unreachable("should not see multiple frame indices");
853
854 SeenFI = true;
855 }
856 }
857#endif
858
859 MachineOperand *FIOp =
860 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
861 : AMDGPU::OpName::vaddr);
862
863 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
864 int64_t NewOffset = OffsetOp->getImm() + Offset;
865
866 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
867 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI));
868
869 if (IsFlat) {
870 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
872 "offset should be legal");
873 FIOp->ChangeToRegister(BaseReg, false);
874 OffsetOp->setImm(NewOffset);
875 return;
876 }
877
878#ifndef NDEBUG
879 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
880 assert(SOffset->isImm() && SOffset->getImm() == 0);
881#endif
882
884 "offset should be legal");
885
886 FIOp->ChangeToRegister(BaseReg, false);
887 OffsetOp->setImm(NewOffset);
888}
889
891 Register BaseReg,
892 int64_t Offset) const {
894 return false;
895
896 int64_t NewOffset = Offset + getScratchInstrOffset(MI);
897
899 return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset);
900
901 const SIInstrInfo *TII = ST.getInstrInfo();
902 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
904}
905
907 const MachineFunction &MF, unsigned Kind) const {
908 // This is inaccurate. It depends on the instruction and address space. The
909 // only place where we should hit this is for dealing with frame indexes /
910 // private accesses, so this is correct in that case.
911 return &AMDGPU::VGPR_32RegClass;
912}
913
916 if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
917 return getEquivalentVGPRClass(RC);
918 if (RC == &AMDGPU::SCC_CLASSRegClass)
919 return getWaveMaskRegClass();
920
921 return RC;
922}
923
924static unsigned getNumSubRegsForSpillOp(unsigned Op) {
925
926 switch (Op) {
927 case AMDGPU::SI_SPILL_S1024_SAVE:
928 case AMDGPU::SI_SPILL_S1024_RESTORE:
929 case AMDGPU::SI_SPILL_V1024_SAVE:
930 case AMDGPU::SI_SPILL_V1024_RESTORE:
931 case AMDGPU::SI_SPILL_A1024_SAVE:
932 case AMDGPU::SI_SPILL_A1024_RESTORE:
933 case AMDGPU::SI_SPILL_AV1024_SAVE:
934 case AMDGPU::SI_SPILL_AV1024_RESTORE:
935 return 32;
936 case AMDGPU::SI_SPILL_S512_SAVE:
937 case AMDGPU::SI_SPILL_S512_RESTORE:
938 case AMDGPU::SI_SPILL_V512_SAVE:
939 case AMDGPU::SI_SPILL_V512_RESTORE:
940 case AMDGPU::SI_SPILL_A512_SAVE:
941 case AMDGPU::SI_SPILL_A512_RESTORE:
942 case AMDGPU::SI_SPILL_AV512_SAVE:
943 case AMDGPU::SI_SPILL_AV512_RESTORE:
944 return 16;
945 case AMDGPU::SI_SPILL_S384_SAVE:
946 case AMDGPU::SI_SPILL_S384_RESTORE:
947 case AMDGPU::SI_SPILL_V384_SAVE:
948 case AMDGPU::SI_SPILL_V384_RESTORE:
949 case AMDGPU::SI_SPILL_A384_SAVE:
950 case AMDGPU::SI_SPILL_A384_RESTORE:
951 case AMDGPU::SI_SPILL_AV384_SAVE:
952 case AMDGPU::SI_SPILL_AV384_RESTORE:
953 return 12;
954 case AMDGPU::SI_SPILL_S352_SAVE:
955 case AMDGPU::SI_SPILL_S352_RESTORE:
956 case AMDGPU::SI_SPILL_V352_SAVE:
957 case AMDGPU::SI_SPILL_V352_RESTORE:
958 case AMDGPU::SI_SPILL_A352_SAVE:
959 case AMDGPU::SI_SPILL_A352_RESTORE:
960 case AMDGPU::SI_SPILL_AV352_SAVE:
961 case AMDGPU::SI_SPILL_AV352_RESTORE:
962 return 11;
963 case AMDGPU::SI_SPILL_S320_SAVE:
964 case AMDGPU::SI_SPILL_S320_RESTORE:
965 case AMDGPU::SI_SPILL_V320_SAVE:
966 case AMDGPU::SI_SPILL_V320_RESTORE:
967 case AMDGPU::SI_SPILL_A320_SAVE:
968 case AMDGPU::SI_SPILL_A320_RESTORE:
969 case AMDGPU::SI_SPILL_AV320_SAVE:
970 case AMDGPU::SI_SPILL_AV320_RESTORE:
971 return 10;
972 case AMDGPU::SI_SPILL_S288_SAVE:
973 case AMDGPU::SI_SPILL_S288_RESTORE:
974 case AMDGPU::SI_SPILL_V288_SAVE:
975 case AMDGPU::SI_SPILL_V288_RESTORE:
976 case AMDGPU::SI_SPILL_A288_SAVE:
977 case AMDGPU::SI_SPILL_A288_RESTORE:
978 case AMDGPU::SI_SPILL_AV288_SAVE:
979 case AMDGPU::SI_SPILL_AV288_RESTORE:
980 return 9;
981 case AMDGPU::SI_SPILL_S256_SAVE:
982 case AMDGPU::SI_SPILL_S256_RESTORE:
983 case AMDGPU::SI_SPILL_V256_SAVE:
984 case AMDGPU::SI_SPILL_V256_RESTORE:
985 case AMDGPU::SI_SPILL_A256_SAVE:
986 case AMDGPU::SI_SPILL_A256_RESTORE:
987 case AMDGPU::SI_SPILL_AV256_SAVE:
988 case AMDGPU::SI_SPILL_AV256_RESTORE:
989 return 8;
990 case AMDGPU::SI_SPILL_S224_SAVE:
991 case AMDGPU::SI_SPILL_S224_RESTORE:
992 case AMDGPU::SI_SPILL_V224_SAVE:
993 case AMDGPU::SI_SPILL_V224_RESTORE:
994 case AMDGPU::SI_SPILL_A224_SAVE:
995 case AMDGPU::SI_SPILL_A224_RESTORE:
996 case AMDGPU::SI_SPILL_AV224_SAVE:
997 case AMDGPU::SI_SPILL_AV224_RESTORE:
998 return 7;
999 case AMDGPU::SI_SPILL_S192_SAVE:
1000 case AMDGPU::SI_SPILL_S192_RESTORE:
1001 case AMDGPU::SI_SPILL_V192_SAVE:
1002 case AMDGPU::SI_SPILL_V192_RESTORE:
1003 case AMDGPU::SI_SPILL_A192_SAVE:
1004 case AMDGPU::SI_SPILL_A192_RESTORE:
1005 case AMDGPU::SI_SPILL_AV192_SAVE:
1006 case AMDGPU::SI_SPILL_AV192_RESTORE:
1007 return 6;
1008 case AMDGPU::SI_SPILL_S160_SAVE:
1009 case AMDGPU::SI_SPILL_S160_RESTORE:
1010 case AMDGPU::SI_SPILL_V160_SAVE:
1011 case AMDGPU::SI_SPILL_V160_RESTORE:
1012 case AMDGPU::SI_SPILL_A160_SAVE:
1013 case AMDGPU::SI_SPILL_A160_RESTORE:
1014 case AMDGPU::SI_SPILL_AV160_SAVE:
1015 case AMDGPU::SI_SPILL_AV160_RESTORE:
1016 return 5;
1017 case AMDGPU::SI_SPILL_S128_SAVE:
1018 case AMDGPU::SI_SPILL_S128_RESTORE:
1019 case AMDGPU::SI_SPILL_V128_SAVE:
1020 case AMDGPU::SI_SPILL_V128_RESTORE:
1021 case AMDGPU::SI_SPILL_A128_SAVE:
1022 case AMDGPU::SI_SPILL_A128_RESTORE:
1023 case AMDGPU::SI_SPILL_AV128_SAVE:
1024 case AMDGPU::SI_SPILL_AV128_RESTORE:
1025 return 4;
1026 case AMDGPU::SI_SPILL_S96_SAVE:
1027 case AMDGPU::SI_SPILL_S96_RESTORE:
1028 case AMDGPU::SI_SPILL_V96_SAVE:
1029 case AMDGPU::SI_SPILL_V96_RESTORE:
1030 case AMDGPU::SI_SPILL_A96_SAVE:
1031 case AMDGPU::SI_SPILL_A96_RESTORE:
1032 case AMDGPU::SI_SPILL_AV96_SAVE:
1033 case AMDGPU::SI_SPILL_AV96_RESTORE:
1034 return 3;
1035 case AMDGPU::SI_SPILL_S64_SAVE:
1036 case AMDGPU::SI_SPILL_S64_RESTORE:
1037 case AMDGPU::SI_SPILL_V64_SAVE:
1038 case AMDGPU::SI_SPILL_V64_RESTORE:
1039 case AMDGPU::SI_SPILL_A64_SAVE:
1040 case AMDGPU::SI_SPILL_A64_RESTORE:
1041 case AMDGPU::SI_SPILL_AV64_SAVE:
1042 case AMDGPU::SI_SPILL_AV64_RESTORE:
1043 return 2;
1044 case AMDGPU::SI_SPILL_S32_SAVE:
1045 case AMDGPU::SI_SPILL_S32_RESTORE:
1046 case AMDGPU::SI_SPILL_V32_SAVE:
1047 case AMDGPU::SI_SPILL_V32_RESTORE:
1048 case AMDGPU::SI_SPILL_A32_SAVE:
1049 case AMDGPU::SI_SPILL_A32_RESTORE:
1050 case AMDGPU::SI_SPILL_AV32_SAVE:
1051 case AMDGPU::SI_SPILL_AV32_RESTORE:
1052 return 1;
1053 default: llvm_unreachable("Invalid spill opcode");
1054 }
1055}
1056
1057static int getOffsetMUBUFStore(unsigned Opc) {
1058 switch (Opc) {
1059 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1060 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1061 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1062 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1063 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1064 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1065 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1066 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1067 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1068 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1069 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1070 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1071 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1072 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1073 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1074 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1075 default:
1076 return -1;
1077 }
1078}
1079
1080static int getOffsetMUBUFLoad(unsigned Opc) {
1081 switch (Opc) {
1082 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1083 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1084 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1085 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1086 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1087 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1088 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1089 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1090 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1091 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1092 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1093 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1094 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1095 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1096 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1097 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1098 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1099 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1100 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1101 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1102 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1103 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1104 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1105 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1106 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1107 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1108 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1109 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1110 default:
1111 return -1;
1112 }
1113}
1114
1115static int getOffenMUBUFStore(unsigned Opc) {
1116 switch (Opc) {
1117 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1118 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1119 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1120 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1121 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1122 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1123 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1124 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1125 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1126 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1127 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1128 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1129 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1130 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1131 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1132 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1133 default:
1134 return -1;
1135 }
1136}
1137
1138static int getOffenMUBUFLoad(unsigned Opc) {
1139 switch (Opc) {
1140 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1141 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1142 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1143 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1144 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1145 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1146 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1147 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1148 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1149 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1150 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1151 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1152 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1153 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1154 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1155 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1156 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1157 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1158 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1159 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1160 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1161 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1162 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1163 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1164 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1165 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1166 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1167 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1168 default:
1169 return -1;
1170 }
1171}
1172
1176 int Index, unsigned Lane,
1177 unsigned ValueReg, bool IsKill) {
1180 const SIInstrInfo *TII = ST.getInstrInfo();
1181
1182 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
1183
1184 if (Reg == AMDGPU::NoRegister)
1185 return MachineInstrBuilder();
1186
1187 bool IsStore = MI->mayStore();
1189 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
1190
1191 unsigned Dst = IsStore ? Reg : ValueReg;
1192 unsigned Src = IsStore ? ValueReg : Reg;
1193 bool IsVGPR = TRI->isVGPR(MRI, Reg);
1194 DebugLoc DL = MI->getDebugLoc();
1195 if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
1196 // Spiller during regalloc may restore a spilled register to its superclass.
1197 // It could result in AGPR spills restored to VGPRs or the other way around,
1198 // making the src and dst with identical regclasses at this point. It just
1199 // needs a copy in such cases.
1200 auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
1201 .addReg(Src, getKillRegState(IsKill));
1203 return CopyMIB;
1204 }
1205 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1206 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1207
1208 auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst)
1209 .addReg(Src, getKillRegState(IsKill));
1211 return MIB;
1212}
1213
1214// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
1215// need to handle the case where an SGPR may need to be spilled while spilling.
1217 MachineFrameInfo &MFI,
1219 int Index,
1220 int64_t Offset) {
1221 const SIInstrInfo *TII = ST.getInstrInfo();
1222 MachineBasicBlock *MBB = MI->getParent();
1223 const DebugLoc &DL = MI->getDebugLoc();
1224 bool IsStore = MI->mayStore();
1225
1226 unsigned Opc = MI->getOpcode();
1227 int LoadStoreOp = IsStore ?
1229 if (LoadStoreOp == -1)
1230 return false;
1231
1232 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
1233 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
1234 return true;
1235
1236 MachineInstrBuilder NewMI =
1237 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
1238 .add(*Reg)
1239 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
1240 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
1241 .addImm(Offset)
1242 .addImm(0) // cpol
1243 .addImm(0) // swz
1244 .cloneMemRefs(*MI);
1245
1246 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
1247 AMDGPU::OpName::vdata_in);
1248 if (VDataIn)
1249 NewMI.add(*VDataIn);
1250 return true;
1251}
1252
1254 unsigned LoadStoreOp,
1255 unsigned EltSize) {
1256 bool IsStore = TII->get(LoadStoreOp).mayStore();
1257 bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr);
1258 bool UseST =
1259 !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr);
1260
1261 switch (EltSize) {
1262 case 4:
1263 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1264 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1265 break;
1266 case 8:
1267 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1268 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1269 break;
1270 case 12:
1271 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1272 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1273 break;
1274 case 16:
1275 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1276 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1277 break;
1278 default:
1279 llvm_unreachable("Unexpected spill load/store size!");
1280 }
1281
1282 if (HasVAddr)
1283 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1284 else if (UseST)
1285 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1286
1287 return LoadStoreOp;
1288}
1289
1292 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1293 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
1294 RegScavenger *RS, LivePhysRegs *LiveRegs) const {
1295 assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
1296
1298 const SIInstrInfo *TII = ST.getInstrInfo();
1299 const MachineFrameInfo &MFI = MF->getFrameInfo();
1301
1302 const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1303 bool IsStore = Desc->mayStore();
1304 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1305
1306 bool CanClobberSCC = false;
1307 bool Scavenged = false;
1308 MCRegister SOffset = ScratchOffsetReg;
1309
1310 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1311 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
1312 const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
1313 const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
1314
1315 // Always use 4 byte operations for AGPRs because we need to scavenge
1316 // a temporary VGPR.
1317 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1318 unsigned NumSubRegs = RegWidth / EltSize;
1319 unsigned Size = NumSubRegs * EltSize;
1320 unsigned RemSize = RegWidth - Size;
1321 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1322 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
1323 int64_t MaterializedOffset = Offset;
1324
1325 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1326 int64_t ScratchOffsetRegDelta = 0;
1327
1328 if (IsFlat && EltSize > 4) {
1329 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1330 Desc = &TII->get(LoadStoreOp);
1331 }
1332
1333 Align Alignment = MFI.getObjectAlign(Index);
1334 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
1335
1336 assert((IsFlat || ((Offset % EltSize) == 0)) &&
1337 "unexpected VGPR spill offset");
1338
1339 // Track a VGPR to use for a constant offset we need to materialize.
1340 Register TmpOffsetVGPR;
1341
1342 // Track a VGPR to use as an intermediate value.
1343 Register TmpIntermediateVGPR;
1344 bool UseVGPROffset = false;
1345
1346 // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate
1347 // combination.
1348 auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,
1349 int64_t VOffset) {
1350 // We are using a VGPR offset
1351 if (IsFlat && SGPRBase) {
1352 // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free
1353 // SGPR, so perform the add as vector.
1354 // We don't need a base SGPR in the kernel.
1355
1356 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1357 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR)
1358 .addReg(SGPRBase)
1359 .addImm(VOffset)
1360 .addImm(0); // clamp
1361 } else {
1362 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1363 .addReg(SGPRBase);
1364 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR)
1365 .addImm(VOffset)
1366 .addReg(TmpOffsetVGPR);
1367 }
1368 } else {
1369 assert(TmpOffsetVGPR);
1370 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1371 .addImm(VOffset);
1372 }
1373 };
1374
1375 bool IsOffsetLegal =
1376 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1379 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1380 SOffset = MCRegister();
1381
1382 // We don't have access to the register scavenger if this function is called
1383 // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
1384 // TODO: Clobbering SCC is not necessary for scratch instructions in the
1385 // entry.
1386 if (RS) {
1387 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
1388
1389 // Piggy back on the liveness scan we just did see if SCC is dead.
1390 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1391 } else if (LiveRegs) {
1392 CanClobberSCC = !LiveRegs->contains(AMDGPU::SCC);
1393 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1394 if (LiveRegs->available(MF->getRegInfo(), Reg)) {
1395 SOffset = Reg;
1396 break;
1397 }
1398 }
1399 }
1400
1401 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1402 SOffset = Register();
1403
1404 if (!SOffset) {
1405 UseVGPROffset = true;
1406
1407 if (RS) {
1408 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
1409 } else {
1410 assert(LiveRegs);
1411 for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1412 if (LiveRegs->available(MF->getRegInfo(), Reg)) {
1413 TmpOffsetVGPR = Reg;
1414 break;
1415 }
1416 }
1417 }
1418
1419 assert(TmpOffsetVGPR);
1420 } else if (!SOffset && CanClobberSCC) {
1421 // There are no free SGPRs, and since we are in the process of spilling
1422 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
1423 // on SI/CI and on VI it is true until we implement spilling using scalar
1424 // stores), we have no way to free up an SGPR. Our solution here is to
1425 // add the offset directly to the ScratchOffset or StackPtrOffset
1426 // register, and then subtract the offset after the spill to return the
1427 // register to it's original value.
1428
1429 // TODO: If we don't have to do an emergency stack slot spill, converting
1430 // to use the VGPR offset is fewer instructions.
1431 if (!ScratchOffsetReg)
1432 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
1433 SOffset = ScratchOffsetReg;
1434 ScratchOffsetRegDelta = Offset;
1435 } else {
1436 Scavenged = true;
1437 }
1438
1439 // We currently only support spilling VGPRs to EltSize boundaries, meaning
1440 // we can simplify the adjustment of Offset here to just scale with
1441 // WavefrontSize.
1442 if (!IsFlat && !UseVGPROffset)
1443 Offset *= ST.getWavefrontSize();
1444
1445 if (!UseVGPROffset && !SOffset)
1446 report_fatal_error("could not scavenge SGPR to spill in entry function");
1447
1448 if (UseVGPROffset) {
1449 // We are using a VGPR offset
1450 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
1451 } else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1452 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
1453 } else {
1454 assert(Offset != 0);
1455 auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1456 .addReg(ScratchOffsetReg)
1457 .addImm(Offset);
1458 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1459 }
1460
1461 Offset = 0;
1462 }
1463
1464 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1465 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1466 && "Unexpected vaddr for flat scratch with a FI operand");
1467
1468 if (UseVGPROffset) {
1469 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1470 } else {
1472 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1473 }
1474
1475 Desc = &TII->get(LoadStoreOp);
1476 }
1477
1478 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1479 ++i, RegOffset += EltSize) {
1480 if (i == NumSubRegs) {
1481 EltSize = RemSize;
1482 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1483 }
1484 Desc = &TII->get(LoadStoreOp);
1485
1486 if (!IsFlat && UseVGPROffset) {
1487 int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp)
1488 : getOffenMUBUFLoad(LoadStoreOp);
1489 Desc = &TII->get(NewLoadStoreOp);
1490 }
1491
1492 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1493 // If we are spilling an AGPR beyond the range of the memory instruction
1494 // offset and need to use a VGPR offset, we ideally have at least 2
1495 // scratch VGPRs. If we don't have a second free VGPR without spilling,
1496 // recycle the VGPR used for the offset which requires resetting after
1497 // each subregister.
1498
1499 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1500 }
1501
1502 unsigned NumRegs = EltSize / 4;
1503 Register SubReg = e == 1
1504 ? ValueReg
1505 : Register(getSubReg(ValueReg,
1506 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1507
1508 unsigned SOffsetRegState = 0;
1509 unsigned SrcDstRegState = getDefRegState(!IsStore);
1510 const bool IsLastSubReg = i + 1 == e;
1511 const bool IsFirstSubReg = i == 0;
1512 if (IsLastSubReg) {
1513 SOffsetRegState |= getKillRegState(Scavenged);
1514 // The last implicit use carries the "Kill" flag.
1515 SrcDstRegState |= getKillRegState(IsKill);
1516 }
1517
1518 // Make sure the whole register is defined if there are undef components by
1519 // adding an implicit def of the super-reg on the first instruction.
1520 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1521 bool NeedSuperRegImpOperand = e > 1;
1522
1523 // Remaining element size to spill into memory after some parts of it
1524 // spilled into either AGPRs or VGPRs.
1525 unsigned RemEltSize = EltSize;
1526
1527 // AGPRs to spill VGPRs and vice versa are allocated in a reverse order,
1528 // starting from the last lane. In case if a register cannot be completely
1529 // spilled into another register that will ensure its alignment does not
1530 // change. For targets with VGPR alignment requirement this is important
1531 // in case of flat scratch usage as we might get a scratch_load or
1532 // scratch_store of an unaligned register otherwise.
1533 for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1534 LaneE = RegOffset / 4;
1535 Lane >= LaneE; --Lane) {
1536 bool IsSubReg = e > 1 || EltSize > 4;
1537 Register Sub = IsSubReg
1538 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1539 : ValueReg;
1540 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1541 if (!MIB.getInstr())
1542 break;
1543 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1544 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1545 NeedSuperRegDef = false;
1546 }
1547 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1548 NeedSuperRegImpOperand = true;
1549 unsigned State = SrcDstRegState;
1550 if (!IsLastSubReg || (Lane != LaneE))
1551 State &= ~RegState::Kill;
1552 if (!IsFirstSubReg || (Lane != LaneS))
1553 State &= ~RegState::Define;
1554 MIB.addReg(ValueReg, RegState::Implicit | State);
1555 }
1556 RemEltSize -= 4;
1557 }
1558
1559 if (!RemEltSize) // Fully spilled into AGPRs.
1560 continue;
1561
1562 if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1563 assert(IsFlat && EltSize > 4);
1564
1565 unsigned NumRegs = RemEltSize / 4;
1566 SubReg = Register(getSubReg(ValueReg,
1567 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1568 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1569 Desc = &TII->get(Opc);
1570 }
1571
1572 unsigned FinalReg = SubReg;
1573
1574 if (IsAGPR) {
1575 assert(EltSize == 4);
1576
1577 if (!TmpIntermediateVGPR) {
1578 TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy();
1579 assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR));
1580 }
1581 if (IsStore) {
1582 auto AccRead = BuildMI(MBB, MI, DL,
1583 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1584 TmpIntermediateVGPR)
1585 .addReg(SubReg, getKillRegState(IsKill));
1586 if (NeedSuperRegDef)
1587 AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1589 }
1590 SubReg = TmpIntermediateVGPR;
1591 } else if (UseVGPROffset) {
1592 // FIXME: change to scavengeRegisterBackwards()
1593 if (!TmpOffsetVGPR) {
1594 TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1595 RS->setRegUsed(TmpOffsetVGPR);
1596 }
1597 }
1598
1599 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
1600 MachineMemOperand *NewMMO =
1601 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1602 commonAlignment(Alignment, RegOffset));
1603
1604 auto MIB =
1605 BuildMI(MBB, MI, DL, *Desc)
1606 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
1607
1608 if (UseVGPROffset) {
1609 // For an AGPR spill, we reuse the same temp VGPR for the offset and the
1610 // intermediate accvgpr_write.
1611 MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));
1612 }
1613
1614 if (!IsFlat)
1615 MIB.addReg(FuncInfo->getScratchRSrcReg());
1616
1617 if (SOffset == AMDGPU::NoRegister) {
1618 if (!IsFlat) {
1619 if (UseVGPROffset && ScratchOffsetReg) {
1620 MIB.addReg(ScratchOffsetReg);
1621 } else {
1622 assert(FuncInfo->isEntryFunction());
1623 MIB.addImm(0);
1624 }
1625 }
1626 } else {
1627 MIB.addReg(SOffset, SOffsetRegState);
1628 }
1629 MIB.addImm(Offset + RegOffset)
1630 .addImm(0); // cpol
1631 if (!IsFlat)
1632 MIB.addImm(0); // swz
1633 MIB.addMemOperand(NewMMO);
1634
1635 if (!IsAGPR && NeedSuperRegDef)
1636 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1637
1638 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1639 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1640 FinalReg)
1641 .addReg(TmpIntermediateVGPR, RegState::Kill);
1643 }
1644
1645 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1646 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1647
1648 // The epilog restore of a wwm-scratch register can cause undesired
1649 // optimization during machine-cp post PrologEpilogInserter if the same
1650 // register was assigned for return value ABI lowering with a COPY
1651 // instruction. As given below, with the epilog reload, the earlier COPY
1652 // appeared to be dead during machine-cp.
1653 // ...
1654 // v0 in WWM operation, needs the WWM spill at prolog/epilog.
1655 // $vgpr0 = V_WRITELANE_B32 $sgpr20, 0, $vgpr0
1656 // ...
1657 // Epilog block:
1658 // $vgpr0 = COPY $vgpr1 // outgoing value moved to v0
1659 // ...
1660 // WWM spill restore to preserve the inactive lanes of v0.
1661 // $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1
1662 // $vgpr0 = BUFFER_LOAD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0
1663 // $exec = S_MOV_B64 killed $sgpr4_sgpr5
1664 // ...
1665 // SI_RETURN implicit $vgpr0
1666 // ...
1667 // To fix it, mark the same reg as a tied op for such restore instructions
1668 // so that it marks a usage for the preceding COPY.
1669 if (!IsStore && MI != MBB.end() && MI->isReturn() &&
1670 MI->readsRegister(SubReg, this)) {
1671 MIB.addReg(SubReg, RegState::Implicit);
1672 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1673 }
1674 }
1675
1676 if (ScratchOffsetRegDelta != 0) {
1677 // Subtract the offset we added to the ScratchOffset register.
1678 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1679 .addReg(SOffset)
1680 .addImm(-ScratchOffsetRegDelta);
1681 }
1682}
1683
1685 int Offset, bool IsLoad,
1686 bool IsKill) const {
1687 // Load/store VGPR
1688 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
1690
1691 Register FrameReg =
1692 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
1693 ? getBaseRegister()
1694 : getFrameRegister(SB.MF);
1695
1696 Align Alignment = FrameInfo.getObjectAlign(Index);
1700 SB.EltSize, Alignment);
1701
1702 if (IsLoad) {
1703 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1704 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1705 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
1706 FrameReg, Offset * SB.EltSize, MMO, SB.RS);
1707 } else {
1708 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1709 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1710 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
1711 FrameReg, Offset * SB.EltSize, MMO, SB.RS);
1712 // This only ever adds one VGPR spill
1713 SB.MFI.addToSpilledVGPRs(1);
1714 }
1715}
1716
1718 RegScavenger *RS, SlotIndexes *Indexes,
1719 LiveIntervals *LIS, bool OnlyToVGPR) const {
1720 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1721
1723 bool SpillToVGPR = !VGPRSpills.empty();
1724 if (OnlyToVGPR && !SpillToVGPR)
1725 return false;
1726
1727 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&
1728 SB.SuperReg != SB.MFI.getFrameOffsetReg()));
1729
1730 if (SpillToVGPR) {
1731
1732 assert(SB.NumSubRegs == VGPRSpills.size() &&
1733 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1734
1735 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1737 SB.NumSubRegs == 1
1738 ? SB.SuperReg
1739 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1740 SpilledReg Spill = VGPRSpills[i];
1741
1742 bool IsFirstSubreg = i == 0;
1743 bool IsLastSubreg = i == SB.NumSubRegs - 1;
1744 bool UseKill = SB.IsKill && IsLastSubreg;
1745
1746
1747 // Mark the "old value of vgpr" input undef only if this is the first sgpr
1748 // spill to this specific vgpr in the first basic block.
1749 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1750 SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
1751 .addReg(SubReg, getKillRegState(UseKill))
1752 .addImm(Spill.Lane)
1753 .addReg(Spill.VGPR);
1754 if (Indexes) {
1755 if (IsFirstSubreg)
1756 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1757 else
1758 Indexes->insertMachineInstrInMaps(*MIB);
1759 }
1760
1761 if (IsFirstSubreg && SB.NumSubRegs > 1) {
1762 // We may be spilling a super-register which is only partially defined,
1763 // and need to ensure later spills think the value is defined.
1764 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1765 }
1766
1767 if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1768 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
1769
1770 // FIXME: Since this spills to another register instead of an actual
1771 // frame index, we should delete the frame index when all references to
1772 // it are fixed.
1773 }
1774 } else {
1775 SB.prepare();
1776
1777 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1778 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1779
1780 // Per VGPR helper data
1781 auto PVD = SB.getPerVGPRData();
1782
1783 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1784 unsigned TmpVGPRFlags = RegState::Undef;
1785
1786 // Write sub registers into the VGPR
1787 for (unsigned i = Offset * PVD.PerVGPR,
1788 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1789 i < e; ++i) {
1791 SB.NumSubRegs == 1
1792 ? SB.SuperReg
1793 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1794
1795 MachineInstrBuilder WriteLane =
1796 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1797 SB.TmpVGPR)
1798 .addReg(SubReg, SubKillState)
1799 .addImm(i % PVD.PerVGPR)
1800 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1801 TmpVGPRFlags = 0;
1802
1803 if (Indexes) {
1804 if (i == 0)
1805 Indexes->replaceMachineInstrInMaps(*MI, *WriteLane);
1806 else
1807 Indexes->insertMachineInstrInMaps(*WriteLane);
1808 }
1809
1810 // There could be undef components of a spilled super register.
1811 // TODO: Can we detect this and skip the spill?
1812 if (SB.NumSubRegs > 1) {
1813 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1814 unsigned SuperKillState = 0;
1815 if (i + 1 == SB.NumSubRegs)
1816 SuperKillState |= getKillRegState(SB.IsKill);
1817 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1818 }
1819 }
1820
1821 // Write out VGPR
1822 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
1823 }
1824
1825 SB.restore();
1826 }
1827
1828 MI->eraseFromParent();
1830
1831 if (LIS)
1833
1834 return true;
1835}
1836
1838 RegScavenger *RS, SlotIndexes *Indexes,
1839 LiveIntervals *LIS, bool OnlyToVGPR) const {
1840 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1841
1843 bool SpillToVGPR = !VGPRSpills.empty();
1844 if (OnlyToVGPR && !SpillToVGPR)
1845 return false;
1846
1847 if (SpillToVGPR) {
1848 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1850 SB.NumSubRegs == 1
1851 ? SB.SuperReg
1852 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1853
1854 SpilledReg Spill = VGPRSpills[i];
1855 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
1856 SubReg)
1857 .addReg(Spill.VGPR)
1858 .addImm(Spill.Lane);
1859 if (SB.NumSubRegs > 1 && i == 0)
1861 if (Indexes) {
1862 if (i == e - 1)
1863 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1864 else
1865 Indexes->insertMachineInstrInMaps(*MIB);
1866 }
1867 }
1868 } else {
1869 SB.prepare();
1870
1871 // Per VGPR helper data
1872 auto PVD = SB.getPerVGPRData();
1873
1874 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1875 // Load in VGPR data
1876 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
1877
1878 // Unpack lanes
1879 for (unsigned i = Offset * PVD.PerVGPR,
1880 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1881 i < e; ++i) {
1883 SB.NumSubRegs == 1
1884 ? SB.SuperReg
1885 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1886
1887 bool LastSubReg = (i + 1 == e);
1888 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1889 SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1890 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1891 .addImm(i);
1892 if (SB.NumSubRegs > 1 && i == 0)
1894 if (Indexes) {
1895 if (i == e - 1)
1896 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1897 else
1898 Indexes->insertMachineInstrInMaps(*MIB);
1899 }
1900 }
1901 }
1902
1903 SB.restore();
1904 }
1905
1906 MI->eraseFromParent();
1907
1908 if (LIS)
1910
1911 return true;
1912}
1913
1915 MachineBasicBlock &RestoreMBB,
1916 Register SGPR, RegScavenger *RS) const {
1917 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
1918 RS);
1919 SB.prepare();
1920 // Generate the spill of SGPR to SB.TmpVGPR.
1921 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1922 auto PVD = SB.getPerVGPRData();
1923 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1924 unsigned TmpVGPRFlags = RegState::Undef;
1925 // Write sub registers into the VGPR
1926 for (unsigned i = Offset * PVD.PerVGPR,
1927 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1928 i < e; ++i) {
1930 SB.NumSubRegs == 1
1931 ? SB.SuperReg
1932 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1933
1934 MachineInstrBuilder WriteLane =
1935 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1936 SB.TmpVGPR)
1937 .addReg(SubReg, SubKillState)
1938 .addImm(i % PVD.PerVGPR)
1939 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1940 TmpVGPRFlags = 0;
1941 // There could be undef components of a spilled super register.
1942 // TODO: Can we detect this and skip the spill?
1943 if (SB.NumSubRegs > 1) {
1944 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1945 unsigned SuperKillState = 0;
1946 if (i + 1 == SB.NumSubRegs)
1947 SuperKillState |= getKillRegState(SB.IsKill);
1948 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1949 }
1950 }
1951 // Don't need to write VGPR out.
1952 }
1953
1954 // Restore clobbered registers in the specified restore block.
1955 MI = RestoreMBB.end();
1956 SB.setMI(&RestoreMBB, MI);
1957 // Generate the restore of SGPR from SB.TmpVGPR.
1958 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1959 // Don't need to load VGPR in.
1960 // Unpack lanes
1961 for (unsigned i = Offset * PVD.PerVGPR,
1962 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1963 i < e; ++i) {
1965 SB.NumSubRegs == 1
1966 ? SB.SuperReg
1967 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1968 bool LastSubReg = (i + 1 == e);
1969 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
1970 SubReg)
1971 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1972 .addImm(i);
1973 if (SB.NumSubRegs > 1 && i == 0)
1975 }
1976 }
1977 SB.restore();
1978
1980 return false;
1981}
1982
1983/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1984/// a VGPR and the stack slot can be safely eliminated when all other users are
1985/// handled.
1988 SlotIndexes *Indexes, LiveIntervals *LIS) const {
1989 switch (MI->getOpcode()) {
1990 case AMDGPU::SI_SPILL_S1024_SAVE:
1991 case AMDGPU::SI_SPILL_S512_SAVE:
1992 case AMDGPU::SI_SPILL_S384_SAVE:
1993 case AMDGPU::SI_SPILL_S352_SAVE:
1994 case AMDGPU::SI_SPILL_S320_SAVE:
1995 case AMDGPU::SI_SPILL_S288_SAVE:
1996 case AMDGPU::SI_SPILL_S256_SAVE:
1997 case AMDGPU::SI_SPILL_S224_SAVE:
1998 case AMDGPU::SI_SPILL_S192_SAVE:
1999 case AMDGPU::SI_SPILL_S160_SAVE:
2000 case AMDGPU::SI_SPILL_S128_SAVE:
2001 case AMDGPU::SI_SPILL_S96_SAVE:
2002 case AMDGPU::SI_SPILL_S64_SAVE:
2003 case AMDGPU::SI_SPILL_S32_SAVE:
2004 return spillSGPR(MI, FI, RS, Indexes, LIS, true);
2005 case AMDGPU::SI_SPILL_S1024_RESTORE:
2006 case AMDGPU::SI_SPILL_S512_RESTORE:
2007 case AMDGPU::SI_SPILL_S384_RESTORE:
2008 case AMDGPU::SI_SPILL_S352_RESTORE:
2009 case AMDGPU::SI_SPILL_S320_RESTORE:
2010 case AMDGPU::SI_SPILL_S288_RESTORE:
2011 case AMDGPU::SI_SPILL_S256_RESTORE:
2012 case AMDGPU::SI_SPILL_S224_RESTORE:
2013 case AMDGPU::SI_SPILL_S192_RESTORE:
2014 case AMDGPU::SI_SPILL_S160_RESTORE:
2015 case AMDGPU::SI_SPILL_S128_RESTORE:
2016 case AMDGPU::SI_SPILL_S96_RESTORE:
2017 case AMDGPU::SI_SPILL_S64_RESTORE:
2018 case AMDGPU::SI_SPILL_S32_RESTORE:
2019 return restoreSGPR(MI, FI, RS, Indexes, LIS, true);
2020 default:
2021 llvm_unreachable("not an SGPR spill instruction");
2022 }
2023}
2024
2026 int SPAdj, unsigned FIOperandNum,
2027 RegScavenger *RS) const {
2028 MachineFunction *MF = MI->getParent()->getParent();
2029 MachineBasicBlock *MBB = MI->getParent();
2031 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2032 const SIInstrInfo *TII = ST.getInstrInfo();
2033 DebugLoc DL = MI->getDebugLoc();
2034
2035 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
2036
2037 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
2038 int Index = MI->getOperand(FIOperandNum).getIndex();
2039
2040 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
2041 ? getBaseRegister()
2042 : getFrameRegister(*MF);
2043
2044 switch (MI->getOpcode()) {
2045 // SGPR register spill
2046 case AMDGPU::SI_SPILL_S1024_SAVE:
2047 case AMDGPU::SI_SPILL_S512_SAVE:
2048 case AMDGPU::SI_SPILL_S384_SAVE:
2049 case AMDGPU::SI_SPILL_S352_SAVE:
2050 case AMDGPU::SI_SPILL_S320_SAVE:
2051 case AMDGPU::SI_SPILL_S288_SAVE:
2052 case AMDGPU::SI_SPILL_S256_SAVE:
2053 case AMDGPU::SI_SPILL_S224_SAVE:
2054 case AMDGPU::SI_SPILL_S192_SAVE:
2055 case AMDGPU::SI_SPILL_S160_SAVE:
2056 case AMDGPU::SI_SPILL_S128_SAVE:
2057 case AMDGPU::SI_SPILL_S96_SAVE:
2058 case AMDGPU::SI_SPILL_S64_SAVE:
2059 case AMDGPU::SI_SPILL_S32_SAVE: {
2060 return spillSGPR(MI, Index, RS);
2061 }
2062
2063 // SGPR register restore
2064 case AMDGPU::SI_SPILL_S1024_RESTORE:
2065 case AMDGPU::SI_SPILL_S512_RESTORE:
2066 case AMDGPU::SI_SPILL_S384_RESTORE:
2067 case AMDGPU::SI_SPILL_S352_RESTORE:
2068 case AMDGPU::SI_SPILL_S320_RESTORE:
2069 case AMDGPU::SI_SPILL_S288_RESTORE:
2070 case AMDGPU::SI_SPILL_S256_RESTORE:
2071 case AMDGPU::SI_SPILL_S224_RESTORE:
2072 case AMDGPU::SI_SPILL_S192_RESTORE:
2073 case AMDGPU::SI_SPILL_S160_RESTORE:
2074 case AMDGPU::SI_SPILL_S128_RESTORE:
2075 case AMDGPU::SI_SPILL_S96_RESTORE:
2076 case AMDGPU::SI_SPILL_S64_RESTORE:
2077 case AMDGPU::SI_SPILL_S32_RESTORE: {
2078 return restoreSGPR(MI, Index, RS);
2079 }
2080
2081 // VGPR register spill
2082 case AMDGPU::SI_SPILL_V1024_SAVE:
2083 case AMDGPU::SI_SPILL_V512_SAVE:
2084 case AMDGPU::SI_SPILL_V384_SAVE:
2085 case AMDGPU::SI_SPILL_V352_SAVE:
2086 case AMDGPU::SI_SPILL_V320_SAVE:
2087 case AMDGPU::SI_SPILL_V288_SAVE:
2088 case AMDGPU::SI_SPILL_V256_SAVE:
2089 case AMDGPU::SI_SPILL_V224_SAVE:
2090 case AMDGPU::SI_SPILL_V192_SAVE:
2091 case AMDGPU::SI_SPILL_V160_SAVE:
2092 case AMDGPU::SI_SPILL_V128_SAVE:
2093 case AMDGPU::SI_SPILL_V96_SAVE:
2094 case AMDGPU::SI_SPILL_V64_SAVE:
2095 case AMDGPU::SI_SPILL_V32_SAVE:
2096 case AMDGPU::SI_SPILL_A1024_SAVE:
2097 case AMDGPU::SI_SPILL_A512_SAVE:
2098 case AMDGPU::SI_SPILL_A384_SAVE:
2099 case AMDGPU::SI_SPILL_A352_SAVE:
2100 case AMDGPU::SI_SPILL_A320_SAVE:
2101 case AMDGPU::SI_SPILL_A288_SAVE:
2102 case AMDGPU::SI_SPILL_A256_SAVE:
2103 case AMDGPU::SI_SPILL_A224_SAVE:
2104 case AMDGPU::SI_SPILL_A192_SAVE:
2105 case AMDGPU::SI_SPILL_A160_SAVE:
2106 case AMDGPU::SI_SPILL_A128_SAVE:
2107 case AMDGPU::SI_SPILL_A96_SAVE:
2108 case AMDGPU::SI_SPILL_A64_SAVE:
2109 case AMDGPU::SI_SPILL_A32_SAVE:
2110 case AMDGPU::SI_SPILL_AV1024_SAVE:
2111 case AMDGPU::SI_SPILL_AV512_SAVE:
2112 case AMDGPU::SI_SPILL_AV384_SAVE:
2113 case AMDGPU::SI_SPILL_AV352_SAVE:
2114 case AMDGPU::SI_SPILL_AV320_SAVE:
2115 case AMDGPU::SI_SPILL_AV288_SAVE:
2116 case AMDGPU::SI_SPILL_AV256_SAVE:
2117 case AMDGPU::SI_SPILL_AV224_SAVE:
2118 case AMDGPU::SI_SPILL_AV192_SAVE:
2119 case AMDGPU::SI_SPILL_AV160_SAVE:
2120 case AMDGPU::SI_SPILL_AV128_SAVE:
2121 case AMDGPU::SI_SPILL_AV96_SAVE:
2122 case AMDGPU::SI_SPILL_AV64_SAVE:
2123 case AMDGPU::SI_SPILL_AV32_SAVE: {
2124 const MachineOperand *VData = TII->getNamedOperand(*MI,
2125 AMDGPU::OpName::vdata);
2126 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2127 MFI->getStackPtrOffsetReg());
2128
2129 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2130 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2131 auto *MBB = MI->getParent();
2133 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2134 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2135 *MI->memoperands_begin(), RS);
2136 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
2137 MI->eraseFromParent();
2138 return true;
2139 }
2140 case AMDGPU::SI_SPILL_V32_RESTORE:
2141 case AMDGPU::SI_SPILL_V64_RESTORE:
2142 case AMDGPU::SI_SPILL_V96_RESTORE:
2143 case AMDGPU::SI_SPILL_V128_RESTORE:
2144 case AMDGPU::SI_SPILL_V160_RESTORE:
2145 case AMDGPU::SI_SPILL_V192_RESTORE:
2146 case AMDGPU::SI_SPILL_V224_RESTORE:
2147 case AMDGPU::SI_SPILL_V256_RESTORE:
2148 case AMDGPU::SI_SPILL_V288_RESTORE:
2149 case AMDGPU::SI_SPILL_V320_RESTORE:
2150 case AMDGPU::SI_SPILL_V352_RESTORE:
2151 case AMDGPU::SI_SPILL_V384_RESTORE:
2152 case AMDGPU::SI_SPILL_V512_RESTORE:
2153 case AMDGPU::SI_SPILL_V1024_RESTORE:
2154 case AMDGPU::SI_SPILL_A32_RESTORE:
2155 case AMDGPU::SI_SPILL_A64_RESTORE:
2156 case AMDGPU::SI_SPILL_A96_RESTORE:
2157 case AMDGPU::SI_SPILL_A128_RESTORE:
2158 case AMDGPU::SI_SPILL_A160_RESTORE:
2159 case AMDGPU::SI_SPILL_A192_RESTORE:
2160 case AMDGPU::SI_SPILL_A224_RESTORE:
2161 case AMDGPU::SI_SPILL_A256_RESTORE:
2162 case AMDGPU::SI_SPILL_A288_RESTORE:
2163 case AMDGPU::SI_SPILL_A320_RESTORE:
2164 case AMDGPU::SI_SPILL_A352_RESTORE:
2165 case AMDGPU::SI_SPILL_A384_RESTORE:
2166 case AMDGPU::SI_SPILL_A512_RESTORE:
2167 case AMDGPU::SI_SPILL_A1024_RESTORE:
2168 case AMDGPU::SI_SPILL_AV32_RESTORE:
2169 case AMDGPU::SI_SPILL_AV64_RESTORE:
2170 case AMDGPU::SI_SPILL_AV96_RESTORE:
2171 case AMDGPU::SI_SPILL_AV128_RESTORE:
2172 case AMDGPU::SI_SPILL_AV160_RESTORE:
2173 case AMDGPU::SI_SPILL_AV192_RESTORE:
2174 case AMDGPU::SI_SPILL_AV224_RESTORE:
2175 case AMDGPU::SI_SPILL_AV256_RESTORE:
2176 case AMDGPU::SI_SPILL_AV288_RESTORE:
2177 case AMDGPU::SI_SPILL_AV320_RESTORE:
2178 case AMDGPU::SI_SPILL_AV352_RESTORE:
2179 case AMDGPU::SI_SPILL_AV384_RESTORE:
2180 case AMDGPU::SI_SPILL_AV512_RESTORE:
2181 case AMDGPU::SI_SPILL_AV1024_RESTORE: {
2182 const MachineOperand *VData = TII->getNamedOperand(*MI,
2183 AMDGPU::OpName::vdata);
2184 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2185 MFI->getStackPtrOffsetReg());
2186
2187 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2188 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2189 auto *MBB = MI->getParent();
2191 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2192 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2193 *MI->memoperands_begin(), RS);
2194 MI->eraseFromParent();
2195 return true;
2196 }
2197
2198 default: {
2199 // Other access to frame index
2200 const DebugLoc &DL = MI->getDebugLoc();
2201
2202 int64_t Offset = FrameInfo.getObjectOffset(Index);
2203 if (ST.enableFlatScratch()) {
2204 if (TII->isFLATScratch(*MI)) {
2205 assert((int16_t)FIOperandNum ==
2206 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2207 AMDGPU::OpName::saddr));
2208
2209 // The offset is always swizzled, just replace it
2210 if (FrameReg)
2211 FIOp.ChangeToRegister(FrameReg, false);
2212
2213 if (!Offset)
2214 return false;
2215
2216 MachineOperand *OffsetOp =
2217 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
2218 int64_t NewOffset = Offset + OffsetOp->getImm();
2219 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
2221 OffsetOp->setImm(NewOffset);
2222 if (FrameReg)
2223 return false;
2224 Offset = 0;
2225 }
2226
2227 if (!Offset) {
2228 unsigned Opc = MI->getOpcode();
2229 int NewOpc = -1;
2230 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
2232 } else if (ST.hasFlatScratchSTMode()) {
2233 // On GFX10 we have ST mode to use no registers for an address.
2234 // Otherwise we need to materialize 0 into an SGPR.
2236 }
2237
2238 if (NewOpc != -1) {
2239 // removeOperand doesn't fixup tied operand indexes as it goes, so
2240 // it asserts. Untie vdst_in for now and retie them afterwards.
2241 int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
2242 AMDGPU::OpName::vdst_in);
2243 bool TiedVDst = VDstIn != -1 &&
2244 MI->getOperand(VDstIn).isReg() &&
2245 MI->getOperand(VDstIn).isTied();
2246 if (TiedVDst)
2247 MI->untieRegOperand(VDstIn);
2248
2249 MI->removeOperand(
2250 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
2251
2252 if (TiedVDst) {
2253 int NewVDst =
2254 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2255 int NewVDstIn =
2256 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2257 assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
2258 MI->tieOperands(NewVDst, NewVDstIn);
2259 }
2260 MI->setDesc(TII->get(NewOpc));
2261 return false;
2262 }
2263 }
2264 }
2265
2266 if (!FrameReg) {
2268 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
2269 return false;
2270 }
2271
2272 // We need to use register here. Check if we can use an SGPR or need
2273 // a VGPR.
2274 FIOp.ChangeToRegister(AMDGPU::M0, false);
2275 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
2276
2277 if (!Offset && FrameReg && UseSGPR) {
2278 FIOp.setReg(FrameReg);
2279 return false;
2280 }
2281
2282 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
2283 : &AMDGPU::VGPR_32RegClass;
2284
2285 Register TmpReg = RS->scavengeRegister(RC, MI, 0, !UseSGPR);
2286 FIOp.setReg(TmpReg);
2287 FIOp.setIsKill();
2288
2289 if ((!FrameReg || !Offset) && TmpReg) {
2290 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2291 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
2292 if (FrameReg)
2293 MIB.addReg(FrameReg);
2294 else
2295 MIB.addImm(Offset);
2296
2297 return false;
2298 }
2299
2300 bool NeedSaveSCC =
2301 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2302
2303 Register TmpSReg =
2304 UseSGPR ? TmpReg
2305 : RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0,
2306 !UseSGPR);
2307
2308 // TODO: for flat scratch another attempt can be made with a VGPR index
2309 // if no SGPRs can be scavenged.
2310 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2311 report_fatal_error("Cannot scavenge register in FI elimination!");
2312
2313 if (!TmpSReg) {
2314 // Use frame register and restore it after.
2315 TmpSReg = FrameReg;
2316 FIOp.setReg(FrameReg);
2317 FIOp.setIsKill(false);
2318 }
2319
2320 if (NeedSaveSCC) {
2321 assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
2322 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
2323 .addReg(FrameReg)
2324 .addImm(Offset);
2325 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
2326 .addReg(TmpSReg)
2327 .addImm(0);
2328 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
2329 .addImm(0)
2330 .addReg(TmpSReg);
2331 } else {
2332 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
2333 .addReg(FrameReg)
2334 .addImm(Offset);
2335 }
2336
2337 if (!UseSGPR)
2338 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2339 .addReg(TmpSReg, RegState::Kill);
2340
2341 if (TmpSReg == FrameReg) {
2342 // Undo frame register modification.
2343 if (NeedSaveSCC && !MI->registerDefIsDead(AMDGPU::SCC)) {
2345 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
2346 TmpSReg)
2347 .addReg(FrameReg)
2348 .addImm(-Offset);
2349 I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
2350 .addReg(TmpSReg)
2351 .addImm(0);
2352 BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
2353 TmpSReg)
2354 .addImm(0)
2355 .addReg(TmpSReg);
2356 } else {
2357 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
2358 FrameReg)
2359 .addReg(FrameReg)
2360 .addImm(-Offset);
2361 }
2362 }
2363
2364 return false;
2365 }
2366
2367 bool IsMUBUF = TII->isMUBUF(*MI);
2368
2369 if (!IsMUBUF && !MFI->isEntryFunction()) {
2370 // Convert to a swizzled stack address by scaling by the wave size.
2371 // In an entry function/kernel the offset is already swizzled.
2372 bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
2373 bool LiveSCC =
2374 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2375 const TargetRegisterClass *RC = IsSALU && !LiveSCC
2376 ? &AMDGPU::SReg_32RegClass
2377 : &AMDGPU::VGPR_32RegClass;
2378 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2379 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2380 Register ResultReg = IsCopy ? MI->getOperand(0).getReg()
2381 : RS->scavengeRegister(RC, MI, 0);
2382
2383 int64_t Offset = FrameInfo.getObjectOffset(Index);
2384 if (Offset == 0) {
2385 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2386 : AMDGPU::V_LSHRREV_B32_e64;
2387 // XXX - This never happens because of emergency scavenging slot at 0?
2388 auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg)
2390 .addReg(FrameReg);
2391 if (IsSALU && !LiveSCC)
2392 Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
2393 if (IsSALU && LiveSCC) {
2394 Register NewDest =
2395 RS->scavengeRegister(&AMDGPU::SReg_32RegClass, Shift, 0);
2396 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
2397 NewDest)
2398 .addReg(ResultReg);
2399 ResultReg = NewDest;
2400 }
2401 } else {
2403 if (!IsSALU) {
2404 if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
2405 nullptr) {
2406 // Reuse ResultReg in intermediate step.
2407 Register ScaledReg = ResultReg;
2408
2409 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
2410 ScaledReg)
2412 .addReg(FrameReg);
2413
2414 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
2415
2416 // TODO: Fold if use instruction is another add of a constant.
2418 // FIXME: This can fail
2419 MIB.addImm(Offset);
2420 MIB.addReg(ScaledReg, RegState::Kill);
2421 if (!IsVOP2)
2422 MIB.addImm(0); // clamp bit
2423 } else {
2424 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
2425 "Need to reuse carry out register");
2426
2427 // Use scavenged unused carry out as offset register.
2428 Register ConstOffsetReg;
2429 if (!isWave32)
2430 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
2431 else
2432 ConstOffsetReg = MIB.getReg(1);
2433
2434 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
2435 .addImm(Offset);
2436 MIB.addReg(ConstOffsetReg, RegState::Kill);
2437 MIB.addReg(ScaledReg, RegState::Kill);
2438 MIB.addImm(0); // clamp bit
2439 }
2440 }
2441 }
2442 if (!MIB || IsSALU) {
2443 // We have to produce a carry out, and there isn't a free SGPR pair
2444 // for it. We can keep the whole computation on the SALU to avoid
2445 // clobbering an additional register at the cost of an extra mov.
2446
2447 // We may have 1 free scratch SGPR even though a carry out is
2448 // unavailable. Only one additional mov is needed.
2449 Register TmpScaledReg =
2450 RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
2451 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
2452
2453 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
2454 .addReg(FrameReg)
2456 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2457 .addReg(ScaledReg, RegState::Kill)
2458 .addImm(Offset);
2459 if (!IsSALU)
2460 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
2461 .addReg(ScaledReg, RegState::Kill);
2462 else
2463 ResultReg = ScaledReg;
2464
2465 // If there were truly no free SGPRs, we need to undo everything.
2466 if (!TmpScaledReg.isValid()) {
2467 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2468 .addReg(ScaledReg, RegState::Kill)
2469 .addImm(-Offset);
2470 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
2471 .addReg(FrameReg)
2473 }
2474 }
2475 }
2476
2477 // Don't introduce an extra copy if we're just materializing in a mov.
2478 if (IsCopy) {
2479 MI->eraseFromParent();
2480 return true;
2481 }
2482 FIOp.ChangeToRegister(ResultReg, false, false, true);
2483 return false;
2484 }
2485
2486 if (IsMUBUF) {
2487 // Disable offen so we don't need a 0 vgpr base.
2488 assert(static_cast<int>(FIOperandNum) ==
2489 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2490 AMDGPU::OpName::vaddr));
2491
2492 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
2493 assert((SOffset.isImm() && SOffset.getImm() == 0));
2494
2495 if (FrameReg != AMDGPU::NoRegister)
2496 SOffset.ChangeToRegister(FrameReg, false);
2497
2498 int64_t Offset = FrameInfo.getObjectOffset(Index);
2499 int64_t OldImm
2500 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
2501 int64_t NewOffset = OldImm + Offset;
2502
2503 if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
2504 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
2505 MI->eraseFromParent();
2506 return true;
2507 }
2508 }
2509
2510 // If the offset is simply too big, don't convert to a scratch wave offset
2511 // relative index.
2512
2514 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
2515 Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
2516 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2517 .addImm(Offset);
2518 FIOp.ChangeToRegister(TmpReg, false, false, true);
2519 }
2520 }
2521 }
2522 return false;
2523}
2524
2527}
2528
2529static const TargetRegisterClass *
2531 if (BitWidth == 64)
2532 return &AMDGPU::VReg_64RegClass;
2533 if (BitWidth == 96)
2534 return &AMDGPU::VReg_96RegClass;
2535 if (BitWidth == 128)
2536 return &AMDGPU::VReg_128RegClass;
2537 if (BitWidth == 160)
2538 return &AMDGPU::VReg_160RegClass;
2539 if (BitWidth == 192)
2540 return &AMDGPU::VReg_192RegClass;
2541 if (BitWidth == 224)
2542 return &AMDGPU::VReg_224RegClass;
2543 if (BitWidth == 256)
2544 return &AMDGPU::VReg_256RegClass;
2545 if (BitWidth == 288)
2546 return &AMDGPU::VReg_288RegClass;
2547 if (BitWidth == 320)
2548 return &AMDGPU::VReg_320RegClass;
2549 if (BitWidth == 352)
2550 return &AMDGPU::VReg_352RegClass;
2551 if (BitWidth == 384)
2552 return &AMDGPU::VReg_384RegClass;
2553 if (BitWidth == 512)
2554 return &AMDGPU::VReg_512RegClass;
2555 if (BitWidth == 1024)
2556 return &AMDGPU::VReg_1024RegClass;
2557
2558 return nullptr;
2559}
2560
2561static const TargetRegisterClass *
2563 if (BitWidth == 64)
2564 return &AMDGPU::VReg_64_Align2RegClass;
2565 if (BitWidth == 96)
2566 return &AMDGPU::VReg_96_Align2RegClass;
2567 if (BitWidth == 128)
2568 return &AMDGPU::VReg_128_Align2RegClass;
2569 if (BitWidth == 160)
2570 return &AMDGPU::VReg_160_Align2RegClass;
2571 if (BitWidth == 192)
2572 return &AMDGPU::VReg_192_Align2RegClass;
2573 if (BitWidth == 224)
2574 return &AMDGPU::VReg_224_Align2RegClass;
2575 if (BitWidth == 256)
2576 return &AMDGPU::VReg_256_Align2RegClass;
2577 if (BitWidth == 288)
2578 return &AMDGPU::VReg_288_Align2RegClass;
2579 if (BitWidth == 320)
2580 return &AMDGPU::VReg_320_Align2RegClass;
2581 if (BitWidth == 352)
2582 return &AMDGPU::VReg_352_Align2RegClass;
2583 if (BitWidth == 384)
2584 return &AMDGPU::VReg_384_Align2RegClass;
2585 if (BitWidth == 512)
2586 return &AMDGPU::VReg_512_Align2RegClass;
2587 if (BitWidth == 1024)
2588 return &AMDGPU::VReg_1024_Align2RegClass;
2589
2590 return nullptr;
2591}
2592
2593const TargetRegisterClass *
2595 if (BitWidth == 1)
2596 return &AMDGPU::VReg_1RegClass;
2597 if (BitWidth == 16)
2598 return &AMDGPU::VGPR_LO16RegClass;
2599 if (BitWidth == 32)
2600 return &AMDGPU::VGPR_32RegClass;
2603}
2604
2605static const TargetRegisterClass *
2607 if (BitWidth == 64)
2608 return &AMDGPU::AReg_64RegClass;
2609 if (BitWidth == 96)
2610 return &AMDGPU::AReg_96RegClass;
2611 if (BitWidth == 128)
2612 return &AMDGPU::AReg_128RegClass;
2613 if (BitWidth == 160)
2614 return &AMDGPU::AReg_160RegClass;
2615 if (BitWidth == 192)
2616 return &AMDGPU::AReg_192RegClass;
2617 if (BitWidth == 224)
2618 return &AMDGPU::AReg_224RegClass;
2619 if (BitWidth == 256)
2620 return &AMDGPU::AReg_256RegClass;
2621 if (BitWidth == 288)
2622 return &AMDGPU::AReg_288RegClass;
2623 if (BitWidth == 320)
2624 return &AMDGPU::AReg_320RegClass;
2625 if (BitWidth == 352)
2626 return &AMDGPU::AReg_352RegClass;
2627 if (BitWidth == 384)
2628 return &AMDGPU::AReg_384RegClass;
2629 if (BitWidth == 512)
2630 return &AMDGPU::AReg_512RegClass;
2631 if (BitWidth == 1024)
2632 return &AMDGPU::AReg_1024RegClass;
2633
2634 return nullptr;
2635}
2636
2637static const TargetRegisterClass *
2639 if (BitWidth == 64)
2640 return &AMDGPU::AReg_64_Align2RegClass;
2641 if (BitWidth == 96)
2642 return &AMDGPU::AReg_96_Align2RegClass;
2643 if (BitWidth == 128)
2644 return &AMDGPU::AReg_128_Align2RegClass;
2645 if (BitWidth == 160)
2646 return &AMDGPU::AReg_160_Align2RegClass;
2647 if (BitWidth == 192)
2648 return &AMDGPU::AReg_192_Align2RegClass;
2649 if (BitWidth == 224)
2650 return &AMDGPU::AReg_224_Align2RegClass;
2651 if (BitWidth == 256)
2652 return &AMDGPU::AReg_256_Align2RegClass;
2653 if (BitWidth == 288)
2654 return &AMDGPU::AReg_288_Align2RegClass;
2655 if (BitWidth == 320)
2656 return &AMDGPU::AReg_320_Align2RegClass;
2657 if (BitWidth == 352)
2658 return &AMDGPU::AReg_352_Align2RegClass;
2659 if (BitWidth == 384)
2660 return &AMDGPU::AReg_384_Align2RegClass;
2661 if (BitWidth == 512)
2662 return &AMDGPU::AReg_512_Align2RegClass;
2663 if (BitWidth == 1024)
2664 return &AMDGPU::AReg_1024_Align2RegClass;
2665
2666 return nullptr;
2667}
2668
2669const TargetRegisterClass *
2671 if (BitWidth == 16)
2672 return &AMDGPU::AGPR_LO16RegClass;
2673 if (BitWidth == 32)
2674 return &AMDGPU::AGPR_32RegClass;
2677}
2678
2679static const TargetRegisterClass *
2681 if (BitWidth == 64)
2682 return &AMDGPU::AV_64RegClass;
2683 if (BitWidth == 96)
2684 return &AMDGPU::AV_96RegClass;
2685 if (BitWidth == 128)
2686 return &AMDGPU::AV_128RegClass;
2687 if (BitWidth == 160)
2688 return &AMDGPU::AV_160RegClass;
2689 if (BitWidth == 192)
2690 return &AMDGPU::AV_192RegClass;
2691 if (BitWidth == 224)
2692 return &AMDGPU::AV_224RegClass;
2693 if (BitWidth == 256)
2694 return &AMDGPU::AV_256RegClass;
2695 if (BitWidth == 288)
2696 return &AMDGPU::AV_288RegClass;
2697 if (BitWidth == 320)
2698 return &AMDGPU::AV_320RegClass;
2699 if (BitWidth == 352)
2700 return &AMDGPU::AV_352RegClass;
2701 if (BitWidth == 384)
2702 return &AMDGPU::AV_384RegClass;
2703 if (BitWidth == 512)
2704 return &AMDGPU::AV_512RegClass;
2705 if (BitWidth == 1024)
2706 return &AMDGPU::AV_1024RegClass;
2707
2708 return nullptr;
2709}
2710
2711static const TargetRegisterClass *
2713 if (BitWidth == 64)
2714 return &AMDGPU::AV_64_Align2RegClass;
2715 if (BitWidth == 96)
2716 return &AMDGPU::AV_96_Align2RegClass;
2717 if (BitWidth == 128)
2718 return &AMDGPU::AV_128_Align2RegClass;
2719 if (BitWidth == 160)
2720 return &AMDGPU::AV_160_Align2RegClass;
2721 if (BitWidth == 192)
2722 return &AMDGPU::AV_192_Align2RegClass;
2723 if (BitWidth == 224)
2724 return &AMDGPU::AV_224_Align2RegClass;
2725 if (BitWidth == 256)
2726 return &AMDGPU::AV_256_Align2RegClass;
2727 if (BitWidth == 288)
2728 return &AMDGPU::AV_288_Align2RegClass;
2729 if (BitWidth == 320)
2730 return &AMDGPU::AV_320_Align2RegClass;
2731 if (BitWidth == 352)
2732 return &AMDGPU::AV_352_Align2RegClass;
2733 if (BitWidth == 384)
2734 return &AMDGPU::AV_384_Align2RegClass;
2735 if (BitWidth == 512)
2736 return &AMDGPU::AV_512_Align2RegClass;
2737 if (BitWidth == 1024)
2738 return &AMDGPU::AV_1024_Align2RegClass;
2739
2740 return nullptr;
2741}
2742
2743const TargetRegisterClass *
2745 if (BitWidth == 16)
2746 return &AMDGPU::VGPR_LO16RegClass;
2747 if (BitWidth == 32)
2748 return &AMDGPU::AV_32RegClass;
2749 return ST.needsAlignedVGPRs()
2752}
2753
2754const TargetRegisterClass *
2756 if (BitWidth == 16)
2757 return &AMDGPU::SGPR_LO16RegClass;
2758 if (BitWidth == 32)
2759 return &AMDGPU::SReg_32RegClass;
2760 if (BitWidth == 64)
2761 return &AMDGPU::SReg_64RegClass;
2762 if (BitWidth == 96)
2763 return &AMDGPU::SGPR_96RegClass;
2764 if (BitWidth == 128)
2765 return &AMDGPU::SGPR_128RegClass;
2766 if (BitWidth == 160)
2767 return &AMDGPU::SGPR_160RegClass;
2768 if (BitWidth == 192)
2769 return &AMDGPU::SGPR_192RegClass;
2770 if (BitWidth == 224)
2771 return &AMDGPU::SGPR_224RegClass;
2772 if (BitWidth == 256)
2773 return &AMDGPU::SGPR_256RegClass;
2774 if (BitWidth == 288)
2775 return &AMDGPU::SGPR_288RegClass;
2776 if (BitWidth == 320)
2777 return &AMDGPU::SGPR_320RegClass;
2778 if (BitWidth == 352)
2779 return &AMDGPU::SGPR_352RegClass;
2780 if (BitWidth == 384)
2781 return &AMDGPU::SGPR_384RegClass;
2782 if (BitWidth == 512)
2783 return &AMDGPU::SGPR_512RegClass;
2784 if (BitWidth == 1024)
2785 return &AMDGPU::SGPR_1024RegClass;
2786
2787 return nullptr;
2788}
2789
2791 Register Reg) const {
2792 const TargetRegisterClass *RC;
2793 if (Reg.isVirtual())
2794 RC = MRI.getRegClass(Reg);
2795 else
2796 RC = getPhysRegBaseClass(Reg);
2797 return RC ? isSGPRClass(RC) : false;
2798}
2799
2800const TargetRegisterClass *
2802 unsigned Size = getRegSizeInBits(*SRC);
2804 assert(VRC && "Invalid register class size");
2805 return VRC;
2806}
2807
2808const TargetRegisterClass *
2810 unsigned Size = getRegSizeInBits(*SRC);
2812 assert(ARC && "Invalid register class size");
2813 return ARC;
2814}
2815
2816const TargetRegisterClass *
2818 unsigned Size = getRegSizeInBits(*VRC);
2819 if (Size == 32)
2820 return &AMDGPU::SGPR_32RegClass;
2822 assert(SRC && "Invalid register class size");
2823 return SRC;
2824}
2825
2826const TargetRegisterClass *
2828 const TargetRegisterClass *SubRC,
2829 unsigned SubIdx) const {
2830 // Ensure this subregister index is aligned in the super register.
2831 const TargetRegisterClass *MatchRC =
2832 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2833 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2834}
2835
2836bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
2839 return !ST.hasMFMAInlineLiteralBug();
2840
2841 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2842 OpType <= AMDGPU::OPERAND_SRC_LAST;
2843}
2844
2846 const TargetRegisterClass *DefRC,
2847 unsigned DefSubReg,
2848 const TargetRegisterClass *SrcRC,
2849 unsigned SrcSubReg) const {
2850 // We want to prefer the smallest register class possible, so we don't want to
2851 // stop and rewrite on anything that looks like a subregister
2852 // extract. Operations mostly don't care about the super register class, so we
2853 // only want to stop on the most basic of copies between the same register
2854 // class.
2855 //
2856 // e.g. if we have something like
2857 // %0 = ...
2858 // %1 = ...
2859 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
2860 // %3 = COPY %2, sub0
2861 //
2862 // We want to look through the COPY to find:
2863 // => %3 = COPY %0
2864
2865 // Plain copy.
2866 return getCommonSubClass(DefRC, SrcRC) != nullptr;
2867}
2868
2869bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2870 // TODO: 64-bit operands have extending behavior from 32-bit literal.
2871 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2873}
2874
2875/// Returns a lowest register that is not used at any point in the function.
2876/// If all registers are used, then this function will return
2877/// AMDGPU::NoRegister. If \p ReserveHighestVGPR = true, then return
2878/// highest unused register.
2880 const TargetRegisterClass *RC,
2881 const MachineFunction &MF,
2882 bool ReserveHighestVGPR) const {
2883 if (ReserveHighestVGPR) {
2884 for (MCRegister Reg : reverse(*RC))
2885 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2886 return Reg;
2887 } else {
2888 for (MCRegister Reg : *RC)
2889 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2890 return Reg;
2891 }
2892 return MCRegister();
2893}
2894
2896 const RegisterBankInfo &RBI,
2897 Register Reg) const {
2898 auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());
2899 if (!RB)
2900 return false;
2901
2902 return !RBI.isDivergentRegBank(RB);
2903}
2904
2906 unsigned EltSize) const {
2907 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC);
2908 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2909
2910 const unsigned RegDWORDs = RegBitWidth / 32;
2911 const unsigned EltDWORDs = EltSize / 4;
2912 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2913
2914 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2915 const unsigned NumParts = RegDWORDs / EltDWORDs;
2916
2917 return ArrayRef(Parts.data(), NumParts);
2918}
2919
2922 Register Reg) const {
2923 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
2924}
2925
2926const TargetRegisterClass *
2928 const MachineOperand &MO) const {
2929 const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg());
2930 return getSubRegisterClass(SrcRC, MO.getSubReg());
2931}
2932
2934 Register Reg) const {
2935 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2936 // Registers without classes are unaddressable, SGPR-like registers.
2937 return RC && isVGPRClass(RC);
2938}
2939
2941 Register Reg) const {
2942 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2943
2944 // Registers without classes are unaddressable, SGPR-like registers.
2945 return RC && isAGPRClass(RC);
2946}
2947
2949 const TargetRegisterClass *SrcRC,
2950 unsigned SubReg,
2951 const TargetRegisterClass *DstRC,
2952 unsigned DstSubReg,
2953 const TargetRegisterClass *NewRC,
2954 LiveIntervals &LIS) const {
2955 unsigned SrcSize = getRegSizeInBits(*SrcRC);
2956 unsigned DstSize = getRegSizeInBits(*DstRC);
2957 unsigned NewSize = getRegSizeInBits(*NewRC);
2958
2959 // Do not increase size of registers beyond dword, we would need to allocate
2960 // adjacent registers and constraint regalloc more than needed.
2961
2962 // Always allow dword coalescing.
2963 if (SrcSize <= 32 || DstSize <= 32)
2964 return true;
2965
2966 return NewSize <= DstSize || NewSize <= SrcSize;
2967}
2968
2970 MachineFunction &MF) const {
2972
2973 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
2974 MF.getFunction());
2975 switch (RC->getID()) {
2976 default:
2977 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
2978 case AMDGPU::VGPR_32RegClassID:
2979 case AMDGPU::VGPR_LO16RegClassID:
2980 case AMDGPU::VGPR_HI16RegClassID:
2981 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
2982 case AMDGPU::SGPR_32RegClassID:
2983 case AMDGPU::SGPR_LO16RegClassID:
2984 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
2985 }
2986}
2987
2989 unsigned Idx) const {
2990 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
2991 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
2992 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
2993 const_cast<MachineFunction &>(MF));
2994
2995 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
2996 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
2997 const_cast<MachineFunction &>(MF));
2998
2999 llvm_unreachable("Unexpected register pressure set!");
3000}
3001
3002const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
3003 static const int Empty[] = { -1 };
3004
3005 if (RegPressureIgnoredUnits[RegUnit])
3006 return Empty;
3007
3008 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3009}
3010
3012 // Not a callee saved register.
3013 return AMDGPU::SGPR30_SGPR31;
3014}
3015
3016const TargetRegisterClass *
3018 const RegisterBank &RB) const {
3019 switch (RB.getID()) {
3020 case AMDGPU::VGPRRegBankID:
3021 return getVGPRClassForBitWidth(std::max(32u, Size));
3022 case AMDGPU::VCCRegBankID:
3023 assert(Size == 1);
3024 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3025 : &AMDGPU::SReg_64_XEXECRegClass;
3026 case AMDGPU::SGPRRegBankID:
3027 return getSGPRClassForBitWidth(std::max(32u, Size));
3028 case AMDGPU::AGPRRegBankID:
3029 return getAGPRClassForBitWidth(std::max(32u, Size));
3030 default:
3031 llvm_unreachable("unknown register bank");
3032 }
3033}
3034
3035const TargetRegisterClass *
3037 const MachineRegisterInfo &MRI) const {
3038 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
3039 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
3040 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB);
3041
3042 if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
3043 return getAllocatableClass(RC);
3044
3045 return nullptr;
3046}
3047
3049 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3050}
3051
3053 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3054}
3055
3057 // VGPR tuples have an alignment requirement on gfx90a variants.
3058 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3059 : &AMDGPU::VReg_64RegClass;
3060}
3061
3062const TargetRegisterClass *
3063SIRegisterInfo::getRegClass(unsigned RCID) const {
3064 switch ((int)RCID) {
3065 case AMDGPU::SReg_1RegClassID:
3066 return getBoolRC();
3067 case AMDGPU::SReg_1_XEXECRegClassID:
3068 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3069 : &AMDGPU::SReg_64_XEXECRegClass;
3070 case -1:
3071 return nullptr;
3072 default:
3073 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3074 }
3075}
3076
3077// Find reaching register definition
3081 LiveIntervals *LIS) const {
3082 auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
3083 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
3084 SlotIndex DefIdx;
3085
3086 if (Reg.isVirtual()) {
3087 if (!LIS->hasInterval(Reg))
3088 return nullptr;
3089 LiveInterval &LI = LIS->getInterval(Reg);
3090 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
3091 : MRI.getMaxLaneMaskForVReg(Reg);
3092 VNInfo *V = nullptr;
3093 if (LI.hasSubRanges()) {
3094 for (auto &S : LI.subranges()) {
3095 if ((S.LaneMask & SubLanes) == SubLanes) {
3096 V = S.getVNInfoAt(UseIdx);
3097 break;
3098 }
3099 }
3100 } else {
3101 V = LI.getVNInfoAt(UseIdx);
3102 }
3103 if (!V)
3104 return nullptr;
3105 DefIdx = V->def;
3106 } else {
3107 // Find last def.
3108 for (MCRegUnitIterator Units(Reg.asMCReg(), this); Units.isValid();
3109 ++Units) {
3110 LiveRange &LR = LIS->getRegUnit(*Units);
3111 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
3112 if (!DefIdx.isValid() ||
3113 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
3114 LIS->getInstructionFromIndex(V->def)))
3115 DefIdx = V->def;
3116 } else {
3117 return nullptr;
3118 }
3119 }
3120 }
3121
3122 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
3123
3124 if (!Def || !MDT.dominates(Def, &Use))
3125 return nullptr;
3126
3127 assert(Def->modifiesRegister(Reg, this));
3128
3129 return Def;
3130}
3131
3133 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3134
3135 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
3136 AMDGPU::SReg_32RegClass,
3137 AMDGPU::AGPR_32RegClass } ) {
3138 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3139 return Super;
3140 }
3141 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3142 &AMDGPU::VGPR_32RegClass)) {
3143 return Super;
3144 }
3145
3146 return AMDGPU::NoRegister;
3147}
3148
3150 if (!ST.needsAlignedVGPRs())
3151 return true;
3152
3153 if (isVGPRClass(&RC))
3154 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
3155 if (isAGPRClass(&RC))
3156 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
3157 if (isVectorSuperClass(&RC))
3158 return RC.hasSuperClassEq(
3159 getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
3160
3161 return true;
3162}
3163
3164const TargetRegisterClass *
3166 if (!RC || !ST.needsAlignedVGPRs())
3167 return RC;
3168
3169 unsigned Size = getRegSizeInBits(*RC);
3170 if (Size <= 32)
3171 return RC;
3172
3173 if (isVGPRClass(RC))
3175 if (isAGPRClass(RC))
3177 if (isVectorSuperClass(RC))
3179
3180 return RC;
3181}
3182
3185 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
3186}
3187
3190 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
3191}
3192
3195 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
3196}
3197
3198unsigned
3200 unsigned SubReg) const {
3201 switch (RC->TSFlags & SIRCFlags::RegKindMask) {
3202 case SIRCFlags::HasSGPR:
3203 return std::min(128u, getSubRegIdxSize(SubReg));
3204 case SIRCFlags::HasAGPR:
3205 case SIRCFlags::HasVGPR:
3207 return std::min(32u, getSubRegIdxSize(SubReg));
3208 default:
3209 break;
3210 }
3211 return 0;
3212}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static const char * getRegisterName(MCRegister Reg)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
bool hasGFX90AInsts() const
bool hasMAIInsts() const
Definition: GCNSubtarget.h:754
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:998
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:228
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
Definition: GCNSubtarget.h:610
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:232
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:600
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:803
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:775
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:50
bool contains(MCPhysReg Reg) const
Returns true if register Reg is contained in the set.
Definition: LivePhysRegs.h:107
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegAliasIterator enumerates all registers aliasing Reg.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:67
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
Definition: MachineInstr.h:342
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:118
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isValid() const
Definition: Register.h:126
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
Definition: SIInstrInfo.h:570
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:1165
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:494
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVGPRLanes(int FrameIndex) const
const ReservedRegSet & getWWMReservedRegs() const
ArrayRef< Register > getSGPRSpillVGPRs() const
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LivePhysRegs *LiveRegs=nullptr) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr) const
Special case of eliminateFrameIndex.
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false) const
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:152
SlotIndexes pass.
Definition: SlotIndexes.h:319
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:540
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
Definition: SlotIndexes.h:597
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const uint8_t TSFlags
Configurable target specific flags.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
const MCRegisterClass * MC
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:394
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
@ OPERAND_REG_IMM_FIRST
Definition: SIDefines.h:213
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:220
@ OPERAND_REG_IMM_LAST
Definition: SIDefines.h:214
@ OPERAND_SRC_LAST
Definition: SIDefines.h:223
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:229
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1777
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
@ HasSGPR
Definition: SIDefines.h:26
@ HasVGPR
Definition: SIDefines.h:24
@ RegKindMask
Definition: SIDefines.h:29
@ HasAGPR
Definition: SIDefines.h:25
unsigned getDefRegState(bool B)
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:87
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:533
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
MachineFunction & MF
MachineBasicBlock * MBB
const SIInstrInfo & TII
The llvm::once_flag structure.
Definition: Threading.h:68