LLVM 22.0.0git
AMDGPUWaitSGPRHazards.cpp
Go to the documentation of this file.
1//===- AMDGPUWaitSGPRHazards.cpp - Insert waits for SGPR read hazards -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_wait_alu instructions to mitigate SGPR read hazards on GFX12.
11//
12//===----------------------------------------------------------------------===//
13
15#include "AMDGPU.h"
16#include "GCNSubtarget.h"
18#include "SIInstrInfo.h"
19#include "llvm/ADT/SetVector.h"
20
21using namespace llvm;
22
23#define DEBUG_TYPE "amdgpu-wait-sgpr-hazards"
24
26 "amdgpu-sgpr-hazard-wait", cl::init(true), cl::Hidden,
27 cl::desc("Enable required s_wait_alu on SGPR hazards"));
28
30 "amdgpu-sgpr-hazard-boundary-cull", cl::init(false), cl::Hidden,
31 cl::desc("Cull hazards on function boundaries"));
32
33static cl::opt<bool>
34 GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull",
35 cl::init(false), cl::Hidden,
36 cl::desc("Cull hazards on memory waits"));
37
39 "amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(8), cl::Hidden,
40 cl::desc("Number of tracked SGPRs before initiating hazard cull on memory "
41 "wait"));
42
43namespace {
44
45class AMDGPUWaitSGPRHazards {
46public:
47 const SIInstrInfo *TII;
48 const SIRegisterInfo *TRI;
50 unsigned DsNopCount;
51
52 bool EnableSGPRHazardWaits;
53 bool CullSGPRHazardsOnFunctionBoundary;
54 bool CullSGPRHazardsAtMemWait;
55 unsigned CullSGPRHazardsMemWaitThreshold;
56
57 AMDGPUWaitSGPRHazards() {}
58
59 // Return the numeric ID 0-127 for a given SGPR.
60 static std::optional<unsigned> sgprNumber(Register Reg,
61 const SIRegisterInfo &TRI) {
62 switch (Reg) {
63 case AMDGPU::M0:
64 case AMDGPU::EXEC:
65 case AMDGPU::EXEC_LO:
66 case AMDGPU::EXEC_HI:
67 case AMDGPU::SGPR_NULL:
68 case AMDGPU::SGPR_NULL64:
69 return {};
70 default:
71 break;
72 }
73 unsigned RegN = TRI.getHWRegIndex(Reg);
74 if (RegN > 127)
75 return {};
76 return RegN;
77 }
78
79 static inline bool isVCC(Register Reg) {
80 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI;
81 }
82
83 // Adjust global offsets for instructions bundled with S_GETPC_B64 after
84 // insertion of a new instruction.
85 static void updateGetPCBundle(MachineInstr *NewMI) {
86 if (!NewMI->isBundled())
87 return;
88
89 // Find start of bundle.
90 auto I = NewMI->getIterator();
91 while (I->isBundledWithPred())
92 I--;
93 if (I->isBundle())
94 I++;
95
96 // Bail if this is not an S_GETPC bundle.
97 if (I->getOpcode() != AMDGPU::S_GETPC_B64)
98 return;
99
100 // Update offsets of any references in the bundle.
101 const unsigned NewBytes = 4;
102 assert(NewMI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
103 "Unexpected instruction insertion in bundle");
104 auto NextMI = std::next(NewMI->getIterator());
105 auto End = NewMI->getParent()->end();
106 while (NextMI != End && NextMI->isBundledWithPred()) {
107 for (auto &Operand : NextMI->operands()) {
108 if (Operand.isGlobal())
109 Operand.setOffset(Operand.getOffset() + NewBytes);
110 }
111 NextMI++;
112 }
113 }
114
115 struct HazardState {
116 static constexpr unsigned None = 0;
117 static constexpr unsigned SALU = (1 << 0);
118 static constexpr unsigned VALU = (1 << 1);
119
120 std::bitset<64> Tracked; // SGPR banks ever read by VALU
121 std::bitset<128> SALUHazards; // SGPRs with uncommitted values from SALU
122 std::bitset<128> VALUHazards; // SGPRs with uncommitted values from VALU
123 unsigned VCCHazard = None; // Source of current VCC writes
124 bool ActiveFlat = false; // Has unwaited flat instructions
125
126 bool merge(const HazardState &RHS) {
127 HazardState Orig(*this);
128 *this |= RHS;
129 return (*this != Orig);
130 }
131
132 bool operator==(const HazardState &RHS) const {
133 return Tracked == RHS.Tracked && SALUHazards == RHS.SALUHazards &&
134 VALUHazards == RHS.VALUHazards && VCCHazard == RHS.VCCHazard &&
135 ActiveFlat == RHS.ActiveFlat;
136 }
137
138 bool operator!=(const HazardState &RHS) const { return !(*this == RHS); }
139
140 void operator|=(const HazardState &RHS) {
141 Tracked |= RHS.Tracked;
142 SALUHazards |= RHS.SALUHazards;
143 VALUHazards |= RHS.VALUHazards;
144 VCCHazard |= RHS.VCCHazard;
145 ActiveFlat |= RHS.ActiveFlat;
146 }
147 };
148
149 struct BlockHazardState {
150 HazardState In;
151 HazardState Out;
152 };
153
154 DenseMap<const MachineBasicBlock *, BlockHazardState> BlockState;
155
156 static constexpr unsigned WAVE32_NOPS = 4;
157 static constexpr unsigned WAVE64_NOPS = 8;
158
159 void insertHazardCull(MachineBasicBlock &MBB,
161 assert(!MI->isBundled());
162 unsigned Count = DsNopCount;
163 while (Count--)
164 BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
165 }
166
167 unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
168 unsigned Mask = 0xffff;
170 Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),
173 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),
176 Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
179 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
182 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
185 Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1),
188 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),
190 return Mask;
191 }
192
193 bool mergeConsecutiveWaitAlus(MachineBasicBlock::instr_iterator &MI,
194 unsigned Mask) {
195 auto MBB = MI->getParent();
196 if (MI == MBB->instr_begin())
197 return false;
198
199 auto It = prev_nodbg(MI, MBB->instr_begin());
200 if (It->getOpcode() != AMDGPU::S_WAITCNT_DEPCTR)
201 return false;
202
203 It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm()));
204 return true;
205 }
206
207 bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
208 enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
209
210 HazardState State = BlockState[&MBB].In;
211 SmallSet<Register, 8> SeenRegs;
212 bool Emitted = false;
213 unsigned DsNops = 0;
214
216 E = MBB.instr_end();
217 MI != E; ++MI) {
218 if (MI->isMetaInstruction())
219 continue;
220
221 // Clear tracked SGPRs if sufficient DS_NOPs occur
222 if (MI->getOpcode() == AMDGPU::DS_NOP) {
223 if (++DsNops >= DsNopCount)
224 State.Tracked.reset();
225 continue;
226 }
227 DsNops = 0;
228
229 // Snoop FLAT instructions to avoid adding culls before scratch/lds loads.
230 // Culls could be disproportionate in cost to load time.
232 State.ActiveFlat = true;
233
234 // SMEM or VMEM clears hazards
235 // FIXME: adapt to add FLAT without VALU (so !isLDSDMA())?
238 State.VCCHazard = HazardState::None;
239 State.SALUHazards.reset();
240 State.VALUHazards.reset();
241 continue;
242 }
243
244 // Existing S_WAITALU can clear hazards
245 if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
246 unsigned int Mask = MI->getOperand(0).getImm();
248 State.VCCHazard &= ~HazardState::VALU;
249 if (AMDGPU::DepCtr::decodeFieldSaSdst(Mask) == 0) {
250 State.SALUHazards.reset();
251 State.VCCHazard &= ~HazardState::SALU;
252 }
254 State.VALUHazards.reset();
255 continue;
256 }
257
258 // Snoop counter waits to insert culls
259 if (CullSGPRHazardsAtMemWait &&
260 (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT ||
261 MI->getOpcode() == AMDGPU::S_WAIT_SAMPLECNT ||
262 MI->getOpcode() == AMDGPU::S_WAIT_BVHCNT) &&
263 (MI->getOperand(0).isImm() && MI->getOperand(0).getImm() == 0) &&
264 (State.Tracked.count() >= CullSGPRHazardsMemWaitThreshold)) {
265 if (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT && State.ActiveFlat) {
266 State.ActiveFlat = false;
267 } else {
268 State.Tracked.reset();
269 if (Emit)
270 insertHazardCull(MBB, MI);
271 continue;
272 }
273 }
274
275 // Process only VALUs and SALUs
276 bool IsVALU = SIInstrInfo::isVALU(*MI);
277 bool IsSALU = SIInstrInfo::isSALU(*MI);
278 if (!IsVALU && !IsSALU)
279 continue;
280
281 unsigned Wait = 0;
282
283 auto processOperand = [&](const MachineOperand &Op, bool IsUse) {
284 if (!Op.isReg())
285 return;
286 Register Reg = Op.getReg();
287 assert(!Op.getSubReg());
288 if (!TRI->isSGPRReg(*MRI, Reg))
289 return;
290
291 // Only visit each register once
292 if (!SeenRegs.insert(Reg).second)
293 return;
294
295 auto RegNumber = sgprNumber(Reg, *TRI);
296 if (!RegNumber)
297 return;
298
299 // Track SGPRs by pair -- numeric ID of an 64b SGPR pair.
300 // i.e. SGPR0 = SGPR0_SGPR1 = 0, SGPR3 = SGPR2_SGPR3 = 1, etc
301 unsigned RegN = *RegNumber;
302 unsigned PairN = (RegN >> 1) & 0x3f;
303
304 // Read/write of untracked register is safe; but must record any new
305 // reads.
306 if (!State.Tracked[PairN]) {
307 if (IsVALU && IsUse)
308 State.Tracked.set(PairN);
309 return;
310 }
311
312 uint8_t SGPRCount =
313 AMDGPU::getRegBitWidth(*TRI->getRegClassForReg(*MRI, Reg)) / 32;
314
315 if (IsUse) {
316 // SALU reading SGPR clears VALU hazards
317 if (IsSALU) {
318 if (isVCC(Reg)) {
319 if (State.VCCHazard & HazardState::VALU)
320 State.VCCHazard = HazardState::None;
321 } else {
322 State.VALUHazards.reset();
323 }
324 }
325 // Compute required waits
326 for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {
327 Wait |= State.SALUHazards[RegN + RegIdx] ? WA_SALU : 0;
328 Wait |= IsVALU && State.VALUHazards[RegN + RegIdx] ? WA_VALU : 0;
329 }
330 if (isVCC(Reg) && State.VCCHazard) {
331 // Note: it's possible for both SALU and VALU to exist if VCC
332 // was updated differently by merged predecessors.
333 if (State.VCCHazard & HazardState::SALU)
334 Wait |= WA_SALU;
335 if (State.VCCHazard & HazardState::VALU)
336 Wait |= WA_VCC;
337 }
338 } else {
339 // Update hazards
340 if (isVCC(Reg)) {
341 State.VCCHazard = IsSALU ? HazardState::SALU : HazardState::VALU;
342 } else {
343 for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {
344 if (IsSALU)
345 State.SALUHazards.set(RegN + RegIdx);
346 else
347 State.VALUHazards.set(RegN + RegIdx);
348 }
349 }
350 }
351 };
352
353 const bool IsSetPC =
354 (MI->isCall() || MI->isReturn() || MI->isIndirectBranch()) &&
355 MI->getOpcode() != AMDGPU::S_ENDPGM &&
356 MI->getOpcode() != AMDGPU::S_ENDPGM_SAVED;
357
358 // Only consider implicit VCC specified by instruction descriptor.
359 const bool HasImplicitVCC =
360 llvm::any_of(MI->getDesc().implicit_uses(), isVCC) ||
361 llvm::any_of(MI->getDesc().implicit_defs(), isVCC);
362
363 if (IsSetPC) {
364 // All SGPR writes before a call/return must be flushed as the
365 // callee/caller will not will not see the hazard chain.
366 if (State.VCCHazard & HazardState::VALU)
367 Wait |= WA_VCC;
368 if (State.SALUHazards.any() || (State.VCCHazard & HazardState::SALU))
369 Wait |= WA_SALU;
370 if (State.VALUHazards.any())
371 Wait |= WA_VALU;
372 if (CullSGPRHazardsOnFunctionBoundary && State.Tracked.any()) {
373 State.Tracked.reset();
374 if (Emit)
375 insertHazardCull(MBB, MI);
376 }
377 } else {
378 // Process uses to determine required wait.
379 SeenRegs.clear();
380 for (const MachineOperand &Op : MI->all_uses()) {
381 if (Op.isImplicit() &&
382 (!HasImplicitVCC || !Op.isReg() || !isVCC(Op.getReg())))
383 continue;
384 processOperand(Op, true);
385 }
386 }
387
388 // Apply wait
389 if (Wait) {
390 unsigned Mask = 0xffff;
391 if (Wait & WA_VCC) {
392 State.VCCHazard &= ~HazardState::VALU;
394 }
395 if (Wait & WA_SALU) {
396 State.SALUHazards.reset();
397 State.VCCHazard &= ~HazardState::SALU;
399 }
400 if (Wait & WA_VALU) {
401 State.VALUHazards.reset();
403 }
404 if (Emit) {
405 if (!mergeConsecutiveWaitAlus(MI, Mask)) {
406 auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
407 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
408 .addImm(Mask);
409 updateGetPCBundle(NewMI);
410 }
411 Emitted = true;
412 }
413 }
414
415 // On return from a call SGPR state is unknown, so all potential hazards.
416 if (MI->isCall() && !CullSGPRHazardsOnFunctionBoundary)
417 State.Tracked.set();
418
419 // Update hazards based on defs.
420 SeenRegs.clear();
421 for (const MachineOperand &Op : MI->all_defs()) {
422 if (Op.isImplicit() &&
423 (!HasImplicitVCC || !Op.isReg() || !isVCC(Op.getReg())))
424 continue;
425 processOperand(Op, false);
426 }
427 }
428
429 BlockHazardState &BS = BlockState[&MBB];
430 bool Changed = State != BS.Out;
431 if (Emit) {
432 assert(!Changed && "Hazard state should not change on emit pass");
433 return Emitted;
434 }
435 if (Changed)
436 BS.Out = State;
437 return Changed;
438 }
439
440 bool run(MachineFunction &MF) {
441 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
442 if (!ST.hasVALUReadSGPRHazard())
443 return false;
444
445 // Parse settings
446 EnableSGPRHazardWaits = GlobalEnableSGPRHazardWaits;
447 CullSGPRHazardsOnFunctionBoundary = GlobalCullSGPRHazardsOnFunctionBoundary;
448 CullSGPRHazardsAtMemWait = GlobalCullSGPRHazardsAtMemWait;
449 CullSGPRHazardsMemWaitThreshold = GlobalCullSGPRHazardsMemWaitThreshold;
450
452 EnableSGPRHazardWaits = MF.getFunction().getFnAttributeAsParsedInteger(
453 "amdgpu-sgpr-hazard-wait", EnableSGPRHazardWaits);
455 CullSGPRHazardsOnFunctionBoundary =
456 MF.getFunction().hasFnAttribute("amdgpu-sgpr-hazard-boundary-cull");
458 CullSGPRHazardsAtMemWait =
459 MF.getFunction().hasFnAttribute("amdgpu-sgpr-hazard-mem-wait-cull");
460 if (!GlobalCullSGPRHazardsMemWaitThreshold.getNumOccurrences())
461 CullSGPRHazardsMemWaitThreshold =
463 "amdgpu-sgpr-hazard-mem-wait-cull-threshold",
464 CullSGPRHazardsMemWaitThreshold);
465
466 // Bail if disabled
467 if (!EnableSGPRHazardWaits)
468 return false;
469
470 TII = ST.getInstrInfo();
471 TRI = ST.getRegisterInfo();
472 MRI = &MF.getRegInfo();
473 DsNopCount = ST.isWave64() ? WAVE64_NOPS : WAVE32_NOPS;
474
476 if (!AMDGPU::isEntryFunctionCC(CallingConv) &&
477 !CullSGPRHazardsOnFunctionBoundary) {
478 // Callee must consider all SGPRs as tracked.
479 LLVM_DEBUG(dbgs() << "Is called function, track all SGPRs.\n");
480 MachineBasicBlock &EntryBlock = MF.front();
481 BlockState[&EntryBlock].In.Tracked.set();
482 }
483
484 // Calculate the hazard state for each basic block.
485 // Iterate until a fixed point is reached.
486 // Fixed point is guaranteed as merge function only ever increases
487 // the hazard set, and all backedges will cause a merge.
488 //
489 // Note: we have to take care of the entry block as this technically
490 // has an edge from outside the function. Failure to treat this as
491 // a merge could prevent fixed point being reached.
492 SetVector<MachineBasicBlock *> Worklist;
493 for (auto &MBB : reverse(MF))
494 Worklist.insert(&MBB);
495 while (!Worklist.empty()) {
496 auto &MBB = *Worklist.pop_back_val();
497 bool Changed = runOnMachineBasicBlock(MBB, false);
498 if (Changed) {
499 // Note: take a copy of state here in case it is reallocated by map
500 HazardState NewState = BlockState[&MBB].Out;
501 // Propagate to all successor blocks
502 for (auto Succ : MBB.successors()) {
503 // We only need to merge hazards at CFG merge points.
504 auto &SuccState = BlockState[Succ];
505 if (Succ->getSinglePredecessor() && !Succ->isEntryBlock()) {
506 if (SuccState.In != NewState) {
507 SuccState.In = NewState;
508 Worklist.insert(Succ);
509 }
510 } else if (SuccState.In.merge(NewState)) {
511 Worklist.insert(Succ);
512 }
513 }
514 }
515 }
516
517 LLVM_DEBUG(dbgs() << "Emit s_wait_alu instructions\n");
518
519 // Final to emit wait instructions.
520 bool Changed = false;
521 for (auto &MBB : MF)
522 Changed |= runOnMachineBasicBlock(MBB, true);
523
524 BlockState.clear();
525 return Changed;
526 }
527};
528
529class AMDGPUWaitSGPRHazardsLegacy : public MachineFunctionPass {
530public:
531 static char ID;
532
533 AMDGPUWaitSGPRHazardsLegacy() : MachineFunctionPass(ID) {}
534
535 bool runOnMachineFunction(MachineFunction &MF) override {
536 return AMDGPUWaitSGPRHazards().run(MF);
537 }
538
539 void getAnalysisUsage(AnalysisUsage &AU) const override {
540 AU.setPreservesCFG();
542 }
543};
544
545} // namespace
546
547char AMDGPUWaitSGPRHazardsLegacy::ID = 0;
548
549char &llvm::AMDGPUWaitSGPRHazardsLegacyID = AMDGPUWaitSGPRHazardsLegacy::ID;
550
551INITIALIZE_PASS(AMDGPUWaitSGPRHazardsLegacy, DEBUG_TYPE,
552 "AMDGPU Insert waits for SGPR read hazards", false, false)
553
557 if (AMDGPUWaitSGPRHazards().run(MF))
559 return PreservedAnalyses::all();
560}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
static cl::opt< bool > GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull", cl::init(false), cl::Hidden, cl::desc("Cull hazards on memory waits"))
static cl::opt< unsigned > GlobalCullSGPRHazardsMemWaitThreshold("amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(8), cl::Hidden, cl::desc("Number of tracked SGPRs before initiating hazard cull on memory " "wait"))
static cl::opt< bool > GlobalCullSGPRHazardsOnFunctionBoundary("amdgpu-sgpr-hazard-boundary-cull", cl::init(false), cl::Hidden, cl::desc("Cull hazards on function boundaries"))
static cl::opt< bool > GlobalEnableSGPRHazardWaits("amdgpu-sgpr-hazard-wait", cl::init(true), cl::Hidden, cl::desc("Enable required s_wait_alu on SGPR hazards"))
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void updateGetPCBundle(MachineInstr *NewMI)
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
Interface definition for SIInstrInfo.
This file implements a set that has insertion order iteration characteristics.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Instructions::iterator instr_iterator
iterator_range< succ_iterator > successors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isBundled() const
Return true if this instruction part of a bundle.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
static bool isVMEM(const MachineInstr &MI)
static bool isSMRD(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool isFLATGlobal(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
value_type pop_back_val()
Definition SetVector.h:296
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
int getNumOccurrences() const
self_iterator getIterator()
Definition ilist_node.h:130
Changed
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
unsigned decodeFieldVaVdst(unsigned Encoded)
unsigned decodeFieldHoldCnt(unsigned Encoded)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Emitted
Assigned address, still materializing.
Definition Core.h:777
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Wait
Definition Threading.h:60
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
char & AMDGPUWaitSGPRHazardsLegacyID
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
DWARFExpression::Operation Op
bool operator|=(SparseBitVector< ElementSize > &LHS, const SparseBitVector< ElementSize > *RHS)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.