LLVM 23.0.0git
AMDGPUWaitSGPRHazards.cpp
Go to the documentation of this file.
1//===- AMDGPUWaitSGPRHazards.cpp - Insert waits for SGPR read hazards -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_wait_alu instructions to mitigate SGPR read hazards on GFX12.
11//
12//===----------------------------------------------------------------------===//
13
15#include "AMDGPU.h"
16#include "GCNSubtarget.h"
18#include "SIInstrInfo.h"
19#include "llvm/ADT/SetVector.h"
21
22using namespace llvm;
23
24#define DEBUG_TYPE "amdgpu-wait-sgpr-hazards"
25
27 "amdgpu-sgpr-hazard-wait", cl::init(true), cl::Hidden,
28 cl::desc("Enable required s_wait_alu on SGPR hazards"));
29
31 "amdgpu-sgpr-hazard-boundary-cull", cl::init(false), cl::Hidden,
32 cl::desc("Cull hazards on function boundaries"));
33
34static cl::opt<bool>
35 GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull",
36 cl::init(false), cl::Hidden,
37 cl::desc("Cull hazards on memory waits"));
38
40 "amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(8), cl::Hidden,
41 cl::desc("Number of tracked SGPRs before initiating hazard cull on memory "
42 "wait"));
43
44namespace {
45
46class AMDGPUWaitSGPRHazards {
47public:
48 const GCNSubtarget *ST;
49 const SIInstrInfo *TII;
50 const SIRegisterInfo *TRI;
52 unsigned DsNopCount;
53
54 bool EnableSGPRHazardWaits;
55 bool CullSGPRHazardsOnFunctionBoundary;
56 bool CullSGPRHazardsAtMemWait;
57 unsigned CullSGPRHazardsMemWaitThreshold;
58
59 AMDGPUWaitSGPRHazards() = default;
60
61 // Return the numeric ID 0-127 for a given SGPR.
62 static std::optional<unsigned> sgprNumber(Register Reg,
63 const SIRegisterInfo &TRI) {
64 switch (Reg) {
65 case AMDGPU::M0:
66 case AMDGPU::EXEC:
67 case AMDGPU::EXEC_LO:
68 case AMDGPU::EXEC_HI:
69 case AMDGPU::SGPR_NULL:
70 case AMDGPU::SGPR_NULL64:
71 return {};
72 default:
73 break;
74 }
75 unsigned RegN = TRI.getHWRegIndex(Reg);
76 if (RegN > 127)
77 return {};
78 return RegN;
79 }
80
81 static inline bool isVCC(Register Reg) {
82 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI;
83 }
84
85 // Adjust global offsets for instructions bundled with S_GETPC_B64 after
86 // insertion of a new instruction.
87 static void updateGetPCBundle(MachineInstr *NewMI) {
88 if (!NewMI->isBundled())
89 return;
90
91 // Find start of bundle.
92 auto I = NewMI->getIterator();
93 while (I->isBundledWithPred())
94 I--;
95 if (I->isBundle())
96 I++;
97
98 // Bail if this is not an S_GETPC bundle.
99 if (I->getOpcode() != AMDGPU::S_GETPC_B64)
100 return;
101
102 // Update offsets of any references in the bundle.
103 const unsigned NewBytes = 4;
104 assert(NewMI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
105 "Unexpected instruction insertion in bundle");
106 auto NextMI = std::next(NewMI->getIterator());
107 auto End = NewMI->getParent()->end();
108 while (NextMI != End && NextMI->isBundledWithPred()) {
109 for (auto &Operand : NextMI->operands()) {
110 if (Operand.isGlobal())
111 Operand.setOffset(Operand.getOffset() + NewBytes);
112 }
113 NextMI++;
114 }
115 }
116
117 struct HazardState {
118 static constexpr unsigned None = 0;
119 static constexpr unsigned SALU = (1 << 0);
120 static constexpr unsigned VALU = (1 << 1);
121
122 std::bitset<64> Tracked; // SGPR banks ever read by VALU
123 std::bitset<128> SALUHazards; // SGPRs with uncommitted values from SALU
124 std::bitset<128> VALUHazards; // SGPRs with uncommitted values from VALU
125 unsigned VCCHazard = None; // Source of current VCC writes
126 bool ActiveFlat = false; // Has unwaited flat instructions
127
128 bool merge(const HazardState &RHS) {
129 HazardState Orig(*this);
130 *this |= RHS;
131 return (*this != Orig);
132 }
133
134 bool operator==(const HazardState &RHS) const {
135 return Tracked == RHS.Tracked && SALUHazards == RHS.SALUHazards &&
136 VALUHazards == RHS.VALUHazards && VCCHazard == RHS.VCCHazard &&
137 ActiveFlat == RHS.ActiveFlat;
138 }
139
140 bool operator!=(const HazardState &RHS) const { return !(*this == RHS); }
141
142 void operator|=(const HazardState &RHS) {
143 Tracked |= RHS.Tracked;
144 SALUHazards |= RHS.SALUHazards;
145 VALUHazards |= RHS.VALUHazards;
146 VCCHazard |= RHS.VCCHazard;
147 ActiveFlat |= RHS.ActiveFlat;
148 }
149 };
150
151 struct BlockHazardState {
152 HazardState In;
153 HazardState Out;
154 };
155
156 DenseMap<const MachineBasicBlock *, BlockHazardState> BlockState;
157
158 static constexpr unsigned WAVE32_NOPS = 4;
159 static constexpr unsigned WAVE64_NOPS = 8;
160
161 void insertHazardCull(MachineBasicBlock &MBB,
163 assert(!MI->isBundled());
164 unsigned Count = DsNopCount;
165 while (Count--)
166 BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
167 }
168
169 unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
172 Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),
175 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),
178 Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
181 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
184 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
186 const AMDGPU::IsaVersion &Version = AMDGPU::getIsaVersion(ST->getCPU());
188 Mask,
191 Version);
193 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),
195 return Mask;
196 }
197
198 bool mergeConsecutiveWaitAlus(MachineBasicBlock::instr_iterator &MI,
199 unsigned Mask) {
200 auto MBB = MI->getParent();
201 if (MI == MBB->instr_begin())
202 return false;
203
204 auto It = prev_nodbg(MI, MBB->instr_begin());
205 if (It->getOpcode() != AMDGPU::S_WAITCNT_DEPCTR)
206 return false;
207
208 It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm()));
209 return true;
210 }
211
212 bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
213 enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
214
215 HazardState State = BlockState[&MBB].In;
216 SmallSet<Register, 8> SeenRegs;
217 bool Emitted = false;
218 unsigned DsNops = 0;
219
221 E = MBB.instr_end();
222 MI != E; ++MI) {
223 if (MI->isMetaInstruction())
224 continue;
225
226 // Clear tracked SGPRs if sufficient DS_NOPs occur
227 if (MI->getOpcode() == AMDGPU::DS_NOP) {
228 if (++DsNops >= DsNopCount)
229 State.Tracked.reset();
230 continue;
231 }
232 DsNops = 0;
233
234 // Snoop FLAT instructions to avoid adding culls before scratch/lds loads.
235 // Culls could be disproportionate in cost to load time.
237 State.ActiveFlat = true;
238
239 // SMEM or VMEM clears hazards
240 // FIXME: adapt to add FLAT without VALU (so !isLDSDMA())?
243 State.VCCHazard = HazardState::None;
244 State.SALUHazards.reset();
245 State.VALUHazards.reset();
246 continue;
247 }
248
249 // Existing S_WAITALU can clear hazards
250 if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
251 unsigned int Mask = MI->getOperand(0).getImm();
253 State.VCCHazard &= ~HazardState::VALU;
254 if (AMDGPU::DepCtr::decodeFieldSaSdst(Mask) == 0) {
255 State.SALUHazards.reset();
256 State.VCCHazard &= ~HazardState::SALU;
257 }
259 State.VALUHazards.reset();
260 continue;
261 }
262
263 // Snoop counter waits to insert culls
264 if (CullSGPRHazardsAtMemWait &&
265 (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT ||
266 MI->getOpcode() == AMDGPU::S_WAIT_SAMPLECNT ||
267 MI->getOpcode() == AMDGPU::S_WAIT_BVHCNT) &&
268 (MI->getOperand(0).isImm() && MI->getOperand(0).getImm() == 0) &&
269 (State.Tracked.count() >= CullSGPRHazardsMemWaitThreshold)) {
270 if (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT && State.ActiveFlat) {
271 State.ActiveFlat = false;
272 } else {
273 State.Tracked.reset();
274 if (Emit)
275 insertHazardCull(MBB, MI);
276 continue;
277 }
278 }
279
280 // Process only VALUs and SALUs
281 bool IsVALU = SIInstrInfo::isVALU(*MI);
282 bool IsSALU = SIInstrInfo::isSALU(*MI);
283 if (!IsVALU && !IsSALU)
284 continue;
285
286 unsigned Wait = 0;
287
288 auto processOperand = [&](const MachineOperand &Op, bool IsUse) {
289 if (!Op.isReg())
290 return;
291 Register Reg = Op.getReg();
292 assert(!Op.getSubReg());
293 if (!TRI->isSGPRReg(*MRI, Reg))
294 return;
295
296 // Only visit each register once
297 if (!SeenRegs.insert(Reg).second)
298 return;
299
300 auto RegNumber = sgprNumber(Reg, *TRI);
301 if (!RegNumber)
302 return;
303
304 // Track SGPRs by pair -- numeric ID of an 64b SGPR pair.
305 // i.e. SGPR0 = SGPR0_SGPR1 = 0, SGPR3 = SGPR2_SGPR3 = 1, etc
306 unsigned RegN = *RegNumber;
307 unsigned PairN = (RegN >> 1) & 0x3f;
308
309 // Read/write of untracked register is safe; but must record any new
310 // reads.
311 if (!State.Tracked[PairN]) {
312 if (IsVALU && IsUse)
313 State.Tracked.set(PairN);
314 return;
315 }
316
317 uint8_t SGPRCount =
318 AMDGPU::getRegBitWidth(*TRI->getRegClassForReg(*MRI, Reg)) / 32;
319
320 if (IsUse) {
321 // SALU reading SGPR clears VALU hazards
322 if (IsSALU) {
323 if (isVCC(Reg)) {
324 if (State.VCCHazard & HazardState::VALU)
325 State.VCCHazard = HazardState::None;
326 } else {
327 State.VALUHazards.reset();
328 }
329 }
330 // Compute required waits
331 for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {
332 Wait |= State.SALUHazards[RegN + RegIdx] ? WA_SALU : 0;
333 Wait |= IsVALU && State.VALUHazards[RegN + RegIdx] ? WA_VALU : 0;
334 }
335 if (isVCC(Reg) && State.VCCHazard) {
336 // Note: it's possible for both SALU and VALU to exist if VCC
337 // was updated differently by merged predecessors.
338 if (State.VCCHazard & HazardState::SALU)
339 Wait |= WA_SALU;
340 if (State.VCCHazard & HazardState::VALU)
341 Wait |= WA_VCC;
342 }
343 } else {
344 // Update hazards
345 if (isVCC(Reg)) {
346 State.VCCHazard = IsSALU ? HazardState::SALU : HazardState::VALU;
347 } else {
348 for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {
349 if (IsSALU)
350 State.SALUHazards.set(RegN + RegIdx);
351 else
352 State.VALUHazards.set(RegN + RegIdx);
353 }
354 }
355 }
356 };
357
358 const bool IsSetPC =
359 (MI->isCall() || MI->isReturn() || MI->isIndirectBranch()) &&
360 MI->getOpcode() != AMDGPU::S_ENDPGM &&
361 MI->getOpcode() != AMDGPU::S_ENDPGM_SAVED;
362
363 // Only consider implicit VCC specified by instruction descriptor.
364 const bool HasImplicitVCC =
365 llvm::any_of(MI->getDesc().implicit_uses(), isVCC) ||
366 llvm::any_of(MI->getDesc().implicit_defs(), isVCC);
367
368 if (IsSetPC) {
369 // All SGPR writes before a call/return must be flushed as the
370 // callee/caller will not will not see the hazard chain.
371 if (State.VCCHazard & HazardState::VALU)
372 Wait |= WA_VCC;
373 if (State.SALUHazards.any() || (State.VCCHazard & HazardState::SALU))
374 Wait |= WA_SALU;
375 if (State.VALUHazards.any())
376 Wait |= WA_VALU;
377 if (CullSGPRHazardsOnFunctionBoundary && State.Tracked.any()) {
378 State.Tracked.reset();
379 if (Emit)
380 insertHazardCull(MBB, MI);
381 }
382 } else {
383 // Process uses to determine required wait.
384 SeenRegs.clear();
385 for (const MachineOperand &Op : MI->all_uses()) {
386 if (Op.isImplicit() &&
387 (!HasImplicitVCC || !Op.isReg() || !isVCC(Op.getReg())))
388 continue;
389 processOperand(Op, true);
390 }
391 }
392
393 // Apply wait
394 if (Wait) {
396 if (Wait & WA_VCC) {
397 State.VCCHazard &= ~HazardState::VALU;
399 }
400 if (Wait & WA_SALU) {
401 State.SALUHazards.reset();
402 State.VCCHazard &= ~HazardState::SALU;
404 }
405 if (Wait & WA_VALU) {
406 State.VALUHazards.reset();
408 }
409 if (Emit) {
410 if (!mergeConsecutiveWaitAlus(MI, Mask)) {
411 auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
412 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
413 .addImm(Mask);
414 updateGetPCBundle(NewMI);
415 }
416 Emitted = true;
417 }
418 }
419
420 // On return from a call SGPR state is unknown, so all potential hazards.
421 if (MI->isCall() && !CullSGPRHazardsOnFunctionBoundary)
422 State.Tracked.set();
423
424 // Update hazards based on defs.
425 SeenRegs.clear();
426 for (const MachineOperand &Op : MI->all_defs()) {
427 if (Op.isImplicit() &&
428 (!HasImplicitVCC || !Op.isReg() || !isVCC(Op.getReg())))
429 continue;
430 processOperand(Op, false);
431 }
432 }
433
434 BlockHazardState &BS = BlockState[&MBB];
435 bool Changed = State != BS.Out;
436 if (Emit) {
437 assert(!Changed && "Hazard state should not change on emit pass");
438 return Emitted;
439 }
440 if (Changed)
441 BS.Out = State;
442 return Changed;
443 }
444
445 bool run(MachineFunction &MF) {
446 ST = &MF.getSubtarget<GCNSubtarget>();
447 if (!ST->hasVALUReadSGPRHazard())
448 return false;
449
450 // Parse settings
451 EnableSGPRHazardWaits = GlobalEnableSGPRHazardWaits;
452 CullSGPRHazardsOnFunctionBoundary = GlobalCullSGPRHazardsOnFunctionBoundary;
453 CullSGPRHazardsAtMemWait = GlobalCullSGPRHazardsAtMemWait;
454 CullSGPRHazardsMemWaitThreshold = GlobalCullSGPRHazardsMemWaitThreshold;
455
457 EnableSGPRHazardWaits = MF.getFunction().getFnAttributeAsParsedInteger(
458 "amdgpu-sgpr-hazard-wait", EnableSGPRHazardWaits);
460 CullSGPRHazardsOnFunctionBoundary =
461 MF.getFunction().hasFnAttribute("amdgpu-sgpr-hazard-boundary-cull");
463 CullSGPRHazardsAtMemWait =
464 MF.getFunction().hasFnAttribute("amdgpu-sgpr-hazard-mem-wait-cull");
465 if (!GlobalCullSGPRHazardsMemWaitThreshold.getNumOccurrences())
466 CullSGPRHazardsMemWaitThreshold =
468 "amdgpu-sgpr-hazard-mem-wait-cull-threshold",
469 CullSGPRHazardsMemWaitThreshold);
470
471 // Bail if disabled
472 if (!EnableSGPRHazardWaits)
473 return false;
474
475 TII = ST->getInstrInfo();
476 TRI = ST->getRegisterInfo();
477 MRI = &MF.getRegInfo();
478 DsNopCount = ST->isWave64() ? WAVE64_NOPS : WAVE32_NOPS;
479
481 if (!AMDGPU::isEntryFunctionCC(CallingConv) &&
482 !CullSGPRHazardsOnFunctionBoundary) {
483 // Callee must consider all SGPRs as tracked.
484 LLVM_DEBUG(dbgs() << "Is called function, track all SGPRs.\n");
485 MachineBasicBlock &EntryBlock = MF.front();
486 BlockState[&EntryBlock].In.Tracked.set();
487 }
488
489 // Calculate the hazard state for each basic block.
490 // Iterate until a fixed point is reached.
491 // Fixed point is guaranteed as merge function only ever increases
492 // the hazard set, and all backedges will cause a merge.
493 //
494 // Note: we have to take care of the entry block as this technically
495 // has an edge from outside the function. Failure to treat this as
496 // a merge could prevent fixed point being reached.
497 SetVector<MachineBasicBlock *> Worklist;
498 for (auto &MBB : reverse(MF))
499 Worklist.insert(&MBB);
500 while (!Worklist.empty()) {
501 auto &MBB = *Worklist.pop_back_val();
502 bool Changed = runOnMachineBasicBlock(MBB, false);
503 if (Changed) {
504 // Note: take a copy of state here in case it is reallocated by map
505 HazardState NewState = BlockState[&MBB].Out;
506 // Propagate to all successor blocks
507 for (auto Succ : MBB.successors()) {
508 // We only need to merge hazards at CFG merge points.
509 auto &SuccState = BlockState[Succ];
510 if (Succ->getSinglePredecessor() && !Succ->isEntryBlock()) {
511 if (SuccState.In != NewState) {
512 SuccState.In = NewState;
513 Worklist.insert(Succ);
514 }
515 } else if (SuccState.In.merge(NewState)) {
516 Worklist.insert(Succ);
517 }
518 }
519 }
520 }
521
522 LLVM_DEBUG(dbgs() << "Emit s_wait_alu instructions\n");
523
524 // Final to emit wait instructions.
525 bool Changed = false;
526 for (auto &MBB : MF)
527 Changed |= runOnMachineBasicBlock(MBB, true);
528
529 BlockState.clear();
530 return Changed;
531 }
532};
533
534class AMDGPUWaitSGPRHazardsLegacy : public MachineFunctionPass {
535public:
536 static char ID;
537
538 AMDGPUWaitSGPRHazardsLegacy() : MachineFunctionPass(ID) {}
539
540 bool runOnMachineFunction(MachineFunction &MF) override {
541 return AMDGPUWaitSGPRHazards().run(MF);
542 }
543
544 void getAnalysisUsage(AnalysisUsage &AU) const override {
545 AU.setPreservesCFG();
547 }
548};
549
550} // namespace
551
552char AMDGPUWaitSGPRHazardsLegacy::ID = 0;
553
554char &llvm::AMDGPUWaitSGPRHazardsLegacyID = AMDGPUWaitSGPRHazardsLegacy::ID;
555
556INITIALIZE_PASS(AMDGPUWaitSGPRHazardsLegacy, DEBUG_TYPE,
557 "AMDGPU Insert waits for SGPR read hazards", false, false)
558
562 if (AMDGPUWaitSGPRHazards().run(MF))
564 return PreservedAnalyses::all();
565}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
static cl::opt< bool > GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull", cl::init(false), cl::Hidden, cl::desc("Cull hazards on memory waits"))
static cl::opt< unsigned > GlobalCullSGPRHazardsMemWaitThreshold("amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(8), cl::Hidden, cl::desc("Number of tracked SGPRs before initiating hazard cull on memory " "wait"))
static cl::opt< bool > GlobalCullSGPRHazardsOnFunctionBoundary("amdgpu-sgpr-hazard-boundary-cull", cl::init(false), cl::Hidden, cl::desc("Cull hazards on function boundaries"))
static cl::opt< bool > GlobalEnableSGPRHazardWaits("amdgpu-sgpr-hazard-wait", cl::init(true), cl::Hidden, cl::desc("Enable required s_wait_alu on SGPR hazards"))
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void updateGetPCBundle(MachineInstr *NewMI)
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
Interface definition for SIInstrInfo.
This file implements a set that has insertion order iteration characteristics.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:776
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
Instructions::iterator instr_iterator
iterator_range< succ_iterator > successors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isBundled() const
Return true if this instruction part of a bundle.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static bool isVMEM(const MachineInstr &MI)
static bool isSMRD(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool isFLATGlobal(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
int getNumOccurrences() const
self_iterator getIterator()
Definition ilist_node.h:123
Changed
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, const IsaVersion &Version)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Emitted
Assigned address, still materializing.
Definition Core.h:794
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Wait
Definition Threading.h:60
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2128
char & AMDGPUWaitSGPRHazardsLegacyID
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
DWARFExpression::Operation Op
bool operator|=(SparseBitVector< ElementSize > &LHS, const SparseBitVector< ElementSize > *RHS)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.