LLVM 20.0.0git
SIOptimizeVGPRLiveRange.cpp
Go to the documentation of this file.
1//===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11/// structures and waterfall loops.
12///
13/// When we do structurization, we usually transform an if-else into two
14/// successive if-then (with a flow block to do predicate inversion). Consider a
15/// simple case after structurization: A divergent value %a was defined before
16/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17/// bb.if:
18/// %a = ...
19/// ...
20/// bb.then:
21/// ... = op %a
22/// ... // %a can be dead here
23/// bb.flow:
24/// ...
25/// bb.else:
26/// ... = %a
27/// ...
28/// bb.endif
29///
30/// As register allocator has no idea of the thread-control-flow, it will just
31/// assume %a would be alive in the whole range of bb.then because of a later
32/// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33/// to exec mask. For this if-else case, the lanes active in bb.then will be
34/// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35/// after the last use in bb.then until the end of the block. The reason is
36/// the instructions in bb.then will only overwrite lanes that will never be
37/// accessed in bb.else.
38///
39/// This pass aims to tell register allocator that %a is in-fact dead,
40/// through inserting a phi-node in bb.flow saying that %a is undef when coming
41/// from bb.then, and then replace the uses in the bb.else with the result of
42/// newly inserted phi.
43///
44/// Two key conditions must be met to ensure correctness:
45/// 1.) The def-point should be in the same loop-level as if-else-endif to make
46/// sure the second loop iteration still get correct data.
47/// 2.) There should be no further uses after the IF-ELSE region.
48///
49///
50/// Waterfall loops get inserted around instructions that use divergent values
51/// but can only be executed with a uniform value. For example an indirect call
52/// to a divergent address:
53/// bb.start:
54/// %a = ...
55/// %fun = ...
56/// ...
57/// bb.loop:
58/// call %fun (%a)
59/// ... // %a can be dead here
60/// loop %bb.loop
61///
62/// The loop block is executed multiple times, but it is run exactly once for
63/// each active lane. Similar to the if-else case, the register allocator
64/// assumes that %a is live throughout the loop as it is used again in the next
65/// iteration. If %a is a VGPR that is unused after the loop, it does not need
66/// to be live after its last use in the loop block. By inserting a phi-node at
67/// the start of bb.loop that is undef when coming from bb.loop, the register
68/// allocation knows that the value of %a does not need to be preserved through
69/// iterations of the loop.
70///
71//
72//===----------------------------------------------------------------------===//
73
75#include "AMDGPU.h"
76#include "GCNSubtarget.h"
83
84using namespace llvm;
85
86#define DEBUG_TYPE "si-opt-vgpr-liverange"
87
88namespace {
89
90class SIOptimizeVGPRLiveRange {
91private:
92 const SIRegisterInfo *TRI = nullptr;
93 const SIInstrInfo *TII = nullptr;
94 LiveVariables *LV = nullptr;
95 MachineDominatorTree *MDT = nullptr;
96 const MachineLoopInfo *Loops = nullptr;
97 MachineRegisterInfo *MRI = nullptr;
98
99public:
100 SIOptimizeVGPRLiveRange(LiveVariables *LV, MachineDominatorTree *MDT,
102 : LV(LV), MDT(MDT), Loops(Loops) {}
103 bool run(MachineFunction &MF);
104
105 MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
106
107 void collectElseRegionBlocks(MachineBasicBlock *Flow,
108 MachineBasicBlock *Endif,
110
111 void
112 collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
113 MachineBasicBlock *Endif,
115 SmallVectorImpl<Register> &CandidateRegs) const;
116
117 void collectWaterfallCandidateRegisters(
118 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
119 SmallSetVector<Register, 16> &CandidateRegs,
121 SmallVectorImpl<MachineInstr *> &Instructions) const;
122
123 void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
125
126 void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
127 MachineBasicBlock *Flow) const;
128
129 void updateLiveRangeInElseRegion(
131 MachineBasicBlock *Endif,
133
134 void
135 optimizeLiveRange(Register Reg, MachineBasicBlock *If,
138
139 void optimizeWaterfallLiveRange(
140 Register Reg, MachineBasicBlock *LoopHeader,
142 SmallVectorImpl<MachineInstr *> &Instructions) const;
143};
144
145class SIOptimizeVGPRLiveRangeLegacy : public MachineFunctionPass {
146public:
147 static char ID;
148
149 SIOptimizeVGPRLiveRangeLegacy() : MachineFunctionPass(ID) {}
150
151 bool runOnMachineFunction(MachineFunction &MF) override;
152
153 StringRef getPassName() const override {
154 return "SI Optimize VGPR LiveRange";
155 }
156
157 void getAnalysisUsage(AnalysisUsage &AU) const override {
158 AU.setPreservesCFG();
166 }
167
170 MachineFunctionProperties::Property::IsSSA);
171 }
172
175 MachineFunctionProperties::Property::NoPHIs);
176 }
177};
178
179} // end anonymous namespace
180
181// Check whether the MBB is a else flow block and get the branching target which
182// is the Endif block
184SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
185 for (auto &BR : MBB->terminators()) {
186 if (BR.getOpcode() == AMDGPU::SI_ELSE)
187 return BR.getOperand(2).getMBB();
188 }
189 return nullptr;
190}
191
192void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
195 assert(Flow != Endif);
196
198 unsigned Cur = 0;
199 while (MBB) {
200 for (auto *Pred : MBB->predecessors()) {
201 if (Pred != Flow)
202 Blocks.insert(Pred);
203 }
204
205 if (Cur < Blocks.size())
206 MBB = Blocks[Cur++];
207 else
208 MBB = nullptr;
209 }
210
211 LLVM_DEBUG({
212 dbgs() << "Found Else blocks: ";
213 for (auto *MBB : Blocks)
214 dbgs() << printMBBReference(*MBB) << ' ';
215 dbgs() << '\n';
216 });
217}
218
219/// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
220void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
223 for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
224 if (UseMI.getParent() == MBB && !UseMI.isPHI())
225 Uses.push_back(&UseMI);
226 }
227}
228
229/// Collect the killed registers in the ELSE region which are not alive through
230/// the whole THEN region.
231void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
234 SmallVectorImpl<Register> &CandidateRegs) const {
235
236 SmallSet<Register, 8> KillsInElse;
237
238 for (auto *Else : ElseBlocks) {
239 for (auto &MI : Else->instrs()) {
240 if (MI.isDebugInstr())
241 continue;
242
243 for (auto &MO : MI.operands()) {
244 if (!MO.isReg() || !MO.getReg() || MO.isDef())
245 continue;
246
247 Register MOReg = MO.getReg();
248 // We can only optimize AGPR/VGPR virtual register
249 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
250 continue;
251
252 if (MO.readsReg()) {
253 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
254 const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
255 // Make sure two conditions are met:
256 // a.) the value is defined before/in the IF block
257 // b.) should be defined in the same loop-level.
258 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
259 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
260 // Check if the register is live into the endif block. If not,
261 // consider it killed in the else region.
262 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
263 if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
264 KillsInElse.insert(MOReg);
265 } else {
266 LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
267 << " as Live in Endif\n");
268 }
269 }
270 }
271 }
272 }
273 }
274
275 // Check the phis in the Endif, looking for value coming from the ELSE
276 // region. Make sure the phi-use is the last use.
277 for (auto &MI : Endif->phis()) {
278 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
279 auto &MO = MI.getOperand(Idx);
280 auto *Pred = MI.getOperand(Idx + 1).getMBB();
281 if (Pred == Flow)
282 continue;
283 assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
284
285 if (!MO.isReg() || !MO.getReg() || MO.isUndef())
286 continue;
287
288 Register Reg = MO.getReg();
289 if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
290 continue;
291
292 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
293
294 if (VI.isLiveIn(*Endif, Reg, *MRI)) {
295 LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
296 << " as Live in Endif\n");
297 continue;
298 }
299 // Make sure two conditions are met:
300 // a.) the value is defined before/in the IF block
301 // b.) should be defined in the same loop-level.
302 const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
303 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
304 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
305 KillsInElse.insert(Reg);
306 }
307 }
308
309 auto IsLiveThroughThen = [&](Register Reg) {
310 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
311 ++I) {
312 if (!I->readsReg())
313 continue;
314 auto *UseMI = I->getParent();
315 auto *UseMBB = UseMI->getParent();
316 if (UseMBB == Flow || UseMBB == Endif) {
317 if (!UseMI->isPHI())
318 return true;
319
320 auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
321 // The register is live through the path If->Flow or Flow->Endif.
322 // we should not optimize for such cases.
323 if ((UseMBB == Flow && IncomingMBB != If) ||
324 (UseMBB == Endif && IncomingMBB == Flow))
325 return true;
326 }
327 }
328 return false;
329 };
330
331 for (auto Reg : KillsInElse) {
332 if (!IsLiveThroughThen(Reg))
333 CandidateRegs.push_back(Reg);
334 }
335}
336
337/// Collect the registers used in the waterfall loop block that are defined
338/// before.
339void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
340 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
341 SmallSetVector<Register, 16> &CandidateRegs,
343 SmallVectorImpl<MachineInstr *> &Instructions) const {
344
345 // Collect loop instructions, potentially spanning multiple blocks
346 auto *MBB = LoopHeader;
347 for (;;) {
348 Blocks.insert(MBB);
349 for (auto &MI : *MBB) {
350 if (MI.isDebugInstr())
351 continue;
352 Instructions.push_back(&MI);
353 }
354 if (MBB == LoopEnd)
355 break;
356
357 if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
358 (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
359 LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
360 return;
361 }
362
363 MBB = *MBB->succ_begin();
364 }
365
366 for (auto *I : Instructions) {
367 auto &MI = *I;
368
369 for (auto &MO : MI.all_uses()) {
370 if (!MO.getReg())
371 continue;
372
373 Register MOReg = MO.getReg();
374 // We can only optimize AGPR/VGPR virtual register
375 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
376 continue;
377
378 if (MO.readsReg()) {
379 MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
380 // Make sure the value is defined before the LOOP block
381 if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
382 // If the variable is used after the loop, the register coalescer will
383 // merge the newly created register and remove the phi node again.
384 // Just do nothing in that case.
385 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
386 bool IsUsed = false;
387 for (auto *Succ : LoopEnd->successors()) {
388 if (!Blocks.contains(Succ) &&
389 OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
390 IsUsed = true;
391 break;
392 }
393 }
394 if (!IsUsed) {
395 LLVM_DEBUG(dbgs() << "Found candidate reg: "
396 << printReg(MOReg, TRI, 0, MRI) << '\n');
397 CandidateRegs.insert(MOReg);
398 } else {
399 LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
400 << printReg(MOReg, TRI, 0, MRI) << '\n');
401 }
402 }
403 }
404 }
405 }
406}
407
408// Re-calculate the liveness of \p Reg in the THEN-region
409void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
413
414 // Collect all successors until we see the flow block, where we should
415 // reconverge.
416 while (!WorkList.empty()) {
417 auto *MBB = WorkList.pop_back_val();
418 for (auto *Succ : MBB->successors()) {
419 if (Succ != Flow && Blocks.insert(Succ))
420 WorkList.push_back(Succ);
421 }
422 }
423
424 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
425 for (MachineBasicBlock *MBB : Blocks) {
426 // Clear Live bit, as we will recalculate afterwards
427 LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
428 << '\n');
429 OldVarInfo.AliveBlocks.reset(MBB->getNumber());
430 }
431
433
434 // Get the blocks the Reg should be alive through
435 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
436 ++I) {
437 auto *UseMI = I->getParent();
438 if (UseMI->isPHI() && I->readsReg()) {
439 if (Blocks.contains(UseMI->getParent()))
440 PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
441 }
442 }
443
444 for (MachineBasicBlock *MBB : Blocks) {
446 // PHI instructions has been processed before.
447 findNonPHIUsesInBlock(Reg, MBB, Uses);
448
449 if (Uses.size() == 1) {
450 LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
451 << printMBBReference(*MBB) << '\n');
452 LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
453 } else if (Uses.size() > 1) {
454 // Process the instructions in-order
455 LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
456 << printMBBReference(*MBB) << '\n');
457 for (MachineInstr &MI : *MBB) {
459 LV->HandleVirtRegUse(Reg, MBB, MI);
460 }
461 }
462
463 // Mark Reg alive through the block if this is a PHI incoming block
464 if (PHIIncoming.contains(MBB))
465 LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
466 MBB);
467 }
468
469 // Set the isKilled flag if we get new Kills in the THEN region.
470 for (auto *MI : OldVarInfo.Kills) {
471 if (Blocks.contains(MI->getParent()))
472 MI->addRegisterKilled(Reg, TRI);
473 }
474}
475
476void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
478 MachineBasicBlock *Endif,
479 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
480 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
481 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
482
483 // Transfer aliveBlocks from Reg to NewReg
484 for (auto *MBB : ElseBlocks) {
485 unsigned BBNum = MBB->getNumber();
486 if (OldVarInfo.AliveBlocks.test(BBNum)) {
487 NewVarInfo.AliveBlocks.set(BBNum);
488 LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
489 << '\n');
490 OldVarInfo.AliveBlocks.reset(BBNum);
491 }
492 }
493
494 // Transfer the possible Kills in ElseBlocks from Reg to NewReg
495 auto I = OldVarInfo.Kills.begin();
496 while (I != OldVarInfo.Kills.end()) {
497 if (ElseBlocks.contains((*I)->getParent())) {
498 NewVarInfo.Kills.push_back(*I);
499 I = OldVarInfo.Kills.erase(I);
500 } else {
501 ++I;
502 }
503 }
504}
505
506void SIOptimizeVGPRLiveRange::optimizeLiveRange(
508 MachineBasicBlock *Endif,
509 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
510 // Insert a new PHI, marking the value from the THEN region being
511 // undef.
512 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
513 const auto *RC = MRI->getRegClass(Reg);
514 Register NewReg = MRI->createVirtualRegister(RC);
515 Register UndefReg = MRI->createVirtualRegister(RC);
516 MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
517 TII->get(TargetOpcode::PHI), NewReg);
518 for (auto *Pred : Flow->predecessors()) {
519 if (Pred == If)
520 PHI.addReg(Reg).addMBB(Pred);
521 else
522 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
523 }
524
525 // Replace all uses in the ELSE region or the PHIs in ENDIF block
526 // Use early increment range because setReg() will update the linked list.
527 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
528 auto *UseMI = O.getParent();
529 auto *UseBlock = UseMI->getParent();
530 // Replace uses in Endif block
531 if (UseBlock == Endif) {
532 if (UseMI->isPHI())
533 O.setReg(NewReg);
534 else if (UseMI->isDebugInstr())
535 continue;
536 else {
537 // DetectDeadLanes may mark register uses as undef without removing
538 // them, in which case a non-phi instruction using the original register
539 // may exist in the Endif block even though the register is not live
540 // into it.
541 assert(!O.readsReg());
542 }
543 continue;
544 }
545
546 // Replace uses in Else region
547 if (ElseBlocks.contains(UseBlock))
548 O.setReg(NewReg);
549 }
550
551 // The optimized Reg is not alive through Flow blocks anymore.
552 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
553 OldVarInfo.AliveBlocks.reset(Flow->getNumber());
554
555 updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
556 updateLiveRangeInThenRegion(Reg, If, Flow);
557}
558
559void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
560 Register Reg, MachineBasicBlock *LoopHeader,
562 SmallVectorImpl<MachineInstr *> &Instructions) const {
563 // Insert a new PHI, marking the value from the last loop iteration undef.
564 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
565 const auto *RC = MRI->getRegClass(Reg);
566 Register NewReg = MRI->createVirtualRegister(RC);
567 Register UndefReg = MRI->createVirtualRegister(RC);
568
569 // Replace all uses in the LOOP region
570 // Use early increment range because setReg() will update the linked list.
571 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
572 auto *UseMI = O.getParent();
573 auto *UseBlock = UseMI->getParent();
574 // Replace uses in Loop blocks
575 if (Blocks.contains(UseBlock))
576 O.setReg(NewReg);
577 }
578
580 BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
581 TII->get(TargetOpcode::PHI), NewReg);
582 for (auto *Pred : LoopHeader->predecessors()) {
583 if (Blocks.contains(Pred))
584 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
585 else
586 PHI.addReg(Reg).addMBB(Pred);
587 }
588
589 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
590 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
591
592 // Find last use and mark as kill
593 MachineInstr *Kill = nullptr;
594 for (auto *MI : reverse(Instructions)) {
595 if (MI->readsRegister(NewReg, TRI)) {
596 MI->addRegisterKilled(NewReg, TRI);
597 NewVarInfo.Kills.push_back(MI);
598 Kill = MI;
599 break;
600 }
601 }
602 assert(Kill && "Failed to find last usage of register in loop");
603
604 MachineBasicBlock *KillBlock = Kill->getParent();
605 bool PostKillBlock = false;
606 for (auto *Block : Blocks) {
607 auto BBNum = Block->getNumber();
608
609 // collectWaterfallCandidateRegisters only collects registers that are dead
610 // after the loop. So we know that the old reg is no longer live throughout
611 // the waterfall loop.
612 OldVarInfo.AliveBlocks.reset(BBNum);
613
614 // The new register is live up to (and including) the block that kills it.
615 PostKillBlock |= (Block == KillBlock);
616 if (PostKillBlock) {
617 NewVarInfo.AliveBlocks.reset(BBNum);
618 } else if (Block != LoopHeader) {
619 NewVarInfo.AliveBlocks.set(BBNum);
620 }
621 }
622}
623
624char SIOptimizeVGPRLiveRangeLegacy::ID = 0;
625
626INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE,
627 "SI Optimize VGPR LiveRange", false, false)
631INITIALIZE_PASS_END(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE,
633
634char &llvm::SIOptimizeVGPRLiveRangeLegacyID = SIOptimizeVGPRLiveRangeLegacy::ID;
635
637 return new SIOptimizeVGPRLiveRangeLegacy();
638}
639
640bool SIOptimizeVGPRLiveRangeLegacy::runOnMachineFunction(MachineFunction &MF) {
641 if (skipFunction(MF.getFunction()))
642 return false;
643
644 LiveVariables *LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
646 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
647 MachineLoopInfo *Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
648 return SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF);
649}
650
654 MFPropsModifier _(*this, MF);
658
659 bool Changed = SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF);
660 if (!Changed)
661 return PreservedAnalyses::all();
662
664 PA.preserve<LiveVariablesAnalysis>();
665 PA.preserve<DominatorTreeAnalysis>();
666 PA.preserve<MachineLoopAnalysis>();
667 PA.preserveSet<CFGAnalyses>();
668 return PA;
669}
670
671bool SIOptimizeVGPRLiveRange::run(MachineFunction &MF) {
672 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
673 TII = ST.getInstrInfo();
674 TRI = &TII->getRegisterInfo();
675 MRI = &MF.getRegInfo();
676
677 bool MadeChange = false;
678
679 // TODO: we need to think about the order of visiting the blocks to get
680 // optimal result for nesting if-else cases.
681 for (MachineBasicBlock &MBB : MF) {
682 for (auto &MI : MBB.terminators()) {
683 // Detect the if-else blocks
684 if (MI.getOpcode() == AMDGPU::SI_IF) {
685 MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
686 auto *Endif = getElseTarget(IfTarget);
687 if (!Endif)
688 continue;
689
690 // Skip unexpected control flow.
691 if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
692 continue;
693
695 SmallVector<Register> CandidateRegs;
696
697 LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
698 << printMBBReference(MBB) << ' '
699 << printMBBReference(*IfTarget) << ' '
700 << printMBBReference(*Endif) << '\n');
701
702 // Collect all the blocks in the ELSE region
703 collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
704
705 // Collect the registers can be optimized
706 collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
707 CandidateRegs);
708 MadeChange |= !CandidateRegs.empty();
709 // Now we are safe to optimize.
710 for (auto Reg : CandidateRegs)
711 optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
712 } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
713 auto *LoopHeader = MI.getOperand(0).getMBB();
714 auto *LoopEnd = &MBB;
715
716 LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
717 << printMBBReference(*LoopHeader) << '\n');
718
719 SmallSetVector<Register, 16> CandidateRegs;
722
723 collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
724 Blocks, Instructions);
725 MadeChange |= !CandidateRegs.empty();
726 // Now we are safe to optimize.
727 for (auto Reg : CandidateRegs)
728 optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
729 }
730 }
731 }
732
733 return MadeChange;
734}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
Provides AMDGPU specific target descriptions.
Rewrite undef for PHI
MachineBasicBlock & MBB
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Hexagon Hardware Loops
#define _
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
Remove Loads Into Fake Uses
Annotate SI Control Flow
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG_TYPE
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
A debug info location.
Definition: DebugLoc.h:33
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
An RAII based helper class to modify MachineFunctionProperties when running pass.
unsigned pred_size() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
unsigned succ_size() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual MachineFunctionProperties getClearedProperties() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
bool isDebugInstr() const
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
Analysis pass that exposes the MachineLoopInfo for a machine function.
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
MachineBasicBlock * getMBB() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
void set(unsigned Idx)
bool test(unsigned Idx) const
void reset(unsigned Idx)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1118
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
char & SIOptimizeVGPRLiveRangeLegacyID
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createSIOptimizeVGPRLiveRangeLegacyPass()
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:78
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...
Definition: LiveVariables.h:88
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
Definition: LiveVariables.h:83
bool isLiveIn(const MachineBasicBlock &MBB, Register Reg, MachineRegisterInfo &MRI)
isLiveIn - Is Reg live in to MBB? This means that Reg is live through MBB, or it is killed in MBB.