Line data Source code
1 : //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// Computations in WWM can overwrite values in inactive channels for
12 : /// variables that the register allocator thinks are dead. This pass adds fake
13 : /// uses of those variables to their def(s) to make sure that they aren't
14 : /// overwritten.
15 : ///
16 : /// As an example, consider this snippet:
17 : /// %vgpr0 = V_MOV_B32_e32 0.0
18 : /// if (...) {
19 : /// %vgpr1 = ...
20 : /// %vgpr2 = WWM killed %vgpr1
21 : /// ... = killed %vgpr2
22 : /// %vgpr0 = V_MOV_B32_e32 1.0
23 : /// }
24 : /// ... = %vgpr0
25 : ///
26 : /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
27 : /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
28 : /// writing %vgpr1 would only write to channels that would be clobbered by the
29 : /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
30 : /// it would clobber even the inactive channels for which the if-condition is
31 : /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
32 : /// of %vgpr0 to its def to make sure they aren't allocated to the
33 : /// same register.
34 : ///
35 : /// In general, we need to figure out what registers might have their inactive
36 : /// channels which are eventually used accidentally clobbered by a WWM
37 : /// instruction. We do that by spotting three separate cases of registers:
38 : ///
39 : /// 1. A "then phi": the value resulting from phi elimination of a phi node at
40 : /// the end of an if..endif. If there is WWM code in the "then", then we
41 : /// make the def at the end of the "then" branch a partial def by adding an
42 : /// implicit use of the register.
43 : ///
44 : /// 2. A "loop exit register": a value written inside a loop but used outside the
45 : /// loop, where there is WWM code inside the loop (the case in the example
46 : /// above). We add an implicit_def of the register in the loop pre-header,
47 : /// and make the original def a partial def by adding an implicit use of the
48 : /// register.
49 : ///
50 : /// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
51 : /// in a loop header. If there is WWM code inside the loop, then we make all
52 : /// defs inside the loop partial defs by adding an implicit use of the
53 : /// register on each one.
54 : ///
55 : /// Note that we do not need to consider an if..else..endif phi. We only need to
56 : /// consider non-uniform control flow, and control flow structurization would
57 : /// have transformed a non-uniform if..else..endif into two if..endifs.
58 : ///
59 : /// The analysis to detect these cases relies on a property of the MIR
60 : /// arising from this pass running straight after PHIElimination and before any
61 : /// coalescing: that any virtual register with more than one definition must be
62 : /// the new register added to lower a phi node by PHIElimination.
63 : ///
64 : /// FIXME: We should detect whether a register in one of the above categories is
65 : /// already live at the WWM code before deciding to add the implicit uses to
66 : /// synthesize its liveness.
67 : ///
68 : /// FIXME: I believe this whole scheme may be flawed due to the possibility of
69 : /// the register allocator doing live interval splitting.
70 : ///
71 : //===----------------------------------------------------------------------===//
72 :
73 : #include "AMDGPU.h"
74 : #include "AMDGPUSubtarget.h"
75 : #include "SIInstrInfo.h"
76 : #include "SIRegisterInfo.h"
77 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
78 : #include "llvm/ADT/DepthFirstIterator.h"
79 : #include "llvm/ADT/SparseBitVector.h"
80 : #include "llvm/CodeGen/LiveIntervals.h"
81 : #include "llvm/CodeGen/MachineDominators.h"
82 : #include "llvm/CodeGen/MachineFunctionPass.h"
83 : #include "llvm/CodeGen/MachineLoopInfo.h"
84 : #include "llvm/CodeGen/Passes.h"
85 : #include "llvm/CodeGen/TargetRegisterInfo.h"
86 :
87 : using namespace llvm;
88 :
89 : #define DEBUG_TYPE "si-fix-wwm-liveness"
90 :
91 : namespace {
92 :
93 : class SIFixWWMLiveness : public MachineFunctionPass {
94 : private:
95 : MachineDominatorTree *DomTree;
96 : MachineLoopInfo *LoopInfo;
97 : LiveIntervals *LIS = nullptr;
98 : const SIInstrInfo *TII;
99 : const SIRegisterInfo *TRI;
100 : MachineRegisterInfo *MRI;
101 :
102 : std::vector<MachineInstr *> WWMs;
103 : std::vector<MachineOperand *> ThenDefs;
104 : std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
105 : std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;
106 :
107 : public:
108 : static char ID;
109 :
110 1954 : SIFixWWMLiveness() : MachineFunctionPass(ID) {
111 1954 : initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
112 1954 : }
113 :
114 : bool runOnMachineFunction(MachineFunction &MF) override;
115 :
116 1954 : StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
117 :
118 1954 : void getAnalysisUsage(AnalysisUsage &AU) const override {
119 1954 : AU.addRequiredID(MachineDominatorsID);
120 1954 : AU.addRequiredID(MachineLoopInfoID);
121 : // Should preserve the same set that TwoAddressInstructions does.
122 : AU.addPreserved<SlotIndexes>();
123 : AU.addPreserved<LiveIntervals>();
124 1954 : AU.addPreservedID(LiveVariablesID);
125 : AU.addPreservedID(MachineLoopInfoID);
126 : AU.addPreservedID(MachineDominatorsID);
127 1954 : AU.setPreservesCFG();
128 1954 : MachineFunctionPass::getAnalysisUsage(AU);
129 1954 : }
130 :
131 : private:
132 : void processDef(MachineOperand &DefOpnd);
133 : bool processThenDef(MachineOperand *DefOpnd);
134 : bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
135 : bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
136 : };
137 :
138 : } // End anonymous namespace.
139 :
140 85105 : INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
141 : "SI fix WWM liveness", false, false)
142 85105 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
143 85105 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
144 200978 : INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
145 : "SI fix WWM liveness", false, false)
146 :
147 : char SIFixWWMLiveness::ID = 0;
148 :
149 : char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
150 :
151 0 : FunctionPass *llvm::createSIFixWWMLivenessPass() {
152 0 : return new SIFixWWMLiveness();
153 : }
154 :
155 19731 : bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
156 : LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
157 : bool Modified = false;
158 :
159 : // This doesn't actually need LiveIntervals, but we can preserve them.
160 19731 : LIS = getAnalysisIfAvailable<LiveIntervals>();
161 :
162 19731 : const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
163 :
164 19731 : TII = ST.getInstrInfo();
165 19731 : TRI = &TII->getRegisterInfo();
166 19731 : MRI = &MF.getRegInfo();
167 :
168 19731 : DomTree = &getAnalysis<MachineDominatorTree>();
169 19731 : LoopInfo = &getAnalysis<MachineLoopInfo>();
170 :
171 : // Scan the function to find the WWM sections and the candidate registers for
172 : // having liveness modified.
173 42093 : for (MachineBasicBlock &MBB : MF) {
174 451474 : for (MachineInstr &MI : MBB) {
175 858224 : if (MI.getOpcode() == AMDGPU::EXIT_WWM)
176 48 : WWMs.push_back(&MI);
177 : else {
178 811756 : for (MachineOperand &DefOpnd : MI.defs()) {
179 382692 : if (DefOpnd.isReg()) {
180 382692 : unsigned Reg = DefOpnd.getReg();
181 382692 : if (TRI->isVGPR(*MRI, Reg))
182 198573 : processDef(DefOpnd);
183 : }
184 : }
185 : }
186 : }
187 : }
188 19731 : if (!WWMs.empty()) {
189 : // Synthesize liveness over WWM sections as required.
190 69 : for (auto ThenDef : ThenDefs)
191 29 : Modified |= processThenDef(ThenDef);
192 43 : for (auto LoopExitDef : LoopExitDefs)
193 3 : Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
194 43 : for (auto LoopPhiDef : LoopPhiDefs)
195 3 : Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
196 : }
197 :
198 : WWMs.clear();
199 : ThenDefs.clear();
200 : LoopExitDefs.clear();
201 : LoopPhiDefs.clear();
202 :
203 19731 : return Modified;
204 : }
205 :
206 : // During the function scan, process an operand that defines a VGPR.
207 : // This categorizes the register and puts it in the appropriate list for later
208 : // use when processing a WWM section.
209 198573 : void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
210 198573 : unsigned Reg = DefOpnd.getReg();
211 : // Get all the defining instructions. For convenience, make Defs[0] the def
212 : // we are on now.
213 : SmallVector<const MachineInstr *, 4> Defs;
214 198573 : Defs.push_back(DefOpnd.getParent());
215 601571 : for (auto &MI : MRI->def_instructions(Reg)) {
216 204425 : if (&MI != DefOpnd.getParent())
217 5852 : Defs.push_back(&MI);
218 : }
219 : // Check whether this def dominates all the others. If not, ignore this def.
220 : // Either it is going to be processed when the scan encounters its other def
221 : // that dominates all defs, or there is no def that dominates all others.
222 : // The latter case is an eliminated phi from an if..else..endif or similar,
223 : // which must be for uniform control flow so can be ignored.
224 : // Because this pass runs shortly after PHIElimination, we assume that any
225 : // multi-def register is a lowered phi, and thus has each def in a separate
226 : // basic block.
227 400026 : for (unsigned I = 1; I != Defs.size(); ++I) {
228 17469 : if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
229 : return;
230 : }
231 : // Check for the case of an if..endif lowered phi: It has two defs, one
232 : // dominates the other, and there is a single use in a successor of the
233 : // dominant def.
234 : // Later we will spot any WWM code inside
235 : // the "then" clause and turn the second def into a partial def so its
236 : // liveness goes through the WWM code in the "then" clause.
237 195630 : if (Defs.size() == 2) {
238 2834 : auto DomDefBlock = Defs[0]->getParent();
239 2834 : if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
240 1581 : auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
241 3144 : for (auto Succ : DomDefBlock->successors()) {
242 3144 : if (Succ == UseBlock) {
243 : LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
244 1581 : ThenDefs.push_back(&DefOpnd);
245 : return;
246 : }
247 : }
248 : }
249 : }
250 : // Check for the case of a non-lowered-phi register (single def) that exits
251 : // a loop, that is, it has a use that is outside a loop that the def is
252 : // inside. We find the outermost loop that the def is inside but a use is
253 : // outside. Later we will spot any WWM code inside that loop and then make
254 : // the def a partial def so its liveness goes round the loop and through the
255 : // WWM code.
256 194049 : if (Defs.size() == 1) {
257 385550 : auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
258 4317 : if (!Loop)
259 188458 : return;
260 : bool IsLoopExit = false;
261 10074 : for (auto &Use : MRI->use_instructions(Reg)) {
262 5757 : auto UseBlock = Use.getParent();
263 5757 : if (Loop->contains(UseBlock))
264 : continue;
265 : IsLoopExit = true;
266 1166 : while (auto Parent = Loop->getParentLoop()) {
267 31 : if (Parent->contains(UseBlock))
268 : break;
269 : Loop = Parent;
270 : }
271 : }
272 4317 : if (!IsLoopExit)
273 : return;
274 : LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
275 : << " is a loop exit reg with loop header at "
276 : << "bb." << Loop->getHeader()->getNumber() << "\n");
277 1128 : LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
278 : &DefOpnd, Loop));
279 1128 : return;
280 : }
281 : // Check for the case of a lowered single-preheader-loop phi, that is, a
282 : // multi-def register where the dominating def is in the loop pre-header and
283 : // all other defs are in backedges. Later we will spot any WWM code inside
284 : // that loop and then make the backedge defs partial defs so the liveness
285 : // goes through the WWM code.
286 : // Note that we are ignoring multi-preheader loops on the basis that the
287 : // structurizer does not allow that for non-uniform loops.
288 : // There must be a single use in the loop header.
289 1274 : if (!MRI->hasOneUse(Reg))
290 : return;
291 1216 : auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
292 1216 : auto Loop = LoopInfo->getLoopFor(UseBlock);
293 1213 : if (!Loop || Loop->getHeader() != UseBlock
294 2426 : || Loop->contains(Defs[0]->getParent())) {
295 : LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
296 : << " is multi-def but single use not in loop header\n");
297 3 : return;
298 : }
299 2429 : for (unsigned I = 1; I != Defs.size(); ++I) {
300 1216 : if (!Loop->contains(Defs[I]->getParent()))
301 : return;
302 : }
303 : LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
304 : << " is a loop phi reg with loop header at "
305 : << "bb." << Loop->getHeader()->getNumber() << "\n");
306 1213 : LoopPhiDefs.push_back(
307 1213 : std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
308 : }
309 :
310 : // Process a then phi def: It has two defs, one dominates the other, and there
311 : // is a single use in a successor of the dominant def. Here we spot any WWM
312 : // code inside the "then" clause and turn the second def into a partial def so
313 : // its liveness goes through the WWM code in the "then" clause.
314 29 : bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
315 : LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
316 58 : if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
317 : // Ignore if dominating def is undef.
318 : LLVM_DEBUG(dbgs() << " ignoring as dominating def is undef\n");
319 : return false;
320 : }
321 9 : unsigned Reg = DefOpnd->getReg();
322 : // Get the use block, which is the endif block.
323 9 : auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
324 : // Check whether there is WWM code inside the then branch. The WWM code must
325 : // be dominated by the if but not dominated by the endif.
326 : bool ContainsWWM = false;
327 9 : for (auto WWM : WWMs) {
328 9 : if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
329 18 : && !DomTree->dominates(UseBlock, WWM->getParent())) {
330 : LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
331 : ContainsWWM = true;
332 : break;
333 : }
334 : }
335 9 : if (!ContainsWWM)
336 : return false;
337 : // Get the other def.
338 : MachineInstr *OtherDef = nullptr;
339 36 : for (auto &MI : MRI->def_instructions(Reg)) {
340 18 : if (&MI != DefOpnd->getParent())
341 : OtherDef = &MI;
342 : }
343 : // Make it a partial def.
344 9 : OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
345 : LLVM_DEBUG(dbgs() << *OtherDef);
346 9 : return true;
347 : }
348 :
349 : // Process a loop exit def, that is, a register with a single use in a loop
350 : // that has a use outside the loop. Here we spot any WWM code inside that loop
351 : // and then make the def a partial def so its liveness goes round the loop and
352 : // through the WWM code.
353 3 : bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
354 : MachineLoop *Loop) {
355 : LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
356 : // Check whether there is WWM code inside the loop.
357 : bool ContainsWWM = false;
358 6 : for (auto WWM : WWMs) {
359 6 : if (Loop->contains(WWM->getParent())) {
360 : LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
361 : ContainsWWM = true;
362 : break;
363 : }
364 : }
365 3 : if (!ContainsWWM)
366 : return false;
367 3 : unsigned Reg = DefOpnd->getReg();
368 : // Add a new implicit_def in loop preheader(s).
369 9 : for (auto Pred : Loop->getHeader()->predecessors()) {
370 6 : if (!Loop->contains(Pred)) {
371 3 : auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
372 6 : TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
373 : LLVM_DEBUG(dbgs() << *ImplicitDef);
374 : (void)ImplicitDef;
375 : }
376 : }
377 : // Make the original def partial.
378 6 : DefOpnd->getParent()->addOperand(MachineOperand::CreateReg(
379 : Reg, false, /*isImp=*/true));
380 : LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
381 3 : return true;
382 : }
383 :
384 : // Process a loop phi def, that is, a multi-def register where the dominating
385 : // def is in the loop pre-header and all other defs are in backedges. Here we
386 : // spot any WWM code inside that loop and then make the backedge defs partial
387 : // defs so the liveness goes through the WWM code.
388 3 : bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
389 : MachineLoop *Loop) {
390 : LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
391 : // Check whether there is WWM code inside the loop.
392 : bool ContainsWWM = false;
393 6 : for (auto WWM : WWMs) {
394 6 : if (Loop->contains(WWM->getParent())) {
395 : LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
396 : ContainsWWM = true;
397 : break;
398 : }
399 : }
400 3 : if (!ContainsWWM)
401 : return false;
402 3 : unsigned Reg = DefOpnd->getReg();
403 : // Remove kill mark from uses.
404 9 : for (auto &Use : MRI->use_operands(Reg))
405 : Use.setIsKill(false);
406 : // Make all defs except the dominating one partial defs.
407 : SmallVector<MachineInstr *, 4> Defs;
408 12 : for (auto &Def : MRI->def_instructions(Reg))
409 6 : Defs.push_back(&Def);
410 9 : for (auto Def : Defs) {
411 6 : if (DefOpnd->getParent() == Def)
412 : continue;
413 3 : Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
414 : LLVM_DEBUG(dbgs() << *Def);
415 : }
416 : return true;
417 : }
418 :
|