LLVM  9.0.0svn
SIFixWWMLiveness.cpp
Go to the documentation of this file.
1 //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Computations in WWM can overwrite values in inactive channels for
11 /// variables that the register allocator thinks are dead. This pass adds fake
12 /// uses of those variables to their def(s) to make sure that they aren't
13 /// overwritten.
14 ///
15 /// As an example, consider this snippet:
16 /// %vgpr0 = V_MOV_B32_e32 0.0
17 /// if (...) {
18 /// %vgpr1 = ...
19 /// %vgpr2 = WWM killed %vgpr1
20 /// ... = killed %vgpr2
21 /// %vgpr0 = V_MOV_B32_e32 1.0
22 /// }
23 /// ... = %vgpr0
24 ///
25 /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
26 /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
27 /// writing %vgpr1 would only write to channels that would be clobbered by the
28 /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
29 /// it would clobber even the inactive channels for which the if-condition is
30 /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
31 /// of %vgpr0 to its def to make sure they aren't allocated to the
32 /// same register.
33 ///
34 /// In general, we need to figure out what registers might have their inactive
35 /// channels which are eventually used accidentally clobbered by a WWM
36 /// instruction. We do that by spotting three separate cases of registers:
37 ///
38 /// 1. A "then phi": the value resulting from phi elimination of a phi node at
39 /// the end of an if..endif. If there is WWM code in the "then", then we
40 /// make the def at the end of the "then" branch a partial def by adding an
41 /// implicit use of the register.
42 ///
43 /// 2. A "loop exit register": a value written inside a loop but used outside the
44 /// loop, where there is WWM code inside the loop (the case in the example
45 /// above). We add an implicit_def of the register in the loop pre-header,
46 /// and make the original def a partial def by adding an implicit use of the
47 /// register.
48 ///
49 /// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
50 /// in a loop header. If there is WWM code inside the loop, then we make all
51 /// defs inside the loop partial defs by adding an implicit use of the
52 /// register on each one.
53 ///
54 /// Note that we do not need to consider an if..else..endif phi. We only need to
55 /// consider non-uniform control flow, and control flow structurization would
56 /// have transformed a non-uniform if..else..endif into two if..endifs.
57 ///
58 /// The analysis to detect these cases relies on a property of the MIR
59 /// arising from this pass running straight after PHIElimination and before any
60 /// coalescing: that any virtual register with more than one definition must be
61 /// the new register added to lower a phi node by PHIElimination.
62 ///
63 /// FIXME: We should detect whether a register in one of the above categories is
64 /// already live at the WWM code before deciding to add the implicit uses to
65 /// synthesize its liveness.
66 ///
67 /// FIXME: I believe this whole scheme may be flawed due to the possibility of
68 /// the register allocator doing live interval splitting.
69 ///
70 //===----------------------------------------------------------------------===//
71 
72 #include "AMDGPU.h"
73 #include "AMDGPUSubtarget.h"
74 #include "SIInstrInfo.h"
75 #include "SIRegisterInfo.h"
83 #include "llvm/CodeGen/Passes.h"
85 
86 using namespace llvm;
87 
88 #define DEBUG_TYPE "si-fix-wwm-liveness"
89 
90 namespace {
91 
92 class SIFixWWMLiveness : public MachineFunctionPass {
93 private:
94  MachineDominatorTree *DomTree;
96  LiveIntervals *LIS = nullptr;
97  const SIInstrInfo *TII;
98  const SIRegisterInfo *TRI;
100 
101  std::vector<MachineInstr *> WWMs;
102  std::vector<MachineOperand *> ThenDefs;
103  std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
104  std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;
105 
106 public:
107  static char ID;
108 
109  SIFixWWMLiveness() : MachineFunctionPass(ID) {
111  }
112 
113  bool runOnMachineFunction(MachineFunction &MF) override;
114 
115  StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
116 
117  void getAnalysisUsage(AnalysisUsage &AU) const override {
120  // Should preserve the same set that TwoAddressInstructions does.
126  AU.setPreservesCFG();
128  }
129 
130 private:
131  void processDef(MachineOperand &DefOpnd);
132  bool processThenDef(MachineOperand *DefOpnd);
133  bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
134  bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
135 };
136 
137 } // End anonymous namespace.
138 
139 INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
140  "SI fix WWM liveness", false, false)
143 INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
144  "SI fix WWM liveness", false, false)
145 
146 char SIFixWWMLiveness::ID = 0;
147 
148 char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
149 
151  return new SIFixWWMLiveness();
152 }
153 
154 bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
155  LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
156  bool Modified = false;
157 
158  // This doesn't actually need LiveIntervals, but we can preserve them.
159  LIS = getAnalysisIfAvailable<LiveIntervals>();
160 
161  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
162 
163  TII = ST.getInstrInfo();
164  TRI = &TII->getRegisterInfo();
165  MRI = &MF.getRegInfo();
166 
167  DomTree = &getAnalysis<MachineDominatorTree>();
168  LoopInfo = &getAnalysis<MachineLoopInfo>();
169 
170  // Scan the function to find the WWM sections and the candidate registers for
171  // having liveness modified.
172  for (MachineBasicBlock &MBB : MF) {
173  for (MachineInstr &MI : MBB) {
174  if (MI.getOpcode() == AMDGPU::EXIT_WWM)
175  WWMs.push_back(&MI);
176  else {
177  for (MachineOperand &DefOpnd : MI.defs()) {
178  if (DefOpnd.isReg()) {
179  unsigned Reg = DefOpnd.getReg();
180  if (TRI->isVGPR(*MRI, Reg))
181  processDef(DefOpnd);
182  }
183  }
184  }
185  }
186  }
187  if (!WWMs.empty()) {
188  // Synthesize liveness over WWM sections as required.
189  for (auto ThenDef : ThenDefs)
190  Modified |= processThenDef(ThenDef);
191  for (auto LoopExitDef : LoopExitDefs)
192  Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
193  for (auto LoopPhiDef : LoopPhiDefs)
194  Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
195  }
196 
197  WWMs.clear();
198  ThenDefs.clear();
199  LoopExitDefs.clear();
200  LoopPhiDefs.clear();
201 
202  return Modified;
203 }
204 
205 // During the function scan, process an operand that defines a VGPR.
206 // This categorizes the register and puts it in the appropriate list for later
207 // use when processing a WWM section.
208 void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
209  unsigned Reg = DefOpnd.getReg();
210  // Get all the defining instructions. For convenience, make Defs[0] the def
211  // we are on now.
213  Defs.push_back(DefOpnd.getParent());
214  for (auto &MI : MRI->def_instructions(Reg)) {
215  if (&MI != DefOpnd.getParent())
216  Defs.push_back(&MI);
217  }
218  // Check whether this def dominates all the others. If not, ignore this def.
219  // Either it is going to be processed when the scan encounters its other def
220  // that dominates all defs, or there is no def that dominates all others.
221  // The latter case is an eliminated phi from an if..else..endif or similar,
222  // which must be for uniform control flow so can be ignored.
223  // Because this pass runs shortly after PHIElimination, we assume that any
224  // multi-def register is a lowered phi, and thus has each def in a separate
225  // basic block.
226  for (unsigned I = 1; I != Defs.size(); ++I) {
227  if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
228  return;
229  }
230  // Check for the case of an if..endif lowered phi: It has two defs, one
231  // dominates the other, and there is a single use in a successor of the
232  // dominant def.
233  // Later we will spot any WWM code inside
234  // the "then" clause and turn the second def into a partial def so its
235  // liveness goes through the WWM code in the "then" clause.
236  if (Defs.size() == 2) {
237  auto DomDefBlock = Defs[0]->getParent();
238  if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
239  auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
240  for (auto Succ : DomDefBlock->successors()) {
241  if (Succ == UseBlock) {
242  LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
243  ThenDefs.push_back(&DefOpnd);
244  return;
245  }
246  }
247  }
248  }
249  // Check for the case of a non-lowered-phi register (single def) that exits
250  // a loop, that is, it has a use that is outside a loop that the def is
251  // inside. We find the outermost loop that the def is inside but a use is
252  // outside. Later we will spot any WWM code inside that loop and then make
253  // the def a partial def so its liveness goes round the loop and through the
254  // WWM code.
255  if (Defs.size() == 1) {
256  auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
257  if (!Loop)
258  return;
259  bool IsLoopExit = false;
260  for (auto &Use : MRI->use_instructions(Reg)) {
261  auto UseBlock = Use.getParent();
262  if (Loop->contains(UseBlock))
263  continue;
264  IsLoopExit = true;
265  while (auto Parent = Loop->getParentLoop()) {
266  if (Parent->contains(UseBlock))
267  break;
268  Loop = Parent;
269  }
270  }
271  if (!IsLoopExit)
272  return;
273  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
274  << " is a loop exit reg with loop header at "
275  << "bb." << Loop->getHeader()->getNumber() << "\n");
276  LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
277  &DefOpnd, Loop));
278  return;
279  }
280  // Check for the case of a lowered single-preheader-loop phi, that is, a
281  // multi-def register where the dominating def is in the loop pre-header and
282  // all other defs are in backedges. Later we will spot any WWM code inside
283  // that loop and then make the backedge defs partial defs so the liveness
284  // goes through the WWM code.
285  // Note that we are ignoring multi-preheader loops on the basis that the
286  // structurizer does not allow that for non-uniform loops.
287  // There must be a single use in the loop header.
288  if (!MRI->hasOneUse(Reg))
289  return;
290  auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
291  auto Loop = LoopInfo->getLoopFor(UseBlock);
292  if (!Loop || Loop->getHeader() != UseBlock
293  || Loop->contains(Defs[0]->getParent())) {
294  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
295  << " is multi-def but single use not in loop header\n");
296  return;
297  }
298  for (unsigned I = 1; I != Defs.size(); ++I) {
299  if (!Loop->contains(Defs[I]->getParent()))
300  return;
301  }
302  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
303  << " is a loop phi reg with loop header at "
304  << "bb." << Loop->getHeader()->getNumber() << "\n");
305  LoopPhiDefs.push_back(
306  std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
307 }
308 
309 // Process a then phi def: It has two defs, one dominates the other, and there
310 // is a single use in a successor of the dominant def. Here we spot any WWM
311 // code inside the "then" clause and turn the second def into a partial def so
312 // its liveness goes through the WWM code in the "then" clause.
313 bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
314  LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
315  if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
316  // Ignore if dominating def is undef.
317  LLVM_DEBUG(dbgs() << " ignoring as dominating def is undef\n");
318  return false;
319  }
320  unsigned Reg = DefOpnd->getReg();
321  // Get the use block, which is the endif block.
322  auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
323  // Check whether there is WWM code inside the then branch. The WWM code must
324  // be dominated by the if but not dominated by the endif.
325  bool ContainsWWM = false;
326  for (auto WWM : WWMs) {
327  if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
328  && !DomTree->dominates(UseBlock, WWM->getParent())) {
329  LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
330  ContainsWWM = true;
331  break;
332  }
333  }
334  if (!ContainsWWM)
335  return false;
336  // Get the other def.
337  MachineInstr *OtherDef = nullptr;
338  for (auto &MI : MRI->def_instructions(Reg)) {
339  if (&MI != DefOpnd->getParent())
340  OtherDef = &MI;
341  }
342  // Make it a partial def.
343  OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
344  LLVM_DEBUG(dbgs() << *OtherDef);
345  return true;
346 }
347 
348 // Process a loop exit def, that is, a register with a single use in a loop
349 // that has a use outside the loop. Here we spot any WWM code inside that loop
350 // and then make the def a partial def so its liveness goes round the loop and
351 // through the WWM code.
352 bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
353  MachineLoop *Loop) {
354  LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
355  // Check whether there is WWM code inside the loop.
356  bool ContainsWWM = false;
357  for (auto WWM : WWMs) {
358  if (Loop->contains(WWM->getParent())) {
359  LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
360  ContainsWWM = true;
361  break;
362  }
363  }
364  if (!ContainsWWM)
365  return false;
366  unsigned Reg = DefOpnd->getReg();
367  // Add a new implicit_def in loop preheader(s).
368  for (auto Pred : Loop->getHeader()->predecessors()) {
369  if (!Loop->contains(Pred)) {
370  auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
371  TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
372  LLVM_DEBUG(dbgs() << *ImplicitDef);
373  (void)ImplicitDef;
374  }
375  }
376  // Make the original def partial.
378  Reg, false, /*isImp=*/true));
379  LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
380  return true;
381 }
382 
383 // Process a loop phi def, that is, a multi-def register where the dominating
384 // def is in the loop pre-header and all other defs are in backedges. Here we
385 // spot any WWM code inside that loop and then make the backedge defs partial
386 // defs so the liveness goes through the WWM code.
387 bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
388  MachineLoop *Loop) {
389  LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
390  // Check whether there is WWM code inside the loop.
391  bool ContainsWWM = false;
392  for (auto WWM : WWMs) {
393  if (Loop->contains(WWM->getParent())) {
394  LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
395  ContainsWWM = true;
396  break;
397  }
398  }
399  if (!ContainsWWM)
400  return false;
401  unsigned Reg = DefOpnd->getReg();
402  // Remove kill mark from uses.
403  for (auto &Use : MRI->use_operands(Reg))
404  Use.setIsKill(false);
405  // Make all defs except the dominating one partial defs.
407  for (auto &Def : MRI->def_instructions(Reg))
408  Defs.push_back(&Def);
409  for (auto Def : Defs) {
410  if (DefOpnd->getParent() == Def)
411  continue;
412  Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
413  LLVM_DEBUG(dbgs() << *Def);
414  }
415  return true;
416 }
417 
Interface definition for SIRegisterInfo.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define DEBUG_TYPE
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:689
const HexagonInstrInfo * TII
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
BlockT * getHeader() const
Definition: LoopInfo.h:99
SI fix WWM liveness
SlotIndexes pass.
Definition: SlotIndexes.h:328
AnalysisUsage & addPreservedID(const void *ID)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Represent the analysis usage information of a pass.
INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE, "SI fix WWM liveness", false, false) INITIALIZE_PASS_END(SIFixWWMLiveness
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
iterator_range< pred_iterator > predecessors()
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
char & SIFixWWMLivenessID
FunctionPass * createSIFixWWMLivenessPass()
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
arm execution domain fix
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
void initializeSIFixWWMLivenessPass(PassRegistry &)
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:298
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
LoopT * getParentLoop() const
Definition: LoopInfo.h:100
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
#define I(x, y, z)
Definition: MD5.cpp:58
static const Function * getParent(const Value *V)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
#define LLVM_DEBUG(X)
Definition: Debug.h:122
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const SIRegisterInfo * getRegisterInfo() const override