LLVM  9.0.0svn
SILowerControlFlow.cpp
Go to the documentation of this file.
1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass lowers the pseudo control flow instructions to real
11 /// machine instructions.
12 ///
13 /// All control flow is handled using predicated instructions and
14 /// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
15 /// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
16 /// by writting to the 64-bit EXEC register (each bit corresponds to a
17 /// single vector ALU). Typically, for predicates, a vector ALU will write
18 /// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
19 /// Vector ALU) and then the ScalarALU will AND the VCC register with the
20 /// EXEC to update the predicates.
21 ///
22 /// For example:
23 /// %vcc = V_CMP_GT_F32 %vgpr1, %vgpr2
24 /// %sgpr0 = SI_IF %vcc
25 /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0
26 /// %sgpr0 = SI_ELSE %sgpr0
27 /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr0
28 /// SI_END_CF %sgpr0
29 ///
30 /// becomes:
31 ///
32 /// %sgpr0 = S_AND_SAVEEXEC_B64 %vcc // Save and update the exec mask
33 /// %sgpr0 = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask
34 /// S_CBRANCH_EXECZ label0 // This instruction is an optional
35 /// // optimization which allows us to
36 /// // branch if all the bits of
37 /// // EXEC are zero.
38 /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
39 ///
40 /// label0:
41 /// %sgpr0 = S_OR_SAVEEXEC_B64 %exec // Restore the exec mask for the Then block
42 /// %exec = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask
43 /// S_BRANCH_EXECZ label1 // Use our branch optimization
44 /// // instruction again.
45 /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr // Do the THEN block
46 /// label1:
47 /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits
48 //===----------------------------------------------------------------------===//
49 
50 #include "AMDGPU.h"
51 #include "AMDGPUSubtarget.h"
52 #include "SIInstrInfo.h"
54 #include "llvm/ADT/SmallVector.h"
55 #include "llvm/ADT/StringRef.h"
65 #include "llvm/CodeGen/Passes.h"
68 #include "llvm/MC/MCRegisterInfo.h"
69 #include "llvm/Pass.h"
70 #include <cassert>
71 #include <iterator>
72 
73 using namespace llvm;
74 
75 #define DEBUG_TYPE "si-lower-control-flow"
76 
77 namespace {
78 
79 class SILowerControlFlow : public MachineFunctionPass {
80 private:
81  const SIRegisterInfo *TRI = nullptr;
82  const SIInstrInfo *TII = nullptr;
83  MachineRegisterInfo *MRI = nullptr;
84  LiveIntervals *LIS = nullptr;
85  MachineDominatorTree *DT = nullptr;
86  MachineLoopInfo *MLI = nullptr;
87 
88 
89  void emitIf(MachineInstr &MI);
90  void emitElse(MachineInstr &MI);
91  void emitIfBreak(MachineInstr &MI);
92  void emitLoop(MachineInstr &MI);
93  void emitEndCf(MachineInstr &MI);
94 
95  void findMaskOperands(MachineInstr &MI, unsigned OpNo,
97 
98  void combineMasks(MachineInstr &MI);
99 
100 public:
101  static char ID;
102 
103  SILowerControlFlow() : MachineFunctionPass(ID) {}
104 
105  bool runOnMachineFunction(MachineFunction &MF) override;
106 
107  StringRef getPassName() const override {
108  return "SI Lower control flow pseudo instructions";
109  }
110 
111  void getAnalysisUsage(AnalysisUsage &AU) const override {
112  // Should preserve the same set that TwoAddressInstructions does.
118 
120  }
121 };
122 
123 } // end anonymous namespace
124 
125 char SILowerControlFlow::ID = 0;
126 
127 INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE,
128  "SI lower control flow", false, false)
129 
130 static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) {
131  MachineOperand &ImpDefSCC = MI.getOperand(3);
132  assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
133 
134  ImpDefSCC.setIsDead(IsDead);
135 }
136 
138 
139 static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
140  const SIInstrInfo *TII) {
141  unsigned SaveExecReg = MI.getOperand(0).getReg();
142  auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
143 
144  if (U == MRI->use_instr_nodbg_end() ||
145  std::next(U) != MRI->use_instr_nodbg_end() ||
146  U->getOpcode() != AMDGPU::SI_END_CF)
147  return false;
148 
149  // Check for SI_KILL_*_TERMINATOR on path from if to endif.
150  // if there is any such terminator simplififcations are not safe.
151  auto SMBB = MI.getParent();
152  auto EMBB = U->getParent();
154  SmallVector<MachineBasicBlock*, 4> Worklist(SMBB->succ_begin(),
155  SMBB->succ_end());
156 
157  while (!Worklist.empty()) {
158  MachineBasicBlock *MBB = Worklist.pop_back_val();
159 
160  if (MBB == EMBB || !Visited.insert(MBB).second)
161  continue;
162  for(auto &Term : MBB->terminators())
163  if (TII->isKillTerminator(Term.getOpcode()))
164  return false;
165 
166  Worklist.append(MBB->succ_begin(), MBB->succ_end());
167  }
168 
169  return true;
170 }
171 
172 void SILowerControlFlow::emitIf(MachineInstr &MI) {
173  MachineBasicBlock &MBB = *MI.getParent();
174  const DebugLoc &DL = MI.getDebugLoc();
176 
177  MachineOperand &SaveExec = MI.getOperand(0);
178  MachineOperand &Cond = MI.getOperand(1);
179  assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
180  Cond.getSubReg() == AMDGPU::NoSubRegister);
181 
182  unsigned SaveExecReg = SaveExec.getReg();
183 
184  MachineOperand &ImpDefSCC = MI.getOperand(4);
185  assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
186 
187  // If there is only one use of save exec register and that use is SI_END_CF,
188  // we can optimize SI_IF by returning the full saved exec mask instead of
189  // just cleared bits.
190  bool SimpleIf = isSimpleIf(MI, MRI, TII);
191 
192  // Add an implicit def of exec to discourage scheduling VALU after this which
193  // will interfere with trying to form s_and_saveexec_b64 later.
194  unsigned CopyReg = SimpleIf ? SaveExecReg
195  : MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
196  MachineInstr *CopyExec =
197  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
198  .addReg(AMDGPU::EXEC)
199  .addReg(AMDGPU::EXEC, RegState::ImplicitDefine);
200 
201  unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
202 
203  MachineInstr *And =
204  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp)
205  .addReg(CopyReg)
206  .add(Cond);
207 
208  setImpSCCDefDead(*And, true);
209 
210  MachineInstr *Xor = nullptr;
211  if (!SimpleIf) {
212  Xor =
213  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
214  .addReg(Tmp)
215  .addReg(CopyReg);
216  setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
217  }
218 
219  // Use a copy that is a terminator to get correct spill code placement it with
220  // fast regalloc.
221  MachineInstr *SetExec =
222  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), AMDGPU::EXEC)
223  .addReg(Tmp, RegState::Kill);
224 
225  // Insert a pseudo terminator to help keep the verifier happy. This will also
226  // be used later when inserting skips.
227  MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
228  .add(MI.getOperand(2));
229 
230  if (!LIS) {
231  MI.eraseFromParent();
232  return;
233  }
234 
235  LIS->InsertMachineInstrInMaps(*CopyExec);
236 
237  // Replace with and so we don't need to fix the live interval for condition
238  // register.
239  LIS->ReplaceMachineInstrInMaps(MI, *And);
240 
241  if (!SimpleIf)
242  LIS->InsertMachineInstrInMaps(*Xor);
243  LIS->InsertMachineInstrInMaps(*SetExec);
244  LIS->InsertMachineInstrInMaps(*NewBr);
245 
246  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
247  MI.eraseFromParent();
248 
249  // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
250  // hard to add another def here but I'm not sure how to correctly update the
251  // valno.
252  LIS->removeInterval(SaveExecReg);
253  LIS->createAndComputeVirtRegInterval(SaveExecReg);
254  LIS->createAndComputeVirtRegInterval(Tmp);
255  if (!SimpleIf)
256  LIS->createAndComputeVirtRegInterval(CopyReg);
257 }
258 
259 void SILowerControlFlow::emitElse(MachineInstr &MI) {
260  MachineBasicBlock &MBB = *MI.getParent();
261  const DebugLoc &DL = MI.getDebugLoc();
262 
263  unsigned DstReg = MI.getOperand(0).getReg();
264  assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);
265 
266  bool ExecModified = MI.getOperand(3).getImm() != 0;
267  MachineBasicBlock::iterator Start = MBB.begin();
268 
269  // We are running before TwoAddressInstructions, and si_else's operands are
270  // tied. In order to correctly tie the registers, split this into a copy of
271  // the src like it does.
272  unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
273  MachineInstr *CopyExec =
274  BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
275  .add(MI.getOperand(1)); // Saved EXEC
276 
277  // This must be inserted before phis and any spill code inserted before the
278  // else.
279  unsigned SaveReg = ExecModified ?
280  MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass) : DstReg;
281  MachineInstr *OrSaveExec =
282  BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), SaveReg)
283  .addReg(CopyReg);
284 
285  MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
286 
287  MachineBasicBlock::iterator ElsePt(MI);
288 
289  if (ExecModified) {
290  MachineInstr *And =
291  BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg)
292  .addReg(AMDGPU::EXEC)
293  .addReg(SaveReg);
294 
295  if (LIS)
296  LIS->InsertMachineInstrInMaps(*And);
297  }
298 
299  MachineInstr *Xor =
300  BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
301  .addReg(AMDGPU::EXEC)
302  .addReg(DstReg);
303 
305  BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
306  .addMBB(DestBB);
307 
308  if (!LIS) {
309  MI.eraseFromParent();
310  return;
311  }
312 
313  LIS->RemoveMachineInstrFromMaps(MI);
314  MI.eraseFromParent();
315 
316  LIS->InsertMachineInstrInMaps(*CopyExec);
317  LIS->InsertMachineInstrInMaps(*OrSaveExec);
318 
319  LIS->InsertMachineInstrInMaps(*Xor);
320  LIS->InsertMachineInstrInMaps(*Branch);
321 
322  // src reg is tied to dst reg.
323  LIS->removeInterval(DstReg);
324  LIS->createAndComputeVirtRegInterval(DstReg);
325  LIS->createAndComputeVirtRegInterval(CopyReg);
326  if (ExecModified)
327  LIS->createAndComputeVirtRegInterval(SaveReg);
328 
329  // Let this be recomputed.
330  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
331 }
332 
333 void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
334  MachineBasicBlock &MBB = *MI.getParent();
335  const DebugLoc &DL = MI.getDebugLoc();
336  auto Dst = MI.getOperand(0).getReg();
337 
338  // Skip ANDing with exec if the break condition is already masked by exec
339  // because it is a V_CMP in the same basic block. (We know the break
340  // condition operand was an i1 in IR, so if it is a VALU instruction it must
341  // be one with a carry-out.)
342  bool SkipAnding = false;
343  if (MI.getOperand(1).isReg()) {
344  if (MachineInstr *Def = MRI->getUniqueVRegDef(MI.getOperand(1).getReg())) {
345  SkipAnding = Def->getParent() == MI.getParent()
347  }
348  }
349 
350  // AND the break condition operand with exec, then OR that into the "loop
351  // exit" mask.
352  MachineInstr *And = nullptr, *Or = nullptr;
353  if (!SkipAnding) {
354  And = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64), Dst)
355  .addReg(AMDGPU::EXEC)
356  .add(MI.getOperand(1));
357  Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
358  .addReg(Dst)
359  .add(MI.getOperand(2));
360  } else
361  Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
362  .add(MI.getOperand(1))
363  .add(MI.getOperand(2));
364 
365  if (LIS) {
366  if (And)
367  LIS->InsertMachineInstrInMaps(*And);
368  LIS->ReplaceMachineInstrInMaps(MI, *Or);
369  }
370 
371  MI.eraseFromParent();
372 }
373 
374 void SILowerControlFlow::emitLoop(MachineInstr &MI) {
375  MachineBasicBlock &MBB = *MI.getParent();
376  const DebugLoc &DL = MI.getDebugLoc();
377 
378  MachineInstr *AndN2 =
379  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64_term), AMDGPU::EXEC)
380  .addReg(AMDGPU::EXEC)
381  .add(MI.getOperand(0));
382 
384  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
385  .add(MI.getOperand(1));
386 
387  if (LIS) {
388  LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
389  LIS->InsertMachineInstrInMaps(*Branch);
390  }
391 
392  MI.eraseFromParent();
393 }
394 
395 // Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB
396 // is split as necessary to keep the exec modification in its own block.
398  MachineInstr &MI,
399  MachineInstr *NewMI,
401  LiveIntervals *LIS,
402  MachineLoopInfo *MLI) {
403  assert(NewMI->isTerminator());
404 
406  if (std::next(MI.getIterator()) == MBB.end()) {
407  // Don't bother with a new block.
408  MBB.insert(InsPt, NewMI);
409  if (LIS)
410  LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
411  MI.eraseFromParent();
412  return &MBB;
413  }
414 
415  MachineFunction *MF = MBB.getParent();
416  MachineBasicBlock *SplitMBB
418 
419  MF->insert(++MachineFunction::iterator(MBB), SplitMBB);
420 
421  // FIXME: This is working around a MachineDominatorTree API defect.
422  //
423  // If a previous pass split a critical edge, it may not have been applied to
424  // the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects
425  // the CFG of the given block. Make sure to call a dominator tree method that
426  // will flush this cache before touching the successors of the block.
427  MachineDomTreeNode *NodeMBB = nullptr;
428  if (DT)
429  NodeMBB = DT->getNode(&MBB);
430 
431  // Move everything to the new block, except the end_cf pseudo.
432  SplitMBB->splice(SplitMBB->begin(), &MBB, MBB.begin(), MBB.end());
433 
434  SplitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
435  MBB.addSuccessor(SplitMBB, BranchProbability::getOne());
436 
437  MBB.insert(MBB.end(), NewMI);
438 
439  if (DT) {
440  std::vector<MachineDomTreeNode *> Children = NodeMBB->getChildren();
441  DT->addNewBlock(SplitMBB, &MBB);
442 
443  // Reparent all of the children to the new block body.
444  auto *SplitNode = DT->getNode(SplitMBB);
445  for (auto *Child : Children)
446  DT->changeImmediateDominator(Child, SplitNode);
447  }
448 
449  if (MLI) {
450  if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
451  Loop->addBasicBlockToLoop(SplitMBB, MLI->getBase());
452  }
453 
454  if (LIS) {
455  LIS->insertMBBInMaps(SplitMBB);
456  LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
457  }
458 
459  // All live-ins are forwarded.
460  for (auto &LiveIn : MBB.liveins())
461  SplitMBB->addLiveIn(LiveIn);
462 
463  MI.eraseFromParent();
464  return SplitMBB;
465 }
466 
467 void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
468  MachineBasicBlock &MBB = *MI.getParent();
469  const DebugLoc &DL = MI.getDebugLoc();
470 
471  MachineBasicBlock::iterator InsPt = MBB.begin();
472 
473  // First, move the instruction. It's unnecessarily difficult to update
474  // LiveIntervals when there's a change in control flow, so move the
475  // instruction before changing the blocks.
476  MBB.splice(InsPt, &MBB, MI.getIterator());
477  if (LIS)
478  LIS->handleMove(MI);
479 
480  MachineFunction *MF = MBB.getParent();
481 
482  // Create instruction without inserting it yet.
483  MachineInstr *NewMI
484  = BuildMI(*MF, DL, TII->get(AMDGPU::S_OR_B64_term), AMDGPU::EXEC)
485  .addReg(AMDGPU::EXEC)
486  .add(MI.getOperand(0));
487  insertInstWithExecFallthrough(MBB, MI, NewMI, DT, LIS, MLI);
488 }
489 
490 // Returns replace operands for a logical operation, either single result
491 // for exec or two operands if source was another equivalent operation.
492 void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
493  SmallVectorImpl<MachineOperand> &Src) const {
494  MachineOperand &Op = MI.getOperand(OpNo);
495  if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
496  Src.push_back(Op);
497  return;
498  }
499 
500  MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg());
501  if (!Def || Def->getParent() != MI.getParent() ||
502  !(Def->isFullCopy() || (Def->getOpcode() == MI.getOpcode())))
503  return;
504 
505  // Make sure we do not modify exec between def and use.
506  // A copy with implcitly defined exec inserted earlier is an exclusion, it
507  // does not really modify exec.
508  for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
509  if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
510  !(I->isCopy() && I->getOperand(0).getReg() != AMDGPU::EXEC))
511  return;
512 
513  for (const auto &SrcOp : Def->explicit_operands())
514  if (SrcOp.isReg() && SrcOp.isUse() &&
516  SrcOp.getReg() == AMDGPU::EXEC))
517  Src.push_back(SrcOp);
518 }
519 
520 // Search and combine pairs of equivalent instructions, like
521 // S_AND_B64 x, (S_AND_B64 x, y) => S_AND_B64 x, y
522 // S_OR_B64 x, (S_OR_B64 x, y) => S_OR_B64 x, y
523 // One of the operands is exec mask.
524 void SILowerControlFlow::combineMasks(MachineInstr &MI) {
525  assert(MI.getNumExplicitOperands() == 3);
527  unsigned OpToReplace = 1;
528  findMaskOperands(MI, 1, Ops);
529  if (Ops.size() == 1) OpToReplace = 2; // First operand can be exec or its copy
530  findMaskOperands(MI, 2, Ops);
531  if (Ops.size() != 3) return;
532 
533  unsigned UniqueOpndIdx;
534  if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
535  else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
536  else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
537  else return;
538 
539  unsigned Reg = MI.getOperand(OpToReplace).getReg();
540  MI.RemoveOperand(OpToReplace);
541  MI.addOperand(Ops[UniqueOpndIdx]);
542  if (MRI->use_empty(Reg))
543  MRI->getUniqueVRegDef(Reg)->eraseFromParent();
544 }
545 
546 bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
547  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
548  TII = ST.getInstrInfo();
549  TRI = &TII->getRegisterInfo();
550 
551  // This doesn't actually need LiveIntervals, but we can preserve them.
552  LIS = getAnalysisIfAvailable<LiveIntervals>();
553  DT = getAnalysisIfAvailable<MachineDominatorTree>();
554  MLI = getAnalysisIfAvailable<MachineLoopInfo>();
555 
556  MRI = &MF.getRegInfo();
557 
559  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
560  BI != BE; BI = NextBB) {
561  NextBB = std::next(BI);
562  MachineBasicBlock *MBB = &*BI;
563 
564  MachineBasicBlock::iterator I, Next, Last;
565 
566  for (I = MBB->begin(), Last = MBB->end(); I != MBB->end(); I = Next) {
567  Next = std::next(I);
568  MachineInstr &MI = *I;
569 
570  switch (MI.getOpcode()) {
571  case AMDGPU::SI_IF:
572  emitIf(MI);
573  break;
574 
575  case AMDGPU::SI_ELSE:
576  emitElse(MI);
577  break;
578 
579  case AMDGPU::SI_IF_BREAK:
580  emitIfBreak(MI);
581  break;
582 
583  case AMDGPU::SI_LOOP:
584  emitLoop(MI);
585  break;
586 
587  case AMDGPU::SI_END_CF: {
588  MachineInstr *NextMI = nullptr;
589 
590  if (Next != MBB->end())
591  NextMI = &*Next;
592 
593  emitEndCf(MI);
594 
595  if (NextMI) {
596  MBB = NextMI->getParent();
597  Next = NextMI->getIterator();
598  Last = MBB->end();
599  }
600 
601  NextBB = std::next(MBB->getIterator());
602  BE = MF.end();
603  break;
604  }
605  case AMDGPU::S_AND_B64:
606  case AMDGPU::S_OR_B64:
607  // Cleanup bit manipulations on exec mask
608  combineMasks(MI);
609  Last = I;
610  continue;
611 
612  default:
613  Last = I;
614  continue;
615  }
616 
617  // Replay newly inserted code to combine masks
618  Next = (Last == MBB->end()) ? MBB->begin() : Last;
619  }
620  }
621 
622  return true;
623 }
const MachineInstrBuilder & add(const MachineOperand &MO) const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
AMDGPU specific subclass of TargetSubtarget.
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool IsDead
MachineBasicBlock * getMBB() const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > explicit_operands()
Definition: MachineInstr.h:464
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
unsigned getSubReg() const
INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE, "SI lower control flow", false, false) static void setImpSCCDefDead(MachineInstr &MI
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setIsDead(bool Val=true)
static BranchProbability getOne()
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:648
iterator_range< iterator > terminators()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:326
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void insertMBBInMaps(MachineBasicBlock *MBB)
bool isFullCopy() const
SlotIndexes pass.
Definition: SlotIndexes.h:328
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:250
Base class for the actual dominator tree node.
const std::vector< DomTreeNodeBase * > & getChildren() const
AnalysisUsage & addPreservedID(const void *ID)
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned getReg() const
#define DEBUG_TYPE
Represent the analysis usage information of a pass.
self_iterator getIterator()
Definition: ilist_node.h:81
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
int64_t getImm() const
char & SILowerControlFlowID
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI, const SIInstrInfo *TII)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
#define I(x, y, z)
Definition: MD5.cpp:58
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static MachineBasicBlock * insertInstWithExecFallthrough(MachineBasicBlock &MBB, MachineInstr &MI, MachineInstr *NewMI, MachineDominatorTree *DT, LiveIntervals *LIS, MachineLoopInfo *MLI)
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
iterator_range< livein_iterator > liveins() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
static bool isKillTerminator(unsigned Opcode)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LoopInfoBase< MachineBasicBlock, MachineLoop > & getBase()
const SIRegisterInfo * getRegisterInfo() const override