LLVM  15.0.0git
SILowerControlFlow.cpp
Go to the documentation of this file.
1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass lowers the pseudo control flow instructions to real
11 /// machine instructions.
12 ///
13 /// All control flow is handled using predicated instructions and
14 /// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
15 /// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
16 /// by writing to the 64-bit EXEC register (each bit corresponds to a
17 /// single vector ALU). Typically, for predicates, a vector ALU will write
18 /// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
19 /// Vector ALU) and then the ScalarALU will AND the VCC register with the
20 /// EXEC to update the predicates.
21 ///
22 /// For example:
23 /// %vcc = V_CMP_GT_F32 %vgpr1, %vgpr2
24 /// %sgpr0 = SI_IF %vcc
25 /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0
26 /// %sgpr0 = SI_ELSE %sgpr0
27 /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr0
28 /// SI_END_CF %sgpr0
29 ///
30 /// becomes:
31 ///
32 /// %sgpr0 = S_AND_SAVEEXEC_B64 %vcc // Save and update the exec mask
33 /// %sgpr0 = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask
34 /// S_CBRANCH_EXECZ label0 // This instruction is an optional
35 /// // optimization which allows us to
36 /// // branch if all the bits of
37 /// // EXEC are zero.
38 /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
39 ///
40 /// label0:
41 /// %sgpr0 = S_OR_SAVEEXEC_B64 %sgpr0 // Restore the exec mask for the Then
42 /// // block
43 /// %exec = S_XOR_B64 %sgpr0, %exec // Update the exec mask
44 /// S_BRANCH_EXECZ label1 // Use our branch optimization
45 /// // instruction again.
46 /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr // Do the THEN block
47 /// label1:
48 /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits
49 //===----------------------------------------------------------------------===//
50 
51 #include "AMDGPU.h"
52 #include "GCNSubtarget.h"
54 #include "llvm/ADT/SmallSet.h"
60 
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "si-lower-control-flow"
64 
65 static cl::opt<bool>
66 RemoveRedundantEndcf("amdgpu-remove-redundant-endcf",
67  cl::init(true), cl::ReallyHidden);
68 
69 namespace {
70 
71 class SILowerControlFlow : public MachineFunctionPass {
72 private:
73  const SIRegisterInfo *TRI = nullptr;
74  const SIInstrInfo *TII = nullptr;
75  LiveIntervals *LIS = nullptr;
76  LiveVariables *LV = nullptr;
77  MachineDominatorTree *MDT = nullptr;
78  MachineRegisterInfo *MRI = nullptr;
79  SetVector<MachineInstr*> LoweredEndCf;
80  DenseSet<Register> LoweredIf;
82 
83  const TargetRegisterClass *BoolRC = nullptr;
84  unsigned AndOpc;
85  unsigned OrOpc;
86  unsigned XorOpc;
87  unsigned MovTermOpc;
88  unsigned Andn2TermOpc;
89  unsigned XorTermrOpc;
90  unsigned OrTermrOpc;
91  unsigned OrSaveExecOpc;
92  unsigned Exec;
93 
94  bool EnableOptimizeEndCf = false;
95 
96  bool hasKill(const MachineBasicBlock *Begin, const MachineBasicBlock *End);
97 
98  void emitIf(MachineInstr &MI);
99  void emitElse(MachineInstr &MI);
100  void emitIfBreak(MachineInstr &MI);
101  void emitLoop(MachineInstr &MI);
102 
103  MachineBasicBlock *emitEndCf(MachineInstr &MI);
104 
105  void lowerInitExec(MachineBasicBlock *MBB, MachineInstr &MI);
106 
107  void findMaskOperands(MachineInstr &MI, unsigned OpNo,
109 
110  void combineMasks(MachineInstr &MI);
111 
112  bool removeMBBifRedundant(MachineBasicBlock &MBB);
113 
114  MachineBasicBlock *process(MachineInstr &MI);
115 
116  // Skip to the next instruction, ignoring debug instructions, and trivial
117  // block boundaries (blocks that have one (typically fallthrough) successor,
118  // and the successor has one predecessor.
120  skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB,
121  MachineBasicBlock::iterator It) const;
122 
123  /// Find the insertion point for a new conditional branch.
125  skipToUncondBrOrEnd(MachineBasicBlock &MBB,
127  assert(I->isTerminator());
128 
129  // FIXME: What if we had multiple pre-existing conditional branches?
131  while (I != End && !I->isUnconditionalBranch())
132  ++I;
133  return I;
134  }
135 
136  // Remove redundant SI_END_CF instructions.
137  void optimizeEndCf();
138 
139 public:
140  static char ID;
141 
142  SILowerControlFlow() : MachineFunctionPass(ID) {}
143 
144  bool runOnMachineFunction(MachineFunction &MF) override;
145 
146  StringRef getPassName() const override {
147  return "SI Lower control flow pseudo instructions";
148  }
149 
150  void getAnalysisUsage(AnalysisUsage &AU) const override {
151  // Should preserve the same set that TwoAddressInstructions does.
157  }
158 };
159 
160 } // end anonymous namespace
161 
162 char SILowerControlFlow::ID = 0;
163 
164 INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE,
165  "SI lower control flow", false, false)
166 
167 static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) {
168  MachineOperand &ImpDefSCC = MI.getOperand(3);
169  assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
170 
171  ImpDefSCC.setIsDead(IsDead);
172 }
173 
175 
176 bool SILowerControlFlow::hasKill(const MachineBasicBlock *Begin,
177  const MachineBasicBlock *End) {
180 
181  while (!Worklist.empty()) {
182  MachineBasicBlock *MBB = Worklist.pop_back_val();
183 
184  if (MBB == End || !Visited.insert(MBB).second)
185  continue;
186  if (KillBlocks.contains(MBB))
187  return true;
188 
189  Worklist.append(MBB->succ_begin(), MBB->succ_end());
190  }
191 
192  return false;
193 }
194 
195 static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) {
196  Register SaveExecReg = MI.getOperand(0).getReg();
197  auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
198 
199  if (U == MRI->use_instr_nodbg_end() ||
200  std::next(U) != MRI->use_instr_nodbg_end() ||
201  U->getOpcode() != AMDGPU::SI_END_CF)
202  return false;
203 
204  return true;
205 }
206 
207 void SILowerControlFlow::emitIf(MachineInstr &MI) {
208  MachineBasicBlock &MBB = *MI.getParent();
209  const DebugLoc &DL = MI.getDebugLoc();
211  Register SaveExecReg = MI.getOperand(0).getReg();
212  MachineOperand& Cond = MI.getOperand(1);
213  assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
214 
215  MachineOperand &ImpDefSCC = MI.getOperand(4);
216  assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
217 
218  // If there is only one use of save exec register and that use is SI_END_CF,
219  // we can optimize SI_IF by returning the full saved exec mask instead of
220  // just cleared bits.
221  bool SimpleIf = isSimpleIf(MI, MRI);
222 
223  if (SimpleIf) {
224  // Check for SI_KILL_*_TERMINATOR on path from if to endif.
225  // if there is any such terminator simplifications are not safe.
226  auto UseMI = MRI->use_instr_nodbg_begin(SaveExecReg);
227  SimpleIf = !hasKill(MI.getParent(), UseMI->getParent());
228  }
229 
230  // Add an implicit def of exec to discourage scheduling VALU after this which
231  // will interfere with trying to form s_and_saveexec_b64 later.
232  Register CopyReg = SimpleIf ? SaveExecReg
233  : MRI->createVirtualRegister(BoolRC);
234  MachineInstr *CopyExec =
235  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
236  .addReg(Exec)
238  LoweredIf.insert(CopyReg);
239 
240  Register Tmp = MRI->createVirtualRegister(BoolRC);
241 
242  MachineInstr *And =
243  BuildMI(MBB, I, DL, TII->get(AndOpc), Tmp)
244  .addReg(CopyReg)
245  .add(Cond);
246  if (LV)
247  LV->replaceKillInstruction(Cond.getReg(), MI, *And);
248 
249  setImpSCCDefDead(*And, true);
250 
251  MachineInstr *Xor = nullptr;
252  if (!SimpleIf) {
253  Xor =
254  BuildMI(MBB, I, DL, TII->get(XorOpc), SaveExecReg)
255  .addReg(Tmp)
256  .addReg(CopyReg);
257  setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
258  }
259 
260  // Use a copy that is a terminator to get correct spill code placement it with
261  // fast regalloc.
262  MachineInstr *SetExec =
263  BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
264  .addReg(Tmp, RegState::Kill);
265  if (LV)
266  LV->getVarInfo(Tmp).Kills.push_back(SetExec);
267 
268  // Skip ahead to the unconditional branch in case there are other terminators
269  // present.
270  I = skipToUncondBrOrEnd(MBB, I);
271 
272  // Insert the S_CBRANCH_EXECZ instruction which will be optimized later
273  // during SIRemoveShortExecBranches.
274  MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
275  .add(MI.getOperand(2));
276 
277  if (!LIS) {
278  MI.eraseFromParent();
279  return;
280  }
281 
282  LIS->InsertMachineInstrInMaps(*CopyExec);
283 
284  // Replace with and so we don't need to fix the live interval for condition
285  // register.
286  LIS->ReplaceMachineInstrInMaps(MI, *And);
287 
288  if (!SimpleIf)
289  LIS->InsertMachineInstrInMaps(*Xor);
290  LIS->InsertMachineInstrInMaps(*SetExec);
291  LIS->InsertMachineInstrInMaps(*NewBr);
292 
293  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
294  MI.eraseFromParent();
295 
296  // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
297  // hard to add another def here but I'm not sure how to correctly update the
298  // valno.
299  LIS->removeInterval(SaveExecReg);
300  LIS->createAndComputeVirtRegInterval(SaveExecReg);
302  if (!SimpleIf)
303  LIS->createAndComputeVirtRegInterval(CopyReg);
304 }
305 
306 void SILowerControlFlow::emitElse(MachineInstr &MI) {
307  MachineBasicBlock &MBB = *MI.getParent();
308  const DebugLoc &DL = MI.getDebugLoc();
309 
310  Register DstReg = MI.getOperand(0).getReg();
311 
313 
314  // This must be inserted before phis and any spill code inserted before the
315  // else.
316  Register SaveReg = MRI->createVirtualRegister(BoolRC);
317  MachineInstr *OrSaveExec =
318  BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
319  .add(MI.getOperand(1)); // Saved EXEC
320  if (LV)
321  LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *OrSaveExec);
322 
323  MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
324 
326 
327  // This accounts for any modification of the EXEC mask within the block and
328  // can be optimized out pre-RA when not required.
329  MachineInstr *And = BuildMI(MBB, ElsePt, DL, TII->get(AndOpc), DstReg)
330  .addReg(Exec)
331  .addReg(SaveReg);
332 
333  if (LIS)
334  LIS->InsertMachineInstrInMaps(*And);
335 
336  MachineInstr *Xor =
337  BuildMI(MBB, ElsePt, DL, TII->get(XorTermrOpc), Exec)
338  .addReg(Exec)
339  .addReg(DstReg);
340 
341  // Skip ahead to the unconditional branch in case there are other terminators
342  // present.
343  ElsePt = skipToUncondBrOrEnd(MBB, ElsePt);
344 
346  BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
347  .addMBB(DestBB);
348 
349  if (!LIS) {
350  MI.eraseFromParent();
351  return;
352  }
353 
355  MI.eraseFromParent();
356 
357  LIS->InsertMachineInstrInMaps(*OrSaveExec);
358 
359  LIS->InsertMachineInstrInMaps(*Xor);
361 
362  LIS->removeInterval(DstReg);
363  LIS->createAndComputeVirtRegInterval(DstReg);
364  LIS->createAndComputeVirtRegInterval(SaveReg);
365 
366  // Let this be recomputed.
367  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
368 }
369 
370 void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
371  MachineBasicBlock &MBB = *MI.getParent();
372  const DebugLoc &DL = MI.getDebugLoc();
373  auto Dst = MI.getOperand(0).getReg();
374 
375  // Skip ANDing with exec if the break condition is already masked by exec
376  // because it is a V_CMP in the same basic block. (We know the break
377  // condition operand was an i1 in IR, so if it is a VALU instruction it must
378  // be one with a carry-out.)
379  bool SkipAnding = false;
380  if (MI.getOperand(1).isReg()) {
381  if (MachineInstr *Def = MRI->getUniqueVRegDef(MI.getOperand(1).getReg())) {
382  SkipAnding = Def->getParent() == MI.getParent()
384  }
385  }
386 
387  // AND the break condition operand with exec, then OR that into the "loop
388  // exit" mask.
389  MachineInstr *And = nullptr, *Or = nullptr;
390  if (!SkipAnding) {
391  Register AndReg = MRI->createVirtualRegister(BoolRC);
392  And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
393  .addReg(Exec)
394  .add(MI.getOperand(1));
395  if (LV)
396  LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *And);
397  Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
398  .addReg(AndReg)
399  .add(MI.getOperand(2));
400  if (LIS)
401  LIS->createAndComputeVirtRegInterval(AndReg);
402  } else {
403  Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
404  .add(MI.getOperand(1))
405  .add(MI.getOperand(2));
406  if (LV)
407  LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *Or);
408  }
409  if (LV)
410  LV->replaceKillInstruction(MI.getOperand(2).getReg(), MI, *Or);
411 
412  if (LIS) {
413  if (And)
414  LIS->InsertMachineInstrInMaps(*And);
415  LIS->ReplaceMachineInstrInMaps(MI, *Or);
416  }
417 
418  MI.eraseFromParent();
419 }
420 
421 void SILowerControlFlow::emitLoop(MachineInstr &MI) {
422  MachineBasicBlock &MBB = *MI.getParent();
423  const DebugLoc &DL = MI.getDebugLoc();
424 
425  MachineInstr *AndN2 =
426  BuildMI(MBB, &MI, DL, TII->get(Andn2TermOpc), Exec)
427  .addReg(Exec)
428  .add(MI.getOperand(0));
429 
430  auto BranchPt = skipToUncondBrOrEnd(MBB, MI.getIterator());
432  BuildMI(MBB, BranchPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
433  .add(MI.getOperand(1));
434 
435  if (LIS) {
436  LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
438  }
439 
440  MI.eraseFromParent();
441 }
442 
444 SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
446 
448  MachineBasicBlock *B = &MBB;
449  do {
450  if (!Visited.insert(B).second)
451  return MBB.end();
452 
453  auto E = B->end();
454  for ( ; It != E; ++It) {
455  if (TII->mayReadEXEC(*MRI, *It))
456  break;
457  }
458 
459  if (It != E)
460  return It;
461 
462  if (B->succ_size() != 1)
463  return MBB.end();
464 
465  // If there is one trivial successor, advance to the next block.
466  MachineBasicBlock *Succ = *B->succ_begin();
467 
468  It = Succ->begin();
469  B = Succ;
470  } while (true);
471 }
472 
473 MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
474  MachineBasicBlock &MBB = *MI.getParent();
475  const DebugLoc &DL = MI.getDebugLoc();
476 
478 
479  // If we have instructions that aren't prolog instructions, split the block
480  // and emit a terminator instruction. This ensures correct spill placement.
481  // FIXME: We should unconditionally split the block here.
482  bool NeedBlockSplit = false;
483  Register DataReg = MI.getOperand(0).getReg();
484  for (MachineBasicBlock::iterator I = InsPt, E = MI.getIterator();
485  I != E; ++I) {
486  if (I->modifiesRegister(DataReg, TRI)) {
487  NeedBlockSplit = true;
488  break;
489  }
490  }
491 
492  unsigned Opcode = OrOpc;
493  MachineBasicBlock *SplitBB = &MBB;
494  if (NeedBlockSplit) {
495  SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/true, LIS);
496  if (MDT && SplitBB != &MBB) {
497  MachineDomTreeNode *MBBNode = (*MDT)[&MBB];
498  SmallVector<MachineDomTreeNode *> Children(MBBNode->begin(),
499  MBBNode->end());
500  MachineDomTreeNode *SplitBBNode = MDT->addNewBlock(SplitBB, &MBB);
501  for (MachineDomTreeNode *Child : Children)
502  MDT->changeImmediateDominator(Child, SplitBBNode);
503  }
504  Opcode = OrTermrOpc;
505  InsPt = MI;
506  }
507 
508  MachineInstr *NewMI =
509  BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec)
510  .addReg(Exec)
511  .add(MI.getOperand(0));
512  if (LV) {
513  LV->replaceKillInstruction(DataReg, MI, *NewMI);
514 
515  if (SplitBB != &MBB) {
516  // Track the set of registers defined in the split block so we don't
517  // accidentally add the original block to AliveBlocks.
518  DenseSet<Register> SplitDefs;
519  for (MachineInstr &X : *SplitBB) {
520  for (MachineOperand &Op : X.operands()) {
521  if (Op.isReg() && Op.isDef() && Op.getReg().isVirtual())
522  SplitDefs.insert(Op.getReg());
523  }
524  }
525 
526  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
529 
530  if (VI.AliveBlocks.test(MBB.getNumber()))
531  VI.AliveBlocks.set(SplitBB->getNumber());
532  else {
533  for (MachineInstr *Kill : VI.Kills) {
534  if (Kill->getParent() == SplitBB && !SplitDefs.contains(Reg))
535  VI.AliveBlocks.set(MBB.getNumber());
536  }
537  }
538  }
539  }
540  }
541 
542  LoweredEndCf.insert(NewMI);
543 
544  if (LIS)
545  LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
546 
547  MI.eraseFromParent();
548 
549  if (LIS)
550  LIS->handleMove(*NewMI);
551  return SplitBB;
552 }
553 
554 // Returns replace operands for a logical operation, either single result
555 // for exec or two operands if source was another equivalent operation.
556 void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
557  SmallVectorImpl<MachineOperand> &Src) const {
558  MachineOperand &Op = MI.getOperand(OpNo);
559  if (!Op.isReg() || !Op.getReg().isVirtual()) {
560  Src.push_back(Op);
561  return;
562  }
563 
564  MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg());
565  if (!Def || Def->getParent() != MI.getParent() ||
566  !(Def->isFullCopy() || (Def->getOpcode() == MI.getOpcode())))
567  return;
568 
569  // Make sure we do not modify exec between def and use.
570  // A copy with implicitly defined exec inserted earlier is an exclusion, it
571  // does not really modify exec.
572  for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
573  if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
574  !(I->isCopy() && I->getOperand(0).getReg() != Exec))
575  return;
576 
577  for (const auto &SrcOp : Def->explicit_operands())
578  if (SrcOp.isReg() && SrcOp.isUse() &&
579  (SrcOp.getReg().isVirtual() || SrcOp.getReg() == Exec))
580  Src.push_back(SrcOp);
581 }
582 
583 // Search and combine pairs of equivalent instructions, like
584 // S_AND_B64 x, (S_AND_B64 x, y) => S_AND_B64 x, y
585 // S_OR_B64 x, (S_OR_B64 x, y) => S_OR_B64 x, y
586 // One of the operands is exec mask.
587 void SILowerControlFlow::combineMasks(MachineInstr &MI) {
588  assert(MI.getNumExplicitOperands() == 3);
590  unsigned OpToReplace = 1;
591  findMaskOperands(MI, 1, Ops);
592  if (Ops.size() == 1) OpToReplace = 2; // First operand can be exec or its copy
593  findMaskOperands(MI, 2, Ops);
594  if (Ops.size() != 3) return;
595 
596  unsigned UniqueOpndIdx;
597  if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
598  else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
599  else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
600  else return;
601 
602  Register Reg = MI.getOperand(OpToReplace).getReg();
603  MI.removeOperand(OpToReplace);
604  MI.addOperand(Ops[UniqueOpndIdx]);
605  if (MRI->use_empty(Reg))
607 }
608 
609 void SILowerControlFlow::optimizeEndCf() {
610  // If the only instruction immediately following this END_CF is another
611  // END_CF in the only successor we can avoid emitting exec mask restore here.
612  if (!EnableOptimizeEndCf)
613  return;
614 
615  for (MachineInstr *MI : reverse(LoweredEndCf)) {
616  MachineBasicBlock &MBB = *MI->getParent();
617  auto Next =
618  skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator()));
619  if (Next == MBB.end() || !LoweredEndCf.count(&*Next))
620  continue;
621  // Only skip inner END_CF if outer ENDCF belongs to SI_IF.
622  // If that belongs to SI_ELSE then saved mask has an inverted value.
623  Register SavedExec
624  = TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
625  assert(SavedExec.isVirtual() && "Expected saved exec to be src1!");
626 
627  const MachineInstr *Def = MRI->getUniqueVRegDef(SavedExec);
628  if (Def && LoweredIf.count(SavedExec)) {
629  LLVM_DEBUG(dbgs() << "Skip redundant "; MI->dump());
630  if (LIS)
632  Register Reg;
633  if (LV)
634  Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::src1)->getReg();
635  MI->eraseFromParent();
636  if (LV)
638  removeMBBifRedundant(MBB);
639  }
640  }
641 }
642 
643 MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
644  MachineBasicBlock &MBB = *MI.getParent();
646  MachineInstr *Prev = (I != MBB.begin()) ? &*(std::prev(I)) : nullptr;
647 
648  MachineBasicBlock *SplitBB = &MBB;
649 
650  switch (MI.getOpcode()) {
651  case AMDGPU::SI_IF:
652  emitIf(MI);
653  break;
654 
655  case AMDGPU::SI_ELSE:
656  emitElse(MI);
657  break;
658 
659  case AMDGPU::SI_IF_BREAK:
660  emitIfBreak(MI);
661  break;
662 
663  case AMDGPU::SI_LOOP:
664  emitLoop(MI);
665  break;
666 
667  case AMDGPU::SI_WATERFALL_LOOP:
668  MI.setDesc(TII->get(AMDGPU::S_CBRANCH_EXECNZ));
669  break;
670 
671  case AMDGPU::SI_END_CF:
672  SplitBB = emitEndCf(MI);
673  break;
674 
675  default:
676  assert(false && "Attempt to process unsupported instruction");
677  break;
678  }
679 
681  for (I = Prev ? Prev->getIterator() : MBB.begin(); I != MBB.end(); I = Next) {
682  Next = std::next(I);
683  MachineInstr &MaskMI = *I;
684  switch (MaskMI.getOpcode()) {
685  case AMDGPU::S_AND_B64:
686  case AMDGPU::S_OR_B64:
687  case AMDGPU::S_AND_B32:
688  case AMDGPU::S_OR_B32:
689  // Cleanup bit manipulations on exec mask
690  combineMasks(MaskMI);
691  break;
692  default:
693  I = MBB.end();
694  break;
695  }
696  }
697 
698  return SplitBB;
699 }
700 
701 void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
702  MachineInstr &MI) {
703  MachineFunction &MF = *MBB->getParent();
704  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
705  bool IsWave32 = ST.isWave32();
706 
707  if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
708  // This should be before all vector instructions.
709  BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
710  TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
711  .addImm(MI.getOperand(0).getImm());
712  if (LIS)
714  MI.eraseFromParent();
715  return;
716  }
717 
718  // Extract the thread count from an SGPR input and set EXEC accordingly.
719  // Since BFM can't shift by 64, handle that case with CMP + CMOV.
720  //
721  // S_BFE_U32 count, input, {shift, 7}
722  // S_BFM_B64 exec, count, 0
723  // S_CMP_EQ_U32 count, 64
724  // S_CMOV_B64 exec, -1
725  Register InputReg = MI.getOperand(0).getReg();
726  MachineInstr *FirstMI = &*MBB->begin();
727  if (InputReg.isVirtual()) {
728  MachineInstr *DefInstr = MRI->getVRegDef(InputReg);
729  assert(DefInstr && DefInstr->isCopy());
730  if (DefInstr->getParent() == MBB) {
731  if (DefInstr != FirstMI) {
732  // If the `InputReg` is defined in current block, we also need to
733  // move that instruction to the beginning of the block.
734  DefInstr->removeFromParent();
735  MBB->insert(FirstMI, DefInstr);
736  if (LIS)
737  LIS->handleMove(*DefInstr);
738  } else {
739  // If first instruction is definition then move pointer after it.
740  FirstMI = &*std::next(FirstMI->getIterator());
741  }
742  }
743  }
744 
745  // Insert instruction sequence at block beginning (before vector operations).
746  const DebugLoc DL = MI.getDebugLoc();
747  const unsigned WavefrontSize = ST.getWavefrontSize();
748  const unsigned Mask = (WavefrontSize << 1) - 1;
749  Register CountReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
750  auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg)
751  .addReg(InputReg)
752  .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
753  if (LV)
754  LV->recomputeForSingleDefVirtReg(InputReg);
755  auto BfmMI =
756  BuildMI(*MBB, FirstMI, DL,
757  TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
758  .addReg(CountReg)
759  .addImm(0);
760  auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
761  .addReg(CountReg, RegState::Kill)
763  if (LV)
764  LV->getVarInfo(CountReg).Kills.push_back(CmpMI);
765  auto CmovMI =
766  BuildMI(*MBB, FirstMI, DL,
767  TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
768  Exec)
769  .addImm(-1);
770 
771  if (!LIS) {
772  MI.eraseFromParent();
773  return;
774  }
775 
777  MI.eraseFromParent();
778 
779  LIS->InsertMachineInstrInMaps(*BfeMI);
780  LIS->InsertMachineInstrInMaps(*BfmMI);
781  LIS->InsertMachineInstrInMaps(*CmpMI);
782  LIS->InsertMachineInstrInMaps(*CmovMI);
783 
784  LIS->removeInterval(InputReg);
785  LIS->createAndComputeVirtRegInterval(InputReg);
786  LIS->createAndComputeVirtRegInterval(CountReg);
787 }
788 
789 bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
790  for (auto &I : MBB.instrs()) {
791  if (!I.isDebugInstr() && !I.isUnconditionalBranch())
792  return false;
793  }
794 
795  assert(MBB.succ_size() == 1 && "MBB has more than one successor");
796 
797  MachineBasicBlock *Succ = *MBB.succ_begin();
798  MachineBasicBlock *FallThrough = nullptr;
799 
800  while (!MBB.predecessors().empty()) {
802  if (P->getFallThrough() == &MBB)
803  FallThrough = P;
804  P->ReplaceUsesOfBlockWith(&MBB, Succ);
805  }
806  MBB.removeSuccessor(Succ);
807  if (LIS) {
808  for (auto &I : MBB.instrs())
810  }
811  if (MDT) {
812  // If Succ, the single successor of MBB, is dominated by MBB, MDT needs
813  // updating by changing Succ's idom to the one of MBB; otherwise, MBB must
814  // be a leaf node in MDT and could be erased directly.
815  if (MDT->dominates(&MBB, Succ))
816  MDT->changeImmediateDominator(MDT->getNode(Succ),
817  MDT->getNode(&MBB)->getIDom());
818  MDT->eraseNode(&MBB);
819  }
820  MBB.clear();
822  if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) {
823  if (!Succ->canFallThrough()) {
824  MachineFunction *MF = FallThrough->getParent();
825  MachineFunction::iterator FallThroughPos(FallThrough);
826  MF->splice(std::next(FallThroughPos), Succ);
827  } else
828  BuildMI(*FallThrough, FallThrough->end(),
829  FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
830  .addMBB(Succ);
831  }
832 
833  return true;
834 }
835 
836 bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
837  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
838  TII = ST.getInstrInfo();
839  TRI = &TII->getRegisterInfo();
840  EnableOptimizeEndCf =
842 
843  // This doesn't actually need LiveIntervals, but we can preserve them.
844  LIS = getAnalysisIfAvailable<LiveIntervals>();
845  // This doesn't actually need LiveVariables, but we can preserve them.
846  LV = getAnalysisIfAvailable<LiveVariables>();
847  MDT = getAnalysisIfAvailable<MachineDominatorTree>();
848  MRI = &MF.getRegInfo();
849  BoolRC = TRI->getBoolRC();
850 
851  if (ST.isWave32()) {
852  AndOpc = AMDGPU::S_AND_B32;
853  OrOpc = AMDGPU::S_OR_B32;
854  XorOpc = AMDGPU::S_XOR_B32;
855  MovTermOpc = AMDGPU::S_MOV_B32_term;
856  Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
857  XorTermrOpc = AMDGPU::S_XOR_B32_term;
858  OrTermrOpc = AMDGPU::S_OR_B32_term;
859  OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
860  Exec = AMDGPU::EXEC_LO;
861  } else {
862  AndOpc = AMDGPU::S_AND_B64;
863  OrOpc = AMDGPU::S_OR_B64;
864  XorOpc = AMDGPU::S_XOR_B64;
865  MovTermOpc = AMDGPU::S_MOV_B64_term;
866  Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
867  XorTermrOpc = AMDGPU::S_XOR_B64_term;
868  OrTermrOpc = AMDGPU::S_OR_B64_term;
869  OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
870  Exec = AMDGPU::EXEC;
871  }
872 
873  // Compute set of blocks with kills
874  const bool CanDemote =
876  for (auto &MBB : MF) {
877  bool IsKillBlock = false;
878  for (auto &Term : MBB.terminators()) {
879  if (TII->isKillTerminator(Term.getOpcode())) {
880  KillBlocks.insert(&MBB);
881  IsKillBlock = true;
882  break;
883  }
884  }
885  if (CanDemote && !IsKillBlock) {
886  for (auto &MI : MBB) {
887  if (MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
888  KillBlocks.insert(&MBB);
889  break;
890  }
891  }
892  }
893  }
894 
895  bool Changed = false;
897  for (MachineFunction::iterator BI = MF.begin();
898  BI != MF.end(); BI = NextBB) {
899  NextBB = std::next(BI);
900  MachineBasicBlock *MBB = &*BI;
901 
903  E = MBB->end();
904  for (I = MBB->begin(); I != E; I = Next) {
905  Next = std::next(I);
906  MachineInstr &MI = *I;
907  MachineBasicBlock *SplitMBB = MBB;
908 
909  switch (MI.getOpcode()) {
910  case AMDGPU::SI_IF:
911  case AMDGPU::SI_ELSE:
912  case AMDGPU::SI_IF_BREAK:
913  case AMDGPU::SI_WATERFALL_LOOP:
914  case AMDGPU::SI_LOOP:
915  case AMDGPU::SI_END_CF:
916  SplitMBB = process(MI);
917  Changed = true;
918  break;
919 
920  // FIXME: find a better place for this
921  case AMDGPU::SI_INIT_EXEC:
922  case AMDGPU::SI_INIT_EXEC_FROM_INPUT:
923  lowerInitExec(MBB, MI);
924  if (LIS)
925  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
926  Changed = true;
927  break;
928 
929  default:
930  break;
931  }
932 
933  if (SplitMBB != MBB) {
934  MBB = Next->getParent();
935  E = MBB->end();
936  }
937  }
938  }
939 
940  optimizeEndCf();
941 
942  LoweredEndCf.clear();
943  LoweredIf.clear();
944  KillBlocks.clear();
945 
946  return Changed;
947 }
i
i
Definition: README.txt:29
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:182
llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition: MachineBasicBlock.h:354
IsDead
bool IsDead
Definition: SILowerControlFlow.cpp:167
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:326
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MachineRegisterInfo::use_instr_nodbg_end
static use_instr_nodbg_iterator use_instr_nodbg_end()
Definition: MachineRegisterInfo.h:546
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition: MachineBasicBlock.h:273
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:974
llvm::LiveVariables::VarInfo::Kills
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...
Definition: LiveVariables.h:90
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
INITIALIZE_PASS
INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE, "SI lower control flow", false, false) static void setImpSCCDefDead(MachineInstr &MI
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:407
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineRegisterInfo::use_instr_nodbg_begin
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:543
llvm::LiveIntervals::removeAllRegUnitsForPhysReg
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
Definition: LiveIntervals.h:425
llvm::LiveIntervals::removeInterval
void removeInterval(Register Reg)
Interval removal.
Definition: LiveIntervals.h:145
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::SILowerControlFlowID
char & SILowerControlFlowID
Definition: SILowerControlFlow.cpp:174
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1292
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:765
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
RemoveRedundantEndcf
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
llvm::MachineDominatorTree::dominates
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Definition: MachineDominators.h:114
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:63
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:298
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:140
llvm::LiveIntervals::createAndComputeVirtRegInterval
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
Definition: LiveIntervals.h:138
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LiveIntervals::handleMove
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition: LiveIntervals.cpp:1512
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
TargetMachine.h
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::LiveVariables::VarInfo
VarInfo - This represents the regions where a virtual register is live in the program.
Definition: LiveVariables.h:80
llvm::MachineDominatorTree::addNewBlock
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
Definition: MachineDominators.h:182
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:344
llvm::LiveVariables::recomputeForSingleDefVirtReg
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
Definition: LiveVariables.cpp:670
llvm::LiveIntervals::InsertMachineInstrInMaps
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
Definition: LiveIntervals.h:266
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SILowerControlFlow.cpp:63
llvm::MachineBasicBlock::eraseFromParent
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
Definition: MachineBasicBlock.cpp:1332
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::MachineInstr::removeFromParent
MachineInstr * removeFromParent()
Unlink 'this' from the containing basic block, and return it without deleting it.
Definition: MachineInstr.cpp:640
llvm::MachineDominatorTree::changeImmediateDominator
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: MachineDominators.h:191
LiveVariables.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::LiveVariables::replaceKillInstruction
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
Definition: LiveVariables.cpp:752
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::MachineBasicBlock::splitAt
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Definition: MachineBasicBlock.cpp:970
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:313
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:514
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::cl::opt< bool >
VI
@ VI
Definition: SIInstrInfo.cpp:7845
AMDGPUMCTargetDesc.h
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:515
llvm::LiveIntervals::ReplaceMachineInstrInMaps
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
Definition: LiveIntervals.h:280
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
LiveIntervals.h
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:238
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MachineFunction::splice
void splice(iterator InsertPt, iterator MBBI)
Definition: MachineFunction.h:875
llvm::MachineDominatorTree::getNode
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: MachineDominators.h:174
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::MachineOperand::isDead
bool isDead() const
Definition: MachineOperand.h:384
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::LiveVariablesID
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
Definition: LiveVariables.cpp:45
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::canFallThrough
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
Definition: MachineBasicBlock.cpp:966
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:342
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:234
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:359
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::clear
void clear()
Definition: DenseSet.h:92
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:307
llvm::MachineDominatorTree::eraseNode
void eraseNode(MachineBasicBlock *BB)
eraseNode - Removes a node from the dominator tree.
Definition: MachineDominators.h:206
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1088
llvm::SrcOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:178
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:210
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:365
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::AnalysisUsage::addPreservedID
AnalysisUsage & addPreservedID(const void *ID)
Definition: PassAnalysisSupport.h:88
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:491
llvm::MachineBasicBlock::findBranchDebugLoc
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
Definition: MachineBasicBlock.cpp:1412
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::contains
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
llvm::DomTreeNodeBase::begin
iterator begin()
Definition: GenericDomTree.h:75
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:135
llvm::DomTreeNodeBase
Base class for the actual dominator tree node.
Definition: LiveIntervalCalc.h:24
isSimpleIf
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
Definition: SILowerControlFlow.cpp:195
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::SetVector::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:652
llvm::MachineBasicBlock::removeSuccessor
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:785
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1312
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::DomTreeNodeBase::end
iterator end()
Definition: GenericDomTree.h:76
llvm::MCID::Branch
@ Branch
Definition: MCInstrDesc.h:158
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::MachineBasicBlock::isLayoutSuccessor
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Definition: MachineBasicBlock.cpp:913
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:278
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::SIInstrInfo::isVALU
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:360
llvm::LiveIntervals::RemoveMachineInstrFromMaps
void RemoveMachineInstrFromMaps(MachineInstr &MI)
Definition: LiveIntervals.h:276
llvm::LiveVariables::getVarInfo
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
Definition: LiveVariables.cpp:84
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:220
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::WavefrontSize
constexpr char WavefrontSize[]
Key for Kernel::CodeProps::Metadata::mWavefrontSize.
Definition: AMDGPUMetadata.h:256
llvm::LiveVariables
Definition: LiveVariables.h:47
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:650
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:280
llvm::SrcOp
Definition: MachineIRBuilder.h:126
llvm::RecurKind::Xor
@ Xor
Bitwise or logical XOR of integers.
llvm::SmallSet::contains
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:238
MachineDominators.h
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38