LLVM  10.0.0svn
ModuloSchedule.cpp
Go to the documentation of this file.
1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/Support/Debug.h"
20 
21 #define DEBUG_TYPE "pipeliner"
22 using namespace llvm;
23 
25  for (MachineInstr *MI : ScheduledInstrs)
26  OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
27 }
28 
29 //===----------------------------------------------------------------------===//
30 // ModuloScheduleExpander implementation
31 //===----------------------------------------------------------------------===//
32 
33 /// Return the register values for the operands of a Phi instruction.
34 /// This function assume the instruction is a Phi.
36  unsigned &InitVal, unsigned &LoopVal) {
37  assert(Phi.isPHI() && "Expecting a Phi.");
38 
39  InitVal = 0;
40  LoopVal = 0;
41  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
42  if (Phi.getOperand(i + 1).getMBB() != Loop)
43  InitVal = Phi.getOperand(i).getReg();
44  else
45  LoopVal = Phi.getOperand(i).getReg();
46 
47  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
48 }
49 
50 /// Return the Phi register value that comes from the incoming block.
51 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
52  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
53  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
54  return Phi.getOperand(i).getReg();
55  return 0;
56 }
57 
58 /// Return the Phi register value that comes the loop block.
59 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
60  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
61  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
62  return Phi.getOperand(i).getReg();
63  return 0;
64 }
65 
67  BB = Schedule.getLoop()->getTopBlock();
68  Preheader = *BB->pred_begin();
69  if (Preheader == BB)
70  Preheader = *std::next(BB->pred_begin());
71 
72  // Iterate over the definitions in each instruction, and compute the
73  // stage difference for each use. Keep the maximum value.
74  for (MachineInstr *MI : Schedule.getInstructions()) {
75  int DefStage = Schedule.getStage(MI);
76  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
77  MachineOperand &Op = MI->getOperand(i);
78  if (!Op.isReg() || !Op.isDef())
79  continue;
80 
81  Register Reg = Op.getReg();
82  unsigned MaxDiff = 0;
83  bool PhiIsSwapped = false;
84  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
85  EI = MRI.use_end();
86  UI != EI; ++UI) {
87  MachineOperand &UseOp = *UI;
88  MachineInstr *UseMI = UseOp.getParent();
89  int UseStage = Schedule.getStage(UseMI);
90  unsigned Diff = 0;
91  if (UseStage != -1 && UseStage >= DefStage)
92  Diff = UseStage - DefStage;
93  if (MI->isPHI()) {
94  if (isLoopCarried(*MI))
95  ++Diff;
96  else
97  PhiIsSwapped = true;
98  }
99  MaxDiff = std::max(Diff, MaxDiff);
100  }
101  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
102  }
103  }
104 
105  generatePipelinedLoop();
106 }
107 
108 void ModuloScheduleExpander::generatePipelinedLoop() {
110  assert(LoopInfo && "Must be able to analyze loop!");
111 
112  // Create a new basic block for the kernel and add it to the CFG.
113  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
114 
115  unsigned MaxStageCount = Schedule.getNumStages() - 1;
116 
117  // Remember the registers that are used in different stages. The index is
118  // the iteration, or stage, that the instruction is scheduled in. This is
119  // a map between register names in the original block and the names created
120  // in each stage of the pipelined loop.
121  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
122  InstrMapTy InstrMap;
123 
125 
126  // Generate the prolog instructions that set up the pipeline.
127  generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
128  MF.insert(BB->getIterator(), KernelBB);
129 
130  // Rearrange the instructions to generate the new, pipelined loop,
131  // and update register names as needed.
132  for (MachineInstr *CI : Schedule.getInstructions()) {
133  if (CI->isPHI())
134  continue;
135  unsigned StageNum = Schedule.getStage(CI);
136  MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
137  updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
138  KernelBB->push_back(NewMI);
139  InstrMap[NewMI] = CI;
140  }
141 
142  // Copy any terminator instructions to the new kernel, and update
143  // names as needed.
144  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
145  E = BB->instr_end();
146  I != E; ++I) {
147  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
148  updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
149  KernelBB->push_back(NewMI);
150  InstrMap[NewMI] = &*I;
151  }
152 
153  NewKernel = KernelBB;
154  KernelBB->transferSuccessors(BB);
155  KernelBB->replaceSuccessor(BB, KernelBB);
156 
157  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
158  InstrMap, MaxStageCount, MaxStageCount, false);
159  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
160  MaxStageCount, MaxStageCount, false);
161 
162  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
163 
165  // Generate the epilog instructions to complete the pipeline.
166  generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
167 
168  // We need this step because the register allocation doesn't handle some
169  // situations well, so we insert copies to help out.
170  splitLifetimes(KernelBB, EpilogBBs);
171 
172  // Remove dead instructions due to loop induction variables.
173  removeDeadInstructions(KernelBB, EpilogBBs);
174 
175  // Add branches between prolog and epilog blocks.
176  addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
177 
178  delete[] VRMap;
179 }
180 
182  // Remove the original loop since it's no longer referenced.
183  for (auto &I : *BB)
184  LIS.RemoveMachineInstrFromMaps(I);
185  BB->clear();
186  BB->eraseFromParent();
187 }
188 
189 /// Generate the pipeline prolog code.
190 void ModuloScheduleExpander::generateProlog(unsigned LastStage,
191  MachineBasicBlock *KernelBB,
192  ValueMapTy *VRMap,
193  MBBVectorTy &PrologBBs) {
194  MachineBasicBlock *PredBB = Preheader;
195  InstrMapTy InstrMap;
196 
197  // Generate a basic block for each stage, not including the last stage,
198  // which will be generated in the kernel. Each basic block may contain
199  // instructions from multiple stages/iterations.
200  for (unsigned i = 0; i < LastStage; ++i) {
201  // Create and insert the prolog basic block prior to the original loop
202  // basic block. The original loop is removed later.
203  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
204  PrologBBs.push_back(NewBB);
205  MF.insert(BB->getIterator(), NewBB);
206  NewBB->transferSuccessors(PredBB);
207  PredBB->addSuccessor(NewBB);
208  PredBB = NewBB;
209 
210  // Generate instructions for each appropriate stage. Process instructions
211  // in original program order.
212  for (int StageNum = i; StageNum >= 0; --StageNum) {
213  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
214  BBE = BB->getFirstTerminator();
215  BBI != BBE; ++BBI) {
216  if (Schedule.getStage(&*BBI) == StageNum) {
217  if (BBI->isPHI())
218  continue;
219  MachineInstr *NewMI =
220  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
221  updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
222  NewBB->push_back(NewMI);
223  InstrMap[NewMI] = &*BBI;
224  }
225  }
226  }
227  rewritePhiValues(NewBB, i, VRMap, InstrMap);
228  LLVM_DEBUG({
229  dbgs() << "prolog:\n";
230  NewBB->dump();
231  });
232  }
233 
234  PredBB->replaceSuccessor(BB, KernelBB);
235 
236  // Check if we need to remove the branch from the preheader to the original
237  // loop, and replace it with a branch to the new loop.
238  unsigned numBranches = TII->removeBranch(*Preheader);
239  if (numBranches) {
241  TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
242  }
243 }
244 
245 /// Generate the pipeline epilog code. The epilog code finishes the iterations
246 /// that were started in either the prolog or the kernel. We create a basic
247 /// block for each stage that needs to complete.
248 void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
249  MachineBasicBlock *KernelBB,
250  ValueMapTy *VRMap,
251  MBBVectorTy &EpilogBBs,
252  MBBVectorTy &PrologBBs) {
253  // We need to change the branch from the kernel to the first epilog block, so
254  // this call to analyze branch uses the kernel rather than the original BB.
255  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
257  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
258  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
259  if (checkBranch)
260  return;
261 
262  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
263  if (*LoopExitI == KernelBB)
264  ++LoopExitI;
265  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
266  MachineBasicBlock *LoopExitBB = *LoopExitI;
267 
268  MachineBasicBlock *PredBB = KernelBB;
269  MachineBasicBlock *EpilogStart = LoopExitBB;
270  InstrMapTy InstrMap;
271 
272  // Generate a basic block for each stage, not including the last stage,
273  // which was generated for the kernel. Each basic block may contain
274  // instructions from multiple stages/iterations.
275  int EpilogStage = LastStage + 1;
276  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
277  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
278  EpilogBBs.push_back(NewBB);
279  MF.insert(BB->getIterator(), NewBB);
280 
281  PredBB->replaceSuccessor(LoopExitBB, NewBB);
282  NewBB->addSuccessor(LoopExitBB);
283 
284  if (EpilogStart == LoopExitBB)
285  EpilogStart = NewBB;
286 
287  // Add instructions to the epilog depending on the current block.
288  // Process instructions in original program order.
289  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
290  for (auto &BBI : *BB) {
291  if (BBI.isPHI())
292  continue;
293  MachineInstr *In = &BBI;
294  if ((unsigned)Schedule.getStage(In) == StageNum) {
295  // Instructions with memoperands in the epilog are updated with
296  // conservative values.
297  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
298  updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
299  NewBB->push_back(NewMI);
300  InstrMap[NewMI] = In;
301  }
302  }
303  }
304  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
305  InstrMap, LastStage, EpilogStage, i == 1);
306  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
307  LastStage, EpilogStage, i == 1);
308  PredBB = NewBB;
309 
310  LLVM_DEBUG({
311  dbgs() << "epilog:\n";
312  NewBB->dump();
313  });
314  }
315 
316  // Fix any Phi nodes in the loop exit block.
317  LoopExitBB->replacePhiUsesWith(BB, PredBB);
318 
319  // Create a branch to the new epilog from the kernel.
320  // Remove the original branch and add a new branch to the epilog.
321  TII->removeBranch(*KernelBB);
322  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
323  // Add a branch to the loop exit.
324  if (EpilogBBs.size() > 0) {
325  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
327  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
328  }
329 }
330 
331 /// Replace all uses of FromReg that appear outside the specified
332 /// basic block with ToReg.
333 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
334  MachineBasicBlock *MBB,
336  LiveIntervals &LIS) {
337  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
338  E = MRI.use_end();
339  I != E;) {
340  MachineOperand &O = *I;
341  ++I;
342  if (O.getParent()->getParent() != MBB)
343  O.setReg(ToReg);
344  }
345  if (!LIS.hasInterval(ToReg))
346  LIS.createEmptyInterval(ToReg);
347 }
348 
349 /// Return true if the register has a use that occurs outside the
350 /// specified loop.
351 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
354  E = MRI.use_end();
355  I != E; ++I)
356  if (I->getParent()->getParent() != BB)
357  return true;
358  return false;
359 }
360 
361 /// Generate Phis for the specific block in the generated pipelined code.
362 /// This function looks at the Phis from the original code to guide the
363 /// creation of new Phis.
364 void ModuloScheduleExpander::generateExistingPhis(
366  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
367  unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
368  // Compute the stage number for the initial value of the Phi, which
369  // comes from the prolog. The prolog to use depends on to which kernel/
370  // epilog that we're adding the Phi.
371  unsigned PrologStage = 0;
372  unsigned PrevStage = 0;
373  bool InKernel = (LastStageNum == CurStageNum);
374  if (InKernel) {
375  PrologStage = LastStageNum - 1;
376  PrevStage = CurStageNum;
377  } else {
378  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
379  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
380  }
381 
382  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
383  BBE = BB->getFirstNonPHI();
384  BBI != BBE; ++BBI) {
385  Register Def = BBI->getOperand(0).getReg();
386 
387  unsigned InitVal = 0;
388  unsigned LoopVal = 0;
389  getPhiRegs(*BBI, BB, InitVal, LoopVal);
390 
391  unsigned PhiOp1 = 0;
392  // The Phi value from the loop body typically is defined in the loop, but
393  // not always. So, we need to check if the value is defined in the loop.
394  unsigned PhiOp2 = LoopVal;
395  if (VRMap[LastStageNum].count(LoopVal))
396  PhiOp2 = VRMap[LastStageNum][LoopVal];
397 
398  int StageScheduled = Schedule.getStage(&*BBI);
399  int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
400  unsigned NumStages = getStagesForReg(Def, CurStageNum);
401  if (NumStages == 0) {
402  // We don't need to generate a Phi anymore, but we need to rename any uses
403  // of the Phi value.
404  unsigned NewReg = VRMap[PrevStage][LoopVal];
405  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
406  InitVal, NewReg);
407  if (VRMap[CurStageNum].count(LoopVal))
408  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
409  }
410  // Adjust the number of Phis needed depending on the number of prologs left,
411  // and the distance from where the Phi is first scheduled. The number of
412  // Phis cannot exceed the number of prolog stages. Each stage can
413  // potentially define two values.
414  unsigned MaxPhis = PrologStage + 2;
415  if (!InKernel && (int)PrologStage <= LoopValStage)
416  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
417  unsigned NumPhis = std::min(NumStages, MaxPhis);
418 
419  unsigned NewReg = 0;
420  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
421  // In the epilog, we may need to look back one stage to get the correct
422  // Phi name because the epilog and prolog blocks execute the same stage.
423  // The correct name is from the previous block only when the Phi has
424  // been completely scheduled prior to the epilog, and Phi value is not
425  // needed in multiple stages.
426  int StageDiff = 0;
427  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
428  NumPhis == 1)
429  StageDiff = 1;
430  // Adjust the computations below when the phi and the loop definition
431  // are scheduled in different stages.
432  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
433  StageDiff = StageScheduled - LoopValStage;
434  for (unsigned np = 0; np < NumPhis; ++np) {
435  // If the Phi hasn't been scheduled, then use the initial Phi operand
436  // value. Otherwise, use the scheduled version of the instruction. This
437  // is a little complicated when a Phi references another Phi.
438  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
439  PhiOp1 = InitVal;
440  // Check if the Phi has already been scheduled in a prolog stage.
441  else if (PrologStage >= AccessStage + StageDiff + np &&
442  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
443  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
444  // Check if the Phi has already been scheduled, but the loop instruction
445  // is either another Phi, or doesn't occur in the loop.
446  else if (PrologStage >= AccessStage + StageDiff + np) {
447  // If the Phi references another Phi, we need to examine the other
448  // Phi to get the correct value.
449  PhiOp1 = LoopVal;
450  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
451  int Indirects = 1;
452  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
453  int PhiStage = Schedule.getStage(InstOp1);
454  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
455  PhiOp1 = getInitPhiReg(*InstOp1, BB);
456  else
457  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
458  InstOp1 = MRI.getVRegDef(PhiOp1);
459  int PhiOpStage = Schedule.getStage(InstOp1);
460  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
461  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
462  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
463  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
464  break;
465  }
466  ++Indirects;
467  }
468  } else
469  PhiOp1 = InitVal;
470  // If this references a generated Phi in the kernel, get the Phi operand
471  // from the incoming block.
472  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
473  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
474  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
475 
476  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
477  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
478  // In the epilog, a map lookup is needed to get the value from the kernel,
479  // or previous epilog block. How is does this depends on if the
480  // instruction is scheduled in the previous block.
481  if (!InKernel) {
482  int StageDiffAdj = 0;
483  if (LoopValStage != -1 && StageScheduled > LoopValStage)
484  StageDiffAdj = StageScheduled - LoopValStage;
485  // Use the loop value defined in the kernel, unless the kernel
486  // contains the last definition of the Phi.
487  if (np == 0 && PrevStage == LastStageNum &&
488  (StageScheduled != 0 || LoopValStage != 0) &&
489  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
490  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
491  // Use the value defined by the Phi. We add one because we switch
492  // from looking at the loop value to the Phi definition.
493  else if (np > 0 && PrevStage == LastStageNum &&
494  VRMap[PrevStage - np + 1].count(Def))
495  PhiOp2 = VRMap[PrevStage - np + 1][Def];
496  // Use the loop value defined in the kernel.
497  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
498  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
499  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
500  // Use the value defined by the Phi, unless we're generating the first
501  // epilog and the Phi refers to a Phi in a different stage.
502  else if (VRMap[PrevStage - np].count(Def) &&
503  (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
504  (LoopValStage == StageScheduled)))
505  PhiOp2 = VRMap[PrevStage - np][Def];
506  }
507 
508  // Check if we can reuse an existing Phi. This occurs when a Phi
509  // references another Phi, and the other Phi is scheduled in an
510  // earlier stage. We can try to reuse an existing Phi up until the last
511  // stage of the current Phi.
512  if (LoopDefIsPhi) {
513  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
514  int LVNumStages = getStagesForPhi(LoopVal);
515  int StageDiff = (StageScheduled - LoopValStage);
516  LVNumStages -= StageDiff;
517  // Make sure the loop value Phi has been processed already.
518  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
519  NewReg = PhiOp2;
520  unsigned ReuseStage = CurStageNum;
521  if (isLoopCarried(*PhiInst))
522  ReuseStage -= LVNumStages;
523  // Check if the Phi to reuse has been generated yet. If not, then
524  // there is nothing to reuse.
525  if (VRMap[ReuseStage - np].count(LoopVal)) {
526  NewReg = VRMap[ReuseStage - np][LoopVal];
527 
528  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
529  Def, NewReg);
530  // Update the map with the new Phi name.
531  VRMap[CurStageNum - np][Def] = NewReg;
532  PhiOp2 = NewReg;
533  if (VRMap[LastStageNum - np - 1].count(LoopVal))
534  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
535 
536  if (IsLast && np == NumPhis - 1)
537  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
538  continue;
539  }
540  }
541  }
542  if (InKernel && StageDiff > 0 &&
543  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
544  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
545  }
546 
547  const TargetRegisterClass *RC = MRI.getRegClass(Def);
548  NewReg = MRI.createVirtualRegister(RC);
549 
550  MachineInstrBuilder NewPhi =
551  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
552  TII->get(TargetOpcode::PHI), NewReg);
553  NewPhi.addReg(PhiOp1).addMBB(BB1);
554  NewPhi.addReg(PhiOp2).addMBB(BB2);
555  if (np == 0)
556  InstrMap[NewPhi] = &*BBI;
557 
558  // We define the Phis after creating the new pipelined code, so
559  // we need to rename the Phi values in scheduled instructions.
560 
561  unsigned PrevReg = 0;
562  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
563  PrevReg = VRMap[PrevStage - np][LoopVal];
564  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
565  NewReg, PrevReg);
566  // If the Phi has been scheduled, use the new name for rewriting.
567  if (VRMap[CurStageNum - np].count(Def)) {
568  unsigned R = VRMap[CurStageNum - np][Def];
569  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
570  NewReg);
571  }
572 
573  // Check if we need to rename any uses that occurs after the loop. The
574  // register to replace depends on whether the Phi is scheduled in the
575  // epilog.
576  if (IsLast && np == NumPhis - 1)
577  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
578 
579  // In the kernel, a dependent Phi uses the value from this Phi.
580  if (InKernel)
581  PhiOp2 = NewReg;
582 
583  // Update the map with the new Phi name.
584  VRMap[CurStageNum - np][Def] = NewReg;
585  }
586 
587  while (NumPhis++ < NumStages) {
588  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
589  NewReg, 0);
590  }
591 
592  // Check if we need to rename a Phi that has been eliminated due to
593  // scheduling.
594  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
595  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
596  }
597 }
598 
599 /// Generate Phis for the specified block in the generated pipelined code.
600 /// These are new Phis needed because the definition is scheduled after the
601 /// use in the pipelined sequence.
602 void ModuloScheduleExpander::generatePhis(
604  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
605  unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
606  // Compute the stage number that contains the initial Phi value, and
607  // the Phi from the previous stage.
608  unsigned PrologStage = 0;
609  unsigned PrevStage = 0;
610  unsigned StageDiff = CurStageNum - LastStageNum;
611  bool InKernel = (StageDiff == 0);
612  if (InKernel) {
613  PrologStage = LastStageNum - 1;
614  PrevStage = CurStageNum;
615  } else {
616  PrologStage = LastStageNum - StageDiff;
617  PrevStage = LastStageNum + StageDiff - 1;
618  }
619 
620  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
621  BBE = BB->instr_end();
622  BBI != BBE; ++BBI) {
623  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
624  MachineOperand &MO = BBI->getOperand(i);
625  if (!MO.isReg() || !MO.isDef() ||
627  continue;
628 
629  int StageScheduled = Schedule.getStage(&*BBI);
630  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
631  Register Def = MO.getReg();
632  unsigned NumPhis = getStagesForReg(Def, CurStageNum);
633  // An instruction scheduled in stage 0 and is used after the loop
634  // requires a phi in the epilog for the last definition from either
635  // the kernel or prolog.
636  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
637  hasUseAfterLoop(Def, BB, MRI))
638  NumPhis = 1;
639  if (!InKernel && (unsigned)StageScheduled > PrologStage)
640  continue;
641 
642  unsigned PhiOp2 = VRMap[PrevStage][Def];
643  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
644  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
645  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
646  // The number of Phis can't exceed the number of prolog stages. The
647  // prolog stage number is zero based.
648  if (NumPhis > PrologStage + 1 - StageScheduled)
649  NumPhis = PrologStage + 1 - StageScheduled;
650  for (unsigned np = 0; np < NumPhis; ++np) {
651  unsigned PhiOp1 = VRMap[PrologStage][Def];
652  if (np <= PrologStage)
653  PhiOp1 = VRMap[PrologStage - np][Def];
654  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
655  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
656  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
657  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
658  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
659  }
660  if (!InKernel)
661  PhiOp2 = VRMap[PrevStage - np][Def];
662 
663  const TargetRegisterClass *RC = MRI.getRegClass(Def);
664  Register NewReg = MRI.createVirtualRegister(RC);
665 
666  MachineInstrBuilder NewPhi =
667  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
668  TII->get(TargetOpcode::PHI), NewReg);
669  NewPhi.addReg(PhiOp1).addMBB(BB1);
670  NewPhi.addReg(PhiOp2).addMBB(BB2);
671  if (np == 0)
672  InstrMap[NewPhi] = &*BBI;
673 
674  // Rewrite uses and update the map. The actions depend upon whether
675  // we generating code for the kernel or epilog blocks.
676  if (InKernel) {
677  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
678  NewReg);
679  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
680  NewReg);
681 
682  PhiOp2 = NewReg;
683  VRMap[PrevStage - np - 1][Def] = NewReg;
684  } else {
685  VRMap[CurStageNum - np][Def] = NewReg;
686  if (np == NumPhis - 1)
687  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
688  NewReg);
689  }
690  if (IsLast && np == NumPhis - 1)
691  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
692  }
693  }
694  }
695 }
696 
697 /// Remove instructions that generate values with no uses.
698 /// Typically, these are induction variable operations that generate values
699 /// used in the loop itself. A dead instruction has a definition with
700 /// no uses, or uses that occur in the original loop only.
701 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
702  MBBVectorTy &EpilogBBs) {
703  // For each epilog block, check that the value defined by each instruction
704  // is used. If not, delete it.
705  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
706  MBE = EpilogBBs.rend();
707  MBB != MBE; ++MBB)
708  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
709  ME = (*MBB)->instr_rend();
710  MI != ME;) {
711  // From DeadMachineInstructionElem. Don't delete inline assembly.
712  if (MI->isInlineAsm()) {
713  ++MI;
714  continue;
715  }
716  bool SawStore = false;
717  // Check if it's safe to remove the instruction due to side effects.
718  // We can, and want to, remove Phis here.
719  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
720  ++MI;
721  continue;
722  }
723  bool used = true;
724  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
725  MOE = MI->operands_end();
726  MOI != MOE; ++MOI) {
727  if (!MOI->isReg() || !MOI->isDef())
728  continue;
729  Register reg = MOI->getReg();
730  // Assume physical registers are used, unless they are marked dead.
731  if (Register::isPhysicalRegister(reg)) {
732  used = !MOI->isDead();
733  if (used)
734  break;
735  continue;
736  }
737  unsigned realUses = 0;
738  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
739  EI = MRI.use_end();
740  UI != EI; ++UI) {
741  // Check if there are any uses that occur only in the original
742  // loop. If so, that's not a real use.
743  if (UI->getParent()->getParent() != BB) {
744  realUses++;
745  used = true;
746  break;
747  }
748  }
749  if (realUses > 0)
750  break;
751  used = false;
752  }
753  if (!used) {
754  LIS.RemoveMachineInstrFromMaps(*MI);
755  MI++->eraseFromParent();
756  continue;
757  }
758  ++MI;
759  }
760  // In the kernel block, check if we can remove a Phi that generates a value
761  // used in an instruction removed in the epilog block.
762  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
763  BBE = KernelBB->getFirstNonPHI();
764  BBI != BBE;) {
765  MachineInstr *MI = &*BBI;
766  ++BBI;
767  Register reg = MI->getOperand(0).getReg();
768  if (MRI.use_begin(reg) == MRI.use_end()) {
769  LIS.RemoveMachineInstrFromMaps(*MI);
770  MI->eraseFromParent();
771  }
772  }
773 }
774 
775 /// For loop carried definitions, we split the lifetime of a virtual register
776 /// that has uses past the definition in the next iteration. A copy with a new
777 /// virtual register is inserted before the definition, which helps with
778 /// generating a better register assignment.
779 ///
780 /// v1 = phi(a, v2) v1 = phi(a, v2)
781 /// v2 = phi(b, v3) v2 = phi(b, v3)
782 /// v3 = .. v4 = copy v1
783 /// .. = V1 v3 = ..
784 /// .. = v4
785 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
786  MBBVectorTy &EpilogBBs) {
787  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
788  for (auto &PHI : KernelBB->phis()) {
789  Register Def = PHI.getOperand(0).getReg();
790  // Check for any Phi definition that used as an operand of another Phi
791  // in the same block.
792  for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
793  E = MRI.use_instr_end();
794  I != E; ++I) {
795  if (I->isPHI() && I->getParent() == KernelBB) {
796  // Get the loop carried definition.
797  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
798  if (!LCDef)
799  continue;
800  MachineInstr *MI = MRI.getVRegDef(LCDef);
801  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
802  continue;
803  // Search through the rest of the block looking for uses of the Phi
804  // definition. If one occurs, then split the lifetime.
805  unsigned SplitReg = 0;
806  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
807  KernelBB->instr_end()))
808  if (BBJ.readsRegister(Def)) {
809  // We split the lifetime when we find the first use.
810  if (SplitReg == 0) {
811  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
812  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
813  TII->get(TargetOpcode::COPY), SplitReg)
814  .addReg(Def);
815  }
816  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
817  }
818  if (!SplitReg)
819  continue;
820  // Search through each of the epilog blocks for any uses to be renamed.
821  for (auto &Epilog : EpilogBBs)
822  for (auto &I : *Epilog)
823  if (I.readsRegister(Def))
824  I.substituteRegister(Def, SplitReg, 0, *TRI);
825  break;
826  }
827  }
828  }
829 }
830 
831 /// Remove the incoming block from the Phis in a basic block.
832 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
833  for (MachineInstr &MI : *BB) {
834  if (!MI.isPHI())
835  break;
836  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
837  if (MI.getOperand(i + 1).getMBB() == Incoming) {
838  MI.RemoveOperand(i + 1);
839  MI.RemoveOperand(i);
840  break;
841  }
842  }
843 }
844 
845 /// Create branches from each prolog basic block to the appropriate epilog
846 /// block. These edges are needed if the loop ends before reaching the
847 /// kernel.
848 void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
849  MBBVectorTy &PrologBBs,
850  MachineBasicBlock *KernelBB,
851  MBBVectorTy &EpilogBBs,
852  ValueMapTy *VRMap) {
853  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
854  MachineBasicBlock *LastPro = KernelBB;
855  MachineBasicBlock *LastEpi = KernelBB;
856 
857  // Start from the blocks connected to the kernel and work "out"
858  // to the first prolog and the last epilog blocks.
860  unsigned MaxIter = PrologBBs.size() - 1;
861  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
862  // Add branches to the prolog that go to the corresponding
863  // epilog, and the fall-thru prolog/kernel block.
864  MachineBasicBlock *Prolog = PrologBBs[j];
865  MachineBasicBlock *Epilog = EpilogBBs[i];
866 
868  Optional<bool> StaticallyGreater =
869  LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
870  unsigned numAdded = 0;
871  if (!StaticallyGreater.hasValue()) {
872  Prolog->addSuccessor(Epilog);
873  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
874  } else if (*StaticallyGreater == false) {
875  Prolog->addSuccessor(Epilog);
876  Prolog->removeSuccessor(LastPro);
877  LastEpi->removeSuccessor(Epilog);
878  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
879  removePhis(Epilog, LastEpi);
880  // Remove the blocks that are no longer referenced.
881  if (LastPro != LastEpi) {
882  LastEpi->clear();
883  LastEpi->eraseFromParent();
884  }
885  if (LastPro == KernelBB) {
886  LoopInfo->disposed();
887  NewKernel = nullptr;
888  }
889  LastPro->clear();
890  LastPro->eraseFromParent();
891  } else {
892  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
893  removePhis(Epilog, Prolog);
894  }
895  LastPro = Prolog;
896  LastEpi = Epilog;
898  E = Prolog->instr_rend();
899  I != E && numAdded > 0; ++I, --numAdded)
900  updateInstruction(&*I, false, j, 0, VRMap);
901  }
902 
903  if (NewKernel) {
904  LoopInfo->setPreheader(PrologBBs[MaxIter]);
905  LoopInfo->adjustTripCount(-(MaxIter + 1));
906  }
907 }
908 
909 /// Return true if we can compute the amount the instruction changes
910 /// during each iteration. Set Delta to the amount of the change.
911 bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
912  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
913  const MachineOperand *BaseOp;
914  int64_t Offset;
915  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
916  return false;
917 
918  if (!BaseOp->isReg())
919  return false;
920 
921  Register BaseReg = BaseOp->getReg();
922 
923  MachineRegisterInfo &MRI = MF.getRegInfo();
924  // Check if there is a Phi. If so, get the definition in the loop.
925  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
926  if (BaseDef && BaseDef->isPHI()) {
927  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
928  BaseDef = MRI.getVRegDef(BaseReg);
929  }
930  if (!BaseDef)
931  return false;
932 
933  int D = 0;
934  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
935  return false;
936 
937  Delta = D;
938  return true;
939 }
940 
941 /// Update the memory operand with a new offset when the pipeliner
942 /// generates a new copy of the instruction that refers to a
943 /// different memory location.
944 void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
945  MachineInstr &OldMI,
946  unsigned Num) {
947  if (Num == 0)
948  return;
949  // If the instruction has memory operands, then adjust the offset
950  // when the instruction appears in different stages.
951  if (NewMI.memoperands_empty())
952  return;
954  for (MachineMemOperand *MMO : NewMI.memoperands()) {
955  // TODO: Figure out whether isAtomic is really necessary (see D57601).
956  if (MMO->isVolatile() || MMO->isAtomic() ||
957  (MMO->isInvariant() && MMO->isDereferenceable()) ||
958  (!MMO->getValue())) {
959  NewMMOs.push_back(MMO);
960  continue;
961  }
962  unsigned Delta;
963  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
964  int64_t AdjOffset = Delta * Num;
965  NewMMOs.push_back(
966  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
967  } else {
968  NewMMOs.push_back(
969  MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
970  }
971  }
972  NewMI.setMemRefs(MF, NewMMOs);
973 }
974 
975 /// Clone the instruction for the new pipelined loop and update the
976 /// memory operands, if needed.
977 MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
978  unsigned CurStageNum,
979  unsigned InstStageNum) {
980  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
981  // Check for tied operands in inline asm instructions. This should be handled
982  // elsewhere, but I'm not sure of the best solution.
983  if (OldMI->isInlineAsm())
984  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
985  const auto &MO = OldMI->getOperand(i);
986  if (MO.isReg() && MO.isUse())
987  break;
988  unsigned UseIdx;
989  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
990  NewMI->tieOperands(i, UseIdx);
991  }
992  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
993  return NewMI;
994 }
995 
996 /// Clone the instruction for the new pipelined loop. If needed, this
997 /// function updates the instruction using the values saved in the
998 /// InstrChanges structure.
999 MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1000  MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1001  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1002  auto It = InstrChanges.find(OldMI);
1003  if (It != InstrChanges.end()) {
1004  std::pair<unsigned, int64_t> RegAndOffset = It->second;
1005  unsigned BasePos, OffsetPos;
1006  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1007  return nullptr;
1008  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1009  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1010  if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1011  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1012  NewMI->getOperand(OffsetPos).setImm(NewOffset);
1013  }
1014  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1015  return NewMI;
1016 }
1017 
1018 /// Update the machine instruction with new virtual registers. This
1019 /// function may change the defintions and/or uses.
1020 void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1021  bool LastDef,
1022  unsigned CurStageNum,
1023  unsigned InstrStageNum,
1024  ValueMapTy *VRMap) {
1025  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
1026  MachineOperand &MO = NewMI->getOperand(i);
1027  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
1028  continue;
1029  Register reg = MO.getReg();
1030  if (MO.isDef()) {
1031  // Create a new virtual register for the definition.
1032  const TargetRegisterClass *RC = MRI.getRegClass(reg);
1033  Register NewReg = MRI.createVirtualRegister(RC);
1034  MO.setReg(NewReg);
1035  VRMap[CurStageNum][reg] = NewReg;
1036  if (LastDef)
1037  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1038  } else if (MO.isUse()) {
1039  MachineInstr *Def = MRI.getVRegDef(reg);
1040  // Compute the stage that contains the last definition for instruction.
1041  int DefStageNum = Schedule.getStage(Def);
1042  unsigned StageNum = CurStageNum;
1043  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1044  // Compute the difference in stages between the defintion and the use.
1045  unsigned StageDiff = (InstrStageNum - DefStageNum);
1046  // Make an adjustment to get the last definition.
1047  StageNum -= StageDiff;
1048  }
1049  if (VRMap[StageNum].count(reg))
1050  MO.setReg(VRMap[StageNum][reg]);
1051  }
1052  }
1053 }
1054 
1055 /// Return the instruction in the loop that defines the register.
1056 /// If the definition is a Phi, then follow the Phi operand to
1057 /// the instruction in the loop.
1058 MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1060  MachineInstr *Def = MRI.getVRegDef(Reg);
1061  while (Def->isPHI()) {
1062  if (!Visited.insert(Def).second)
1063  break;
1064  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1065  if (Def->getOperand(i + 1).getMBB() == BB) {
1066  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1067  break;
1068  }
1069  }
1070  return Def;
1071 }
1072 
1073 /// Return the new name for the value from the previous stage.
1074 unsigned ModuloScheduleExpander::getPrevMapVal(
1075  unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1076  ValueMapTy *VRMap, MachineBasicBlock *BB) {
1077  unsigned PrevVal = 0;
1078  if (StageNum > PhiStage) {
1079  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1080  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1081  // The name is defined in the previous stage.
1082  PrevVal = VRMap[StageNum - 1][LoopVal];
1083  else if (VRMap[StageNum].count(LoopVal))
1084  // The previous name is defined in the current stage when the instruction
1085  // order is swapped.
1086  PrevVal = VRMap[StageNum][LoopVal];
1087  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1088  // The loop value hasn't yet been scheduled.
1089  PrevVal = LoopVal;
1090  else if (StageNum == PhiStage + 1)
1091  // The loop value is another phi, which has not been scheduled.
1092  PrevVal = getInitPhiReg(*LoopInst, BB);
1093  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1094  // The loop value is another phi, which has been scheduled.
1095  PrevVal =
1096  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1097  LoopStage, VRMap, BB);
1098  }
1099  return PrevVal;
1100 }
1101 
1102 /// Rewrite the Phi values in the specified block to use the mappings
1103 /// from the initial operand. Once the Phi is scheduled, we switch
1104 /// to using the loop value instead of the Phi value, so those names
1105 /// do not need to be rewritten.
1106 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1107  unsigned StageNum,
1108  ValueMapTy *VRMap,
1109  InstrMapTy &InstrMap) {
1110  for (auto &PHI : BB->phis()) {
1111  unsigned InitVal = 0;
1112  unsigned LoopVal = 0;
1113  getPhiRegs(PHI, BB, InitVal, LoopVal);
1114  Register PhiDef = PHI.getOperand(0).getReg();
1115 
1116  unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1117  unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1118  unsigned NumPhis = getStagesForPhi(PhiDef);
1119  if (NumPhis > StageNum)
1120  NumPhis = StageNum;
1121  for (unsigned np = 0; np <= NumPhis; ++np) {
1122  unsigned NewVal =
1123  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1124  if (!NewVal)
1125  NewVal = InitVal;
1126  rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1127  NewVal);
1128  }
1129  }
1130 }
1131 
1132 /// Rewrite a previously scheduled instruction to use the register value
1133 /// from the new instruction. Make sure the instruction occurs in the
1134 /// basic block, and we don't change the uses in the new instruction.
1135 void ModuloScheduleExpander::rewriteScheduledInstr(
1136  MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1137  unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1138  unsigned PrevReg) {
1139  bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1140  int StagePhi = Schedule.getStage(Phi) + PhiNum;
1141  // Rewrite uses that have been scheduled already to use the new
1142  // Phi register.
1143  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
1144  EI = MRI.use_end();
1145  UI != EI;) {
1146  MachineOperand &UseOp = *UI;
1147  MachineInstr *UseMI = UseOp.getParent();
1148  ++UI;
1149  if (UseMI->getParent() != BB)
1150  continue;
1151  if (UseMI->isPHI()) {
1152  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1153  continue;
1154  if (getLoopPhiReg(*UseMI, BB) != OldReg)
1155  continue;
1156  }
1157  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1158  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1159  MachineInstr *OrigMI = OrigInstr->second;
1160  int StageSched = Schedule.getStage(OrigMI);
1161  int CycleSched = Schedule.getCycle(OrigMI);
1162  unsigned ReplaceReg = 0;
1163  // This is the stage for the scheduled instruction.
1164  if (StagePhi == StageSched && Phi->isPHI()) {
1165  int CyclePhi = Schedule.getCycle(Phi);
1166  if (PrevReg && InProlog)
1167  ReplaceReg = PrevReg;
1168  else if (PrevReg && !isLoopCarried(*Phi) &&
1169  (CyclePhi <= CycleSched || OrigMI->isPHI()))
1170  ReplaceReg = PrevReg;
1171  else
1172  ReplaceReg = NewReg;
1173  }
1174  // The scheduled instruction occurs before the scheduled Phi, and the
1175  // Phi is not loop carried.
1176  if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1177  ReplaceReg = NewReg;
1178  if (StagePhi > StageSched && Phi->isPHI())
1179  ReplaceReg = NewReg;
1180  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1181  ReplaceReg = NewReg;
1182  if (ReplaceReg) {
1183  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1184  UseOp.setReg(ReplaceReg);
1185  }
1186  }
1187 }
1188 
1189 bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1190  if (!Phi.isPHI())
1191  return false;
1192  unsigned DefCycle = Schedule.getCycle(&Phi);
1193  int DefStage = Schedule.getStage(&Phi);
1194 
1195  unsigned InitVal = 0;
1196  unsigned LoopVal = 0;
1197  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1198  MachineInstr *Use = MRI.getVRegDef(LoopVal);
1199  if (!Use || Use->isPHI())
1200  return true;
1201  unsigned LoopCycle = Schedule.getCycle(Use);
1202  int LoopStage = Schedule.getStage(Use);
1203  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1204 }
1205 
1206 //===----------------------------------------------------------------------===//
1207 // PeelingModuloScheduleExpander implementation
1208 //===----------------------------------------------------------------------===//
1209 // This is a reimplementation of ModuloScheduleExpander that works by creating
1210 // a fully correct steady-state kernel and peeling off the prolog and epilogs.
1211 //===----------------------------------------------------------------------===//
1212 
1213 namespace {
1214 // Remove any dead phis in MBB. Dead phis either have only one block as input
1215 // (in which case they are the identity) or have no uses.
1216 void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1217  LiveIntervals *LIS) {
1218  bool Changed = true;
1219  while (Changed) {
1220  Changed = false;
1221  for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) {
1222  MachineInstr &MI = *I++;
1223  assert(MI.isPHI());
1224  if (MRI.use_empty(MI.getOperand(0).getReg())) {
1225  if (LIS)
1226  LIS->RemoveMachineInstrFromMaps(MI);
1227  MI.eraseFromParent();
1228  Changed = true;
1229  } else if (MI.getNumExplicitOperands() == 3) {
1230  MRI.constrainRegClass(MI.getOperand(1).getReg(),
1231  MRI.getRegClass(MI.getOperand(0).getReg()));
1232  MRI.replaceRegWith(MI.getOperand(0).getReg(),
1233  MI.getOperand(1).getReg());
1234  if (LIS)
1235  LIS->RemoveMachineInstrFromMaps(MI);
1236  MI.eraseFromParent();
1237  Changed = true;
1238  }
1239  }
1240  }
1241 }
1242 
1243 /// Rewrites the kernel block in-place to adhere to the given schedule.
1244 /// KernelRewriter holds all of the state required to perform the rewriting.
1245 class KernelRewriter {
1246  ModuloSchedule &S;
1247  MachineBasicBlock *BB;
1248  MachineBasicBlock *PreheaderBB, *ExitBB;
1250  const TargetInstrInfo *TII;
1251  LiveIntervals *LIS;
1252 
1253  // Map from register class to canonical undef register for that class.
1255  // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1256  // this map is only used when InitReg is non-undef.
1258  // Map from LoopReg to phi register where the InitReg is undef.
1259  DenseMap<Register, Register> UndefPhis;
1260 
1261  // Reg is used by MI. Return the new register MI should use to adhere to the
1262  // schedule. Insert phis as necessary.
1263  Register remapUse(Register Reg, MachineInstr &MI);
1264  // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1265  // If InitReg is not given it is chosen arbitrarily. It will either be undef
1266  // or will be chosen so as to share another phi.
1267  Register phi(Register LoopReg, Optional<Register> InitReg = {},
1268  const TargetRegisterClass *RC = nullptr);
1269  // Create an undef register of the given register class.
1270  Register undef(const TargetRegisterClass *RC);
1271 
1272 public:
1273  KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1274  LiveIntervals *LIS = nullptr);
1275  void rewrite();
1276 };
1277 } // namespace
1278 
1279 KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1280  LiveIntervals *LIS)
1281  : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()),
1282  ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1283  TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1284  PreheaderBB = *BB->pred_begin();
1285  if (PreheaderBB == BB)
1286  PreheaderBB = *std::next(BB->pred_begin());
1287 }
1288 
1289 void KernelRewriter::rewrite() {
1290  // Rearrange the loop to be in schedule order. Note that the schedule may
1291  // contain instructions that are not owned by the loop block (InstrChanges and
1292  // friends), so we gracefully handle unowned instructions and delete any
1293  // instructions that weren't in the schedule.
1294  auto InsertPt = BB->getFirstTerminator();
1295  MachineInstr *FirstMI = nullptr;
1296  for (MachineInstr *MI : S.getInstructions()) {
1297  if (MI->isPHI())
1298  continue;
1299  if (MI->getParent())
1300  MI->removeFromParent();
1301  BB->insert(InsertPt, MI);
1302  if (!FirstMI)
1303  FirstMI = MI;
1304  }
1305  assert(FirstMI && "Failed to find first MI in schedule");
1306 
1307  // At this point all of the scheduled instructions are between FirstMI
1308  // and the end of the block. Kill from the first non-phi to FirstMI.
1309  for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1310  if (LIS)
1312  (I++)->eraseFromParent();
1313  }
1314 
1315  // Now remap every instruction in the loop.
1316  for (MachineInstr &MI : *BB) {
1317  if (MI.isPHI() || MI.isTerminator())
1318  continue;
1319  for (MachineOperand &MO : MI.uses()) {
1320  if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1321  continue;
1322  Register Reg = remapUse(MO.getReg(), MI);
1323  MO.setReg(Reg);
1324  }
1325  }
1326  EliminateDeadPhis(BB, MRI, LIS);
1327 
1328  // Ensure a phi exists for all instructions that are either referenced by
1329  // an illegal phi or by an instruction outside the loop. This allows us to
1330  // treat remaps of these values the same as "normal" values that come from
1331  // loop-carried phis.
1332  for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1333  if (MI->isPHI()) {
1334  Register R = MI->getOperand(0).getReg();
1335  phi(R);
1336  continue;
1337  }
1338 
1339  for (MachineOperand &Def : MI->defs()) {
1340  for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1341  if (MI.getParent() != BB) {
1342  phi(Def.getReg());
1343  break;
1344  }
1345  }
1346  }
1347  }
1348 }
1349 
1350 Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1351  MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1352  if (!Producer)
1353  return Reg;
1354 
1355  int ConsumerStage = S.getStage(&MI);
1356  if (!Producer->isPHI()) {
1357  // Non-phi producers are simple to remap. Insert as many phis as the
1358  // difference between the consumer and producer stages.
1359  if (Producer->getParent() != BB)
1360  // Producer was not inside the loop. Use the register as-is.
1361  return Reg;
1362  int ProducerStage = S.getStage(Producer);
1363  assert(ConsumerStage != -1 &&
1364  "In-loop consumer should always be scheduled!");
1365  assert(ConsumerStage >= ProducerStage);
1366  unsigned StageDiff = ConsumerStage - ProducerStage;
1367 
1368  for (unsigned I = 0; I < StageDiff; ++I)
1369  Reg = phi(Reg);
1370  return Reg;
1371  }
1372 
1373  // First, dive through the phi chain to find the defaults for the generated
1374  // phis.
1375  SmallVector<Optional<Register>, 4> Defaults;
1376  Register LoopReg = Reg;
1377  auto LoopProducer = Producer;
1378  while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1379  LoopReg = getLoopPhiReg(*LoopProducer, BB);
1380  Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1381  LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1382  assert(LoopProducer);
1383  }
1384  int LoopProducerStage = S.getStage(LoopProducer);
1385 
1386  Optional<Register> IllegalPhiDefault;
1387 
1388  if (LoopProducerStage == -1) {
1389  // Do nothing.
1390  } else if (LoopProducerStage > ConsumerStage) {
1391  // This schedule is only representable if ProducerStage == ConsumerStage+1.
1392  // In addition, Consumer's cycle must be scheduled after Producer in the
1393  // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1394  // functions.
1395 #ifndef NDEBUG // Silence unused variables in non-asserts mode.
1396  int LoopProducerCycle = S.getCycle(LoopProducer);
1397  int ConsumerCycle = S.getCycle(&MI);
1398 #endif
1399  assert(LoopProducerCycle <= ConsumerCycle);
1400  assert(LoopProducerStage == ConsumerStage + 1);
1401  // Peel off the first phi from Defaults and insert a phi between producer
1402  // and consumer. This phi will not be at the front of the block so we
1403  // consider it illegal. It will only exist during the rewrite process; it
1404  // needs to exist while we peel off prologs because these could take the
1405  // default value. After that we can replace all uses with the loop producer
1406  // value.
1407  IllegalPhiDefault = Defaults.front();
1408  Defaults.erase(Defaults.begin());
1409  } else {
1410  assert(ConsumerStage >= LoopProducerStage);
1411  int StageDiff = ConsumerStage - LoopProducerStage;
1412  if (StageDiff > 0) {
1413  LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1414  << " to " << (Defaults.size() + StageDiff) << "\n");
1415  // If we need more phis than we have defaults for, pad out with undefs for
1416  // the earliest phis, which are at the end of the defaults chain (the
1417  // chain is in reverse order).
1418  Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
1419  ? Optional<Register>()
1420  : Defaults.back());
1421  }
1422  }
1423 
1424  // Now we know the number of stages to jump back, insert the phi chain.
1425  auto DefaultI = Defaults.rbegin();
1426  while (DefaultI != Defaults.rend())
1427  LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1428 
1429  if (IllegalPhiDefault.hasValue()) {
1430  // The consumer optionally consumes LoopProducer in the same iteration
1431  // (because the producer is scheduled at an earlier cycle than the consumer)
1432  // or the initial value. To facilitate this we create an illegal block here
1433  // by embedding a phi in the middle of the block. We will fix this up
1434  // immediately prior to pruning.
1435  auto RC = MRI.getRegClass(Reg);
1436  Register R = MRI.createVirtualRegister(RC);
1437  BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1438  .addReg(IllegalPhiDefault.getValue())
1439  .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1440  .addReg(LoopReg)
1441  .addMBB(BB); // Block choice is arbitrary and has no effect.
1442  return R;
1443  }
1444 
1445  return LoopReg;
1446 }
1447 
1449  const TargetRegisterClass *RC) {
1450  // If the init register is not undef, try and find an existing phi.
1451  if (InitReg.hasValue()) {
1452  auto I = Phis.find({LoopReg, InitReg.getValue()});
1453  if (I != Phis.end())
1454  return I->second;
1455  } else {
1456  for (auto &KV : Phis) {
1457  if (KV.first.first == LoopReg)
1458  return KV.second;
1459  }
1460  }
1461 
1462  // InitReg is either undef or no existing phi takes InitReg as input. Try and
1463  // find a phi that takes undef as input.
1464  auto I = UndefPhis.find(LoopReg);
1465  if (I != UndefPhis.end()) {
1466  Register R = I->second;
1467  if (!InitReg.hasValue())
1468  // Found a phi taking undef as input, and this input is undef so return
1469  // without any more changes.
1470  return R;
1471  // Found a phi taking undef as input, so rewrite it to take InitReg.
1472  MachineInstr *MI = MRI.getVRegDef(R);
1473  MI->getOperand(1).setReg(InitReg.getValue());
1474  Phis.insert({{LoopReg, InitReg.getValue()}, R});
1475  MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
1476  UndefPhis.erase(I);
1477  return R;
1478  }
1479 
1480  // Failed to find any existing phi to reuse, so create a new one.
1481  if (!RC)
1482  RC = MRI.getRegClass(LoopReg);
1483  Register R = MRI.createVirtualRegister(RC);
1484  if (InitReg.hasValue())
1485  MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1486  BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1487  .addReg(InitReg.hasValue() ? *InitReg : undef(RC))
1488  .addMBB(PreheaderBB)
1489  .addReg(LoopReg)
1490  .addMBB(BB);
1491  if (!InitReg.hasValue())
1492  UndefPhis[LoopReg] = R;
1493  else
1494  Phis[{LoopReg, *InitReg}] = R;
1495  return R;
1496 }
1497 
1498 Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1499  Register &R = Undefs[RC];
1500  if (R == 0) {
1501  // Create an IMPLICIT_DEF that defines this register if we need it.
1502  // All uses of this should be removed by the time we have finished unrolling
1503  // prologs and epilogs.
1504  R = MRI.createVirtualRegister(RC);
1505  auto *InsertBB = &PreheaderBB->getParent()->front();
1506  BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1507  TII->get(TargetOpcode::IMPLICIT_DEF), R);
1508  }
1509  return R;
1510 }
1511 
1512 namespace {
1513 /// Describes an operand in the kernel of a pipelined loop. Characteristics of
1514 /// the operand are discovered, such as how many in-loop PHIs it has to jump
1515 /// through and defaults for these phis.
1516 class KernelOperandInfo {
1517  MachineBasicBlock *BB;
1519  SmallVector<Register, 4> PhiDefaults;
1522 
1523 public:
1524  KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1525  const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1526  : MRI(MRI) {
1527  Source = MO;
1528  BB = MO->getParent()->getParent();
1529  while (isRegInLoop(MO)) {
1530  MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1531  if (MI->isFullCopy()) {
1532  MO = &MI->getOperand(1);
1533  continue;
1534  }
1535  if (!MI->isPHI())
1536  break;
1537  // If this is an illegal phi, don't count it in distance.
1538  if (IllegalPhis.count(MI)) {
1539  MO = &MI->getOperand(3);
1540  continue;
1541  }
1542 
1543  Register Default = getInitPhiReg(*MI, BB);
1544  MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1545  : &MI->getOperand(3);
1546  PhiDefaults.push_back(Default);
1547  }
1548  Target = MO;
1549  }
1550 
1551  bool operator==(const KernelOperandInfo &Other) const {
1552  return PhiDefaults.size() == Other.PhiDefaults.size();
1553  }
1554 
1555  void print(raw_ostream &OS) const {
1556  OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1557  << *Source->getParent();
1558  }
1559 
1560 private:
1561  bool isRegInLoop(MachineOperand *MO) {
1562  return MO->isReg() && MO->getReg().isVirtual() &&
1563  MRI.getVRegDef(MO->getReg())->getParent() == BB;
1564  }
1565 };
1566 } // namespace
1567 
1569 PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
1570  MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
1571  if (LPD == LPD_Front)
1572  PeeledFront.push_back(NewBB);
1573  else
1574  PeeledBack.push_front(NewBB);
1575  for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1576  ++I, ++NI) {
1577  CanonicalMIs[&*I] = &*I;
1578  CanonicalMIs[&*NI] = &*I;
1579  BlockMIs[{NewBB, &*I}] = &*NI;
1580  BlockMIs[{BB, &*I}] = &*I;
1581  }
1582  return NewBB;
1583 }
1584 
1585 void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
1586  BitVector LS(Schedule.getNumStages(), true);
1587  BitVector AS(Schedule.getNumStages(), true);
1588  LiveStages[BB] = LS;
1589  AvailableStages[BB] = AS;
1590 
1591  // Peel out the prologs.
1592  LS.reset();
1593  for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1594  LS[I] = 1;
1595  Prologs.push_back(peelKernel(LPD_Front));
1596  LiveStages[Prologs.back()] = LS;
1597  AvailableStages[Prologs.back()] = LS;
1598  }
1599 
1600  // Create a block that will end up as the new loop exiting block (dominated by
1601  // all prologs and epilogs). It will only contain PHIs, in the same order as
1602  // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1603  // that the exiting block is a (sub) clone of BB. This in turn gives us the
1604  // property that any value deffed in BB but used outside of BB is used by a
1605  // PHI in the exiting block.
1606  MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
1607 
1608  // Push out the epilogs, again in reverse order.
1609  // We can't assume anything about the minumum loop trip count at this point,
1610  // so emit a fairly complex epilog:
1611  // K[0, 1, 2] // Kernel runs stages 0, 1, 2
1612  // E0[2] <- P1 // Epilog runs stage 2 only, so the state after is [0].
1613  // E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2.
1614  //
1615  // This creates a single-successor single-predecessor sequence of blocks for
1616  // each epilog, which are kept this way for simplicity at this stage and
1617  // cleaned up by the optimizer later.
1618  for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1619  Epilogs.push_back(nullptr);
1620  for (int J = Schedule.getNumStages() - 1; J >= I; --J) {
1621  LS.reset();
1622  LS[J] = 1;
1623  Epilogs.back() = peelKernel(LPD_Back);
1624  LiveStages[Epilogs.back()] = LS;
1625  AvailableStages[Epilogs.back()] = AS;
1626  }
1627  }
1628 
1629  // Now we've defined all the prolog and epilog blocks as a fallthrough
1630  // sequence, add the edges that will be followed if the loop trip count is
1631  // lower than the number of stages (connecting prologs directly with epilogs).
1632  auto PI = Prologs.begin();
1633  auto EI = Epilogs.begin();
1634  assert(Prologs.size() == Epilogs.size());
1635  for (; PI != Prologs.end(); ++PI, ++EI) {
1636  MachineBasicBlock *Pred = *(*EI)->pred_begin();
1637  (*PI)->addSuccessor(*EI);
1638  for (MachineInstr &MI : (*EI)->phis()) {
1639  Register Reg = MI.getOperand(1).getReg();
1640  MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
1641  if (Use && Use->getParent() == Pred)
1642  Reg = getEquivalentRegisterIn(Reg, *PI);
1643  MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1645  }
1646  }
1647 
1648  // Create a list of all blocks in order.
1650  llvm::copy(PeeledFront, std::back_inserter(Blocks));
1651  Blocks.push_back(BB);
1652  llvm::copy(PeeledBack, std::back_inserter(Blocks));
1653 
1654  // Iterate in reverse order over all instructions, remapping as we go.
1655  for (MachineBasicBlock *B : reverse(Blocks)) {
1656  for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
1657  I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1658  MachineInstr *MI = &*I++;
1659  rewriteUsesOf(MI);
1660  }
1661  }
1662  // Now all remapping has been done, we're free to optimize the generated code.
1663  for (MachineBasicBlock *B : reverse(Blocks))
1664  EliminateDeadPhis(B, MRI, LIS);
1665  EliminateDeadPhis(ExitingBB, MRI, LIS);
1666 }
1667 
1668 MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
1669  MachineFunction &MF = *BB->getParent();
1670  MachineBasicBlock *Exit = *BB->succ_begin();
1671  if (Exit == BB)
1672  Exit = *std::next(BB->succ_begin());
1673 
1675  MF.insert(std::next(BB->getIterator()), NewBB);
1676 
1677  // Clone all phis in BB into NewBB and rewrite.
1678  for (MachineInstr &MI : BB->phis()) {
1679  auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1680  Register OldR = MI.getOperand(3).getReg();
1681  Register R = MRI.createVirtualRegister(RC);
1683  for (MachineInstr &Use : MRI.use_instructions(OldR))
1684  if (Use.getParent() != BB)
1685  Uses.push_back(&Use);
1686  for (MachineInstr *Use : Uses)
1687  Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1688  *MRI.getTargetRegisterInfo());
1689  MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1690  .addReg(OldR)
1691  .addMBB(BB);
1692  BlockMIs[{NewBB, &MI}] = NI;
1693  CanonicalMIs[NI] = &MI;
1694  }
1695  BB->replaceSuccessor(Exit, NewBB);
1696  Exit->replacePhiUsesWith(BB, NewBB);
1697  NewBB->addSuccessor(Exit);
1698 
1699  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1701  bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1702  (void)CanAnalyzeBr;
1703  assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1704  TII->removeBranch(*BB);
1705  TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1706  Cond, DebugLoc());
1707  TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1708  return NewBB;
1709 }
1710 
1711 Register
1712 PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
1713  MachineBasicBlock *BB) {
1714  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
1715  unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
1716  return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1717 }
1718 
1719 void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
1720  if (MI->isPHI()) {
1721  // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1722  // and it is produced by this block.
1723  Register PhiR = MI->getOperand(0).getReg();
1724  Register R = MI->getOperand(3).getReg();
1725  int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1726  if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1727  R = MI->getOperand(1).getReg();
1728  MRI.setRegClass(R, MRI.getRegClass(PhiR));
1729  MRI.replaceRegWith(PhiR, R);
1730  if (LIS)
1731  LIS->RemoveMachineInstrFromMaps(*MI);
1732  MI->eraseFromParent();
1733  return;
1734  }
1735 
1736  int Stage = getStage(MI);
1737  if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1738  LiveStages[MI->getParent()].test(Stage))
1739  // Instruction is live, no rewriting to do.
1740  return;
1741 
1742  for (MachineOperand &DefMO : MI->defs()) {
1744  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1745  // Only PHIs can use values from this block by construction.
1746  // Match with the equivalent PHI in B.
1747  assert(UseMI.isPHI());
1748  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1749  MI->getParent());
1750  Subs.emplace_back(&UseMI, Reg);
1751  }
1752  for (auto &Sub : Subs)
1753  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1754  *MRI.getTargetRegisterInfo());
1755  }
1756  if (LIS)
1757  LIS->RemoveMachineInstrFromMaps(*MI);
1758  MI->eraseFromParent();
1759 }
1760 
1761 void PeelingModuloScheduleExpander::fixupBranches() {
1762  std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info =
1764  assert(Info);
1765 
1766  // Work outwards from the kernel.
1767  bool KernelDisposed = false;
1768  int TC = Schedule.getNumStages() - 1;
1769  for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1770  ++PI, ++EI, --TC) {
1771  MachineBasicBlock *Prolog = *PI;
1772  MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1773  MachineBasicBlock *Epilog = *EI;
1775  TII->removeBranch(*Prolog);
1776  Optional<bool> StaticallyGreater =
1777  Info->createTripCountGreaterCondition(TC, *Prolog, Cond);
1778  if (!StaticallyGreater.hasValue()) {
1779  LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1780  // Dynamically branch based on Cond.
1781  TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1782  } else if (*StaticallyGreater == false) {
1783  LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1784  // Prolog never falls through; branch to epilog and orphan interior
1785  // blocks. Leave it to unreachable-block-elim to clean up.
1786  Prolog->removeSuccessor(Fallthrough);
1787  for (MachineInstr &P : Fallthrough->phis()) {
1788  P.RemoveOperand(2);
1789  P.RemoveOperand(1);
1790  }
1791  TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
1792  KernelDisposed = true;
1793  } else {
1794  LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1795  // Prolog always falls through; remove incoming values in epilog.
1796  Prolog->removeSuccessor(Epilog);
1797  for (MachineInstr &P : Epilog->phis()) {
1798  P.RemoveOperand(4);
1799  P.RemoveOperand(3);
1800  }
1801  }
1802  }
1803 
1804  if (!KernelDisposed) {
1805  Info->adjustTripCount(-(Schedule.getNumStages() - 1));
1806  Info->setPreheader(Prologs.back());
1807  } else {
1808  Info->disposed();
1809  }
1810 }
1811 
1813  KernelRewriter KR(*Schedule.getLoop(), Schedule);
1814  KR.rewrite();
1815 }
1816 
1818  BB = Schedule.getLoop()->getTopBlock();
1819  Preheader = Schedule.getLoop()->getLoopPreheader();
1820  LLVM_DEBUG(Schedule.dump());
1821 
1822  rewriteKernel();
1823  peelPrologAndEpilogs();
1824  fixupBranches();
1825 }
1826 
1828  BB = Schedule.getLoop()->getTopBlock();
1829  Preheader = Schedule.getLoop()->getLoopPreheader();
1830 
1831  // Dump the schedule before we invalidate and remap all its instructions.
1832  // Stash it in a string so we can print it if we found an error.
1833  std::string ScheduleDump;
1834  raw_string_ostream OS(ScheduleDump);
1835  Schedule.print(OS);
1836  OS.flush();
1837 
1838  // First, run the normal ModuleScheduleExpander. We don't support any
1839  // InstrChanges.
1840  assert(LIS && "Requires LiveIntervals!");
1841  ModuloScheduleExpander MSE(MF, Schedule, *LIS,
1843  MSE.expand();
1844  MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
1845  if (!ExpandedKernel) {
1846  // The expander optimized away the kernel. We can't do any useful checking.
1847  MSE.cleanup();
1848  return;
1849  }
1850  // Before running the KernelRewriter, re-add BB into the CFG.
1851  Preheader->addSuccessor(BB);
1852 
1853  // Now run the new expansion algorithm.
1854  KernelRewriter KR(*Schedule.getLoop(), Schedule);
1855  KR.rewrite();
1856  peelPrologAndEpilogs();
1857 
1858  // Collect all illegal phis that the new algorithm created. We'll give these
1859  // to KernelOperandInfo.
1860  SmallPtrSet<MachineInstr *, 4> IllegalPhis;
1861  for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
1862  if (NI->isPHI())
1863  IllegalPhis.insert(&*NI);
1864  }
1865 
1866  // Co-iterate across both kernels. We expect them to be identical apart from
1867  // phis and full COPYs (we look through both).
1869  auto OI = ExpandedKernel->begin();
1870  auto NI = BB->begin();
1871  for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
1872  while (OI->isPHI() || OI->isFullCopy())
1873  ++OI;
1874  while (NI->isPHI() || NI->isFullCopy())
1875  ++NI;
1876  assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
1877  // Analyze every operand separately.
1878  for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
1879  OOpI != OI->operands_end(); ++OOpI, ++NOpI)
1880  KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
1881  KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
1882  }
1883 
1884  bool Failed = false;
1885  for (auto &OldAndNew : KOIs) {
1886  if (OldAndNew.first == OldAndNew.second)
1887  continue;
1888  Failed = true;
1889  errs() << "Modulo kernel validation error: [\n";
1890  errs() << " [golden] ";
1891  OldAndNew.first.print(errs());
1892  errs() << " ";
1893  OldAndNew.second.print(errs());
1894  errs() << "]\n";
1895  }
1896 
1897  if (Failed) {
1898  errs() << "Golden reference kernel:\n";
1899  ExpandedKernel->print(errs());
1900  errs() << "New kernel:\n";
1901  BB->print(errs());
1902  errs() << ScheduleDump;
1904  "Modulo kernel validation (-pipeliner-experimental-cg) failed");
1905  }
1906 
1907  // Cleanup by removing BB from the CFG again as the original
1908  // ModuloScheduleExpander intended.
1909  Preheader->removeSuccessor(BB);
1910  MSE.cleanup();
1911 }
1912 
1913 //===----------------------------------------------------------------------===//
1914 // ModuloScheduleTestPass implementation
1915 //===----------------------------------------------------------------------===//
1916 // This pass constructs a ModuloSchedule from its module and runs
1917 // ModuloScheduleExpander.
1918 //
1919 // The module is expected to contain a single-block analyzable loop.
1920 // The total order of instructions is taken from the loop as-is.
1921 // Instructions are expected to be annotated with a PostInstrSymbol.
1922 // This PostInstrSymbol must have the following format:
1923 // "Stage=%d Cycle=%d".
1924 //===----------------------------------------------------------------------===//
1925 
1926 namespace {
1927 class ModuloScheduleTest : public MachineFunctionPass {
1928 public:
1929  static char ID;
1930 
1931  ModuloScheduleTest() : MachineFunctionPass(ID) {
1933  }
1934 
1935  bool runOnMachineFunction(MachineFunction &MF) override;
1936  void runOnLoop(MachineFunction &MF, MachineLoop &L);
1937 
1938  void getAnalysisUsage(AnalysisUsage &AU) const override {
1940  AU.addRequired<LiveIntervals>();
1942  }
1943 };
1944 } // namespace
1945 
1946 char ModuloScheduleTest::ID = 0;
1947 
1948 INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
1949  "Modulo Schedule test pass", false, false)
1952 INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
1953  "Modulo Schedule test pass", false, false)
1954 
1955 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
1956  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
1957  for (auto *L : MLI) {
1958  if (L->getTopBlock() != L->getBottomBlock())
1959  continue;
1960  runOnLoop(MF, *L);
1961  return false;
1962  }
1963  return false;
1964 }
1965 
1966 static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
1967  std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
1968  std::pair<StringRef, StringRef> StageTokenAndValue =
1969  getToken(StageAndCycle.first, "-");
1970  std::pair<StringRef, StringRef> CycleTokenAndValue =
1971  getToken(StageAndCycle.second, "-");
1972  if (StageTokenAndValue.first != "Stage" ||
1973  CycleTokenAndValue.first != "_Cycle") {
1975  "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
1976  return;
1977  }
1978 
1979  StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
1980  CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
1981 
1982  dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
1983 }
1984 
1985 void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
1986  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
1987  MachineBasicBlock *BB = L.getTopBlock();
1988  dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
1989 
1990  DenseMap<MachineInstr *, int> Cycle, Stage;
1991  std::vector<MachineInstr *> Instrs;
1992  for (MachineInstr &MI : *BB) {
1993  if (MI.isTerminator())
1994  continue;
1995  Instrs.push_back(&MI);
1996  if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
1997  dbgs() << "Parsing post-instr symbol for " << MI;
1998  parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
1999  }
2000  }
2001 
2002  ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2003  std::move(Stage));
2005  MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2006  MSE.expand();
2007  MSE.cleanup();
2008 }
2009 
2010 //===----------------------------------------------------------------------===//
2011 // ModuloScheduleTestAnnotater implementation
2012 //===----------------------------------------------------------------------===//
2013 
2015  for (MachineInstr *MI : S.getInstructions()) {
2017  raw_svector_ostream OS(SV);
2018  OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2019  MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2020  MI->setPostInstrSymbol(MF, Sym);
2021  }
2022 }
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
instr_iterator instr_begin()
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:509
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Register getReg(unsigned Idx) const
Get the register for the operand index.
Represents a schedule for a single-block loop.
void push_back(const T &Elt)
Definition: SmallVector.h:211
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:63
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned Reg
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found...
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
bool isInlineAsm() const
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:953
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:530
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", "Modulo Schedule test pass", false, false) INITIALIZE_PASS_END(ModuloScheduleTest
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Peel the first iteration of the loop.
bool isPHI() const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
constexpr double phi
Definition: MathExtras.h:71
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout, ignoring any parts of the loop not contiguous with the part that contains the header.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
void print(raw_ostream &OS)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
MachineBasicBlock * getBottomBlock()
Return the "bottom" block in the loop, which is the last block in the linear layout, ignoring any parts of the loop not contiguous with the part that contains the header.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:672
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \\\)
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
void initializeModuloScheduleTestPass(PassRegistry &)
bool isFullCopy() const
bool hasInterval(Register Reg) const
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void expand()
Performs the actual expansion.
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:89
void setReg(Register Reg)
Change the register this operand corresponds to.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
virtual const TargetInstrInfo * getInstrInfo() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1231
TargetInstrInfo - Interface to description of machine instruction set.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state. ...
MCContext & getContext() const
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
#define P(N)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:533
bool getIncrementValue(const MachineInstr &MI, int &Value) const override
If the instruction is an increment of a constant value, return the amount.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const override
For instructions with a base and offset, return the position of the base register and offset operands...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:498
constexpr double e
Definition: MathExtras.h:57
void setImm(int64_t immVal)
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
self_iterator getIterator()
Definition: ilist_node.h:81
void cleanup()
Performs final cleanup after expansion.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:75
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
MCSymbol * getPostInstrSymbol() const
Helper to extract a post-instruction symbol if one has been added.
Definition: MachineInstr.h:584
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rbegin()
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:147
modulo schedule test
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
modulo schedule Modulo Schedule test pass
StringRef str()
Return a StringRef for the vector contents.
Definition: raw_ostream.h:555
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Peel the last iteration of the loop.
int64_t getImm() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
reverse_instr_iterator instr_rend()
Target - Wrapper for Target specific information.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Representation of each machine instruction.
Definition: MachineInstr.h:63
bool hasValue() const
Definition: Optional.h:259
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:129
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
static bool rewrite(Function &F)
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
MachineInstr * removeFromParent()
Unlink &#39;this&#39; from the containing basic block, and return it without deleting it. ...
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool memoperands_empty() const
Return true if we don&#39;t have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:563
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand *> MemRefs)
Assign this MachineInstr&#39;s memory reference descriptor list.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
static const Function * getParent(const Value *V)
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1975
Register getReg() const
getReg - Returns the register number.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1217
void annotate()
Performs the annotation.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
std::vector< MachineBasicBlock * >::iterator succ_iterator
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
void resize(size_type N)
Definition: SmallVector.h:344