LLVM  17.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/InitializePasses.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/Support/Debug.h"
21 
22 #define DEBUG_TYPE "pipeliner"
23 using namespace llvm;
24 
26  for (MachineInstr *MI : ScheduledInstrs)
27  OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
28 }
29 
30 //===----------------------------------------------------------------------===//
31 // ModuloScheduleExpander implementation
32 //===----------------------------------------------------------------------===//
33 
34 /// Return the register values for the operands of a Phi instruction.
35 /// This function assume the instruction is a Phi.
37  unsigned &InitVal, unsigned &LoopVal) {
38  assert(Phi.isPHI() && "Expecting a Phi.");
39 
40  InitVal = 0;
41  LoopVal = 0;
42  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
43  if (Phi.getOperand(i + 1).getMBB() != Loop)
44  InitVal = Phi.getOperand(i).getReg();
45  else
46  LoopVal = Phi.getOperand(i).getReg();
47 
48  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
49 }
50 
51 /// Return the Phi register value that comes from the incoming block.
52 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
53  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
54  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
55  return Phi.getOperand(i).getReg();
56  return 0;
57 }
58 
59 /// Return the Phi register value that comes the loop block.
60 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
61  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
62  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
63  return Phi.getOperand(i).getReg();
64  return 0;
65 }
66 
68  BB = Schedule.getLoop()->getTopBlock();
69  Preheader = *BB->pred_begin();
70  if (Preheader == BB)
71  Preheader = *std::next(BB->pred_begin());
72 
73  // Iterate over the definitions in each instruction, and compute the
74  // stage difference for each use. Keep the maximum value.
75  for (MachineInstr *MI : Schedule.getInstructions()) {
76  int DefStage = Schedule.getStage(MI);
77  for (const MachineOperand &Op : MI->operands()) {
78  if (!Op.isReg() || !Op.isDef())
79  continue;
80 
81  Register Reg = Op.getReg();
82  unsigned MaxDiff = 0;
83  bool PhiIsSwapped = false;
84  for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
85  MachineInstr *UseMI = UseOp.getParent();
86  int UseStage = Schedule.getStage(UseMI);
87  unsigned Diff = 0;
88  if (UseStage != -1 && UseStage >= DefStage)
89  Diff = UseStage - DefStage;
90  if (MI->isPHI()) {
91  if (isLoopCarried(*MI))
92  ++Diff;
93  else
94  PhiIsSwapped = true;
95  }
96  MaxDiff = std::max(Diff, MaxDiff);
97  }
98  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
99  }
100  }
101 
102  generatePipelinedLoop();
103 }
104 
105 void ModuloScheduleExpander::generatePipelinedLoop() {
107  assert(LoopInfo && "Must be able to analyze loop!");
108 
109  // Create a new basic block for the kernel and add it to the CFG.
111 
112  unsigned MaxStageCount = Schedule.getNumStages() - 1;
113 
114  // Remember the registers that are used in different stages. The index is
115  // the iteration, or stage, that the instruction is scheduled in. This is
116  // a map between register names in the original block and the names created
117  // in each stage of the pipelined loop.
118  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
119 
120  // The renaming destination by Phis for the registers across stages.
121  // This map is updated during Phis generation to point to the most recent
122  // renaming destination.
123  ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
124 
125  InstrMapTy InstrMap;
126 
128 
129  // Generate the prolog instructions that set up the pipeline.
130  generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
131  MF.insert(BB->getIterator(), KernelBB);
132 
133  // Rearrange the instructions to generate the new, pipelined loop,
134  // and update register names as needed.
135  for (MachineInstr *CI : Schedule.getInstructions()) {
136  if (CI->isPHI())
137  continue;
138  unsigned StageNum = Schedule.getStage(CI);
139  MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
140  updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
141  KernelBB->push_back(NewMI);
142  InstrMap[NewMI] = CI;
143  }
144 
145  // Copy any terminator instructions to the new kernel, and update
146  // names as needed.
147  for (MachineInstr &MI : BB->terminators()) {
148  MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
149  updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
150  KernelBB->push_back(NewMI);
151  InstrMap[NewMI] = &MI;
152  }
153 
154  NewKernel = KernelBB;
155  KernelBB->transferSuccessors(BB);
156  KernelBB->replaceSuccessor(BB, KernelBB);
157 
158  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
159  InstrMap, MaxStageCount, MaxStageCount, false);
160  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
161  InstrMap, MaxStageCount, MaxStageCount, false);
162 
163  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
164 
166  // Generate the epilog instructions to complete the pipeline.
167  generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
168  PrologBBs);
169 
170  // We need this step because the register allocation doesn't handle some
171  // situations well, so we insert copies to help out.
172  splitLifetimes(KernelBB, EpilogBBs);
173 
174  // Remove dead instructions due to loop induction variables.
175  removeDeadInstructions(KernelBB, EpilogBBs);
176 
177  // Add branches between prolog and epilog blocks.
178  addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
179 
180  delete[] VRMap;
181  delete[] VRMapPhi;
182 }
183 
185  // Remove the original loop since it's no longer referenced.
186  for (auto &I : *BB)
188  BB->clear();
189  BB->eraseFromParent();
190 }
191 
192 /// Generate the pipeline prolog code.
193 void ModuloScheduleExpander::generateProlog(unsigned LastStage,
194  MachineBasicBlock *KernelBB,
195  ValueMapTy *VRMap,
196  MBBVectorTy &PrologBBs) {
197  MachineBasicBlock *PredBB = Preheader;
198  InstrMapTy InstrMap;
199 
200  // Generate a basic block for each stage, not including the last stage,
201  // which will be generated in the kernel. Each basic block may contain
202  // instructions from multiple stages/iterations.
203  for (unsigned i = 0; i < LastStage; ++i) {
204  // Create and insert the prolog basic block prior to the original loop
205  // basic block. The original loop is removed later.
207  PrologBBs.push_back(NewBB);
208  MF.insert(BB->getIterator(), NewBB);
209  NewBB->transferSuccessors(PredBB);
210  PredBB->addSuccessor(NewBB);
211  PredBB = NewBB;
212 
213  // Generate instructions for each appropriate stage. Process instructions
214  // in original program order.
215  for (int StageNum = i; StageNum >= 0; --StageNum) {
216  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
217  BBE = BB->getFirstTerminator();
218  BBI != BBE; ++BBI) {
219  if (Schedule.getStage(&*BBI) == StageNum) {
220  if (BBI->isPHI())
221  continue;
222  MachineInstr *NewMI =
223  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
224  updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
225  NewBB->push_back(NewMI);
226  InstrMap[NewMI] = &*BBI;
227  }
228  }
229  }
230  rewritePhiValues(NewBB, i, VRMap, InstrMap);
231  LLVM_DEBUG({
232  dbgs() << "prolog:\n";
233  NewBB->dump();
234  });
235  }
236 
237  PredBB->replaceSuccessor(BB, KernelBB);
238 
239  // Check if we need to remove the branch from the preheader to the original
240  // loop, and replace it with a branch to the new loop.
241  unsigned numBranches = TII->removeBranch(*Preheader);
242  if (numBranches) {
244  TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
245  }
246 }
247 
248 /// Generate the pipeline epilog code. The epilog code finishes the iterations
249 /// that were started in either the prolog or the kernel. We create a basic
250 /// block for each stage that needs to complete.
251 void ModuloScheduleExpander::generateEpilog(
252  unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
253  ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
254  MBBVectorTy &PrologBBs) {
255  // We need to change the branch from the kernel to the first epilog block, so
256  // this call to analyze branch uses the kernel rather than the original BB.
257  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
259  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
260  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
261  if (checkBranch)
262  return;
263 
264  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
265  if (*LoopExitI == KernelBB)
266  ++LoopExitI;
267  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
268  MachineBasicBlock *LoopExitBB = *LoopExitI;
269 
270  MachineBasicBlock *PredBB = KernelBB;
271  MachineBasicBlock *EpilogStart = LoopExitBB;
272  InstrMapTy InstrMap;
273 
274  // Generate a basic block for each stage, not including the last stage,
275  // which was generated for the kernel. Each basic block may contain
276  // instructions from multiple stages/iterations.
277  int EpilogStage = LastStage + 1;
278  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
280  EpilogBBs.push_back(NewBB);
281  MF.insert(BB->getIterator(), NewBB);
282 
283  PredBB->replaceSuccessor(LoopExitBB, NewBB);
284  NewBB->addSuccessor(LoopExitBB);
285 
286  if (EpilogStart == LoopExitBB)
287  EpilogStart = NewBB;
288 
289  // Add instructions to the epilog depending on the current block.
290  // Process instructions in original program order.
291  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
292  for (auto &BBI : *BB) {
293  if (BBI.isPHI())
294  continue;
295  MachineInstr *In = &BBI;
296  if ((unsigned)Schedule.getStage(In) == StageNum) {
297  // Instructions with memoperands in the epilog are updated with
298  // conservative values.
299  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
300  updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
301  NewBB->push_back(NewMI);
302  InstrMap[NewMI] = In;
303  }
304  }
305  }
306  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
307  InstrMap, LastStage, EpilogStage, i == 1);
308  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
309  InstrMap, LastStage, EpilogStage, i == 1);
310  PredBB = NewBB;
311 
312  LLVM_DEBUG({
313  dbgs() << "epilog:\n";
314  NewBB->dump();
315  });
316  }
317 
318  // Fix any Phi nodes in the loop exit block.
319  LoopExitBB->replacePhiUsesWith(BB, PredBB);
320 
321  // Create a branch to the new epilog from the kernel.
322  // Remove the original branch and add a new branch to the epilog.
323  TII->removeBranch(*KernelBB);
324  assert((OrigBB == TBB || OrigBB == FBB) &&
325  "Unable to determine looping branch direction");
326  if (OrigBB != TBB)
327  TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
328  else
329  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
330  // Add a branch to the loop exit.
331  if (EpilogBBs.size() > 0) {
332  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
334  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
335  }
336 }
337 
338 /// Replace all uses of FromReg that appear outside the specified
339 /// basic block with ToReg.
340 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
343  LiveIntervals &LIS) {
344  for (MachineOperand &O :
346  if (O.getParent()->getParent() != MBB)
347  O.setReg(ToReg);
348  if (!LIS.hasInterval(ToReg))
349  LIS.createEmptyInterval(ToReg);
350 }
351 
352 /// Return true if the register has a use that occurs outside the
353 /// specified loop.
354 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
356  for (const MachineOperand &MO : MRI.use_operands(Reg))
357  if (MO.getParent()->getParent() != BB)
358  return true;
359  return false;
360 }
361 
362 /// Generate Phis for the specific block in the generated pipelined code.
363 /// This function looks at the Phis from the original code to guide the
364 /// creation of new Phis.
365 void ModuloScheduleExpander::generateExistingPhis(
367  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
368  unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
369  // Compute the stage number for the initial value of the Phi, which
370  // comes from the prolog. The prolog to use depends on to which kernel/
371  // epilog that we're adding the Phi.
372  unsigned PrologStage = 0;
373  unsigned PrevStage = 0;
374  bool InKernel = (LastStageNum == CurStageNum);
375  if (InKernel) {
376  PrologStage = LastStageNum - 1;
377  PrevStage = CurStageNum;
378  } else {
379  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
380  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
381  }
382 
383  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
384  BBE = BB->getFirstNonPHI();
385  BBI != BBE; ++BBI) {
386  Register Def = BBI->getOperand(0).getReg();
387 
388  unsigned InitVal = 0;
389  unsigned LoopVal = 0;
390  getPhiRegs(*BBI, BB, InitVal, LoopVal);
391 
392  unsigned PhiOp1 = 0;
393  // The Phi value from the loop body typically is defined in the loop, but
394  // not always. So, we need to check if the value is defined in the loop.
395  unsigned PhiOp2 = LoopVal;
396  if (VRMap[LastStageNum].count(LoopVal))
397  PhiOp2 = VRMap[LastStageNum][LoopVal];
398 
399  int StageScheduled = Schedule.getStage(&*BBI);
400  int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
401  unsigned NumStages = getStagesForReg(Def, CurStageNum);
402  if (NumStages == 0) {
403  // We don't need to generate a Phi anymore, but we need to rename any uses
404  // of the Phi value.
405  unsigned NewReg = VRMap[PrevStage][LoopVal];
406  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
407  InitVal, NewReg);
408  if (VRMap[CurStageNum].count(LoopVal))
409  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
410  }
411  // Adjust the number of Phis needed depending on the number of prologs left,
412  // and the distance from where the Phi is first scheduled. The number of
413  // Phis cannot exceed the number of prolog stages. Each stage can
414  // potentially define two values.
415  unsigned MaxPhis = PrologStage + 2;
416  if (!InKernel && (int)PrologStage <= LoopValStage)
417  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
418  unsigned NumPhis = std::min(NumStages, MaxPhis);
419 
420  unsigned NewReg = 0;
421  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
422  // In the epilog, we may need to look back one stage to get the correct
423  // Phi name, because the epilog and prolog blocks execute the same stage.
424  // The correct name is from the previous block only when the Phi has
425  // been completely scheduled prior to the epilog, and Phi value is not
426  // needed in multiple stages.
427  int StageDiff = 0;
428  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
429  NumPhis == 1)
430  StageDiff = 1;
431  // Adjust the computations below when the phi and the loop definition
432  // are scheduled in different stages.
433  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
434  StageDiff = StageScheduled - LoopValStage;
435  for (unsigned np = 0; np < NumPhis; ++np) {
436  // If the Phi hasn't been scheduled, then use the initial Phi operand
437  // value. Otherwise, use the scheduled version of the instruction. This
438  // is a little complicated when a Phi references another Phi.
439  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
440  PhiOp1 = InitVal;
441  // Check if the Phi has already been scheduled in a prolog stage.
442  else if (PrologStage >= AccessStage + StageDiff + np &&
443  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
444  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
445  // Check if the Phi has already been scheduled, but the loop instruction
446  // is either another Phi, or doesn't occur in the loop.
447  else if (PrologStage >= AccessStage + StageDiff + np) {
448  // If the Phi references another Phi, we need to examine the other
449  // Phi to get the correct value.
450  PhiOp1 = LoopVal;
451  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
452  int Indirects = 1;
453  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
454  int PhiStage = Schedule.getStage(InstOp1);
455  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
456  PhiOp1 = getInitPhiReg(*InstOp1, BB);
457  else
458  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
459  InstOp1 = MRI.getVRegDef(PhiOp1);
460  int PhiOpStage = Schedule.getStage(InstOp1);
461  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
462  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
463  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
464  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
465  break;
466  }
467  ++Indirects;
468  }
469  } else
470  PhiOp1 = InitVal;
471  // If this references a generated Phi in the kernel, get the Phi operand
472  // from the incoming block.
473  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
474  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
475  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
476 
477  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
478  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
479  // In the epilog, a map lookup is needed to get the value from the kernel,
480  // or previous epilog block. How is does this depends on if the
481  // instruction is scheduled in the previous block.
482  if (!InKernel) {
483  int StageDiffAdj = 0;
484  if (LoopValStage != -1 && StageScheduled > LoopValStage)
485  StageDiffAdj = StageScheduled - LoopValStage;
486  // Use the loop value defined in the kernel, unless the kernel
487  // contains the last definition of the Phi.
488  if (np == 0 && PrevStage == LastStageNum &&
489  (StageScheduled != 0 || LoopValStage != 0) &&
490  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
491  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
492  // Use the value defined by the Phi. We add one because we switch
493  // from looking at the loop value to the Phi definition.
494  else if (np > 0 && PrevStage == LastStageNum &&
495  VRMap[PrevStage - np + 1].count(Def))
496  PhiOp2 = VRMap[PrevStage - np + 1][Def];
497  // Use the loop value defined in the kernel.
498  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
499  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
500  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
501  // Use the value defined by the Phi, unless we're generating the first
502  // epilog and the Phi refers to a Phi in a different stage.
503  else if (VRMap[PrevStage - np].count(Def) &&
504  (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
505  (LoopValStage == StageScheduled)))
506  PhiOp2 = VRMap[PrevStage - np][Def];
507  }
508 
509  // Check if we can reuse an existing Phi. This occurs when a Phi
510  // references another Phi, and the other Phi is scheduled in an
511  // earlier stage. We can try to reuse an existing Phi up until the last
512  // stage of the current Phi.
513  if (LoopDefIsPhi) {
514  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
515  int LVNumStages = getStagesForPhi(LoopVal);
516  int StageDiff = (StageScheduled - LoopValStage);
517  LVNumStages -= StageDiff;
518  // Make sure the loop value Phi has been processed already.
519  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
520  NewReg = PhiOp2;
521  unsigned ReuseStage = CurStageNum;
522  if (isLoopCarried(*PhiInst))
523  ReuseStage -= LVNumStages;
524  // Check if the Phi to reuse has been generated yet. If not, then
525  // there is nothing to reuse.
526  if (VRMap[ReuseStage - np].count(LoopVal)) {
527  NewReg = VRMap[ReuseStage - np][LoopVal];
528 
529  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
530  Def, NewReg);
531  // Update the map with the new Phi name.
532  VRMap[CurStageNum - np][Def] = NewReg;
533  PhiOp2 = NewReg;
534  if (VRMap[LastStageNum - np - 1].count(LoopVal))
535  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
536 
537  if (IsLast && np == NumPhis - 1)
538  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
539  continue;
540  }
541  }
542  }
543  if (InKernel && StageDiff > 0 &&
544  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
545  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
546  }
547 
548  const TargetRegisterClass *RC = MRI.getRegClass(Def);
549  NewReg = MRI.createVirtualRegister(RC);
550 
551  MachineInstrBuilder NewPhi =
552  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
553  TII->get(TargetOpcode::PHI), NewReg);
554  NewPhi.addReg(PhiOp1).addMBB(BB1);
555  NewPhi.addReg(PhiOp2).addMBB(BB2);
556  if (np == 0)
557  InstrMap[NewPhi] = &*BBI;
558 
559  // We define the Phis after creating the new pipelined code, so
560  // we need to rename the Phi values in scheduled instructions.
561 
562  unsigned PrevReg = 0;
563  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
564  PrevReg = VRMap[PrevStage - np][LoopVal];
565  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
566  NewReg, PrevReg);
567  // If the Phi has been scheduled, use the new name for rewriting.
568  if (VRMap[CurStageNum - np].count(Def)) {
569  unsigned R = VRMap[CurStageNum - np][Def];
570  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
571  NewReg);
572  }
573 
574  // Check if we need to rename any uses that occurs after the loop. The
575  // register to replace depends on whether the Phi is scheduled in the
576  // epilog.
577  if (IsLast && np == NumPhis - 1)
578  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
579 
580  // In the kernel, a dependent Phi uses the value from this Phi.
581  if (InKernel)
582  PhiOp2 = NewReg;
583 
584  // Update the map with the new Phi name.
585  VRMap[CurStageNum - np][Def] = NewReg;
586  }
587 
588  while (NumPhis++ < NumStages) {
589  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
590  NewReg, 0);
591  }
592 
593  // Check if we need to rename a Phi that has been eliminated due to
594  // scheduling.
595  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
596  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
597  }
598 }
599 
600 /// Generate Phis for the specified block in the generated pipelined code.
601 /// These are new Phis needed because the definition is scheduled after the
602 /// use in the pipelined sequence.
603 void ModuloScheduleExpander::generatePhis(
605  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
606  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
607  bool IsLast) {
608  // Compute the stage number that contains the initial Phi value, and
609  // the Phi from the previous stage.
610  unsigned PrologStage = 0;
611  unsigned PrevStage = 0;
612  unsigned StageDiff = CurStageNum - LastStageNum;
613  bool InKernel = (StageDiff == 0);
614  if (InKernel) {
615  PrologStage = LastStageNum - 1;
616  PrevStage = CurStageNum;
617  } else {
618  PrologStage = LastStageNum - StageDiff;
619  PrevStage = LastStageNum + StageDiff - 1;
620  }
621 
622  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
623  BBE = BB->instr_end();
624  BBI != BBE; ++BBI) {
625  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
626  MachineOperand &MO = BBI->getOperand(i);
627  if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
628  continue;
629 
630  int StageScheduled = Schedule.getStage(&*BBI);
631  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
632  Register Def = MO.getReg();
633  unsigned NumPhis = getStagesForReg(Def, CurStageNum);
634  // An instruction scheduled in stage 0 and is used after the loop
635  // requires a phi in the epilog for the last definition from either
636  // the kernel or prolog.
637  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
638  hasUseAfterLoop(Def, BB, MRI))
639  NumPhis = 1;
640  if (!InKernel && (unsigned)StageScheduled > PrologStage)
641  continue;
642 
643  unsigned PhiOp2;
644  if (InKernel) {
645  PhiOp2 = VRMap[PrevStage][Def];
646  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
647  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
648  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
649  }
650  // The number of Phis can't exceed the number of prolog stages. The
651  // prolog stage number is zero based.
652  if (NumPhis > PrologStage + 1 - StageScheduled)
653  NumPhis = PrologStage + 1 - StageScheduled;
654  for (unsigned np = 0; np < NumPhis; ++np) {
655  // Example for
656  // Org:
657  // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
658  //
659  // Prolog0 (Stage0):
660  // %Clone0 = ...
661  // Prolog1 (Stage1):
662  // %Clone1 = ...
663  // Kernel (Stage2):
664  // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
665  // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
666  // %Clone2 = ...
667  // Epilog0 (Stage3):
668  // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
669  // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
670  // Epilog1 (Stage4):
671  // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
672  //
673  // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
674  // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
675  // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
676 
677  unsigned PhiOp1 = VRMap[PrologStage][Def];
678  if (np <= PrologStage)
679  PhiOp1 = VRMap[PrologStage - np][Def];
680  if (!InKernel) {
681  if (PrevStage == LastStageNum && np == 0)
682  PhiOp2 = VRMap[LastStageNum][Def];
683  else
684  PhiOp2 = VRMapPhi[PrevStage - np][Def];
685  }
686 
687  const TargetRegisterClass *RC = MRI.getRegClass(Def);
688  Register NewReg = MRI.createVirtualRegister(RC);
689 
690  MachineInstrBuilder NewPhi =
691  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
692  TII->get(TargetOpcode::PHI), NewReg);
693  NewPhi.addReg(PhiOp1).addMBB(BB1);
694  NewPhi.addReg(PhiOp2).addMBB(BB2);
695  if (np == 0)
696  InstrMap[NewPhi] = &*BBI;
697 
698  // Rewrite uses and update the map. The actions depend upon whether
699  // we generating code for the kernel or epilog blocks.
700  if (InKernel) {
701  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
702  NewReg);
703  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
704  NewReg);
705 
706  PhiOp2 = NewReg;
707  VRMapPhi[PrevStage - np - 1][Def] = NewReg;
708  } else {
709  VRMapPhi[CurStageNum - np][Def] = NewReg;
710  if (np == NumPhis - 1)
711  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
712  NewReg);
713  }
714  if (IsLast && np == NumPhis - 1)
715  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
716  }
717  }
718  }
719 }
720 
721 /// Remove instructions that generate values with no uses.
722 /// Typically, these are induction variable operations that generate values
723 /// used in the loop itself. A dead instruction has a definition with
724 /// no uses, or uses that occur in the original loop only.
725 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
726  MBBVectorTy &EpilogBBs) {
727  // For each epilog block, check that the value defined by each instruction
728  // is used. If not, delete it.
729  for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
731  ME = MBB->instr_rend();
732  MI != ME;) {
733  // From DeadMachineInstructionElem. Don't delete inline assembly.
734  if (MI->isInlineAsm()) {
735  ++MI;
736  continue;
737  }
738  bool SawStore = false;
739  // Check if it's safe to remove the instruction due to side effects.
740  // We can, and want to, remove Phis here.
741  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
742  ++MI;
743  continue;
744  }
745  bool used = true;
746  for (const MachineOperand &MO : MI->operands()) {
747  if (!MO.isReg() || !MO.isDef())
748  continue;
749  Register reg = MO.getReg();
750  // Assume physical registers are used, unless they are marked dead.
751  if (reg.isPhysical()) {
752  used = !MO.isDead();
753  if (used)
754  break;
755  continue;
756  }
757  unsigned realUses = 0;
758  for (const MachineOperand &U : MRI.use_operands(reg)) {
759  // Check if there are any uses that occur only in the original
760  // loop. If so, that's not a real use.
761  if (U.getParent()->getParent() != BB) {
762  realUses++;
763  used = true;
764  break;
765  }
766  }
767  if (realUses > 0)
768  break;
769  used = false;
770  }
771  if (!used) {
773  MI++->eraseFromParent();
774  continue;
775  }
776  ++MI;
777  }
778  // In the kernel block, check if we can remove a Phi that generates a value
779  // used in an instruction removed in the epilog block.
780  for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
781  Register reg = MI.getOperand(0).getReg();
782  if (MRI.use_begin(reg) == MRI.use_end()) {
784  MI.eraseFromParent();
785  }
786  }
787 }
788 
789 /// For loop carried definitions, we split the lifetime of a virtual register
790 /// that has uses past the definition in the next iteration. A copy with a new
791 /// virtual register is inserted before the definition, which helps with
792 /// generating a better register assignment.
793 ///
794 /// v1 = phi(a, v2) v1 = phi(a, v2)
795 /// v2 = phi(b, v3) v2 = phi(b, v3)
796 /// v3 = .. v4 = copy v1
797 /// .. = V1 v3 = ..
798 /// .. = v4
799 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
800  MBBVectorTy &EpilogBBs) {
802  for (auto &PHI : KernelBB->phis()) {
803  Register Def = PHI.getOperand(0).getReg();
804  // Check for any Phi definition that used as an operand of another Phi
805  // in the same block.
807  E = MRI.use_instr_end();
808  I != E; ++I) {
809  if (I->isPHI() && I->getParent() == KernelBB) {
810  // Get the loop carried definition.
811  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
812  if (!LCDef)
813  continue;
814  MachineInstr *MI = MRI.getVRegDef(LCDef);
815  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
816  continue;
817  // Search through the rest of the block looking for uses of the Phi
818  // definition. If one occurs, then split the lifetime.
819  unsigned SplitReg = 0;
821  KernelBB->instr_end()))
822  if (BBJ.readsRegister(Def)) {
823  // We split the lifetime when we find the first use.
824  if (SplitReg == 0) {
825  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
826  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
827  TII->get(TargetOpcode::COPY), SplitReg)
828  .addReg(Def);
829  }
830  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
831  }
832  if (!SplitReg)
833  continue;
834  // Search through each of the epilog blocks for any uses to be renamed.
835  for (auto &Epilog : EpilogBBs)
836  for (auto &I : *Epilog)
837  if (I.readsRegister(Def))
838  I.substituteRegister(Def, SplitReg, 0, *TRI);
839  break;
840  }
841  }
842  }
843 }
844 
845 /// Remove the incoming block from the Phis in a basic block.
847  for (MachineInstr &MI : *BB) {
848  if (!MI.isPHI())
849  break;
850  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
851  if (MI.getOperand(i + 1).getMBB() == Incoming) {
852  MI.removeOperand(i + 1);
853  MI.removeOperand(i);
854  break;
855  }
856  }
857 }
858 
859 /// Create branches from each prolog basic block to the appropriate epilog
860 /// block. These edges are needed if the loop ends before reaching the
861 /// kernel.
862 void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
863  MBBVectorTy &PrologBBs,
864  MachineBasicBlock *KernelBB,
865  MBBVectorTy &EpilogBBs,
866  ValueMapTy *VRMap) {
867  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
868  MachineBasicBlock *LastPro = KernelBB;
869  MachineBasicBlock *LastEpi = KernelBB;
870 
871  // Start from the blocks connected to the kernel and work "out"
872  // to the first prolog and the last epilog blocks.
874  unsigned MaxIter = PrologBBs.size() - 1;
875  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
876  // Add branches to the prolog that go to the corresponding
877  // epilog, and the fall-thru prolog/kernel block.
878  MachineBasicBlock *Prolog = PrologBBs[j];
879  MachineBasicBlock *Epilog = EpilogBBs[i];
880 
882  std::optional<bool> StaticallyGreater =
883  LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
884  unsigned numAdded = 0;
885  if (!StaticallyGreater) {
886  Prolog->addSuccessor(Epilog);
887  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
888  } else if (*StaticallyGreater == false) {
889  Prolog->addSuccessor(Epilog);
890  Prolog->removeSuccessor(LastPro);
891  LastEpi->removeSuccessor(Epilog);
892  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
893  removePhis(Epilog, LastEpi);
894  // Remove the blocks that are no longer referenced.
895  if (LastPro != LastEpi) {
896  LastEpi->clear();
897  LastEpi->eraseFromParent();
898  }
899  if (LastPro == KernelBB) {
900  LoopInfo->disposed();
901  NewKernel = nullptr;
902  }
903  LastPro->clear();
904  LastPro->eraseFromParent();
905  } else {
906  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
908  }
909  LastPro = Prolog;
910  LastEpi = Epilog;
911  for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
912  E = Prolog->instr_rend();
913  I != E && numAdded > 0; ++I, --numAdded)
914  updateInstruction(&*I, false, j, 0, VRMap);
915  }
916 
917  if (NewKernel) {
918  LoopInfo->setPreheader(PrologBBs[MaxIter]);
919  LoopInfo->adjustTripCount(-(MaxIter + 1));
920  }
921 }
922 
923 /// Return true if we can compute the amount the instruction changes
924 /// during each iteration. Set Delta to the amount of the change.
925 bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
927  const MachineOperand *BaseOp;
928  int64_t Offset;
929  bool OffsetIsScalable;
930  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
931  return false;
932 
933  // FIXME: This algorithm assumes instructions have fixed-size offsets.
934  if (OffsetIsScalable)
935  return false;
936 
937  if (!BaseOp->isReg())
938  return false;
939 
940  Register BaseReg = BaseOp->getReg();
941 
942  MachineRegisterInfo &MRI = MF.getRegInfo();
943  // Check if there is a Phi. If so, get the definition in the loop.
944  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
945  if (BaseDef && BaseDef->isPHI()) {
946  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
947  BaseDef = MRI.getVRegDef(BaseReg);
948  }
949  if (!BaseDef)
950  return false;
951 
952  int D = 0;
953  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
954  return false;
955 
956  Delta = D;
957  return true;
958 }
959 
960 /// Update the memory operand with a new offset when the pipeliner
961 /// generates a new copy of the instruction that refers to a
962 /// different memory location.
963 void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
964  MachineInstr &OldMI,
965  unsigned Num) {
966  if (Num == 0)
967  return;
968  // If the instruction has memory operands, then adjust the offset
969  // when the instruction appears in different stages.
970  if (NewMI.memoperands_empty())
971  return;
973  for (MachineMemOperand *MMO : NewMI.memoperands()) {
974  // TODO: Figure out whether isAtomic is really necessary (see D57601).
975  if (MMO->isVolatile() || MMO->isAtomic() ||
976  (MMO->isInvariant() && MMO->isDereferenceable()) ||
977  (!MMO->getValue())) {
978  NewMMOs.push_back(MMO);
979  continue;
980  }
981  unsigned Delta;
982  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
983  int64_t AdjOffset = Delta * Num;
984  NewMMOs.push_back(
985  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
986  } else {
987  NewMMOs.push_back(
989  }
990  }
991  NewMI.setMemRefs(MF, NewMMOs);
992 }
993 
994 /// Clone the instruction for the new pipelined loop and update the
995 /// memory operands, if needed.
996 MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
997  unsigned CurStageNum,
998  unsigned InstStageNum) {
999  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1000  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1001  return NewMI;
1002 }
1003 
1004 /// Clone the instruction for the new pipelined loop. If needed, this
1005 /// function updates the instruction using the values saved in the
1006 /// InstrChanges structure.
1007 MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1008  MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1009  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1010  auto It = InstrChanges.find(OldMI);
1011  if (It != InstrChanges.end()) {
1012  std::pair<unsigned, int64_t> RegAndOffset = It->second;
1013  unsigned BasePos, OffsetPos;
1014  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1015  return nullptr;
1016  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1017  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1018  if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1019  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1020  NewMI->getOperand(OffsetPos).setImm(NewOffset);
1021  }
1022  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1023  return NewMI;
1024 }
1025 
1026 /// Update the machine instruction with new virtual registers. This
1027 /// function may change the definitions and/or uses.
1028 void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1029  bool LastDef,
1030  unsigned CurStageNum,
1031  unsigned InstrStageNum,
1032  ValueMapTy *VRMap) {
1033  for (MachineOperand &MO : NewMI->operands()) {
1034  if (!MO.isReg() || !MO.getReg().isVirtual())
1035  continue;
1036  Register reg = MO.getReg();
1037  if (MO.isDef()) {
1038  // Create a new virtual register for the definition.
1039  const TargetRegisterClass *RC = MRI.getRegClass(reg);
1040  Register NewReg = MRI.createVirtualRegister(RC);
1041  MO.setReg(NewReg);
1042  VRMap[CurStageNum][reg] = NewReg;
1043  if (LastDef)
1044  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1045  } else if (MO.isUse()) {
1046  MachineInstr *Def = MRI.getVRegDef(reg);
1047  // Compute the stage that contains the last definition for instruction.
1048  int DefStageNum = Schedule.getStage(Def);
1049  unsigned StageNum = CurStageNum;
1050  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1051  // Compute the difference in stages between the defintion and the use.
1052  unsigned StageDiff = (InstrStageNum - DefStageNum);
1053  // Make an adjustment to get the last definition.
1054  StageNum -= StageDiff;
1055  }
1056  if (VRMap[StageNum].count(reg))
1057  MO.setReg(VRMap[StageNum][reg]);
1058  }
1059  }
1060 }
1061 
1062 /// Return the instruction in the loop that defines the register.
1063 /// If the definition is a Phi, then follow the Phi operand to
1064 /// the instruction in the loop.
1065 MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1068  while (Def->isPHI()) {
1069  if (!Visited.insert(Def).second)
1070  break;
1071  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1072  if (Def->getOperand(i + 1).getMBB() == BB) {
1073  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1074  break;
1075  }
1076  }
1077  return Def;
1078 }
1079 
1080 /// Return the new name for the value from the previous stage.
1081 unsigned ModuloScheduleExpander::getPrevMapVal(
1082  unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1083  ValueMapTy *VRMap, MachineBasicBlock *BB) {
1084  unsigned PrevVal = 0;
1085  if (StageNum > PhiStage) {
1086  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1087  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1088  // The name is defined in the previous stage.
1089  PrevVal = VRMap[StageNum - 1][LoopVal];
1090  else if (VRMap[StageNum].count(LoopVal))
1091  // The previous name is defined in the current stage when the instruction
1092  // order is swapped.
1093  PrevVal = VRMap[StageNum][LoopVal];
1094  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1095  // The loop value hasn't yet been scheduled.
1096  PrevVal = LoopVal;
1097  else if (StageNum == PhiStage + 1)
1098  // The loop value is another phi, which has not been scheduled.
1099  PrevVal = getInitPhiReg(*LoopInst, BB);
1100  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1101  // The loop value is another phi, which has been scheduled.
1102  PrevVal =
1103  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1104  LoopStage, VRMap, BB);
1105  }
1106  return PrevVal;
1107 }
1108 
1109 /// Rewrite the Phi values in the specified block to use the mappings
1110 /// from the initial operand. Once the Phi is scheduled, we switch
1111 /// to using the loop value instead of the Phi value, so those names
1112 /// do not need to be rewritten.
1113 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1114  unsigned StageNum,
1115  ValueMapTy *VRMap,
1116  InstrMapTy &InstrMap) {
1117  for (auto &PHI : BB->phis()) {
1118  unsigned InitVal = 0;
1119  unsigned LoopVal = 0;
1120  getPhiRegs(PHI, BB, InitVal, LoopVal);
1121  Register PhiDef = PHI.getOperand(0).getReg();
1122 
1123  unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1124  unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1125  unsigned NumPhis = getStagesForPhi(PhiDef);
1126  if (NumPhis > StageNum)
1127  NumPhis = StageNum;
1128  for (unsigned np = 0; np <= NumPhis; ++np) {
1129  unsigned NewVal =
1130  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1131  if (!NewVal)
1132  NewVal = InitVal;
1133  rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1134  NewVal);
1135  }
1136  }
1137 }
1138 
1139 /// Rewrite a previously scheduled instruction to use the register value
1140 /// from the new instruction. Make sure the instruction occurs in the
1141 /// basic block, and we don't change the uses in the new instruction.
1142 void ModuloScheduleExpander::rewriteScheduledInstr(
1143  MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1144  unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1145  unsigned PrevReg) {
1146  bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1147  int StagePhi = Schedule.getStage(Phi) + PhiNum;
1148  // Rewrite uses that have been scheduled already to use the new
1149  // Phi register.
1150  for (MachineOperand &UseOp :
1152  MachineInstr *UseMI = UseOp.getParent();
1153  if (UseMI->getParent() != BB)
1154  continue;
1155  if (UseMI->isPHI()) {
1156  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1157  continue;
1158  if (getLoopPhiReg(*UseMI, BB) != OldReg)
1159  continue;
1160  }
1161  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1162  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1163  MachineInstr *OrigMI = OrigInstr->second;
1164  int StageSched = Schedule.getStage(OrigMI);
1165  int CycleSched = Schedule.getCycle(OrigMI);
1166  unsigned ReplaceReg = 0;
1167  // This is the stage for the scheduled instruction.
1168  if (StagePhi == StageSched && Phi->isPHI()) {
1169  int CyclePhi = Schedule.getCycle(Phi);
1170  if (PrevReg && InProlog)
1171  ReplaceReg = PrevReg;
1172  else if (PrevReg && !isLoopCarried(*Phi) &&
1173  (CyclePhi <= CycleSched || OrigMI->isPHI()))
1174  ReplaceReg = PrevReg;
1175  else
1176  ReplaceReg = NewReg;
1177  }
1178  // The scheduled instruction occurs before the scheduled Phi, and the
1179  // Phi is not loop carried.
1180  if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1181  ReplaceReg = NewReg;
1182  if (StagePhi > StageSched && Phi->isPHI())
1183  ReplaceReg = NewReg;
1184  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1185  ReplaceReg = NewReg;
1186  if (ReplaceReg) {
1187  const TargetRegisterClass *NRC =
1188  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1189  if (NRC)
1190  UseOp.setReg(ReplaceReg);
1191  else {
1192  Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1193  BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1194  SplitReg)
1195  .addReg(ReplaceReg);
1196  UseOp.setReg(SplitReg);
1197  }
1198  }
1199  }
1200 }
1201 
1202 bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1203  if (!Phi.isPHI())
1204  return false;
1205  int DefCycle = Schedule.getCycle(&Phi);
1206  int DefStage = Schedule.getStage(&Phi);
1207 
1208  unsigned InitVal = 0;
1209  unsigned LoopVal = 0;
1210  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1211  MachineInstr *Use = MRI.getVRegDef(LoopVal);
1212  if (!Use || Use->isPHI())
1213  return true;
1214  int LoopCycle = Schedule.getCycle(Use);
1215  int LoopStage = Schedule.getStage(Use);
1216  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1217 }
1218 
1219 //===----------------------------------------------------------------------===//
1220 // PeelingModuloScheduleExpander implementation
1221 //===----------------------------------------------------------------------===//
1222 // This is a reimplementation of ModuloScheduleExpander that works by creating
1223 // a fully correct steady-state kernel and peeling off the prolog and epilogs.
1224 //===----------------------------------------------------------------------===//
1225 
1226 namespace {
1227 // Remove any dead phis in MBB. Dead phis either have only one block as input
1228 // (in which case they are the identity) or have no uses.
1229 void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1230  LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1231  bool Changed = true;
1232  while (Changed) {
1233  Changed = false;
1235  assert(MI.isPHI());
1236  if (MRI.use_empty(MI.getOperand(0).getReg())) {
1237  if (LIS)
1239  MI.eraseFromParent();
1240  Changed = true;
1241  } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1242  const TargetRegisterClass *ConstrainRegClass =
1243  MRI.constrainRegClass(MI.getOperand(1).getReg(),
1244  MRI.getRegClass(MI.getOperand(0).getReg()));
1245  assert(ConstrainRegClass &&
1246  "Expected a valid constrained register class!");
1247  (void)ConstrainRegClass;
1248  MRI.replaceRegWith(MI.getOperand(0).getReg(),
1249  MI.getOperand(1).getReg());
1250  if (LIS)
1252  MI.eraseFromParent();
1253  Changed = true;
1254  }
1255  }
1256  }
1257 }
1258 
1259 /// Rewrites the kernel block in-place to adhere to the given schedule.
1260 /// KernelRewriter holds all of the state required to perform the rewriting.
1261 class KernelRewriter {
1262  ModuloSchedule &S;
1264  MachineBasicBlock *PreheaderBB, *ExitBB;
1266  const TargetInstrInfo *TII;
1267  LiveIntervals *LIS;
1268 
1269  // Map from register class to canonical undef register for that class.
1271  // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1272  // this map is only used when InitReg is non-undef.
1274  // Map from LoopReg to phi register where the InitReg is undef.
1275  DenseMap<Register, Register> UndefPhis;
1276 
1277  // Reg is used by MI. Return the new register MI should use to adhere to the
1278  // schedule. Insert phis as necessary.
1279  Register remapUse(Register Reg, MachineInstr &MI);
1280  // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1281  // If InitReg is not given it is chosen arbitrarily. It will either be undef
1282  // or will be chosen so as to share another phi.
1283  Register phi(Register LoopReg, std::optional<Register> InitReg = {},
1284  const TargetRegisterClass *RC = nullptr);
1285  // Create an undef register of the given register class.
1286  Register undef(const TargetRegisterClass *RC);
1287 
1288 public:
1289  KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1290  LiveIntervals *LIS = nullptr);
1291  void rewrite();
1292 };
1293 } // namespace
1294 
1295 KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1296  MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1297  : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1298  ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1299  TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1300  PreheaderBB = *BB->pred_begin();
1301  if (PreheaderBB == BB)
1302  PreheaderBB = *std::next(BB->pred_begin());
1303 }
1304 
1305 void KernelRewriter::rewrite() {
1306  // Rearrange the loop to be in schedule order. Note that the schedule may
1307  // contain instructions that are not owned by the loop block (InstrChanges and
1308  // friends), so we gracefully handle unowned instructions and delete any
1309  // instructions that weren't in the schedule.
1310  auto InsertPt = BB->getFirstTerminator();
1311  MachineInstr *FirstMI = nullptr;
1312  for (MachineInstr *MI : S.getInstructions()) {
1313  if (MI->isPHI())
1314  continue;
1315  if (MI->getParent())
1316  MI->removeFromParent();
1317  BB->insert(InsertPt, MI);
1318  if (!FirstMI)
1319  FirstMI = MI;
1320  }
1321  assert(FirstMI && "Failed to find first MI in schedule");
1322 
1323  // At this point all of the scheduled instructions are between FirstMI
1324  // and the end of the block. Kill from the first non-phi to FirstMI.
1325  for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1326  if (LIS)
1328  (I++)->eraseFromParent();
1329  }
1330 
1331  // Now remap every instruction in the loop.
1332  for (MachineInstr &MI : *BB) {
1333  if (MI.isPHI() || MI.isTerminator())
1334  continue;
1335  for (MachineOperand &MO : MI.uses()) {
1336  if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1337  continue;
1338  Register Reg = remapUse(MO.getReg(), MI);
1339  MO.setReg(Reg);
1340  }
1341  }
1342  EliminateDeadPhis(BB, MRI, LIS);
1343 
1344  // Ensure a phi exists for all instructions that are either referenced by
1345  // an illegal phi or by an instruction outside the loop. This allows us to
1346  // treat remaps of these values the same as "normal" values that come from
1347  // loop-carried phis.
1348  for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1349  if (MI->isPHI()) {
1350  Register R = MI->getOperand(0).getReg();
1351  phi(R);
1352  continue;
1353  }
1354 
1355  for (MachineOperand &Def : MI->defs()) {
1356  for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1357  if (MI.getParent() != BB) {
1358  phi(Def.getReg());
1359  break;
1360  }
1361  }
1362  }
1363  }
1364 }
1365 
1366 Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1368  if (!Producer)
1369  return Reg;
1370 
1371  int ConsumerStage = S.getStage(&MI);
1372  if (!Producer->isPHI()) {
1373  // Non-phi producers are simple to remap. Insert as many phis as the
1374  // difference between the consumer and producer stages.
1375  if (Producer->getParent() != BB)
1376  // Producer was not inside the loop. Use the register as-is.
1377  return Reg;
1378  int ProducerStage = S.getStage(Producer);
1379  assert(ConsumerStage != -1 &&
1380  "In-loop consumer should always be scheduled!");
1381  assert(ConsumerStage >= ProducerStage);
1382  unsigned StageDiff = ConsumerStage - ProducerStage;
1383 
1384  for (unsigned I = 0; I < StageDiff; ++I)
1385  Reg = phi(Reg);
1386  return Reg;
1387  }
1388 
1389  // First, dive through the phi chain to find the defaults for the generated
1390  // phis.
1392  Register LoopReg = Reg;
1393  auto LoopProducer = Producer;
1394  while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1395  LoopReg = getLoopPhiReg(*LoopProducer, BB);
1396  Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1397  LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1398  assert(LoopProducer);
1399  }
1400  int LoopProducerStage = S.getStage(LoopProducer);
1401 
1402  std::optional<Register> IllegalPhiDefault;
1403 
1404  if (LoopProducerStage == -1) {
1405  // Do nothing.
1406  } else if (LoopProducerStage > ConsumerStage) {
1407  // This schedule is only representable if ProducerStage == ConsumerStage+1.
1408  // In addition, Consumer's cycle must be scheduled after Producer in the
1409  // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1410  // functions.
1411 #ifndef NDEBUG // Silence unused variables in non-asserts mode.
1412  int LoopProducerCycle = S.getCycle(LoopProducer);
1413  int ConsumerCycle = S.getCycle(&MI);
1414 #endif
1415  assert(LoopProducerCycle <= ConsumerCycle);
1416  assert(LoopProducerStage == ConsumerStage + 1);
1417  // Peel off the first phi from Defaults and insert a phi between producer
1418  // and consumer. This phi will not be at the front of the block so we
1419  // consider it illegal. It will only exist during the rewrite process; it
1420  // needs to exist while we peel off prologs because these could take the
1421  // default value. After that we can replace all uses with the loop producer
1422  // value.
1423  IllegalPhiDefault = Defaults.front();
1424  Defaults.erase(Defaults.begin());
1425  } else {
1426  assert(ConsumerStage >= LoopProducerStage);
1427  int StageDiff = ConsumerStage - LoopProducerStage;
1428  if (StageDiff > 0) {
1429  LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1430  << " to " << (Defaults.size() + StageDiff) << "\n");
1431  // If we need more phis than we have defaults for, pad out with undefs for
1432  // the earliest phis, which are at the end of the defaults chain (the
1433  // chain is in reverse order).
1434  Defaults.resize(Defaults.size() + StageDiff,
1435  Defaults.empty() ? std::optional<Register>()
1436  : Defaults.back());
1437  }
1438  }
1439 
1440  // Now we know the number of stages to jump back, insert the phi chain.
1441  auto DefaultI = Defaults.rbegin();
1442  while (DefaultI != Defaults.rend())
1443  LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1444 
1445  if (IllegalPhiDefault) {
1446  // The consumer optionally consumes LoopProducer in the same iteration
1447  // (because the producer is scheduled at an earlier cycle than the consumer)
1448  // or the initial value. To facilitate this we create an illegal block here
1449  // by embedding a phi in the middle of the block. We will fix this up
1450  // immediately prior to pruning.
1451  auto RC = MRI.getRegClass(Reg);
1453  MachineInstr *IllegalPhi =
1454  BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1455  .addReg(*IllegalPhiDefault)
1456  .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1457  .addReg(LoopReg)
1458  .addMBB(BB); // Block choice is arbitrary and has no effect.
1459  // Illegal phi should belong to the producer stage so that it can be
1460  // filtered correctly during peeling.
1461  S.setStage(IllegalPhi, LoopProducerStage);
1462  return R;
1463  }
1464 
1465  return LoopReg;
1466 }
1467 
1468 Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
1469  const TargetRegisterClass *RC) {
1470  // If the init register is not undef, try and find an existing phi.
1471  if (InitReg) {
1472  auto I = Phis.find({LoopReg, *InitReg});
1473  if (I != Phis.end())
1474  return I->second;
1475  } else {
1476  for (auto &KV : Phis) {
1477  if (KV.first.first == LoopReg)
1478  return KV.second;
1479  }
1480  }
1481 
1482  // InitReg is either undef or no existing phi takes InitReg as input. Try and
1483  // find a phi that takes undef as input.
1484  auto I = UndefPhis.find(LoopReg);
1485  if (I != UndefPhis.end()) {
1486  Register R = I->second;
1487  if (!InitReg)
1488  // Found a phi taking undef as input, and this input is undef so return
1489  // without any more changes.
1490  return R;
1491  // Found a phi taking undef as input, so rewrite it to take InitReg.
1493  MI->getOperand(1).setReg(*InitReg);
1494  Phis.insert({{LoopReg, *InitReg}, R});
1495  const TargetRegisterClass *ConstrainRegClass =
1496  MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1497  assert(ConstrainRegClass && "Expected a valid constrained register class!");
1498  (void)ConstrainRegClass;
1499  UndefPhis.erase(I);
1500  return R;
1501  }
1502 
1503  // Failed to find any existing phi to reuse, so create a new one.
1504  if (!RC)
1505  RC = MRI.getRegClass(LoopReg);
1507  if (InitReg) {
1508  const TargetRegisterClass *ConstrainRegClass =
1509  MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1510  assert(ConstrainRegClass && "Expected a valid constrained register class!");
1511  (void)ConstrainRegClass;
1512  }
1513  BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1514  .addReg(InitReg ? *InitReg : undef(RC))
1515  .addMBB(PreheaderBB)
1516  .addReg(LoopReg)
1517  .addMBB(BB);
1518  if (!InitReg)
1519  UndefPhis[LoopReg] = R;
1520  else
1521  Phis[{LoopReg, *InitReg}] = R;
1522  return R;
1523 }
1524 
1525 Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1526  Register &R = Undefs[RC];
1527  if (R == 0) {
1528  // Create an IMPLICIT_DEF that defines this register if we need it.
1529  // All uses of this should be removed by the time we have finished unrolling
1530  // prologs and epilogs.
1531  R = MRI.createVirtualRegister(RC);
1532  auto *InsertBB = &PreheaderBB->getParent()->front();
1533  BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1534  TII->get(TargetOpcode::IMPLICIT_DEF), R);
1535  }
1536  return R;
1537 }
1538 
1539 namespace {
1540 /// Describes an operand in the kernel of a pipelined loop. Characteristics of
1541 /// the operand are discovered, such as how many in-loop PHIs it has to jump
1542 /// through and defaults for these phis.
1543 class KernelOperandInfo {
1546  SmallVector<Register, 4> PhiDefaults;
1549 
1550 public:
1551  KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1552  const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1553  : MRI(MRI) {
1554  Source = MO;
1555  BB = MO->getParent()->getParent();
1556  while (isRegInLoop(MO)) {
1557  MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1558  if (MI->isFullCopy()) {
1559  MO = &MI->getOperand(1);
1560  continue;
1561  }
1562  if (!MI->isPHI())
1563  break;
1564  // If this is an illegal phi, don't count it in distance.
1565  if (IllegalPhis.count(MI)) {
1566  MO = &MI->getOperand(3);
1567  continue;
1568  }
1569 
1571  MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1572  : &MI->getOperand(3);
1573  PhiDefaults.push_back(Default);
1574  }
1575  Target = MO;
1576  }
1577 
1578  bool operator==(const KernelOperandInfo &Other) const {
1579  return PhiDefaults.size() == Other.PhiDefaults.size();
1580  }
1581 
1582  void print(raw_ostream &OS) const {
1583  OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1584  << *Source->getParent();
1585  }
1586 
1587 private:
1588  bool isRegInLoop(MachineOperand *MO) {
1589  return MO->isReg() && MO->getReg().isVirtual() &&
1590  MRI.getVRegDef(MO->getReg())->getParent() == BB;
1591  }
1592 };
1593 } // namespace
1594 
1596 PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
1597  MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
1598  if (LPD == LPD_Front)
1599  PeeledFront.push_back(NewBB);
1600  else
1601  PeeledBack.push_front(NewBB);
1602  for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1603  ++I, ++NI) {
1604  CanonicalMIs[&*I] = &*I;
1605  CanonicalMIs[&*NI] = &*I;
1606  BlockMIs[{NewBB, &*I}] = &*NI;
1607  BlockMIs[{BB, &*I}] = &*I;
1608  }
1609  return NewBB;
1610 }
1611 
1613  int MinStage) {
1614  for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1615  I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1616  MachineInstr *MI = &*I++;
1617  int Stage = getStage(MI);
1618  if (Stage == -1 || Stage >= MinStage)
1619  continue;
1620 
1621  for (MachineOperand &DefMO : MI->defs()) {
1623  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1624  // Only PHIs can use values from this block by construction.
1625  // Match with the equivalent PHI in B.
1626  assert(UseMI.isPHI());
1627  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1628  MI->getParent());
1629  Subs.emplace_back(&UseMI, Reg);
1630  }
1631  for (auto &Sub : Subs)
1632  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1634  }
1635  if (LIS)
1637  MI->eraseFromParent();
1638  }
1639 }
1640 
1642  MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1643  auto InsertPt = DestBB->getFirstNonPHI();
1646  llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1647  if (MI.isPHI()) {
1648  // This is an illegal PHI. If we move any instructions using an illegal
1649  // PHI, we need to create a legal Phi.
1650  if (getStage(&MI) != Stage) {
1651  // The legal Phi is not necessary if the illegal phi's stage
1652  // is being moved.
1653  Register PhiR = MI.getOperand(0).getReg();
1654  auto RC = MRI.getRegClass(PhiR);
1656  MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1657  DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1658  .addReg(PhiR)
1659  .addMBB(SourceBB);
1660  BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1661  CanonicalMIs[NI] = CanonicalMIs[&MI];
1662  Remaps[PhiR] = NR;
1663  }
1664  }
1665  if (getStage(&MI) != Stage)
1666  continue;
1667  MI.removeFromParent();
1668  DestBB->insert(InsertPt, &MI);
1669  auto *KernelMI = CanonicalMIs[&MI];
1670  BlockMIs[{DestBB, KernelMI}] = &MI;
1671  BlockMIs.erase({SourceBB, KernelMI});
1672  }
1673  SmallVector<MachineInstr *, 4> PhiToDelete;
1674  for (MachineInstr &MI : DestBB->phis()) {
1675  assert(MI.getNumOperands() == 3);
1676  MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1677  // If the instruction referenced by the phi is moved inside the block
1678  // we don't need the phi anymore.
1679  if (getStage(Def) == Stage) {
1680  Register PhiReg = MI.getOperand(0).getReg();
1681  assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1);
1682  MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1683  MI.getOperand(0).setReg(PhiReg);
1684  PhiToDelete.push_back(&MI);
1685  }
1686  }
1687  for (auto *P : PhiToDelete)
1688  P->eraseFromParent();
1689  InsertPt = DestBB->getFirstNonPHI();
1690  // Helper to clone Phi instructions into the destination block. We clone Phi
1691  // greedily to avoid combinatorial explosion of Phi instructions.
1692  auto clonePhi = [&](MachineInstr *Phi) {
1693  MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1694  DestBB->insert(InsertPt, NewMI);
1695  Register OrigR = Phi->getOperand(0).getReg();
1697  NewMI->getOperand(0).setReg(R);
1698  NewMI->getOperand(1).setReg(OrigR);
1699  NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1700  Remaps[OrigR] = R;
1701  CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1702  BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1704  return R;
1705  };
1706  for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1707  for (MachineOperand &MO : I->uses()) {
1708  if (!MO.isReg())
1709  continue;
1710  if (Remaps.count(MO.getReg()))
1711  MO.setReg(Remaps[MO.getReg()]);
1712  else {
1713  // If we are using a phi from the source block we need to add a new phi
1714  // pointing to the old one.
1716  if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1717  Register R = clonePhi(Use);
1718  MO.setReg(R);
1719  }
1720  }
1721  }
1722  }
1723 }
1724 
1725 Register
1727  MachineInstr *Phi) {
1728  unsigned distance = PhiNodeLoopIteration[Phi];
1729  MachineInstr *CanonicalUse = CanonicalPhi;
1730  Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1731  for (unsigned I = 0; I < distance; ++I) {
1732  assert(CanonicalUse->isPHI());
1733  assert(CanonicalUse->getNumOperands() == 5);
1734  unsigned LoopRegIdx = 3, InitRegIdx = 1;
1735  if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1736  std::swap(LoopRegIdx, InitRegIdx);
1737  CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1738  CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1739  }
1740  return CanonicalUseReg;
1741 }
1742 
1744  BitVector LS(Schedule.getNumStages(), true);
1745  BitVector AS(Schedule.getNumStages(), true);
1746  LiveStages[BB] = LS;
1747  AvailableStages[BB] = AS;
1748 
1749  // Peel out the prologs.
1750  LS.reset();
1751  for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1752  LS[I] = true;
1753  Prologs.push_back(peelKernel(LPD_Front));
1754  LiveStages[Prologs.back()] = LS;
1755  AvailableStages[Prologs.back()] = LS;
1756  }
1757 
1758  // Create a block that will end up as the new loop exiting block (dominated by
1759  // all prologs and epilogs). It will only contain PHIs, in the same order as
1760  // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1761  // that the exiting block is a (sub) clone of BB. This in turn gives us the
1762  // property that any value deffed in BB but used outside of BB is used by a
1763  // PHI in the exiting block.
1765  EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1766  // Push out the epilogs, again in reverse order.
1767  // We can't assume anything about the minumum loop trip count at this point,
1768  // so emit a fairly complex epilog.
1769 
1770  // We first peel number of stages minus one epilogue. Then we remove dead
1771  // stages and reorder instructions based on their stage. If we have 3 stages
1772  // we generate first:
1773  // E0[3, 2, 1]
1774  // E1[3', 2']
1775  // E2[3'']
1776  // And then we move instructions based on their stages to have:
1777  // E0[3]
1778  // E1[2, 3']
1779  // E2[1, 2', 3'']
1780  // The transformation is legal because we only move instructions past
1781  // instructions of a previous loop iteration.
1782  for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1783  Epilogs.push_back(peelKernel(LPD_Back));
1784  MachineBasicBlock *B = Epilogs.back();
1786  // Keep track at which iteration each phi belongs to. We need it to know
1787  // what version of the variable to use during prologue/epilogue stitching.
1788  EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1789  for (MachineInstr &Phi : B->phis())
1791  }
1792  for (size_t I = 0; I < Epilogs.size(); I++) {
1793  LS.reset();
1794  for (size_t J = I; J < Epilogs.size(); J++) {
1795  int Iteration = J;
1796  unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1797  // Move stage one block at a time so that Phi nodes are updated correctly.
1798  for (size_t K = Iteration; K > I; K--)
1799  moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1800  LS[Stage] = true;
1801  }
1802  LiveStages[Epilogs[I]] = LS;
1803  AvailableStages[Epilogs[I]] = AS;
1804  }
1805 
1806  // Now we've defined all the prolog and epilog blocks as a fallthrough
1807  // sequence, add the edges that will be followed if the loop trip count is
1808  // lower than the number of stages (connecting prologs directly with epilogs).
1809  auto PI = Prologs.begin();
1810  auto EI = Epilogs.begin();
1811  assert(Prologs.size() == Epilogs.size());
1812  for (; PI != Prologs.end(); ++PI, ++EI) {
1813  MachineBasicBlock *Pred = *(*EI)->pred_begin();
1814  (*PI)->addSuccessor(*EI);
1815  for (MachineInstr &MI : (*EI)->phis()) {
1816  Register Reg = MI.getOperand(1).getReg();
1818  if (Use && Use->getParent() == Pred) {
1819  MachineInstr *CanonicalUse = CanonicalMIs[Use];
1820  if (CanonicalUse->isPHI()) {
1821  // If the use comes from a phi we need to skip as many phi as the
1822  // distance between the epilogue and the kernel. Trace through the phi
1823  // chain to find the right value.
1824  Reg = getPhiCanonicalReg(CanonicalUse, Use);
1825  }
1827  }
1828  MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1829  MI.addOperand(MachineOperand::CreateMBB(*PI));
1830  }
1831  }
1832 
1833  // Create a list of all blocks in order.
1835  llvm::copy(PeeledFront, std::back_inserter(Blocks));
1836  Blocks.push_back(BB);
1837  llvm::copy(PeeledBack, std::back_inserter(Blocks));
1838 
1839  // Iterate in reverse order over all instructions, remapping as we go.
1840  for (MachineBasicBlock *B : reverse(Blocks)) {
1841  for (auto I = B->instr_rbegin();
1842  I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1844  rewriteUsesOf(&*MI);
1845  }
1846  }
1847  for (auto *MI : IllegalPhisToDelete) {
1848  if (LIS)
1850  MI->eraseFromParent();
1851  }
1853 
1854  // Now all remapping has been done, we're free to optimize the generated code.
1855  for (MachineBasicBlock *B : reverse(Blocks))
1856  EliminateDeadPhis(B, MRI, LIS);
1857  EliminateDeadPhis(ExitingBB, MRI, LIS);
1858 }
1859 
1861  MachineFunction &MF = *BB->getParent();
1862  MachineBasicBlock *Exit = *BB->succ_begin();
1863  if (Exit == BB)
1864  Exit = *std::next(BB->succ_begin());
1865 
1867  MF.insert(std::next(BB->getIterator()), NewBB);
1868 
1869  // Clone all phis in BB into NewBB and rewrite.
1870  for (MachineInstr &MI : BB->phis()) {
1871  auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1872  Register OldR = MI.getOperand(3).getReg();
1875  for (MachineInstr &Use : MRI.use_instructions(OldR))
1876  if (Use.getParent() != BB)
1877  Uses.push_back(&Use);
1878  for (MachineInstr *Use : Uses)
1879  Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1881  MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1882  .addReg(OldR)
1883  .addMBB(BB);
1884  BlockMIs[{NewBB, &MI}] = NI;
1885  CanonicalMIs[NI] = &MI;
1886  }
1887  BB->replaceSuccessor(Exit, NewBB);
1888  Exit->replacePhiUsesWith(BB, NewBB);
1889  NewBB->addSuccessor(Exit);
1890 
1891  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1893  bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1894  (void)CanAnalyzeBr;
1895  assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1896  TII->removeBranch(*BB);
1897  TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1898  Cond, DebugLoc());
1899  TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1900  return NewBB;
1901 }
1902 
1903 Register
1905  MachineBasicBlock *BB) {
1907  unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
1908  return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1909 }
1910 
1912  if (MI->isPHI()) {
1913  // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1914  // and it is produced by this block.
1915  Register PhiR = MI->getOperand(0).getReg();
1916  Register R = MI->getOperand(3).getReg();
1917  int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1918  if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1919  R = MI->getOperand(1).getReg();
1920  MRI.setRegClass(R, MRI.getRegClass(PhiR));
1921  MRI.replaceRegWith(PhiR, R);
1922  // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1923  // later to figure out how to remap registers.
1924  MI->getOperand(0).setReg(PhiR);
1925  IllegalPhisToDelete.push_back(MI);
1926  return;
1927  }
1928 
1929  int Stage = getStage(MI);
1930  if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1931  LiveStages[MI->getParent()].test(Stage))
1932  // Instruction is live, no rewriting to do.
1933  return;
1934 
1935  for (MachineOperand &DefMO : MI->defs()) {
1937  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1938  // Only PHIs can use values from this block by construction.
1939  // Match with the equivalent PHI in B.
1940  assert(UseMI.isPHI());
1941  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1942  MI->getParent());
1943  Subs.emplace_back(&UseMI, Reg);
1944  }
1945  for (auto &Sub : Subs)
1946  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1948  }
1949  if (LIS)
1951  MI->eraseFromParent();
1952 }
1953 
1955  // Work outwards from the kernel.
1956  bool KernelDisposed = false;
1957  int TC = Schedule.getNumStages() - 1;
1958  for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1959  ++PI, ++EI, --TC) {
1960  MachineBasicBlock *Prolog = *PI;
1961  MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1962  MachineBasicBlock *Epilog = *EI;
1964  TII->removeBranch(*Prolog);
1965  std::optional<bool> StaticallyGreater =
1966  LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1967  if (!StaticallyGreater) {
1968  LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1969  // Dynamically branch based on Cond.
1970  TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1971  } else if (*StaticallyGreater == false) {
1972  LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1973  // Prolog never falls through; branch to epilog and orphan interior
1974  // blocks. Leave it to unreachable-block-elim to clean up.
1975  Prolog->removeSuccessor(Fallthrough);
1976  for (MachineInstr &P : Fallthrough->phis()) {
1977  P.removeOperand(2);
1978  P.removeOperand(1);
1979  }
1981  KernelDisposed = true;
1982  } else {
1983  LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1984  // Prolog always falls through; remove incoming values in epilog.
1985  Prolog->removeSuccessor(Epilog);
1986  for (MachineInstr &P : Epilog->phis()) {
1987  P.removeOperand(4);
1988  P.removeOperand(3);
1989  }
1990  }
1991  }
1992 
1993  if (!KernelDisposed) {
1994  LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1995  LoopInfo->setPreheader(Prologs.back());
1996  } else {
1997  LoopInfo->disposed();
1998  }
1999 }
2000 
2002  KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2003  KR.rewrite();
2004 }
2005 
2007  BB = Schedule.getLoop()->getTopBlock();
2011  assert(LoopInfo);
2012 
2013  rewriteKernel();
2015  fixupBranches();
2016 }
2017 
2019  BB = Schedule.getLoop()->getTopBlock();
2021 
2022  // Dump the schedule before we invalidate and remap all its instructions.
2023  // Stash it in a string so we can print it if we found an error.
2024  std::string ScheduleDump;
2025  raw_string_ostream OS(ScheduleDump);
2026  Schedule.print(OS);
2027  OS.flush();
2028 
2029  // First, run the normal ModuleScheduleExpander. We don't support any
2030  // InstrChanges.
2031  assert(LIS && "Requires LiveIntervals!");
2034  MSE.expand();
2035  MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2036  if (!ExpandedKernel) {
2037  // The expander optimized away the kernel. We can't do any useful checking.
2038  MSE.cleanup();
2039  return;
2040  }
2041  // Before running the KernelRewriter, re-add BB into the CFG.
2043 
2044  // Now run the new expansion algorithm.
2045  KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2046  KR.rewrite();
2048 
2049  // Collect all illegal phis that the new algorithm created. We'll give these
2050  // to KernelOperandInfo.
2051  SmallPtrSet<MachineInstr *, 4> IllegalPhis;
2052  for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2053  if (NI->isPHI())
2054  IllegalPhis.insert(&*NI);
2055  }
2056 
2057  // Co-iterate across both kernels. We expect them to be identical apart from
2058  // phis and full COPYs (we look through both).
2060  auto OI = ExpandedKernel->begin();
2061  auto NI = BB->begin();
2062  for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2063  while (OI->isPHI() || OI->isFullCopy())
2064  ++OI;
2065  while (NI->isPHI() || NI->isFullCopy())
2066  ++NI;
2067  assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2068  // Analyze every operand separately.
2069  for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2070  OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2071  KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2072  KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2073  }
2074 
2075  bool Failed = false;
2076  for (auto &OldAndNew : KOIs) {
2077  if (OldAndNew.first == OldAndNew.second)
2078  continue;
2079  Failed = true;
2080  errs() << "Modulo kernel validation error: [\n";
2081  errs() << " [golden] ";
2082  OldAndNew.first.print(errs());
2083  errs() << " ";
2084  OldAndNew.second.print(errs());
2085  errs() << "]\n";
2086  }
2087 
2088  if (Failed) {
2089  errs() << "Golden reference kernel:\n";
2090  ExpandedKernel->print(errs());
2091  errs() << "New kernel:\n";
2092  BB->print(errs());
2093  errs() << ScheduleDump;
2095  "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2096  }
2097 
2098  // Cleanup by removing BB from the CFG again as the original
2099  // ModuloScheduleExpander intended.
2101  MSE.cleanup();
2102 }
2103 
2104 //===----------------------------------------------------------------------===//
2105 // ModuloScheduleTestPass implementation
2106 //===----------------------------------------------------------------------===//
2107 // This pass constructs a ModuloSchedule from its module and runs
2108 // ModuloScheduleExpander.
2109 //
2110 // The module is expected to contain a single-block analyzable loop.
2111 // The total order of instructions is taken from the loop as-is.
2112 // Instructions are expected to be annotated with a PostInstrSymbol.
2113 // This PostInstrSymbol must have the following format:
2114 // "Stage=%d Cycle=%d".
2115 //===----------------------------------------------------------------------===//
2116 
2117 namespace {
2118 class ModuloScheduleTest : public MachineFunctionPass {
2119 public:
2120  static char ID;
2121 
2122  ModuloScheduleTest() : MachineFunctionPass(ID) {
2124  }
2125 
2126  bool runOnMachineFunction(MachineFunction &MF) override;
2127  void runOnLoop(MachineFunction &MF, MachineLoop &L);
2128 
2129  void getAnalysisUsage(AnalysisUsage &AU) const override {
2131  AU.addRequired<LiveIntervals>();
2133  }
2134 };
2135 } // namespace
2136 
2137 char ModuloScheduleTest::ID = 0;
2138 
2139 INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2140  "Modulo Schedule test pass", false, false)
2143 INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2144  "Modulo Schedule test pass", false, false)
2145 
2146 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2147  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
2148  for (auto *L : MLI) {
2149  if (L->getTopBlock() != L->getBottomBlock())
2150  continue;
2151  runOnLoop(MF, *L);
2152  return false;
2153  }
2154  return false;
2155 }
2156 
2157 static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2158  std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2159  std::pair<StringRef, StringRef> StageTokenAndValue =
2160  getToken(StageAndCycle.first, "-");
2161  std::pair<StringRef, StringRef> CycleTokenAndValue =
2162  getToken(StageAndCycle.second, "-");
2163  if (StageTokenAndValue.first != "Stage" ||
2164  CycleTokenAndValue.first != "_Cycle") {
2166  "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2167  return;
2168  }
2169 
2170  StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2171  CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2172 
2173  dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2174 }
2175 
2176 void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2177  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
2179  dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2180 
2182  std::vector<MachineInstr *> Instrs;
2183  for (MachineInstr &MI : *BB) {
2184  if (MI.isTerminator())
2185  continue;
2186  Instrs.push_back(&MI);
2187  if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2188  dbgs() << "Parsing post-instr symbol for " << MI;
2189  parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2190  }
2191  }
2192 
2193  ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2194  std::move(Stage));
2196  MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2197  MSE.expand();
2198  MSE.cleanup();
2199 }
2200 
2201 //===----------------------------------------------------------------------===//
2202 // ModuloScheduleTestAnnotater implementation
2203 //===----------------------------------------------------------------------===//
2204 
2206  for (MachineInstr *MI : S.getInstructions()) {
2208  raw_svector_ostream OS(SV);
2209  OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2210  MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2211  MI->setPostInstrSymbol(MF, Sym);
2212  }
2213 }
i
i
Definition: README.txt:29
llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1204
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:357
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:107
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:830
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:824
llvm::PeelingModuloScheduleExpander::rewriteKernel
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
Definition: ModuloSchedule.cpp:2001
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:213
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:157
llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:184
llvm::logicalview::LVAttributeKind::Producer
@ Producer
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:79
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::MachineFunction::getContext
MCContext & getContext() const
Definition: MachineFunction.h:624
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:1029
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:149
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:469
ErrorHandling.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:156
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:409
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:291
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:127
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:681
llvm::LPD_Front
@ LPD_Front
Peel the first iteration of the loop.
Definition: MachineLoopUtils.h:18
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:236
llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1433
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:507
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::MachineRegisterInfo::use_operands
iterator_range< use_iterator > use_operands(Register Reg) const
Definition: MachineRegisterInfo.h:491
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:889
llvm::copy
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1837
llvm::PeelingModuloScheduleExpander::Epilogs
SmallVector< MachineBasicBlock *, 4 > Epilogs
Definition: ModuloSchedule.h:308
llvm::PeelingModuloScheduleExpander::getStage
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
Definition: ModuloSchedule.h:360
llvm::AMDGPU::Exp::Target
Target
Definition: SIDefines.h:858
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:891
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:289
llvm::PeelingModuloScheduleExpander::TII
const TargetInstrInfo * TII
Definition: ModuloSchedule.h:300
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:329
llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:67
llvm::MachineLoop::getBottomBlock
MachineBasicBlock * getBottomBlock()
Return the "bottom" block in the loop, which is the last block in the linear layout,...
Definition: MachineLoopInfo.cpp:76
llvm::PeelingModuloScheduleExpander::PeeledFront
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
Definition: ModuloSchedule.h:326
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::PeelingModuloScheduleExpander::moveStageBetweenBlocks
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Definition: ModuloSchedule.cpp:1641
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:275
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:590
llvm::MachineBasicBlock::dump
void dump() const
Definition: MachineBasicBlock.cpp:297
llvm::TargetInstrInfo::insertBranch
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
Definition: TargetInstrInfo.h:712
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:772
llvm::ModuloSchedule::dump
void dump()
Definition: ModuloSchedule.h:154
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:930
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::PeelingModuloScheduleExpander::AvailableStages
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
Definition: ModuloSchedule.h:314
llvm::MachineBasicBlock::getFirstInstrTerminator
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Definition: MachineBasicBlock.cpp:249
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:882
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:682
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:386
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
MachineLoopInfo.h
llvm::TargetInstrInfo::removeBranch
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
Definition: TargetInstrInfo.h:694
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::Failed
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
llvm::PeelingModuloScheduleExpander::Schedule
ModuloSchedule & Schedule
Definition: ModuloSchedule.h:296
llvm::PeelingModuloScheduleExpander::PeeledBack
std::deque< MachineBasicBlock * > PeeledBack
Definition: ModuloSchedule.h:326
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:553
llvm::MachineInstr::setMemRefs
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
Definition: MachineInstr.cpp:352
llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:375
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:376
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::PeelingModuloScheduleExpander::PhiNodeLoopIteration
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
Definition: ModuloSchedule.h:317
MCContext.h
llvm::MachineBasicBlock::eraseFromParent
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
Definition: MachineBasicBlock.cpp:1356
llvm::PeelingModuloScheduleExpander::BlockMIs
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Definition: ModuloSchedule.h:323
llvm::PeelingModuloScheduleExpander::BB
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
Definition: ModuloSchedule.h:304
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
TBB
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
Definition: RISCVRedundantCopyElimination.cpp:76
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::PeelingModuloScheduleExpander::Prologs
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
Definition: ModuloSchedule.h:308
pass
modulo schedule Modulo Schedule test pass
Definition: ModuloSchedule.cpp:2144
llvm::GenericCycle
A possibly irreducible generalization of a Loop.
Definition: GenericCycleInfo.h:48
llvm::PeelingModuloScheduleExpander::filterInstructions
void filterInstructions(MachineBasicBlock *MB, int MinStage)
Definition: ModuloSchedule.cpp:1612
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:642
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:185
llvm::ModuloSchedule::getInstructions
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
Definition: ModuloSchedule.h:152
llvm::PeelSingleBlockLoop
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
Definition: MachineLoopUtils.cpp:26
llvm::BitVector
Definition: BitVector.h:75
CombinerObjective::Default
@ Default
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:398
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:243
llvm::PeelingModuloScheduleExpander::peelKernel
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
Definition: ModuloSchedule.cpp:1596
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:528
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
parseSymbolString
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
Definition: ModuloSchedule.cpp:2157
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:94
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:779
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:661
llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1405
llvm::PeelingModuloScheduleExpander::Preheader
MachineBasicBlock * Preheader
The original loop preheader.
Definition: ModuloSchedule.h:306
llvm::PeelingModuloScheduleExpander::fixupBranches
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
Definition: ModuloSchedule.cpp:1954
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineBasicBlock::succ_iterator
std::vector< MachineBasicBlock * >::iterator succ_iterator
Definition: MachineBasicBlock.h:347
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:672
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:274
llvm::ModuloScheduleExpander::getRewrittenKernel
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
Definition: ModuloSchedule.h:277
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
llvm::PeelingModuloScheduleExpander::rewriteUsesOf
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
Definition: ModuloSchedule.cpp:1911
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:299
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1896
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:326
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2205
LiveIntervals.h
llvm::MachineOperand::CreateMBB
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
Definition: MachineOperand.h:855
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:390
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:499
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:31
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::MachineBasicBlock::instr_rbegin
reverse_instr_iterator instr_rbegin()
Definition: MachineBasicBlock.h:297
llvm::MachineOperand::isDead
bool isDead() const
Definition: MachineOperand.h:391
I
#define I(x, y, z)
Definition: MD5.cpp:58
StringExtras.h
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:721
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:198
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:446
replaceRegUsesAfterLoop
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
Definition: ModuloSchedule.cpp:340
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetInstrInfo::insertUnconditionalBranch
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
Definition: TargetInstrInfo.h:720
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:373
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:265
llvm::operator==
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:2038
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1308
llvm::PeelingModuloScheduleExpander::CreateLCSSAExitingBlock
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
Definition: ModuloSchedule.cpp:1860
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::LoopPeelDirection
LoopPeelDirection
Definition: MachineLoopUtils.h:17
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:366
llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition: MachineBasicBlock.h:293
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:295
llvm::MachineFunction
Definition: MachineFunction.h:258
llvm::PeelingModuloScheduleExpander::getEquivalentRegisterIn
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
Definition: ModuloSchedule.cpp:1904
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:240
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::MachineBasicBlock::phis
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
Definition: MachineBasicBlock.h:337
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:568
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:697
llvm::Offset
@ Offset
Definition: DWP.cpp:406
hasUseAfterLoop
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
Definition: ModuloSchedule.cpp:354
ModuloSchedule.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:2006
llvm::MachineRegisterInfo::use_begin
use_iterator use_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:486
getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: ModuloSchedule.cpp:36
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:806
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:381
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LiveIntervals::createEmptyInterval
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
Definition: LiveIntervals.h:129
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
getLoopPhiReg
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: ModuloSchedule.cpp:60
llvm::MachineBasicBlock::transferSuccessors
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
Definition: MachineBasicBlock.cpp:891
Prolog
@ Prolog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:126
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
rewrite
static bool rewrite(Function &F)
Definition: PoisonChecking.cpp:255
llvm::LPD_Back
@ LPD_Back
Peel the last iteration of the loop.
Definition: MachineLoopUtils.h:19
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
j
return j(j<< 16)
llvm::MachineBasicBlock::replaceSuccessor
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Definition: MachineBasicBlock.cpp:833
llvm::PeelingModuloScheduleExpander::LIS
LiveIntervals * LIS
Definition: ModuloSchedule.h:301
llvm::MachineBasicBlock::replacePhiUsesWith
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: MachineBasicBlock.cpp:1384
llvm::PeelingModuloScheduleExpander::MRI
MachineRegisterInfo & MRI
Definition: ModuloSchedule.h:299
llvm::MachineBasicBlock::removeSuccessor
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:810
Epilog
@ Epilog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:126
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:354
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1336
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:641
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
llvm::ModuloSchedule::getCycle
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
Definition: ModuloSchedule.h:140
llvm::ModuloSchedule::getNumStages
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
Definition: ModuloSchedule.h:123
llvm::PeelingModuloScheduleExpander::IllegalPhisToDelete
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
Definition: ModuloSchedule.h:328
llvm::ModuloSchedule::getLoop
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
Definition: ModuloSchedule.h:119
llvm::LiveIntervals
Definition: LiveIntervals.h:53
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::MachineInstr::memoperands_empty
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:743
llvm::PeelingModuloScheduleExpander::peelPrologAndEpilogs
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
Definition: ModuloSchedule.cpp:1743
llvm::PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Definition: ModuloSchedule.cpp:2018
getInitPhiReg
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
Definition: ModuloSchedule.cpp:52
llvm::Cycle
CycleInfo::CycleT Cycle
Definition: CycleAnalysis.h:28
llvm::MachineRegisterInfo::use_end
static use_iterator use_end()
Definition: MachineRegisterInfo.h:489
llvm::numbers::phi
constexpr double phi
Definition: MathExtras.h:45
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::iterator
DenseMapIterator< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > > iterator
Definition: DenseMap.h:71
Other
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1260
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::ModuloSchedule::print
void print(raw_ostream &OS)
Definition: ModuloSchedule.cpp:25
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:309
MachineInstrBuilder.h
llvm::PeelingModuloScheduleExpander::LiveStages
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
Definition: ModuloSchedule.h:310
used
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the producing something like xmm1 movaps xmm0 ret saving two instructions The basic idea is that a reload from a spill if only one byte chunk is used
Definition: README-SSE.txt:270
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:56
llvm::LiveIntervals::RemoveMachineInstrFromMaps
void RemoveMachineInstrFromMaps(MachineInstr &MI)
Definition: LiveIntervals.h:270
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:61
llvm::MachineOperand::setMBB
void setMBB(MachineBasicBlock *MBB)
Definition: MachineOperand.h:724
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:83
llvm::MachineInstr::memoperands
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:713
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:484
llvm::initializeModuloScheduleTestPass
void initializeModuloScheduleTestPass(PassRegistry &)
llvm::PeelingModuloScheduleExpander::MF
MachineFunction & MF
Definition: ModuloSchedule.h:297
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:160
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", "Modulo Schedule test pass", false, false) INITIALIZE_PASS_END(ModuloScheduleTest
llvm::LiveIntervals::hasInterval
bool hasInterval(Register Reg) const
Definition: LiveIntervals.h:123
llvm::MachineBasicBlock::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
Definition: MachineBasicBlock.cpp:335
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
raw_ostream.h
llvm::ModuloSchedule::getStage
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
Definition: ModuloSchedule.h:134
test
modulo schedule test
Definition: ModuloSchedule.cpp:2143
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:216
InitializePasses.h
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:641
llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:502
llvm::PeelingModuloScheduleExpander::getPhiCanonicalReg
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
Definition: ModuloSchedule.cpp:1726
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:311
llvm::MachineRegisterInfo::setRegClass
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
Definition: MachineRegisterInfo.cpp:57
removePhis
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Definition: ModuloSchedule.cpp:846
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::PeelingModuloScheduleExpander::CanonicalMIs
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
Definition: ModuloSchedule.h:321