LLVM  9.0.0svn
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
17 // one and try again.
18 //
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
25 //
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
28 // instructions.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/BitVector.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/MapVector.h"
36 #include "llvm/ADT/PriorityQueue.h"
37 #include "llvm/ADT/SetVector.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
65 #include "llvm/Config/llvm-config.h"
66 #include "llvm/IR/Attributes.h"
67 #include "llvm/IR/DebugLoc.h"
68 #include "llvm/IR/Function.h"
69 #include "llvm/MC/LaneBitmask.h"
70 #include "llvm/MC/MCInstrDesc.h"
72 #include "llvm/MC/MCRegisterInfo.h"
73 #include "llvm/Pass.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
79 #include <algorithm>
80 #include <cassert>
81 #include <climits>
82 #include <cstdint>
83 #include <deque>
84 #include <functional>
85 #include <iterator>
86 #include <map>
87 #include <memory>
88 #include <tuple>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 
94 #define DEBUG_TYPE "pipeliner"
95 
96 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
97 STATISTIC(NumPipelined, "Number of loops software pipelined");
98 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
99 STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
100 STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
101 STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
102 STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
103 STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
104 STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
105 STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
106 STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
107 
108 /// A command line option to turn software pipelining on or off.
109 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
111  cl::desc("Enable Software Pipelining"));
112 
113 /// A command line option to enable SWP at -Os.
114 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
115  cl::desc("Enable SWP at Os."), cl::Hidden,
116  cl::init(false));
117 
118 /// A command line argument to limit minimum initial interval for pipelining.
119 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
120  cl::desc("Size limit for the MII."),
121  cl::Hidden, cl::init(27));
122 
123 /// A command line argument to limit the number of stages in the pipeline.
124 static cl::opt<int>
125  SwpMaxStages("pipeliner-max-stages",
126  cl::desc("Maximum stages allowed in the generated scheduled."),
127  cl::Hidden, cl::init(3));
128 
129 /// A command line option to disable the pruning of chain dependences due to
130 /// an unrelated Phi.
131 static cl::opt<bool>
132  SwpPruneDeps("pipeliner-prune-deps",
133  cl::desc("Prune dependences between unrelated Phi nodes."),
134  cl::Hidden, cl::init(true));
135 
136 /// A command line option to disable the pruning of loop carried order
137 /// dependences.
138 static cl::opt<bool>
139  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
140  cl::desc("Prune loop carried order dependences."),
141  cl::Hidden, cl::init(true));
142 
143 #ifndef NDEBUG
144 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
145 #endif
146 
147 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
148  cl::ReallyHidden, cl::init(false),
149  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
150 
151 static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
152  cl::init(false));
153 static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
154  cl::init(false));
155 
156 namespace llvm {
157 
158 // A command line option to enable the CopyToPhi DAG mutation.
160  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
161  cl::init(true), cl::ZeroOrMore,
162  cl::desc("Enable CopyToPhi DAG Mutation"));
163 
164 } // end namespace llvm
165 
166 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
167 char MachinePipeliner::ID = 0;
168 #ifndef NDEBUG
170 #endif
172 
174  "Modulo Software Pipelining", false, false)
180  "Modulo Software Pipelining", false, false)
181 
182 /// The "main" function for implementing Swing Modulo Scheduling.
183 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
184  if (skipFunction(mf.getFunction()))
185  return false;
186 
187  if (!EnableSWP)
188  return false;
189 
190  if (mf.getFunction().getAttributes().hasAttribute(
191  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
192  !EnableSWPOptSize.getPosition())
193  return false;
194 
195  if (!mf.getSubtarget().enableMachinePipeliner())
196  return false;
197 
198  // Cannot pipeline loops without instruction itineraries if we are using
199  // DFA for the pipeliner.
200  if (mf.getSubtarget().useDFAforSMS() &&
201  (!mf.getSubtarget().getInstrItineraryData() ||
202  mf.getSubtarget().getInstrItineraryData()->isEmpty()))
203  return false;
204 
205  MF = &mf;
206  MLI = &getAnalysis<MachineLoopInfo>();
207  MDT = &getAnalysis<MachineDominatorTree>();
208  TII = MF->getSubtarget().getInstrInfo();
209  RegClassInfo.runOnMachineFunction(*MF);
210 
211  for (auto &L : *MLI)
212  scheduleLoop(*L);
213 
214  return false;
215 }
216 
217 /// Attempt to perform the SMS algorithm on the specified loop. This function is
218 /// the main entry point for the algorithm. The function identifies candidate
219 /// loops, calculates the minimum initiation interval, and attempts to schedule
220 /// the loop.
221 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
222  bool Changed = false;
223  for (auto &InnerLoop : L)
224  Changed |= scheduleLoop(*InnerLoop);
225 
226 #ifndef NDEBUG
227  // Stop trying after reaching the limit (if any).
228  int Limit = SwpLoopLimit;
229  if (Limit >= 0) {
230  if (NumTries >= SwpLoopLimit)
231  return Changed;
232  NumTries++;
233  }
234 #endif
235 
236  setPragmaPipelineOptions(L);
237  if (!canPipelineLoop(L)) {
238  LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
239  return Changed;
240  }
241 
242  ++NumTrytoPipeline;
243 
244  Changed = swingModuloScheduler(L);
245 
246  return Changed;
247 }
248 
249 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
250  MachineBasicBlock *LBLK = L.getTopBlock();
251 
252  if (LBLK == nullptr)
253  return;
254 
255  const BasicBlock *BBLK = LBLK->getBasicBlock();
256  if (BBLK == nullptr)
257  return;
258 
259  const Instruction *TI = BBLK->getTerminator();
260  if (TI == nullptr)
261  return;
262 
263  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
264  if (LoopID == nullptr)
265  return;
266 
267  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
268  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
269 
270  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
271  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
272 
273  if (MD == nullptr)
274  continue;
275 
276  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
277 
278  if (S == nullptr)
279  continue;
280 
281  if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
282  assert(MD->getNumOperands() == 2 &&
283  "Pipeline initiation interval hint metadata should have two operands.");
285  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
286  assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
287  } else if (S->getString() == "llvm.loop.pipeline.disable") {
288  disabledByPragma = true;
289  }
290  }
291 }
292 
293 /// Return true if the loop can be software pipelined. The algorithm is
294 /// restricted to loops with a single basic block. Make sure that the
295 /// branch in the loop can be analyzed.
296 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
297  if (L.getNumBlocks() != 1)
298  return false;
299 
300  if (disabledByPragma)
301  return false;
302 
303  // Check if the branch can't be understood because we can't do pipelining
304  // if that's the case.
305  LI.TBB = nullptr;
306  LI.FBB = nullptr;
307  LI.BrCond.clear();
308  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
309  LLVM_DEBUG(
310  dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
311  NumFailBranch++;
312  return false;
313  }
314 
315  LI.LoopInductionVar = nullptr;
316  LI.LoopCompare = nullptr;
318  LLVM_DEBUG(
319  dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
320  NumFailLoop++;
321  return false;
322  }
323 
324  if (!L.getLoopPreheader()) {
325  LLVM_DEBUG(
326  dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
327  NumFailPreheader++;
328  return false;
329  }
330 
331  // Remove any subregisters from inputs to phi nodes.
332  preprocessPhiNodes(*L.getHeader());
333  return true;
334 }
335 
336 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
338  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
339 
340  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
341  MachineOperand &DefOp = PI.getOperand(0);
342  assert(DefOp.getSubReg() == 0);
343  auto *RC = MRI.getRegClass(DefOp.getReg());
344 
345  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
346  MachineOperand &RegOp = PI.getOperand(i);
347  if (RegOp.getSubReg() == 0)
348  continue;
349 
350  // If the operand uses a subregister, replace it with a new register
351  // without subregisters, and generate a copy to the new register.
352  unsigned NewReg = MRI.createVirtualRegister(RC);
353  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
355  const DebugLoc &DL = PredB.findDebugLoc(At);
356  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
357  .addReg(RegOp.getReg(), getRegState(RegOp),
358  RegOp.getSubReg());
359  Slots.insertMachineInstrInMaps(*Copy);
360  RegOp.setReg(NewReg);
361  RegOp.setSubReg(0);
362  }
363  }
364 }
365 
366 /// The SMS algorithm consists of the following main steps:
367 /// 1. Computation and analysis of the dependence graph.
368 /// 2. Ordering of the nodes (instructions).
369 /// 3. Attempt to Schedule the loop.
370 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
371  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
372 
373  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
375 
376  MachineBasicBlock *MBB = L.getHeader();
377  // The kernel should not include any terminator instructions. These
378  // will be added back later.
379  SMS.startBlock(MBB);
380 
381  // Compute the number of 'real' instructions in the basic block by
382  // ignoring terminators.
383  unsigned size = MBB->size();
385  E = MBB->instr_end();
386  I != E; ++I, --size)
387  ;
388 
389  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
390  SMS.schedule();
391  SMS.exitRegion();
392 
393  SMS.finishBlock();
394  return SMS.hasNewSchedule();
395 }
396 
397 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
398  if (II_setByPragma > 0)
399  MII = II_setByPragma;
400  else
401  MII = std::max(ResMII, RecMII);
402 }
403 
404 void SwingSchedulerDAG::setMAX_II() {
405  if (II_setByPragma > 0)
406  MAX_II = II_setByPragma;
407  else
408  MAX_II = MII + 10;
409 }
410 
411 /// We override the schedule function in ScheduleDAGInstrs to implement the
412 /// scheduling part of the Swing Modulo Scheduling algorithm.
414  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
415  buildSchedGraph(AA);
416  addLoopCarriedDependences(AA);
417  updatePhiDependences();
418  Topo.InitDAGTopologicalSorting();
419  changeDependences();
420  postprocessDAG();
421  LLVM_DEBUG(dump());
422 
423  NodeSetType NodeSets;
424  findCircuits(NodeSets);
425  NodeSetType Circuits = NodeSets;
426 
427  // Calculate the MII.
428  unsigned ResMII = calculateResMII();
429  unsigned RecMII = calculateRecMII(NodeSets);
430 
431  fuseRecs(NodeSets);
432 
433  // This flag is used for testing and can cause correctness problems.
434  if (SwpIgnoreRecMII)
435  RecMII = 0;
436 
437  setMII(ResMII, RecMII);
438  setMAX_II();
439 
440  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
441  << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
442 
443  // Can't schedule a loop without a valid MII.
444  if (MII == 0) {
445  LLVM_DEBUG(
446  dbgs()
447  << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
448  NumFailZeroMII++;
449  return;
450  }
451 
452  // Don't pipeline large loops.
453  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
454  LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
455  << ", we don't pipleline large loops\n");
456  NumFailLargeMaxMII++;
457  return;
458  }
459 
460  computeNodeFunctions(NodeSets);
461 
462  registerPressureFilter(NodeSets);
463 
464  colocateNodeSets(NodeSets);
465 
466  checkNodeSets(NodeSets);
467 
468  LLVM_DEBUG({
469  for (auto &I : NodeSets) {
470  dbgs() << " Rec NodeSet ";
471  I.dump();
472  }
473  });
474 
475  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
476 
477  groupRemainingNodes(NodeSets);
478 
479  removeDuplicateNodes(NodeSets);
480 
481  LLVM_DEBUG({
482  for (auto &I : NodeSets) {
483  dbgs() << " NodeSet ";
484  I.dump();
485  }
486  });
487 
488  computeNodeOrder(NodeSets);
489 
490  // check for node order issues
491  checkValidNodeOrder(Circuits);
492 
493  SMSchedule Schedule(Pass.MF);
494  Scheduled = schedulePipeline(Schedule);
495 
496  if (!Scheduled){
497  LLVM_DEBUG(dbgs() << "No schedule found, return\n");
498  NumFailNoSchedule++;
499  return;
500  }
501 
502  unsigned numStages = Schedule.getMaxStageCount();
503  // No need to generate pipeline if there are no overlapped iterations.
504  if (numStages == 0) {
505  LLVM_DEBUG(
506  dbgs() << "No overlapped iterations, no need to generate pipeline\n");
507  NumFailZeroStage++;
508  return;
509  }
510  // Check that the maximum stage count is less than user-defined limit.
511  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
512  LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
513  << " : too many stages, abort\n");
514  NumFailLargeMaxStage++;
515  return;
516  }
517 
518  generatePipelinedLoop(Schedule);
519  ++NumPipelined;
520 }
521 
522 /// Clean up after the software pipeliner runs.
524  for (MachineInstr *I : NewMIs)
526  NewMIs.clear();
527 
528  // Call the superclass.
530 }
531 
532 /// Return the register values for the operands of a Phi instruction.
533 /// This function assume the instruction is a Phi.
535  unsigned &InitVal, unsigned &LoopVal) {
536  assert(Phi.isPHI() && "Expecting a Phi.");
537 
538  InitVal = 0;
539  LoopVal = 0;
540  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
541  if (Phi.getOperand(i + 1).getMBB() != Loop)
542  InitVal = Phi.getOperand(i).getReg();
543  else
544  LoopVal = Phi.getOperand(i).getReg();
545 
546  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
547 }
548 
549 /// Return the Phi register value that comes from the incoming block.
550 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
551  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
552  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
553  return Phi.getOperand(i).getReg();
554  return 0;
555 }
556 
557 /// Return the Phi register value that comes the loop block.
558 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
559  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
560  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
561  return Phi.getOperand(i).getReg();
562  return 0;
563 }
564 
565 /// Return true if SUb can be reached from SUa following the chain edges.
566 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
567  SmallPtrSet<SUnit *, 8> Visited;
568  SmallVector<SUnit *, 8> Worklist;
569  Worklist.push_back(SUa);
570  while (!Worklist.empty()) {
571  const SUnit *SU = Worklist.pop_back_val();
572  for (auto &SI : SU->Succs) {
573  SUnit *SuccSU = SI.getSUnit();
574  if (SI.getKind() == SDep::Order) {
575  if (Visited.count(SuccSU))
576  continue;
577  if (SuccSU == SUb)
578  return true;
579  Worklist.push_back(SuccSU);
580  Visited.insert(SuccSU);
581  }
582  }
583  }
584  return false;
585 }
586 
587 /// Return true if the instruction causes a chain between memory
588 /// references before and after it.
590  return MI.isCall() || MI.mayRaiseFPException() ||
592  (MI.hasOrderedMemoryRef() &&
593  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
594 }
595 
596 /// Return the underlying objects for the memory references of an instruction.
597 /// This function calls the code in ValueTracking, but first checks that the
598 /// instruction has a memory operand.
601  const DataLayout &DL) {
602  if (!MI->hasOneMemOperand())
603  return;
605  if (!MM->getValue())
606  return;
607  GetUnderlyingObjects(MM->getValue(), Objs, DL);
608  for (const Value *V : Objs) {
609  if (!isIdentifiedObject(V)) {
610  Objs.clear();
611  return;
612  }
613  Objs.push_back(V);
614  }
615 }
616 
617 /// Add a chain edge between a load and store if the store can be an
618 /// alias of the load on a subsequent iteration, i.e., a loop carried
619 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
620 /// but that code doesn't create loop carried dependences.
621 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
623  Value *UnknownValue =
625  for (auto &SU : SUnits) {
626  MachineInstr &MI = *SU.getInstr();
627  if (isDependenceBarrier(MI, AA))
628  PendingLoads.clear();
629  else if (MI.mayLoad()) {
631  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
632  if (Objs.empty())
633  Objs.push_back(UnknownValue);
634  for (auto V : Objs) {
635  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
636  SUs.push_back(&SU);
637  }
638  } else if (MI.mayStore()) {
640  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
641  if (Objs.empty())
642  Objs.push_back(UnknownValue);
643  for (auto V : Objs) {
645  PendingLoads.find(V);
646  if (I == PendingLoads.end())
647  continue;
648  for (auto Load : I->second) {
649  if (isSuccOrder(Load, &SU))
650  continue;
651  MachineInstr &LdMI = *Load->getInstr();
652  // First, perform the cheaper check that compares the base register.
653  // If they are the same and the load offset is less than the store
654  // offset, then mark the dependence as loop carried potentially.
655  const MachineOperand *BaseOp1, *BaseOp2;
656  int64_t Offset1, Offset2;
657  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
658  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
659  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
660  (int)Offset1 < (int)Offset2) {
661  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
662  "What happened to the chain edge?");
663  SDep Dep(Load, SDep::Barrier);
664  Dep.setLatency(1);
665  SU.addPred(Dep);
666  continue;
667  }
668  }
669  // Second, the more expensive check that uses alias analysis on the
670  // base registers. If they alias, and the load offset is less than
671  // the store offset, the mark the dependence as loop carried.
672  if (!AA) {
673  SDep Dep(Load, SDep::Barrier);
674  Dep.setLatency(1);
675  SU.addPred(Dep);
676  continue;
677  }
678  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
679  MachineMemOperand *MMO2 = *MI.memoperands_begin();
680  if (!MMO1->getValue() || !MMO2->getValue()) {
681  SDep Dep(Load, SDep::Barrier);
682  Dep.setLatency(1);
683  SU.addPred(Dep);
684  continue;
685  }
686  if (MMO1->getValue() == MMO2->getValue() &&
687  MMO1->getOffset() <= MMO2->getOffset()) {
688  SDep Dep(Load, SDep::Barrier);
689  Dep.setLatency(1);
690  SU.addPred(Dep);
691  continue;
692  }
693  AliasResult AAResult = AA->alias(
695  MMO1->getAAInfo()),
697  MMO2->getAAInfo()));
698 
699  if (AAResult != NoAlias) {
700  SDep Dep(Load, SDep::Barrier);
701  Dep.setLatency(1);
702  SU.addPred(Dep);
703  }
704  }
705  }
706  }
707  }
708 }
709 
710 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
711 /// processes dependences for PHIs. This function adds true dependences
712 /// from a PHI to a use, and a loop carried dependence from the use to the
713 /// PHI. The loop carried dependence is represented as an anti dependence
714 /// edge. This function also removes chain dependences between unrelated
715 /// PHIs.
716 void SwingSchedulerDAG::updatePhiDependences() {
717  SmallVector<SDep, 4> RemoveDeps;
719 
720  // Iterate over each DAG node.
721  for (SUnit &I : SUnits) {
722  RemoveDeps.clear();
723  // Set to true if the instruction has an operand defined by a Phi.
724  unsigned HasPhiUse = 0;
725  unsigned HasPhiDef = 0;
726  MachineInstr *MI = I.getInstr();
727  // Iterate over each operand, and we process the definitions.
729  MOE = MI->operands_end();
730  MOI != MOE; ++MOI) {
731  if (!MOI->isReg())
732  continue;
733  unsigned Reg = MOI->getReg();
734  if (MOI->isDef()) {
735  // If the register is used by a Phi, then create an anti dependence.
737  UI = MRI.use_instr_begin(Reg),
738  UE = MRI.use_instr_end();
739  UI != UE; ++UI) {
740  MachineInstr *UseMI = &*UI;
741  SUnit *SU = getSUnit(UseMI);
742  if (SU != nullptr && UseMI->isPHI()) {
743  if (!MI->isPHI()) {
744  SDep Dep(SU, SDep::Anti, Reg);
745  Dep.setLatency(1);
746  I.addPred(Dep);
747  } else {
748  HasPhiDef = Reg;
749  // Add a chain edge to a dependent Phi that isn't an existing
750  // predecessor.
751  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
752  I.addPred(SDep(SU, SDep::Barrier));
753  }
754  }
755  }
756  } else if (MOI->isUse()) {
757  // If the register is defined by a Phi, then create a true dependence.
758  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
759  if (DefMI == nullptr)
760  continue;
761  SUnit *SU = getSUnit(DefMI);
762  if (SU != nullptr && DefMI->isPHI()) {
763  if (!MI->isPHI()) {
764  SDep Dep(SU, SDep::Data, Reg);
765  Dep.setLatency(0);
766  ST.adjustSchedDependency(SU, &I, Dep);
767  I.addPred(Dep);
768  } else {
769  HasPhiUse = Reg;
770  // Add a chain edge to a dependent Phi that isn't an existing
771  // predecessor.
772  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
773  I.addPred(SDep(SU, SDep::Barrier));
774  }
775  }
776  }
777  }
778  // Remove order dependences from an unrelated Phi.
779  if (!SwpPruneDeps)
780  continue;
781  for (auto &PI : I.Preds) {
782  MachineInstr *PMI = PI.getSUnit()->getInstr();
783  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
784  if (I.getInstr()->isPHI()) {
785  if (PMI->getOperand(0).getReg() == HasPhiUse)
786  continue;
787  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
788  continue;
789  }
790  RemoveDeps.push_back(PI);
791  }
792  }
793  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
794  I.removePred(RemoveDeps[i]);
795  }
796 }
797 
798 /// Iterate over each DAG node and see if we can change any dependences
799 /// in order to reduce the recurrence MII.
800 void SwingSchedulerDAG::changeDependences() {
801  // See if an instruction can use a value from the previous iteration.
802  // If so, we update the base and offset of the instruction and change
803  // the dependences.
804  for (SUnit &I : SUnits) {
805  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
806  int64_t NewOffset = 0;
807  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
808  NewOffset))
809  continue;
810 
811  // Get the MI and SUnit for the instruction that defines the original base.
812  unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
813  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
814  if (!DefMI)
815  continue;
816  SUnit *DefSU = getSUnit(DefMI);
817  if (!DefSU)
818  continue;
819  // Get the MI and SUnit for the instruction that defins the new base.
820  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
821  if (!LastMI)
822  continue;
823  SUnit *LastSU = getSUnit(LastMI);
824  if (!LastSU)
825  continue;
826 
827  if (Topo.IsReachable(&I, LastSU))
828  continue;
829 
830  // Remove the dependence. The value now depends on a prior iteration.
832  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
833  ++P)
834  if (P->getSUnit() == DefSU)
835  Deps.push_back(*P);
836  for (int i = 0, e = Deps.size(); i != e; i++) {
837  Topo.RemovePred(&I, Deps[i].getSUnit());
838  I.removePred(Deps[i]);
839  }
840  // Remove the chain dependence between the instructions.
841  Deps.clear();
842  for (auto &P : LastSU->Preds)
843  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
844  Deps.push_back(P);
845  for (int i = 0, e = Deps.size(); i != e; i++) {
846  Topo.RemovePred(LastSU, Deps[i].getSUnit());
847  LastSU->removePred(Deps[i]);
848  }
849 
850  // Add a dependence between the new instruction and the instruction
851  // that defines the new base.
852  SDep Dep(&I, SDep::Anti, NewBase);
853  Topo.AddPred(LastSU, &I);
854  LastSU->addPred(Dep);
855 
856  // Remember the base and offset information so that we can update the
857  // instruction during code generation.
858  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
859  }
860 }
861 
862 namespace {
863 
864 // FuncUnitSorter - Comparison operator used to sort instructions by
865 // the number of functional unit choices.
866 struct FuncUnitSorter {
868  const MCSubtargetInfo *STI;
870 
871  FuncUnitSorter(const TargetSubtargetInfo &TSI)
872  : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
873 
874  // Compute the number of functional unit alternatives needed
875  // at each stage, and take the minimum value. We prioritize the
876  // instructions by the least number of choices first.
877  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
878  unsigned SchedClass = Inst->getDesc().getSchedClass();
879  unsigned min = UINT_MAX;
880  if (InstrItins && !InstrItins->isEmpty()) {
881  for (const InstrStage &IS :
882  make_range(InstrItins->beginStage(SchedClass),
883  InstrItins->endStage(SchedClass))) {
884  unsigned funcUnits = IS.getUnits();
885  unsigned numAlternatives = countPopulation(funcUnits);
886  if (numAlternatives < min) {
887  min = numAlternatives;
888  F = funcUnits;
889  }
890  }
891  return min;
892  }
893  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
894  const MCSchedClassDesc *SCDesc =
895  STI->getSchedModel().getSchedClassDesc(SchedClass);
896  if (!SCDesc->isValid())
897  // No valid Schedule Class Desc for schedClass, should be
898  // Pseudo/PostRAPseudo
899  return min;
900 
901  for (const MCWriteProcResEntry &PRE :
902  make_range(STI->getWriteProcResBegin(SCDesc),
903  STI->getWriteProcResEnd(SCDesc))) {
904  if (!PRE.Cycles)
905  continue;
906  const MCProcResourceDesc *ProcResource =
907  STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
908  unsigned NumUnits = ProcResource->NumUnits;
909  if (NumUnits < min) {
910  min = NumUnits;
911  F = PRE.ProcResourceIdx;
912  }
913  }
914  return min;
915  }
916  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
917  }
918 
919  // Compute the critical resources needed by the instruction. This
920  // function records the functional units needed by instructions that
921  // must use only one functional unit. We use this as a tie breaker
922  // for computing the resource MII. The instrutions that require
923  // the same, highly used, functional unit have high priority.
924  void calcCriticalResources(MachineInstr &MI) {
925  unsigned SchedClass = MI.getDesc().getSchedClass();
926  if (InstrItins && !InstrItins->isEmpty()) {
927  for (const InstrStage &IS :
928  make_range(InstrItins->beginStage(SchedClass),
929  InstrItins->endStage(SchedClass))) {
930  unsigned FuncUnits = IS.getUnits();
931  if (countPopulation(FuncUnits) == 1)
932  Resources[FuncUnits]++;
933  }
934  return;
935  }
936  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
937  const MCSchedClassDesc *SCDesc =
938  STI->getSchedModel().getSchedClassDesc(SchedClass);
939  if (!SCDesc->isValid())
940  // No valid Schedule Class Desc for schedClass, should be
941  // Pseudo/PostRAPseudo
942  return;
943 
944  for (const MCWriteProcResEntry &PRE :
945  make_range(STI->getWriteProcResBegin(SCDesc),
946  STI->getWriteProcResEnd(SCDesc))) {
947  if (!PRE.Cycles)
948  continue;
949  Resources[PRE.ProcResourceIdx]++;
950  }
951  return;
952  }
953  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
954  }
955 
956  /// Return true if IS1 has less priority than IS2.
957  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
958  unsigned F1 = 0, F2 = 0;
959  unsigned MFUs1 = minFuncUnits(IS1, F1);
960  unsigned MFUs2 = minFuncUnits(IS2, F2);
961  if (MFUs1 == 1 && MFUs2 == 1)
962  return Resources.lookup(F1) < Resources.lookup(F2);
963  return MFUs1 > MFUs2;
964  }
965 };
966 
967 } // end anonymous namespace
968 
969 /// Calculate the resource constrained minimum initiation interval for the
970 /// specified loop. We use the DFA to model the resources needed for
971 /// each instruction, and we ignore dependences. A different DFA is created
972 /// for each cycle that is required. When adding a new instruction, we attempt
973 /// to add it to each existing DFA, until a legal space is found. If the
974 /// instruction cannot be reserved in an existing DFA, we create a new one.
975 unsigned SwingSchedulerDAG::calculateResMII() {
976 
977  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
980  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
981 
982  // Sort the instructions by the number of available choices for scheduling,
983  // least to most. Use the number of critical resources as the tie breaker.
984  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
986  E = MBB->getFirstTerminator();
987  I != E; ++I)
988  FUS.calcCriticalResources(*I);
990  FuncUnitOrder(FUS);
991 
993  E = MBB->getFirstTerminator();
994  I != E; ++I)
995  FuncUnitOrder.push(&*I);
996 
997  while (!FuncUnitOrder.empty()) {
998  MachineInstr *MI = FuncUnitOrder.top();
999  FuncUnitOrder.pop();
1000  if (TII->isZeroCost(MI->getOpcode()))
1001  continue;
1002  // Attempt to reserve the instruction in an existing DFA. At least one
1003  // DFA is needed for each cycle.
1004  unsigned NumCycles = getSUnit(MI)->Latency;
1005  unsigned ReservedCycles = 0;
1008  LLVM_DEBUG({
1009  dbgs() << "Trying to reserve resource for " << NumCycles
1010  << " cycles for \n";
1011  MI->dump();
1012  });
1013  for (unsigned C = 0; C < NumCycles; ++C)
1014  while (RI != RE) {
1015  if ((*RI++)->canReserveResources(*MI)) {
1016  ++ReservedCycles;
1017  break;
1018  }
1019  }
1020  // Start reserving resources using existing DFAs.
1021  for (unsigned C = 0; C < ReservedCycles; ++C) {
1022  --RI;
1023  (*RI)->reserveResources(*MI);
1024  }
1025 
1026  LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
1027  << ", NumCycles:" << NumCycles << "\n");
1028  // Add new DFAs, if needed, to reserve resources.
1029  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1031  << "NewResource created to reserve resources"
1032  << "\n");
1033  ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
1034  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1035  NewResource->reserveResources(*MI);
1036  Resources.push_back(NewResource);
1037  }
1038  }
1039  int Resmii = Resources.size();
1040  LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
1041  // Delete the memory for each of the DFAs that were created earlier.
1042  for (ResourceManager *RI : Resources) {
1043  ResourceManager *D = RI;
1044  delete D;
1045  }
1046  Resources.clear();
1047  return Resmii;
1048 }
1049 
1050 /// Calculate the recurrence-constrainted minimum initiation interval.
1051 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1052 /// for each circuit. The II needs to satisfy the inequality
1053 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1054 /// II that satisfies the inequality, and the RecMII is the maximum
1055 /// of those values.
1056 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1057  unsigned RecMII = 0;
1058 
1059  for (NodeSet &Nodes : NodeSets) {
1060  if (Nodes.empty())
1061  continue;
1062 
1063  unsigned Delay = Nodes.getLatency();
1064  unsigned Distance = 1;
1065 
1066  // ii = ceil(delay / distance)
1067  unsigned CurMII = (Delay + Distance - 1) / Distance;
1068  Nodes.setRecMII(CurMII);
1069  if (CurMII > RecMII)
1070  RecMII = CurMII;
1071  }
1072 
1073  return RecMII;
1074 }
1075 
1076 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1077 /// but we do this to find the circuits, and then change them back.
1078 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1080  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1081  SUnit *SU = &SUnits[i];
1082  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1083  IP != EP; ++IP) {
1084  if (IP->getKind() != SDep::Anti)
1085  continue;
1086  DepsAdded.push_back(std::make_pair(SU, *IP));
1087  }
1088  }
1089  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1090  E = DepsAdded.end();
1091  I != E; ++I) {
1092  // Remove this anti dependency and add one in the reverse direction.
1093  SUnit *SU = I->first;
1094  SDep &D = I->second;
1095  SUnit *TargetSU = D.getSUnit();
1096  unsigned Reg = D.getReg();
1097  unsigned Lat = D.getLatency();
1098  SU->removePred(D);
1099  SDep Dep(SU, SDep::Anti, Reg);
1100  Dep.setLatency(Lat);
1101  TargetSU->addPred(Dep);
1102  }
1103 }
1104 
1105 /// Create the adjacency structure of the nodes in the graph.
1106 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1107  SwingSchedulerDAG *DAG) {
1108  BitVector Added(SUnits.size());
1109  DenseMap<int, int> OutputDeps;
1110  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1111  Added.reset();
1112  // Add any successor to the adjacency matrix and exclude duplicates.
1113  for (auto &SI : SUnits[i].Succs) {
1114  // Only create a back-edge on the first and last nodes of a dependence
1115  // chain. This records any chains and adds them later.
1116  if (SI.getKind() == SDep::Output) {
1117  int N = SI.getSUnit()->NodeNum;
1118  int BackEdge = i;
1119  auto Dep = OutputDeps.find(BackEdge);
1120  if (Dep != OutputDeps.end()) {
1121  BackEdge = Dep->second;
1122  OutputDeps.erase(Dep);
1123  }
1124  OutputDeps[N] = BackEdge;
1125  }
1126  // Do not process a boundary node, an artificial node.
1127  // A back-edge is processed only if it goes to a Phi.
1128  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1129  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1130  continue;
1131  int N = SI.getSUnit()->NodeNum;
1132  if (!Added.test(N)) {
1133  AdjK[i].push_back(N);
1134  Added.set(N);
1135  }
1136  }
1137  // A chain edge between a store and a load is treated as a back-edge in the
1138  // adjacency matrix.
1139  for (auto &PI : SUnits[i].Preds) {
1140  if (!SUnits[i].getInstr()->mayStore() ||
1141  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1142  continue;
1143  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1144  int N = PI.getSUnit()->NodeNum;
1145  if (!Added.test(N)) {
1146  AdjK[i].push_back(N);
1147  Added.set(N);
1148  }
1149  }
1150  }
1151  }
1152  // Add back-edges in the adjacency matrix for the output dependences.
1153  for (auto &OD : OutputDeps)
1154  if (!Added.test(OD.second)) {
1155  AdjK[OD.first].push_back(OD.second);
1156  Added.set(OD.second);
1157  }
1158 }
1159 
1160 /// Identify an elementary circuit in the dependence graph starting at the
1161 /// specified node.
1162 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1163  bool HasBackedge) {
1164  SUnit *SV = &SUnits[V];
1165  bool F = false;
1166  Stack.insert(SV);
1167  Blocked.set(V);
1168 
1169  for (auto W : AdjK[V]) {
1170  if (NumPaths > MaxPaths)
1171  break;
1172  if (W < S)
1173  continue;
1174  if (W == S) {
1175  if (!HasBackedge)
1176  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1177  F = true;
1178  ++NumPaths;
1179  break;
1180  } else if (!Blocked.test(W)) {
1181  if (circuit(W, S, NodeSets,
1182  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1183  F = true;
1184  }
1185  }
1186 
1187  if (F)
1188  unblock(V);
1189  else {
1190  for (auto W : AdjK[V]) {
1191  if (W < S)
1192  continue;
1193  if (B[W].count(SV) == 0)
1194  B[W].insert(SV);
1195  }
1196  }
1197  Stack.pop_back();
1198  return F;
1199 }
1200 
1201 /// Unblock a node in the circuit finding algorithm.
1202 void SwingSchedulerDAG::Circuits::unblock(int U) {
1203  Blocked.reset(U);
1204  SmallPtrSet<SUnit *, 4> &BU = B[U];
1205  while (!BU.empty()) {
1207  assert(SI != BU.end() && "Invalid B set.");
1208  SUnit *W = *SI;
1209  BU.erase(W);
1210  if (Blocked.test(W->NodeNum))
1211  unblock(W->NodeNum);
1212  }
1213 }
1214 
1215 /// Identify all the elementary circuits in the dependence graph using
1216 /// Johnson's circuit algorithm.
1217 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1218  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1219  // but we do this to find the circuits, and then change them back.
1220  swapAntiDependences(SUnits);
1221 
1222  Circuits Cir(SUnits, Topo);
1223  // Create the adjacency structure.
1224  Cir.createAdjacencyStructure(this);
1225  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1226  Cir.reset();
1227  Cir.circuit(i, i, NodeSets);
1228  }
1229 
1230  // Change the dependences back so that we've created a DAG again.
1231  swapAntiDependences(SUnits);
1232 }
1233 
1234 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1235 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1236 // additional copies that are needed across iterations. An artificial dependence
1237 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1238 
1239 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1240 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1241 // PHI-------True-Dep------> USEOfPhi
1242 
1243 // The mutation creates
1244 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1245 
1246 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1247 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1248 // late to avoid additional copies across iterations. The possible scheduling
1249 // order would be
1250 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1251 
1253  for (SUnit &SU : DAG->SUnits) {
1254  // Find the COPY/REG_SEQUENCE instruction.
1255  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1256  continue;
1257 
1258  // Record the loop carried PHIs.
1259  SmallVector<SUnit *, 4> PHISUs;
1260  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1261  SmallVector<SUnit *, 4> SrcSUs;
1262 
1263  for (auto &Dep : SU.Preds) {
1264  SUnit *TmpSU = Dep.getSUnit();
1265  MachineInstr *TmpMI = TmpSU->getInstr();
1266  SDep::Kind DepKind = Dep.getKind();
1267  // Save the loop carried PHI.
1268  if (DepKind == SDep::Anti && TmpMI->isPHI())
1269  PHISUs.push_back(TmpSU);
1270  // Save the source of COPY/REG_SEQUENCE.
1271  // If the source has no pre-decessors, we will end up creating cycles.
1272  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1273  SrcSUs.push_back(TmpSU);
1274  }
1275 
1276  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1277  continue;
1278 
1279  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1280  // SUnit to the container.
1281  SmallVector<SUnit *, 8> UseSUs;
1282  for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
1283  for (auto &Dep : (*I)->Succs) {
1284  if (Dep.getKind() != SDep::Data)
1285  continue;
1286 
1287  SUnit *TmpSU = Dep.getSUnit();
1288  MachineInstr *TmpMI = TmpSU->getInstr();
1289  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1290  PHISUs.push_back(TmpSU);
1291  continue;
1292  }
1293  UseSUs.push_back(TmpSU);
1294  }
1295  }
1296 
1297  if (UseSUs.size() == 0)
1298  continue;
1299 
1300  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1301  // Add the artificial dependencies if it does not form a cycle.
1302  for (auto I : UseSUs) {
1303  for (auto Src : SrcSUs) {
1304  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1305  Src->addPred(SDep(I, SDep::Artificial));
1306  SDAG->Topo.AddPred(Src, I);
1307  }
1308  }
1309  }
1310  }
1311 }
1312 
1313 /// Return true for DAG nodes that we ignore when computing the cost functions.
1314 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1315 /// in the calculation of the ASAP, ALAP, etc functions.
1316 static bool ignoreDependence(const SDep &D, bool isPred) {
1317  if (D.isArtificial())
1318  return true;
1319  return D.getKind() == SDep::Anti && isPred;
1320 }
1321 
1322 /// Compute several functions need to order the nodes for scheduling.
1323 /// ASAP - Earliest time to schedule a node.
1324 /// ALAP - Latest time to schedule a node.
1325 /// MOV - Mobility function, difference between ALAP and ASAP.
1326 /// D - Depth of each node.
1327 /// H - Height of each node.
1328 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1329  ScheduleInfo.resize(SUnits.size());
1330 
1331  LLVM_DEBUG({
1332  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1333  E = Topo.end();
1334  I != E; ++I) {
1335  const SUnit &SU = SUnits[*I];
1336  dumpNode(SU);
1337  }
1338  });
1339 
1340  int maxASAP = 0;
1341  // Compute ASAP and ZeroLatencyDepth.
1342  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1343  E = Topo.end();
1344  I != E; ++I) {
1345  int asap = 0;
1346  int zeroLatencyDepth = 0;
1347  SUnit *SU = &SUnits[*I];
1348  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1349  EP = SU->Preds.end();
1350  IP != EP; ++IP) {
1351  SUnit *pred = IP->getSUnit();
1352  if (IP->getLatency() == 0)
1353  zeroLatencyDepth =
1354  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1355  if (ignoreDependence(*IP, true))
1356  continue;
1357  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1358  getDistance(pred, SU, *IP) * MII));
1359  }
1360  maxASAP = std::max(maxASAP, asap);
1361  ScheduleInfo[*I].ASAP = asap;
1362  ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
1363  }
1364 
1365  // Compute ALAP, ZeroLatencyHeight, and MOV.
1367  E = Topo.rend();
1368  I != E; ++I) {
1369  int alap = maxASAP;
1370  int zeroLatencyHeight = 0;
1371  SUnit *SU = &SUnits[*I];
1372  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1373  ES = SU->Succs.end();
1374  IS != ES; ++IS) {
1375  SUnit *succ = IS->getSUnit();
1376  if (IS->getLatency() == 0)
1377  zeroLatencyHeight =
1378  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1379  if (ignoreDependence(*IS, true))
1380  continue;
1381  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1382  getDistance(SU, succ, *IS) * MII));
1383  }
1384 
1385  ScheduleInfo[*I].ALAP = alap;
1386  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1387  }
1388 
1389  // After computing the node functions, compute the summary for each node set.
1390  for (NodeSet &I : NodeSets)
1391  I.computeNodeSetInfo(this);
1392 
1393  LLVM_DEBUG({
1394  for (unsigned i = 0; i < SUnits.size(); i++) {
1395  dbgs() << "\tNode " << i << ":\n";
1396  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1397  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1398  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1399  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1400  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1401  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1402  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1403  }
1404  });
1405 }
1406 
1407 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1408 /// as the predecessors of the elements of NodeOrder that are not also in
1409 /// NodeOrder.
1412  const NodeSet *S = nullptr) {
1413  Preds.clear();
1414  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1415  I != E; ++I) {
1416  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1417  PI != PE; ++PI) {
1418  if (S && S->count(PI->getSUnit()) == 0)
1419  continue;
1420  if (ignoreDependence(*PI, true))
1421  continue;
1422  if (NodeOrder.count(PI->getSUnit()) == 0)
1423  Preds.insert(PI->getSUnit());
1424  }
1425  // Back-edges are predecessors with an anti-dependence.
1426  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1427  ES = (*I)->Succs.end();
1428  IS != ES; ++IS) {
1429  if (IS->getKind() != SDep::Anti)
1430  continue;
1431  if (S && S->count(IS->getSUnit()) == 0)
1432  continue;
1433  if (NodeOrder.count(IS->getSUnit()) == 0)
1434  Preds.insert(IS->getSUnit());
1435  }
1436  }
1437  return !Preds.empty();
1438 }
1439 
1440 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1441 /// as the successors of the elements of NodeOrder that are not also in
1442 /// NodeOrder.
1445  const NodeSet *S = nullptr) {
1446  Succs.clear();
1447  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1448  I != E; ++I) {
1449  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1450  SI != SE; ++SI) {
1451  if (S && S->count(SI->getSUnit()) == 0)
1452  continue;
1453  if (ignoreDependence(*SI, false))
1454  continue;
1455  if (NodeOrder.count(SI->getSUnit()) == 0)
1456  Succs.insert(SI->getSUnit());
1457  }
1458  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1459  PE = (*I)->Preds.end();
1460  PI != PE; ++PI) {
1461  if (PI->getKind() != SDep::Anti)
1462  continue;
1463  if (S && S->count(PI->getSUnit()) == 0)
1464  continue;
1465  if (NodeOrder.count(PI->getSUnit()) == 0)
1466  Succs.insert(PI->getSUnit());
1467  }
1468  }
1469  return !Succs.empty();
1470 }
1471 
1472 /// Return true if there is a path from the specified node to any of the nodes
1473 /// in DestNodes. Keep track and return the nodes in any path.
1474 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1475  SetVector<SUnit *> &DestNodes,
1476  SetVector<SUnit *> &Exclude,
1477  SmallPtrSet<SUnit *, 8> &Visited) {
1478  if (Cur->isBoundaryNode())
1479  return false;
1480  if (Exclude.count(Cur) != 0)
1481  return false;
1482  if (DestNodes.count(Cur) != 0)
1483  return true;
1484  if (!Visited.insert(Cur).second)
1485  return Path.count(Cur) != 0;
1486  bool FoundPath = false;
1487  for (auto &SI : Cur->Succs)
1488  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1489  for (auto &PI : Cur->Preds)
1490  if (PI.getKind() == SDep::Anti)
1491  FoundPath |=
1492  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1493  if (FoundPath)
1494  Path.insert(Cur);
1495  return FoundPath;
1496 }
1497 
1498 /// Return true if Set1 is a subset of Set2.
1499 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1500  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1501  if (Set2.count(*I) == 0)
1502  return false;
1503  return true;
1504 }
1505 
1506 /// Compute the live-out registers for the instructions in a node-set.
1507 /// The live-out registers are those that are defined in the node-set,
1508 /// but not used. Except for use operands of Phis.
1510  NodeSet &NS) {
1514  SmallSet<unsigned, 4> Uses;
1515  for (SUnit *SU : NS) {
1516  const MachineInstr *MI = SU->getInstr();
1517  if (MI->isPHI())
1518  continue;
1519  for (const MachineOperand &MO : MI->operands())
1520  if (MO.isReg() && MO.isUse()) {
1521  unsigned Reg = MO.getReg();
1523  Uses.insert(Reg);
1524  else if (MRI.isAllocatable(Reg))
1525  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1526  Uses.insert(*Units);
1527  }
1528  }
1529  for (SUnit *SU : NS)
1530  for (const MachineOperand &MO : SU->getInstr()->operands())
1531  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1532  unsigned Reg = MO.getReg();
1534  if (!Uses.count(Reg))
1535  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1537  } else if (MRI.isAllocatable(Reg)) {
1538  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1539  if (!Uses.count(*Units))
1540  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1542  }
1543  }
1544  RPTracker.addLiveRegs(LiveOutRegs);
1545 }
1546 
1547 /// A heuristic to filter nodes in recurrent node-sets if the register
1548 /// pressure of a set is too high.
1549 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1550  for (auto &NS : NodeSets) {
1551  // Skip small node-sets since they won't cause register pressure problems.
1552  if (NS.size() <= 2)
1553  continue;
1554  IntervalPressure RecRegPressure;
1555  RegPressureTracker RecRPTracker(RecRegPressure);
1556  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1557  computeLiveOuts(MF, RecRPTracker, NS);
1558  RecRPTracker.closeBottom();
1559 
1560  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1561  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1562  return A->NodeNum > B->NodeNum;
1563  });
1564 
1565  for (auto &SU : SUnits) {
1566  // Since we're computing the register pressure for a subset of the
1567  // instructions in a block, we need to set the tracker for each
1568  // instruction in the node-set. The tracker is set to the instruction
1569  // just after the one we're interested in.
1570  MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
1571  RecRPTracker.setPos(std::next(CurInstI));
1572 
1573  RegPressureDelta RPDelta;
1574  ArrayRef<PressureChange> CriticalPSets;
1575  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1576  CriticalPSets,
1577  RecRegPressure.MaxSetPressure);
1578  if (RPDelta.Excess.isValid()) {
1579  LLVM_DEBUG(
1580  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1581  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1582  << ":" << RPDelta.Excess.getUnitInc());
1583  NS.setExceedPressure(SU);
1584  break;
1585  }
1586  RecRPTracker.recede();
1587  }
1588  }
1589 }
1590 
1591 /// A heuristic to colocate node sets that have the same set of
1592 /// successors.
1593 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1594  unsigned Colocate = 0;
1595  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1596  NodeSet &N1 = NodeSets[i];
1598  if (N1.empty() || !succ_L(N1, S1))
1599  continue;
1600  for (int j = i + 1; j < e; ++j) {
1601  NodeSet &N2 = NodeSets[j];
1602  if (N1.compareRecMII(N2) != 0)
1603  continue;
1605  if (N2.empty() || !succ_L(N2, S2))
1606  continue;
1607  if (isSubset(S1, S2) && S1.size() == S2.size()) {
1608  N1.setColocate(++Colocate);
1609  N2.setColocate(Colocate);
1610  break;
1611  }
1612  }
1613  }
1614 }
1615 
1616 /// Check if the existing node-sets are profitable. If not, then ignore the
1617 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1618 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1619 /// then it's best to try to schedule all instructions together instead of
1620 /// starting with the recurrent node-sets.
1621 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1622  // Look for loops with a large MII.
1623  if (MII < 17)
1624  return;
1625  // Check if the node-set contains only a simple add recurrence.
1626  for (auto &NS : NodeSets) {
1627  if (NS.getRecMII() > 2)
1628  return;
1629  if (NS.getMaxDepth() > MII)
1630  return;
1631  }
1632  NodeSets.clear();
1633  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1634  return;
1635 }
1636 
1637 /// Add the nodes that do not belong to a recurrence set into groups
1638 /// based upon connected componenets.
1639 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1640  SetVector<SUnit *> NodesAdded;
1641  SmallPtrSet<SUnit *, 8> Visited;
1642  // Add the nodes that are on a path between the previous node sets and
1643  // the current node set.
1644  for (NodeSet &I : NodeSets) {
1646  // Add the nodes from the current node set to the previous node set.
1647  if (succ_L(I, N)) {
1648  SetVector<SUnit *> Path;
1649  for (SUnit *NI : N) {
1650  Visited.clear();
1651  computePath(NI, Path, NodesAdded, I, Visited);
1652  }
1653  if (!Path.empty())
1654  I.insert(Path.begin(), Path.end());
1655  }
1656  // Add the nodes from the previous node set to the current node set.
1657  N.clear();
1658  if (succ_L(NodesAdded, N)) {
1659  SetVector<SUnit *> Path;
1660  for (SUnit *NI : N) {
1661  Visited.clear();
1662  computePath(NI, Path, I, NodesAdded, Visited);
1663  }
1664  if (!Path.empty())
1665  I.insert(Path.begin(), Path.end());
1666  }
1667  NodesAdded.insert(I.begin(), I.end());
1668  }
1669 
1670  // Create a new node set with the connected nodes of any successor of a node
1671  // in a recurrent set.
1672  NodeSet NewSet;
1674  if (succ_L(NodesAdded, N))
1675  for (SUnit *I : N)
1676  addConnectedNodes(I, NewSet, NodesAdded);
1677  if (!NewSet.empty())
1678  NodeSets.push_back(NewSet);
1679 
1680  // Create a new node set with the connected nodes of any predecessor of a node
1681  // in a recurrent set.
1682  NewSet.clear();
1683  if (pred_L(NodesAdded, N))
1684  for (SUnit *I : N)
1685  addConnectedNodes(I, NewSet, NodesAdded);
1686  if (!NewSet.empty())
1687  NodeSets.push_back(NewSet);
1688 
1689  // Create new nodes sets with the connected nodes any remaining node that
1690  // has no predecessor.
1691  for (unsigned i = 0; i < SUnits.size(); ++i) {
1692  SUnit *SU = &SUnits[i];
1693  if (NodesAdded.count(SU) == 0) {
1694  NewSet.clear();
1695  addConnectedNodes(SU, NewSet, NodesAdded);
1696  if (!NewSet.empty())
1697  NodeSets.push_back(NewSet);
1698  }
1699  }
1700 }
1701 
1702 /// Add the node to the set, and add all of its connected nodes to the set.
1703 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1704  SetVector<SUnit *> &NodesAdded) {
1705  NewSet.insert(SU);
1706  NodesAdded.insert(SU);
1707  for (auto &SI : SU->Succs) {
1708  SUnit *Successor = SI.getSUnit();
1709  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1710  addConnectedNodes(Successor, NewSet, NodesAdded);
1711  }
1712  for (auto &PI : SU->Preds) {
1713  SUnit *Predecessor = PI.getSUnit();
1714  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1715  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1716  }
1717 }
1718 
1719 /// Return true if Set1 contains elements in Set2. The elements in common
1720 /// are returned in a different container.
1721 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1722  SmallSetVector<SUnit *, 8> &Result) {
1723  Result.clear();
1724  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1725  SUnit *SU = Set1[i];
1726  if (Set2.count(SU) != 0)
1727  Result.insert(SU);
1728  }
1729  return !Result.empty();
1730 }
1731 
1732 /// Merge the recurrence node sets that have the same initial node.
1733 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1734  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1735  ++I) {
1736  NodeSet &NI = *I;
1737  for (NodeSetType::iterator J = I + 1; J != E;) {
1738  NodeSet &NJ = *J;
1739  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1740  if (NJ.compareRecMII(NI) > 0)
1741  NI.setRecMII(NJ.getRecMII());
1742  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
1743  ++NII)
1744  I->insert(*NII);
1745  NodeSets.erase(J);
1746  E = NodeSets.end();
1747  } else {
1748  ++J;
1749  }
1750  }
1751  }
1752 }
1753 
1754 /// Remove nodes that have been scheduled in previous NodeSets.
1755 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1756  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1757  ++I)
1758  for (NodeSetType::iterator J = I + 1; J != E;) {
1759  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1760 
1761  if (J->empty()) {
1762  NodeSets.erase(J);
1763  E = NodeSets.end();
1764  } else {
1765  ++J;
1766  }
1767  }
1768 }
1769 
1770 /// Compute an ordered list of the dependence graph nodes, which
1771 /// indicates the order that the nodes will be scheduled. This is a
1772 /// two-level algorithm. First, a partial order is created, which
1773 /// consists of a list of sets ordered from highest to lowest priority.
1774 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1776  NodeOrder.clear();
1777 
1778  for (auto &Nodes : NodeSets) {
1779  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1780  OrderKind Order;
1782  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
1783  R.insert(N.begin(), N.end());
1784  Order = BottomUp;
1785  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1786  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
1787  R.insert(N.begin(), N.end());
1788  Order = TopDown;
1789  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1790  } else if (isIntersect(N, Nodes, R)) {
1791  // If some of the successors are in the existing node-set, then use the
1792  // top-down ordering.
1793  Order = TopDown;
1794  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1795  } else if (NodeSets.size() == 1) {
1796  for (auto &N : Nodes)
1797  if (N->Succs.size() == 0)
1798  R.insert(N);
1799  Order = BottomUp;
1800  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1801  } else {
1802  // Find the node with the highest ASAP.
1803  SUnit *maxASAP = nullptr;
1804  for (SUnit *SU : Nodes) {
1805  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1806  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1807  maxASAP = SU;
1808  }
1809  R.insert(maxASAP);
1810  Order = BottomUp;
1811  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1812  }
1813 
1814  while (!R.empty()) {
1815  if (Order == TopDown) {
1816  // Choose the node with the maximum height. If more than one, choose
1817  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1818  // choose the node with the lowest MOV.
1819  while (!R.empty()) {
1820  SUnit *maxHeight = nullptr;
1821  for (SUnit *I : R) {
1822  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1823  maxHeight = I;
1824  else if (getHeight(I) == getHeight(maxHeight) &&
1825  getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
1826  maxHeight = I;
1827  else if (getHeight(I) == getHeight(maxHeight) &&
1828  getZeroLatencyHeight(I) ==
1829  getZeroLatencyHeight(maxHeight) &&
1830  getMOV(I) < getMOV(maxHeight))
1831  maxHeight = I;
1832  }
1833  NodeOrder.insert(maxHeight);
1834  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1835  R.remove(maxHeight);
1836  for (const auto &I : maxHeight->Succs) {
1837  if (Nodes.count(I.getSUnit()) == 0)
1838  continue;
1839  if (NodeOrder.count(I.getSUnit()) != 0)
1840  continue;
1841  if (ignoreDependence(I, false))
1842  continue;
1843  R.insert(I.getSUnit());
1844  }
1845  // Back-edges are predecessors with an anti-dependence.
1846  for (const auto &I : maxHeight->Preds) {
1847  if (I.getKind() != SDep::Anti)
1848  continue;
1849  if (Nodes.count(I.getSUnit()) == 0)
1850  continue;
1851  if (NodeOrder.count(I.getSUnit()) != 0)
1852  continue;
1853  R.insert(I.getSUnit());
1854  }
1855  }
1856  Order = BottomUp;
1857  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1859  if (pred_L(NodeOrder, N, &Nodes))
1860  R.insert(N.begin(), N.end());
1861  } else {
1862  // Choose the node with the maximum depth. If more than one, choose
1863  // the node with the maximum ZeroLatencyDepth. If still more than one,
1864  // choose the node with the lowest MOV.
1865  while (!R.empty()) {
1866  SUnit *maxDepth = nullptr;
1867  for (SUnit *I : R) {
1868  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1869  maxDepth = I;
1870  else if (getDepth(I) == getDepth(maxDepth) &&
1871  getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
1872  maxDepth = I;
1873  else if (getDepth(I) == getDepth(maxDepth) &&
1874  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1875  getMOV(I) < getMOV(maxDepth))
1876  maxDepth = I;
1877  }
1878  NodeOrder.insert(maxDepth);
1879  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1880  R.remove(maxDepth);
1881  if (Nodes.isExceedSU(maxDepth)) {
1882  Order = TopDown;
1883  R.clear();
1884  R.insert(Nodes.getNode(0));
1885  break;
1886  }
1887  for (const auto &I : maxDepth->Preds) {
1888  if (Nodes.count(I.getSUnit()) == 0)
1889  continue;
1890  if (NodeOrder.count(I.getSUnit()) != 0)
1891  continue;
1892  R.insert(I.getSUnit());
1893  }
1894  // Back-edges are predecessors with an anti-dependence.
1895  for (const auto &I : maxDepth->Succs) {
1896  if (I.getKind() != SDep::Anti)
1897  continue;
1898  if (Nodes.count(I.getSUnit()) == 0)
1899  continue;
1900  if (NodeOrder.count(I.getSUnit()) != 0)
1901  continue;
1902  R.insert(I.getSUnit());
1903  }
1904  }
1905  Order = TopDown;
1906  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
1908  if (succ_L(NodeOrder, N, &Nodes))
1909  R.insert(N.begin(), N.end());
1910  }
1911  }
1912  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1913  }
1914 
1915  LLVM_DEBUG({
1916  dbgs() << "Node order: ";
1917  for (SUnit *I : NodeOrder)
1918  dbgs() << " " << I->NodeNum << " ";
1919  dbgs() << "\n";
1920  });
1921 }
1922 
1923 /// Process the nodes in the computed order and create the pipelined schedule
1924 /// of the instructions, if possible. Return true if a schedule is found.
1925 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
1926 
1927  if (NodeOrder.empty()){
1928  LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
1929  return false;
1930  }
1931 
1932  bool scheduleFound = false;
1933  unsigned II = 0;
1934  // Keep increasing II until a valid schedule is found.
1935  for (II = MII; II <= MAX_II && !scheduleFound; ++II) {
1936  Schedule.reset();
1937  Schedule.setInitiationInterval(II);
1938  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
1939 
1942  do {
1943  SUnit *SU = *NI;
1944 
1945  // Compute the schedule time for the instruction, which is based
1946  // upon the scheduled time for any predecessors/successors.
1947  int EarlyStart = INT_MIN;
1948  int LateStart = INT_MAX;
1949  // These values are set when the size of the schedule window is limited
1950  // due to chain dependences.
1951  int SchedEnd = INT_MAX;
1952  int SchedStart = INT_MIN;
1953  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
1954  II, this);
1955  LLVM_DEBUG({
1956  dbgs() << "\n";
1957  dbgs() << "Inst (" << SU->NodeNum << ") ";
1958  SU->getInstr()->dump();
1959  dbgs() << "\n";
1960  });
1961  LLVM_DEBUG({
1962  dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
1963  LateStart, SchedEnd, SchedStart);
1964  });
1965 
1966  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
1967  SchedStart > LateStart)
1968  scheduleFound = false;
1969  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
1970  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
1971  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
1972  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
1973  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
1974  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
1975  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
1976  SchedEnd =
1977  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
1978  // When scheduling a Phi it is better to start at the late cycle and go
1979  // backwards. The default order may insert the Phi too far away from
1980  // its first dependence.
1981  if (SU->getInstr()->isPHI())
1982  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
1983  else
1984  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
1985  } else {
1986  int FirstCycle = Schedule.getFirstCycle();
1987  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
1988  FirstCycle + getASAP(SU) + II - 1, II);
1989  }
1990  // Even if we find a schedule, make sure the schedule doesn't exceed the
1991  // allowable number of stages. We keep trying if this happens.
1992  if (scheduleFound)
1993  if (SwpMaxStages > -1 &&
1994  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
1995  scheduleFound = false;
1996 
1997  LLVM_DEBUG({
1998  if (!scheduleFound)
1999  dbgs() << "\tCan't schedule\n";
2000  });
2001  } while (++NI != NE && scheduleFound);
2002 
2003  // If a schedule is found, check if it is a valid schedule too.
2004  if (scheduleFound)
2005  scheduleFound = Schedule.isValidSchedule(this);
2006  }
2007 
2008  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
2009  << ")\n");
2010 
2011  if (scheduleFound)
2012  Schedule.finalizeSchedule(this);
2013  else
2014  Schedule.reset();
2015 
2016  return scheduleFound && Schedule.getMaxStageCount() > 0;
2017 }
2018 
2019 /// Given a schedule for the loop, generate a new version of the loop,
2020 /// and replace the old version. This function generates a prolog
2021 /// that contains the initial iterations in the pipeline, and kernel
2022 /// loop, and the epilogue that contains the code for the final
2023 /// iterations.
2024 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
2025  // Create a new basic block for the kernel and add it to the CFG.
2026  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2027 
2028  unsigned MaxStageCount = Schedule.getMaxStageCount();
2029 
2030  // Remember the registers that are used in different stages. The index is
2031  // the iteration, or stage, that the instruction is scheduled in. This is
2032  // a map between register names in the original block and the names created
2033  // in each stage of the pipelined loop.
2034  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
2035  InstrMapTy InstrMap;
2036 
2038 
2039  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2040  assert(PreheaderBB != nullptr &&
2041  "Need to add code to handle loops w/o preheader");
2042  // Generate the prolog instructions that set up the pipeline.
2043  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
2044  MF.insert(BB->getIterator(), KernelBB);
2045 
2046  // Rearrange the instructions to generate the new, pipelined loop,
2047  // and update register names as needed.
2048  for (int Cycle = Schedule.getFirstCycle(),
2049  LastCycle = Schedule.getFinalCycle();
2050  Cycle <= LastCycle; ++Cycle) {
2051  std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
2052  // This inner loop schedules each instruction in the cycle.
2053  for (SUnit *CI : CycleInstrs) {
2054  if (CI->getInstr()->isPHI())
2055  continue;
2056  unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
2057  MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
2058  updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
2059  KernelBB->push_back(NewMI);
2060  InstrMap[NewMI] = CI->getInstr();
2061  }
2062  }
2063 
2064  // Copy any terminator instructions to the new kernel, and update
2065  // names as needed.
2066  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
2067  E = BB->instr_end();
2068  I != E; ++I) {
2069  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
2070  updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
2071  KernelBB->push_back(NewMI);
2072  InstrMap[NewMI] = &*I;
2073  }
2074 
2075  KernelBB->transferSuccessors(BB);
2076  KernelBB->replaceSuccessor(BB, KernelBB);
2077 
2078  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
2079  VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
2080  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
2081  InstrMap, MaxStageCount, MaxStageCount, false);
2082 
2083  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
2084 
2086  // Generate the epilog instructions to complete the pipeline.
2087  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
2088  PrologBBs);
2089 
2090  // We need this step because the register allocation doesn't handle some
2091  // situations well, so we insert copies to help out.
2092  splitLifetimes(KernelBB, EpilogBBs, Schedule);
2093 
2094  // Remove dead instructions due to loop induction variables.
2095  removeDeadInstructions(KernelBB, EpilogBBs);
2096 
2097  // Add branches between prolog and epilog blocks.
2098  addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
2099 
2100  // Remove the original loop since it's no longer referenced.
2101  for (auto &I : *BB)
2102  LIS.RemoveMachineInstrFromMaps(I);
2103  BB->clear();
2104  BB->eraseFromParent();
2105 
2106  delete[] VRMap;
2107 }
2108 
2109 /// Generate the pipeline prolog code.
2110 void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
2111  MachineBasicBlock *KernelBB,
2112  ValueMapTy *VRMap,
2113  MBBVectorTy &PrologBBs) {
2114  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2115  assert(PreheaderBB != nullptr &&
2116  "Need to add code to handle loops w/o preheader");
2117  MachineBasicBlock *PredBB = PreheaderBB;
2118  InstrMapTy InstrMap;
2119 
2120  // Generate a basic block for each stage, not including the last stage,
2121  // which will be generated in the kernel. Each basic block may contain
2122  // instructions from multiple stages/iterations.
2123  for (unsigned i = 0; i < LastStage; ++i) {
2124  // Create and insert the prolog basic block prior to the original loop
2125  // basic block. The original loop is removed later.
2126  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2127  PrologBBs.push_back(NewBB);
2128  MF.insert(BB->getIterator(), NewBB);
2129  NewBB->transferSuccessors(PredBB);
2130  PredBB->addSuccessor(NewBB);
2131  PredBB = NewBB;
2132 
2133  // Generate instructions for each appropriate stage. Process instructions
2134  // in original program order.
2135  for (int StageNum = i; StageNum >= 0; --StageNum) {
2136  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2137  BBE = BB->getFirstTerminator();
2138  BBI != BBE; ++BBI) {
2139  if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
2140  if (BBI->isPHI())
2141  continue;
2142  MachineInstr *NewMI =
2143  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
2144  updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
2145  VRMap);
2146  NewBB->push_back(NewMI);
2147  InstrMap[NewMI] = &*BBI;
2148  }
2149  }
2150  }
2151  rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
2152  LLVM_DEBUG({
2153  dbgs() << "prolog:\n";
2154  NewBB->dump();
2155  });
2156  }
2157 
2158  PredBB->replaceSuccessor(BB, KernelBB);
2159 
2160  // Check if we need to remove the branch from the preheader to the original
2161  // loop, and replace it with a branch to the new loop.
2162  unsigned numBranches = TII->removeBranch(*PreheaderBB);
2163  if (numBranches) {
2165  TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
2166  }
2167 }
2168 
2169 /// Generate the pipeline epilog code. The epilog code finishes the iterations
2170 /// that were started in either the prolog or the kernel. We create a basic
2171 /// block for each stage that needs to complete.
2172 void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
2173  MachineBasicBlock *KernelBB,
2174  ValueMapTy *VRMap,
2175  MBBVectorTy &EpilogBBs,
2176  MBBVectorTy &PrologBBs) {
2177  // We need to change the branch from the kernel to the first epilog block, so
2178  // this call to analyze branch uses the kernel rather than the original BB.
2179  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2181  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2182  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2183  if (checkBranch)
2184  return;
2185 
2186  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2187  if (*LoopExitI == KernelBB)
2188  ++LoopExitI;
2189  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2190  MachineBasicBlock *LoopExitBB = *LoopExitI;
2191 
2192  MachineBasicBlock *PredBB = KernelBB;
2193  MachineBasicBlock *EpilogStart = LoopExitBB;
2194  InstrMapTy InstrMap;
2195 
2196  // Generate a basic block for each stage, not including the last stage,
2197  // which was generated for the kernel. Each basic block may contain
2198  // instructions from multiple stages/iterations.
2199  int EpilogStage = LastStage + 1;
2200  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2202  EpilogBBs.push_back(NewBB);
2203  MF.insert(BB->getIterator(), NewBB);
2204 
2205  PredBB->replaceSuccessor(LoopExitBB, NewBB);
2206  NewBB->addSuccessor(LoopExitBB);
2207 
2208  if (EpilogStart == LoopExitBB)
2209  EpilogStart = NewBB;
2210 
2211  // Add instructions to the epilog depending on the current block.
2212  // Process instructions in original program order.
2213  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2214  for (auto &BBI : *BB) {
2215  if (BBI.isPHI())
2216  continue;
2217  MachineInstr *In = &BBI;
2218  if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2219  // Instructions with memoperands in the epilog are updated with
2220  // conservative values.
2221  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
2222  updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2223  NewBB->push_back(NewMI);
2224  InstrMap[NewMI] = In;
2225  }
2226  }
2227  }
2228  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2229  VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2230  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2231  InstrMap, LastStage, EpilogStage, i == 1);
2232  PredBB = NewBB;
2233 
2234  LLVM_DEBUG({
2235  dbgs() << "epilog:\n";
2236  NewBB->dump();
2237  });
2238  }
2239 
2240  // Fix any Phi nodes in the loop exit block.
2241  for (MachineInstr &MI : *LoopExitBB) {
2242  if (!MI.isPHI())
2243  break;
2244  for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
2245  MachineOperand &MO = MI.getOperand(i);
2246  if (MO.getMBB() == BB)
2247  MO.setMBB(PredBB);
2248  }
2249  }
2250 
2251  // Create a branch to the new epilog from the kernel.
2252  // Remove the original branch and add a new branch to the epilog.
2253  TII->removeBranch(*KernelBB);
2254  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2255  // Add a branch to the loop exit.
2256  if (EpilogBBs.size() > 0) {
2257  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2259  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2260  }
2261 }
2262 
2263 /// Replace all uses of FromReg that appear outside the specified
2264 /// basic block with ToReg.
2265 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2266  MachineBasicBlock *MBB,
2268  LiveIntervals &LIS) {
2269  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2270  E = MRI.use_end();
2271  I != E;) {
2272  MachineOperand &O = *I;
2273  ++I;
2274  if (O.getParent()->getParent() != MBB)
2275  O.setReg(ToReg);
2276  }
2277  if (!LIS.hasInterval(ToReg))
2278  LIS.createEmptyInterval(ToReg);
2279 }
2280 
2281 /// Return true if the register has a use that occurs outside the
2282 /// specified loop.
2283 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2286  E = MRI.use_end();
2287  I != E; ++I)
2288  if (I->getParent()->getParent() != BB)
2289  return true;
2290  return false;
2291 }
2292 
2293 /// Generate Phis for the specific block in the generated pipelined code.
2294 /// This function looks at the Phis from the original code to guide the
2295 /// creation of new Phis.
2296 void SwingSchedulerDAG::generateExistingPhis(
2298  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2299  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2300  bool IsLast) {
2301  // Compute the stage number for the initial value of the Phi, which
2302  // comes from the prolog. The prolog to use depends on to which kernel/
2303  // epilog that we're adding the Phi.
2304  unsigned PrologStage = 0;
2305  unsigned PrevStage = 0;
2306  bool InKernel = (LastStageNum == CurStageNum);
2307  if (InKernel) {
2308  PrologStage = LastStageNum - 1;
2309  PrevStage = CurStageNum;
2310  } else {
2311  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2312  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2313  }
2314 
2315  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2316  BBE = BB->getFirstNonPHI();
2317  BBI != BBE; ++BBI) {
2318  unsigned Def = BBI->getOperand(0).getReg();
2319 
2320  unsigned InitVal = 0;
2321  unsigned LoopVal = 0;
2322  getPhiRegs(*BBI, BB, InitVal, LoopVal);
2323 
2324  unsigned PhiOp1 = 0;
2325  // The Phi value from the loop body typically is defined in the loop, but
2326  // not always. So, we need to check if the value is defined in the loop.
2327  unsigned PhiOp2 = LoopVal;
2328  if (VRMap[LastStageNum].count(LoopVal))
2329  PhiOp2 = VRMap[LastStageNum][LoopVal];
2330 
2331  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2332  int LoopValStage =
2333  Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2334  unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2335  if (NumStages == 0) {
2336  // We don't need to generate a Phi anymore, but we need to rename any uses
2337  // of the Phi value.
2338  unsigned NewReg = VRMap[PrevStage][LoopVal];
2339  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2340  Def, InitVal, NewReg);
2341  if (VRMap[CurStageNum].count(LoopVal))
2342  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2343  }
2344  // Adjust the number of Phis needed depending on the number of prologs left,
2345  // and the distance from where the Phi is first scheduled. The number of
2346  // Phis cannot exceed the number of prolog stages. Each stage can
2347  // potentially define two values.
2348  unsigned MaxPhis = PrologStage + 2;
2349  if (!InKernel && (int)PrologStage <= LoopValStage)
2350  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
2351  unsigned NumPhis = std::min(NumStages, MaxPhis);
2352 
2353  unsigned NewReg = 0;
2354  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2355  // In the epilog, we may need to look back one stage to get the correct
2356  // Phi name because the epilog and prolog blocks execute the same stage.
2357  // The correct name is from the previous block only when the Phi has
2358  // been completely scheduled prior to the epilog, and Phi value is not
2359  // needed in multiple stages.
2360  int StageDiff = 0;
2361  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2362  NumPhis == 1)
2363  StageDiff = 1;
2364  // Adjust the computations below when the phi and the loop definition
2365  // are scheduled in different stages.
2366  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2367  StageDiff = StageScheduled - LoopValStage;
2368  for (unsigned np = 0; np < NumPhis; ++np) {
2369  // If the Phi hasn't been scheduled, then use the initial Phi operand
2370  // value. Otherwise, use the scheduled version of the instruction. This
2371  // is a little complicated when a Phi references another Phi.
2372  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
2373  PhiOp1 = InitVal;
2374  // Check if the Phi has already been scheduled in a prolog stage.
2375  else if (PrologStage >= AccessStage + StageDiff + np &&
2376  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
2377  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2378  // Check if the Phi has already been scheduled, but the loop instruction
2379  // is either another Phi, or doesn't occur in the loop.
2380  else if (PrologStage >= AccessStage + StageDiff + np) {
2381  // If the Phi references another Phi, we need to examine the other
2382  // Phi to get the correct value.
2383  PhiOp1 = LoopVal;
2384  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2385  int Indirects = 1;
2386  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
2387  int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2388  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2389  PhiOp1 = getInitPhiReg(*InstOp1, BB);
2390  else
2391  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2392  InstOp1 = MRI.getVRegDef(PhiOp1);
2393  int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2394  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2395  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2396  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
2397  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2398  break;
2399  }
2400  ++Indirects;
2401  }
2402  } else
2403  PhiOp1 = InitVal;
2404  // If this references a generated Phi in the kernel, get the Phi operand
2405  // from the incoming block.
2406  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2407  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2408  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2409 
2410  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2411  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2412  // In the epilog, a map lookup is needed to get the value from the kernel,
2413  // or previous epilog block. How is does this depends on if the
2414  // instruction is scheduled in the previous block.
2415  if (!InKernel) {
2416  int StageDiffAdj = 0;
2417  if (LoopValStage != -1 && StageScheduled > LoopValStage)
2418  StageDiffAdj = StageScheduled - LoopValStage;
2419  // Use the loop value defined in the kernel, unless the kernel
2420  // contains the last definition of the Phi.
2421  if (np == 0 && PrevStage == LastStageNum &&
2422  (StageScheduled != 0 || LoopValStage != 0) &&
2423  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
2424  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2425  // Use the value defined by the Phi. We add one because we switch
2426  // from looking at the loop value to the Phi definition.
2427  else if (np > 0 && PrevStage == LastStageNum &&
2428  VRMap[PrevStage - np + 1].count(Def))
2429  PhiOp2 = VRMap[PrevStage - np + 1][Def];
2430  // Use the loop value defined in the kernel.
2431  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
2432  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
2433  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2434  // Use the value defined by the Phi, unless we're generating the first
2435  // epilog and the Phi refers to a Phi in a different stage.
2436  else if (VRMap[PrevStage - np].count(Def) &&
2437  (!LoopDefIsPhi || PrevStage != LastStageNum))
2438  PhiOp2 = VRMap[PrevStage - np][Def];
2439  }
2440 
2441  // Check if we can reuse an existing Phi. This occurs when a Phi
2442  // references another Phi, and the other Phi is scheduled in an
2443  // earlier stage. We can try to reuse an existing Phi up until the last
2444  // stage of the current Phi.
2445  if (LoopDefIsPhi) {
2446  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
2447  int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2448  int StageDiff = (StageScheduled - LoopValStage);
2449  LVNumStages -= StageDiff;
2450  // Make sure the loop value Phi has been processed already.
2451  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
2452  NewReg = PhiOp2;
2453  unsigned ReuseStage = CurStageNum;
2454  if (Schedule.isLoopCarried(this, *PhiInst))
2455  ReuseStage -= LVNumStages;
2456  // Check if the Phi to reuse has been generated yet. If not, then
2457  // there is nothing to reuse.
2458  if (VRMap[ReuseStage - np].count(LoopVal)) {
2459  NewReg = VRMap[ReuseStage - np][LoopVal];
2460 
2461  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2462  &*BBI, Def, NewReg);
2463  // Update the map with the new Phi name.
2464  VRMap[CurStageNum - np][Def] = NewReg;
2465  PhiOp2 = NewReg;
2466  if (VRMap[LastStageNum - np - 1].count(LoopVal))
2467  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2468 
2469  if (IsLast && np == NumPhis - 1)
2470  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2471  continue;
2472  }
2473  }
2474  }
2475  if (InKernel && StageDiff > 0 &&
2476  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
2477  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2478  }
2479 
2480  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2481  NewReg = MRI.createVirtualRegister(RC);
2482 
2483  MachineInstrBuilder NewPhi =
2484  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2485  TII->get(TargetOpcode::PHI), NewReg);
2486  NewPhi.addReg(PhiOp1).addMBB(BB1);
2487  NewPhi.addReg(PhiOp2).addMBB(BB2);
2488  if (np == 0)
2489  InstrMap[NewPhi] = &*BBI;
2490 
2491  // We define the Phis after creating the new pipelined code, so
2492  // we need to rename the Phi values in scheduled instructions.
2493 
2494  unsigned PrevReg = 0;
2495  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
2496  PrevReg = VRMap[PrevStage - np][LoopVal];
2497  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2498  Def, NewReg, PrevReg);
2499  // If the Phi has been scheduled, use the new name for rewriting.
2500  if (VRMap[CurStageNum - np].count(Def)) {
2501  unsigned R = VRMap[CurStageNum - np][Def];
2502  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2503  R, NewReg);
2504  }
2505 
2506  // Check if we need to rename any uses that occurs after the loop. The
2507  // register to replace depends on whether the Phi is scheduled in the
2508  // epilog.
2509  if (IsLast && np == NumPhis - 1)
2510  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2511 
2512  // In the kernel, a dependent Phi uses the value from this Phi.
2513  if (InKernel)
2514  PhiOp2 = NewReg;
2515 
2516  // Update the map with the new Phi name.
2517  VRMap[CurStageNum - np][Def] = NewReg;
2518  }
2519 
2520  while (NumPhis++ < NumStages) {
2521  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2522  &*BBI, Def, NewReg, 0);
2523  }
2524 
2525  // Check if we need to rename a Phi that has been eliminated due to
2526  // scheduling.
2527  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
2528  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2529  }
2530 }
2531 
2532 /// Generate Phis for the specified block in the generated pipelined code.
2533 /// These are new Phis needed because the definition is scheduled after the
2534 /// use in the pipelined sequence.
2535 void SwingSchedulerDAG::generatePhis(
2537  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2538  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2539  bool IsLast) {
2540  // Compute the stage number that contains the initial Phi value, and
2541  // the Phi from the previous stage.
2542  unsigned PrologStage = 0;
2543  unsigned PrevStage = 0;
2544  unsigned StageDiff = CurStageNum - LastStageNum;
2545  bool InKernel = (StageDiff == 0);
2546  if (InKernel) {
2547  PrologStage = LastStageNum - 1;
2548  PrevStage = CurStageNum;
2549  } else {
2550  PrologStage = LastStageNum - StageDiff;
2551  PrevStage = LastStageNum + StageDiff - 1;
2552  }
2553 
2554  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2555  BBE = BB->instr_end();
2556  BBI != BBE; ++BBI) {
2557  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2558  MachineOperand &MO = BBI->getOperand(i);
2559  if (!MO.isReg() || !MO.isDef() ||
2561  continue;
2562 
2563  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2564  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2565  unsigned Def = MO.getReg();
2566  unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2567  // An instruction scheduled in stage 0 and is used after the loop
2568  // requires a phi in the epilog for the last definition from either
2569  // the kernel or prolog.
2570  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2571  hasUseAfterLoop(Def, BB, MRI))
2572  NumPhis = 1;
2573  if (!InKernel && (unsigned)StageScheduled > PrologStage)
2574  continue;
2575 
2576  unsigned PhiOp2 = VRMap[PrevStage][Def];
2577  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2578  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2579  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2580  // The number of Phis can't exceed the number of prolog stages. The
2581  // prolog stage number is zero based.
2582  if (NumPhis > PrologStage + 1 - StageScheduled)
2583  NumPhis = PrologStage + 1 - StageScheduled;
2584  for (unsigned np = 0; np < NumPhis; ++np) {
2585  unsigned PhiOp1 = VRMap[PrologStage][Def];
2586  if (np <= PrologStage)
2587  PhiOp1 = VRMap[PrologStage - np][Def];
2588  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2589  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2590  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2591  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2592  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2593  }
2594  if (!InKernel)
2595  PhiOp2 = VRMap[PrevStage - np][Def];
2596 
2597  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2598  unsigned NewReg = MRI.createVirtualRegister(RC);
2599 
2600  MachineInstrBuilder NewPhi =
2601  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2602  TII->get(TargetOpcode::PHI), NewReg);
2603  NewPhi.addReg(PhiOp1).addMBB(BB1);
2604  NewPhi.addReg(PhiOp2).addMBB(BB2);
2605  if (np == 0)
2606  InstrMap[NewPhi] = &*BBI;
2607 
2608  // Rewrite uses and update the map. The actions depend upon whether
2609  // we generating code for the kernel or epilog blocks.
2610  if (InKernel) {
2611  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2612  &*BBI, PhiOp1, NewReg);
2613  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2614  &*BBI, PhiOp2, NewReg);
2615 
2616  PhiOp2 = NewReg;
2617  VRMap[PrevStage - np - 1][Def] = NewReg;
2618  } else {
2619  VRMap[CurStageNum - np][Def] = NewReg;
2620  if (np == NumPhis - 1)
2621  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2622  &*BBI, Def, NewReg);
2623  }
2624  if (IsLast && np == NumPhis - 1)
2625  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2626  }
2627  }
2628  }
2629 }
2630 
2631 /// Remove instructions that generate values with no uses.
2632 /// Typically, these are induction variable operations that generate values
2633 /// used in the loop itself. A dead instruction has a definition with
2634 /// no uses, or uses that occur in the original loop only.
2635 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
2636  MBBVectorTy &EpilogBBs) {
2637  // For each epilog block, check that the value defined by each instruction
2638  // is used. If not, delete it.
2639  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
2640  MBE = EpilogBBs.rend();
2641  MBB != MBE; ++MBB)
2642  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
2643  ME = (*MBB)->instr_rend();
2644  MI != ME;) {
2645  // From DeadMachineInstructionElem. Don't delete inline assembly.
2646  if (MI->isInlineAsm()) {
2647  ++MI;
2648  continue;
2649  }
2650  bool SawStore = false;
2651  // Check if it's safe to remove the instruction due to side effects.
2652  // We can, and want to, remove Phis here.
2653  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
2654  ++MI;
2655  continue;
2656  }
2657  bool used = true;
2658  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
2659  MOE = MI->operands_end();
2660  MOI != MOE; ++MOI) {
2661  if (!MOI->isReg() || !MOI->isDef())
2662  continue;
2663  unsigned reg = MOI->getReg();
2664  // Assume physical registers are used, unless they are marked dead.
2666  used = !MOI->isDead();
2667  if (used)
2668  break;
2669  continue;
2670  }
2671  unsigned realUses = 0;
2672  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
2673  EI = MRI.use_end();
2674  UI != EI; ++UI) {
2675  // Check if there are any uses that occur only in the original
2676  // loop. If so, that's not a real use.
2677  if (UI->getParent()->getParent() != BB) {
2678  realUses++;
2679  used = true;
2680  break;
2681  }
2682  }
2683  if (realUses > 0)
2684  break;
2685  used = false;
2686  }
2687  if (!used) {
2688  LIS.RemoveMachineInstrFromMaps(*MI);
2689  MI++->eraseFromParent();
2690  continue;
2691  }
2692  ++MI;
2693  }
2694  // In the kernel block, check if we can remove a Phi that generates a value
2695  // used in an instruction removed in the epilog block.
2696  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2697  BBE = KernelBB->getFirstNonPHI();
2698  BBI != BBE;) {
2699  MachineInstr *MI = &*BBI;
2700  ++BBI;
2701  unsigned reg = MI->getOperand(0).getReg();
2702  if (MRI.use_begin(reg) == MRI.use_end()) {
2703  LIS.RemoveMachineInstrFromMaps(*MI);
2704  MI->eraseFromParent();
2705  }
2706  }
2707 }
2708 
2709 /// For loop carried definitions, we split the lifetime of a virtual register
2710 /// that has uses past the definition in the next iteration. A copy with a new
2711 /// virtual register is inserted before the definition, which helps with
2712 /// generating a better register assignment.
2713 ///
2714 /// v1 = phi(a, v2) v1 = phi(a, v2)
2715 /// v2 = phi(b, v3) v2 = phi(b, v3)
2716 /// v3 = .. v4 = copy v1
2717 /// .. = V1 v3 = ..
2718 /// .. = v4
2719 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
2720  MBBVectorTy &EpilogBBs,
2721  SMSchedule &Schedule) {
2723  for (auto &PHI : KernelBB->phis()) {
2724  unsigned Def = PHI.getOperand(0).getReg();
2725  // Check for any Phi definition that used as an operand of another Phi
2726  // in the same block.
2727  for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
2728  E = MRI.use_instr_end();
2729  I != E; ++I) {
2730  if (I->isPHI() && I->getParent() == KernelBB) {
2731  // Get the loop carried definition.
2732  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
2733  if (!LCDef)
2734  continue;
2735  MachineInstr *MI = MRI.getVRegDef(LCDef);
2736  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
2737  continue;
2738  // Search through the rest of the block looking for uses of the Phi
2739  // definition. If one occurs, then split the lifetime.
2740  unsigned SplitReg = 0;
2741  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
2742  KernelBB->instr_end()))
2743  if (BBJ.readsRegister(Def)) {
2744  // We split the lifetime when we find the first use.
2745  if (SplitReg == 0) {
2746  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
2747  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
2748  TII->get(TargetOpcode::COPY), SplitReg)
2749  .addReg(Def);
2750  }
2751  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
2752  }
2753  if (!SplitReg)
2754  continue;
2755  // Search through each of the epilog blocks for any uses to be renamed.
2756  for (auto &Epilog : EpilogBBs)
2757  for (auto &I : *Epilog)
2758  if (I.readsRegister(Def))
2759  I.substituteRegister(Def, SplitReg, 0, *TRI);
2760  break;
2761  }
2762  }
2763  }
2764 }
2765 
2766 /// Remove the incoming block from the Phis in a basic block.
2767 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
2768  for (MachineInstr &MI : *BB) {
2769  if (!MI.isPHI())
2770  break;
2771  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
2772  if (MI.getOperand(i + 1).getMBB() == Incoming) {
2773  MI.RemoveOperand(i + 1);
2774  MI.RemoveOperand(i);
2775  break;
2776  }
2777  }
2778 }
2779 
2780 /// Create branches from each prolog basic block to the appropriate epilog
2781 /// block. These edges are needed if the loop ends before reaching the
2782 /// kernel.
2783 void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB,
2784  MBBVectorTy &PrologBBs,
2785  MachineBasicBlock *KernelBB,
2786  MBBVectorTy &EpilogBBs,
2787  SMSchedule &Schedule, ValueMapTy *VRMap) {
2788  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
2789  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
2790  MachineInstr *Cmp = Pass.LI.LoopCompare;
2791  MachineBasicBlock *LastPro = KernelBB;
2792  MachineBasicBlock *LastEpi = KernelBB;
2793 
2794  // Start from the blocks connected to the kernel and work "out"
2795  // to the first prolog and the last epilog blocks.
2797  unsigned MaxIter = PrologBBs.size() - 1;
2798  unsigned LC = UINT_MAX;
2799  unsigned LCMin = UINT_MAX;
2800  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
2801  // Add branches to the prolog that go to the corresponding
2802  // epilog, and the fall-thru prolog/kernel block.
2803  MachineBasicBlock *Prolog = PrologBBs[j];
2804  MachineBasicBlock *Epilog = EpilogBBs[i];
2805  // We've executed one iteration, so decrement the loop count and check for
2806  // the loop end.
2808  // Check if the LOOP0 has already been removed. If so, then there is no need
2809  // to reduce the trip count.
2810  if (LC != 0)
2811  LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond,
2812  PrevInsts, j, MaxIter);
2813 
2814  // Record the value of the first trip count, which is used to determine if
2815  // branches and blocks can be removed for constant trip counts.
2816  if (LCMin == UINT_MAX)
2817  LCMin = LC;
2818 
2819  unsigned numAdded = 0;
2821  Prolog->addSuccessor(Epilog);
2822  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
2823  } else if (j >= LCMin) {
2824  Prolog->addSuccessor(Epilog);
2825  Prolog->removeSuccessor(LastPro);
2826  LastEpi->removeSuccessor(Epilog);
2827  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
2828  removePhis(Epilog, LastEpi);
2829  // Remove the blocks that are no longer referenced.
2830  if (LastPro != LastEpi) {
2831  LastEpi->clear();
2832  LastEpi->eraseFromParent();
2833  }
2834  LastPro->clear();
2835  LastPro->eraseFromParent();
2836  } else {
2837  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
2838  removePhis(Epilog, Prolog);
2839  }
2840  LastPro = Prolog;
2841  LastEpi = Epilog;
2843  E = Prolog->instr_rend();
2844  I != E && numAdded > 0; ++I, --numAdded)
2845  updateInstruction(&*I, false, j, 0, Schedule, VRMap);
2846  }
2847 }
2848 
2849 /// Return true if we can compute the amount the instruction changes
2850 /// during each iteration. Set Delta to the amount of the change.
2851 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2853  const MachineOperand *BaseOp;
2854  int64_t Offset;
2855  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
2856  return false;
2857 
2858  if (!BaseOp->isReg())
2859  return false;
2860 
2861  unsigned BaseReg = BaseOp->getReg();
2862 
2864  // Check if there is a Phi. If so, get the definition in the loop.
2865  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2866  if (BaseDef && BaseDef->isPHI()) {
2867  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2868  BaseDef = MRI.getVRegDef(BaseReg);
2869  }
2870  if (!BaseDef)
2871  return false;
2872 
2873  int D = 0;
2874  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2875  return false;
2876 
2877  Delta = D;
2878  return true;
2879 }
2880 
2881 /// Update the memory operand with a new offset when the pipeliner
2882 /// generates a new copy of the instruction that refers to a
2883 /// different memory location.
2884 void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
2885  MachineInstr &OldMI, unsigned Num) {
2886  if (Num == 0)
2887  return;
2888  // If the instruction has memory operands, then adjust the offset
2889  // when the instruction appears in different stages.
2890  if (NewMI.memoperands_empty())
2891  return;
2893  for (MachineMemOperand *MMO : NewMI.memoperands()) {
2894  // TODO: Figure out whether isAtomic is really necessary (see D57601).
2895  if (MMO->isVolatile() || MMO->isAtomic() ||
2896  (MMO->isInvariant() && MMO->isDereferenceable()) ||
2897  (!MMO->getValue())) {
2898  NewMMOs.push_back(MMO);
2899  continue;
2900  }
2901  unsigned Delta;
2902  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
2903  int64_t AdjOffset = Delta * Num;
2904  NewMMOs.push_back(
2905  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
2906  } else {
2907  NewMMOs.push_back(
2909  }
2910  }
2911  NewMI.setMemRefs(MF, NewMMOs);
2912 }
2913 
2914 /// Clone the instruction for the new pipelined loop and update the
2915 /// memory operands, if needed.
2916 MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
2917  unsigned CurStageNum,
2918  unsigned InstStageNum) {
2919  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2920  // Check for tied operands in inline asm instructions. This should be handled
2921  // elsewhere, but I'm not sure of the best solution.
2922  if (OldMI->isInlineAsm())
2923  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
2924  const auto &MO = OldMI->getOperand(i);
2925  if (MO.isReg() && MO.isUse())
2926  break;
2927  unsigned UseIdx;
2928  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
2929  NewMI->tieOperands(i, UseIdx);
2930  }
2931  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2932  return NewMI;
2933 }
2934 
2935 /// Clone the instruction for the new pipelined loop. If needed, this
2936 /// function updates the instruction using the values saved in the
2937 /// InstrChanges structure.
2938 MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
2939  unsigned CurStageNum,
2940  unsigned InstStageNum,
2941  SMSchedule &Schedule) {
2942  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2944  InstrChanges.find(getSUnit(OldMI));
2945  if (It != InstrChanges.end()) {
2946  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2947  unsigned BasePos, OffsetPos;
2948  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
2949  return nullptr;
2950  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
2951  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
2952  if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
2953  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
2954  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2955  }
2956  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2957  return NewMI;
2958 }
2959 
2960 /// Update the machine instruction with new virtual registers. This
2961 /// function may change the defintions and/or uses.
2962 void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
2963  unsigned CurStageNum,
2964  unsigned InstrStageNum,
2965  SMSchedule &Schedule,
2966  ValueMapTy *VRMap) {
2967  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
2968  MachineOperand &MO = NewMI->getOperand(i);
2970  continue;
2971  unsigned reg = MO.getReg();
2972  if (MO.isDef()) {
2973  // Create a new virtual register for the definition.
2974  const TargetRegisterClass *RC = MRI.getRegClass(reg);
2975  unsigned NewReg = MRI.createVirtualRegister(RC);
2976  MO.setReg(NewReg);
2977  VRMap[CurStageNum][reg] = NewReg;
2978  if (LastDef)
2979  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
2980  } else if (MO.isUse()) {
2981  MachineInstr *Def = MRI.getVRegDef(reg);
2982  // Compute the stage that contains the last definition for instruction.
2983  int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
2984  unsigned StageNum = CurStageNum;
2985  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
2986  // Compute the difference in stages between the defintion and the use.
2987  unsigned StageDiff = (InstrStageNum - DefStageNum);
2988  // Make an adjustment to get the last definition.
2989  StageNum -= StageDiff;
2990  }
2991  if (VRMap[StageNum].count(reg))
2992  MO.setReg(VRMap[StageNum][reg]);
2993  }
2994  }
2995 }
2996 
2997 /// Return the instruction in the loop that defines the register.
2998 /// If the definition is a Phi, then follow the Phi operand to
2999 /// the instruction in the loop.
3000 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
3002  MachineInstr *Def = MRI.getVRegDef(Reg);
3003  while (Def->isPHI()) {
3004  if (!Visited.insert(Def).second)
3005  break;
3006  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3007  if (Def->getOperand(i + 1).getMBB() == BB) {
3008  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
3009  break;
3010  }
3011  }
3012  return Def;
3013 }
3014 
3015 /// Return the new name for the value from the previous stage.
3016 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
3017  unsigned LoopVal, unsigned LoopStage,
3018  ValueMapTy *VRMap,
3019  MachineBasicBlock *BB) {
3020  unsigned PrevVal = 0;
3021  if (StageNum > PhiStage) {
3022  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
3023  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
3024  // The name is defined in the previous stage.
3025  PrevVal = VRMap[StageNum - 1][LoopVal];
3026  else if (VRMap[StageNum].count(LoopVal))
3027  // The previous name is defined in the current stage when the instruction
3028  // order is swapped.
3029  PrevVal = VRMap[StageNum][LoopVal];
3030  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
3031  // The loop value hasn't yet been scheduled.
3032  PrevVal = LoopVal;
3033  else if (StageNum == PhiStage + 1)
3034  // The loop value is another phi, which has not been scheduled.
3035  PrevVal = getInitPhiReg(*LoopInst, BB);
3036  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
3037  // The loop value is another phi, which has been scheduled.
3038  PrevVal =
3039  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
3040  LoopStage, VRMap, BB);
3041  }
3042  return PrevVal;
3043 }
3044 
3045 /// Rewrite the Phi values in the specified block to use the mappings
3046 /// from the initial operand. Once the Phi is scheduled, we switch
3047 /// to using the loop value instead of the Phi value, so those names
3048 /// do not need to be rewritten.
3049 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
3050  unsigned StageNum,
3051  SMSchedule &Schedule,
3052  ValueMapTy *VRMap,
3053  InstrMapTy &InstrMap) {
3054  for (auto &PHI : BB->phis()) {
3055  unsigned InitVal = 0;
3056  unsigned LoopVal = 0;
3057  getPhiRegs(PHI, BB, InitVal, LoopVal);
3058  unsigned PhiDef = PHI.getOperand(0).getReg();
3059 
3060  unsigned PhiStage =
3061  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
3062  unsigned LoopStage =
3063  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
3064  unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
3065  if (NumPhis > StageNum)
3066  NumPhis = StageNum;
3067  for (unsigned np = 0; np <= NumPhis; ++np) {
3068  unsigned NewVal =
3069  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
3070  if (!NewVal)
3071  NewVal = InitVal;
3072  rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
3073  PhiDef, NewVal);
3074  }
3075  }
3076 }
3077 
3078 /// Rewrite a previously scheduled instruction to use the register value
3079 /// from the new instruction. Make sure the instruction occurs in the
3080 /// basic block, and we don't change the uses in the new instruction.
3081 void SwingSchedulerDAG::rewriteScheduledInstr(
3082  MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
3083  unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
3084  unsigned NewReg, unsigned PrevReg) {
3085  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
3086  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
3087  // Rewrite uses that have been scheduled already to use the new
3088  // Phi register.
3089  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
3090  EI = MRI.use_end();
3091  UI != EI;) {
3092  MachineOperand &UseOp = *UI;
3093  MachineInstr *UseMI = UseOp.getParent();
3094  ++UI;
3095  if (UseMI->getParent() != BB)
3096  continue;
3097  if (UseMI->isPHI()) {
3098  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
3099  continue;
3100  if (getLoopPhiReg(*UseMI, BB) != OldReg)
3101  continue;
3102  }
3103  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
3104  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
3105  SUnit *OrigMISU = getSUnit(OrigInstr->second);
3106  int StageSched = Schedule.stageScheduled(OrigMISU);
3107  int CycleSched = Schedule.cycleScheduled(OrigMISU);
3108  unsigned ReplaceReg = 0;
3109  // This is the stage for the scheduled instruction.
3110  if (StagePhi == StageSched && Phi->isPHI()) {
3111  int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
3112  if (PrevReg && InProlog)
3113  ReplaceReg = PrevReg;
3114  else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
3115  (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
3116  ReplaceReg = PrevReg;
3117  else
3118  ReplaceReg = NewReg;
3119  }
3120  // The scheduled instruction occurs before the scheduled Phi, and the
3121  // Phi is not loop carried.
3122  if (!InProlog && StagePhi + 1 == StageSched &&
3123  !Schedule.isLoopCarried(this, *Phi))
3124  ReplaceReg = NewReg;
3125  if (StagePhi > StageSched && Phi->isPHI())
3126  ReplaceReg = NewReg;
3127  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
3128  ReplaceReg = NewReg;
3129  if (ReplaceReg) {
3130  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
3131  UseOp.setReg(ReplaceReg);
3132  }
3133  }
3134 }
3135 
3136 /// Check if we can change the instruction to use an offset value from the
3137 /// previous iteration. If so, return true and set the base and offset values
3138 /// so that we can rewrite the load, if necessary.
3139 /// v1 = Phi(v0, v3)
3140 /// v2 = load v1, 0
3141 /// v3 = post_store v1, 4, x
3142 /// This function enables the load to be rewritten as v2 = load v3, 4.
3143 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3144  unsigned &BasePos,
3145  unsigned &OffsetPos,
3146  unsigned &NewBase,
3147  int64_t &Offset) {
3148  // Get the load instruction.
3149  if (TII->isPostIncrement(*MI))
3150  return false;
3151  unsigned BasePosLd, OffsetPosLd;
3152  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
3153  return false;
3154  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
3155 
3156  // Look for the Phi instruction.
3158  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3159  if (!Phi || !Phi->isPHI())
3160  return false;
3161  // Get the register defined in the loop block.
3162  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
3163  if (!PrevReg)
3164  return false;
3165 
3166  // Check for the post-increment load/store instruction.
3167  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3168  if (!PrevDef || PrevDef == MI)
3169  return false;
3170 
3171  if (!TII->isPostIncrement(*PrevDef))
3172  return false;
3173 
3174  unsigned BasePos1 = 0, OffsetPos1 = 0;
3175  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
3176  return false;
3177 
3178  // Make sure that the instructions do not access the same memory location in
3179  // the next iteration.
3180  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3181  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3182  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3183  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
3184  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
3185  MF.DeleteMachineInstr(NewMI);
3186  if (!Disjoint)
3187  return false;
3188 
3189  // Set the return value once we determine that we return true.
3190  BasePos = BasePosLd;
3191  OffsetPos = OffsetPosLd;
3192  NewBase = PrevReg;
3193  Offset = StoreOffset;
3194  return true;
3195 }
3196 
3197 /// Apply changes to the instruction if needed. The changes are need
3198 /// to improve the scheduling and depend up on the final schedule.
3200  SMSchedule &Schedule) {
3201  SUnit *SU = getSUnit(MI);
3203  InstrChanges.find(SU);
3204  if (It != InstrChanges.end()) {
3205  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3206  unsigned BasePos, OffsetPos;
3207  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3208  return;
3209  unsigned BaseReg = MI->getOperand(BasePos).getReg();
3210  MachineInstr *LoopDef = findDefInLoop(BaseReg);
3211  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3212  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3213  int BaseStageNum = Schedule.stageScheduled(SU);
3214  int BaseCycleNum = Schedule.cycleScheduled(SU);
3215  if (BaseStageNum < DefStageNum) {
3216  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3217  int OffsetDiff = DefStageNum - BaseStageNum;
3218  if (DefCycleNum < BaseCycleNum) {
3219  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3220  if (OffsetDiff > 0)
3221  --OffsetDiff;
3222  }
3223  int64_t NewOffset =
3224  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3225  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3226  SU->setInstr(NewMI);
3227  MISUnitMap[NewMI] = SU;
3228  NewMIs.insert(NewMI);
3229  }
3230  }
3231 }
3232 
3233 /// Return true for an order or output dependence that is loop carried
3234 /// potentially. A dependence is loop carried if the destination defines a valu
3235 /// that may be used or defined by the source in a subsequent iteration.
3237  bool isSucc) {
3238  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
3239  Dep.isArtificial())
3240  return false;
3241 
3242  if (!SwpPruneLoopCarried)
3243  return true;
3244 
3245  if (Dep.getKind() == SDep::Output)
3246  return true;
3247 
3248  MachineInstr *SI = Source->getInstr();
3249  MachineInstr *DI = Dep.getSUnit()->getInstr();
3250  if (!isSucc)
3251  std::swap(SI, DI);
3252  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3253 
3254  // Assume ordered loads and stores may have a loop carried dependence.
3255  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3256  SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
3258  return true;
3259 
3260  // Only chain dependences between a load and store can be loop carried.
3261  if (!DI->mayStore() || !SI->mayLoad())
3262  return false;
3263 
3264  unsigned DeltaS, DeltaD;
3265  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
3266  return true;
3267 
3268  const MachineOperand *BaseOpS, *BaseOpD;
3269  int64_t OffsetS, OffsetD;
3271  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
3272  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
3273  return true;
3274 
3275  if (!BaseOpS->isIdenticalTo(*BaseOpD))
3276  return true;
3277 
3278  // Check that the base register is incremented by a constant value for each
3279  // iteration.
3280  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
3281  if (!Def || !Def->isPHI())
3282  return true;
3283  unsigned InitVal = 0;
3284  unsigned LoopVal = 0;
3285  getPhiRegs(*Def, BB, InitVal, LoopVal);
3286  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
3287  int D = 0;
3288  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
3289  return true;
3290 
3291  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3292  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3293 
3294  // This is the main test, which checks the offset values and the loop
3295  // increment value to determine if the accesses may be loop carried.
3296  if (AccessSizeS == MemoryLocation::UnknownSize ||
3297  AccessSizeD == MemoryLocation::UnknownSize)
3298  return true;
3299 
3300  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
3301  return true;
3302 
3303  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
3304 }
3305 
3306 void SwingSchedulerDAG::postprocessDAG() {
3307  for (auto &M : Mutations)
3308  M->apply(this);
3309 }
3310 
3311 /// Try to schedule the node at the specified StartCycle and continue
3312 /// until the node is schedule or the EndCycle is reached. This function
3313 /// returns true if the node is scheduled. This routine may search either
3314 /// forward or backward for a place to insert the instruction based upon
3315 /// the relative values of StartCycle and EndCycle.
3316 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3317  bool forward = true;
3318  LLVM_DEBUG({
3319  dbgs() << "Trying to insert node between " << StartCycle << " and "
3320  << EndCycle << " II: " << II << "\n";
3321  });
3322  if (StartCycle > EndCycle)
3323  forward = false;
3324 
3325  // The terminating condition depends on the direction.
3326  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3327  for (int curCycle = StartCycle; curCycle != termCycle;
3328  forward ? ++curCycle : --curCycle) {
3329 
3330  // Add the already scheduled instructions at the specified cycle to the
3331  // DFA.
3332  ProcItinResources.clearResources();
3333  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3334  checkCycle <= LastCycle; checkCycle += II) {
3335  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3336 
3337  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3338  E = cycleInstrs.end();
3339  I != E; ++I) {
3340  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3341  continue;
3342  assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) &&
3343  "These instructions have already been scheduled.");
3344  ProcItinResources.reserveResources(*(*I)->getInstr());
3345  }
3346  }
3347  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3348  ProcItinResources.canReserveResources(*SU->getInstr())) {
3349  LLVM_DEBUG({
3350  dbgs() << "\tinsert at cycle " << curCycle << " ";
3351  SU->getInstr()->dump();
3352  });
3353 
3354  ScheduledInstrs[curCycle].push_back(SU);
3355  InstrToCycle.insert(std::make_pair(SU, curCycle));
3356  if (curCycle > LastCycle)
3357  LastCycle = curCycle;
3358  if (curCycle < FirstCycle)
3359  FirstCycle = curCycle;
3360  return true;
3361  }
3362  LLVM_DEBUG({
3363  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3364  SU->getInstr()->dump();
3365  });
3366  }
3367  return false;
3368 }
3369 
3370 // Return the cycle of the earliest scheduled instruction in the chain.
3372  SmallPtrSet<SUnit *, 8> Visited;
3373  SmallVector<SDep, 8> Worklist;
3374  Worklist.push_back(Dep);
3375  int EarlyCycle = INT_MAX;
3376  while (!Worklist.empty()) {
3377  const SDep &Cur = Worklist.pop_back_val();
3378  SUnit *PrevSU = Cur.getSUnit();
3379  if (Visited.count(PrevSU))
3380  continue;
3381  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3382  if (it == InstrToCycle.end())
3383  continue;
3384  EarlyCycle = std::min(EarlyCycle, it->second);
3385  for (const auto &PI : PrevSU->Preds)
3386  if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3387  Worklist.push_back(PI);
3388  Visited.insert(PrevSU);
3389  }
3390  return EarlyCycle;
3391 }
3392 
3393 // Return the cycle of the latest scheduled instruction in the chain.
3395  SmallPtrSet<SUnit *, 8> Visited;
3396  SmallVector<SDep, 8> Worklist;
3397  Worklist.push_back(Dep);
3398  int LateCycle = INT_MIN;
3399  while (!Worklist.empty()) {
3400  const SDep &Cur = Worklist.pop_back_val();
3401  SUnit *SuccSU = Cur.getSUnit();
3402  if (Visited.count(SuccSU))
3403  continue;
3404  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3405  if (it == InstrToCycle.end())
3406  continue;
3407  LateCycle = std::max(LateCycle, it->second);
3408  for (const auto &SI : SuccSU->Succs)
3409  if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3410  Worklist.push_back(SI);
3411  Visited.insert(SuccSU);
3412  }
3413  return LateCycle;
3414 }
3415 
3416 /// If an instruction has a use that spans multiple iterations, then
3417 /// return true. These instructions are characterized by having a back-ege
3418 /// to a Phi, which contains a reference to another Phi.
3420  for (auto &P : SU->Preds)
3421  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
3422  for (auto &S : P.getSUnit()->Succs)
3423  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
3424  return P.getSUnit();
3425  return nullptr;
3426 }
3427 
3428 /// Compute the scheduling start slot for the instruction. The start slot
3429 /// depends on any predecessor or successor nodes scheduled already.
3430 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3431  int *MinEnd, int *MaxStart, int II,
3432  SwingSchedulerDAG *DAG) {
3433  // Iterate over each instruction that has been scheduled already. The start
3434  // slot computation depends on whether the previously scheduled instruction
3435  // is a predecessor or successor of the specified instruction.
3436  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3437 
3438  // Iterate over each instruction in the current cycle.
3439  for (SUnit *I : getInstructions(cycle)) {
3440  // Because we're processing a DAG for the dependences, we recognize
3441  // the back-edge in recurrences by anti dependences.
3442  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
3443  const SDep &Dep = SU->Preds[i];
3444  if (Dep.getSUnit() == I) {
3445  if (!DAG->isBackedge(SU, Dep)) {
3446  int EarlyStart = cycle + Dep.getLatency() -
3447  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3448  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3449  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
3450  int End = earliestCycleInChain(Dep) + (II - 1);
3451  *MinEnd = std::min(*MinEnd, End);
3452  }
3453  } else {
3454  int LateStart = cycle - Dep.getLatency() +
3455  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3456  *MinLateStart = std::min(*MinLateStart, LateStart);
3457  }
3458  }
3459  // For instruction that requires multiple iterations, make sure that
3460  // the dependent instruction is not scheduled past the definition.
3461  SUnit *BE = multipleIterations(I, DAG);
3462  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3463  !SU->isPred(I))
3464  *MinLateStart = std::min(*MinLateStart, cycle);
3465  }
3466  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
3467  if (SU->Succs[i].getSUnit() == I) {
3468  const SDep &Dep = SU->Succs[i];
3469  if (!DAG->isBackedge(SU, Dep)) {
3470  int LateStart = cycle - Dep.getLatency() +
3471  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3472  *MinLateStart = std::min(*MinLateStart, LateStart);
3473  if (DAG->isLoopCarriedDep(SU, Dep)) {
3474  int Start = latestCycleInChain(Dep) + 1 - II;
3475  *MaxStart = std::max(*MaxStart, Start);
3476  }
3477  } else {
3478  int EarlyStart = cycle + Dep.getLatency() -
3479  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3480  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3481  }
3482  }
3483  }
3484  }
3485  }
3486 }
3487 
3488 /// Order the instructions within a cycle so that the definitions occur
3489 /// before the uses. Returns true if the instruction is added to the start
3490 /// of the list, or false if added to the end.
3492  std::deque<SUnit *> &Insts) {
3493  MachineInstr *MI = SU->getInstr();
3494  bool OrderBeforeUse = false;
3495  bool OrderAfterDef = false;
3496  bool OrderBeforeDef = false;
3497  unsigned MoveDef = 0;
3498  unsigned MoveUse = 0;
3499  int StageInst1 = stageScheduled(SU);
3500 
3501  unsigned Pos = 0;
3502  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3503  ++I, ++Pos) {
3504  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3505  MachineOperand &MO = MI->getOperand(i);
3507  continue;
3508 
3509  unsigned Reg = MO.getReg();
3510  unsigned BasePos, OffsetPos;
3511  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3512  if (MI->getOperand(BasePos).getReg() == Reg)
3513  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3514  Reg = NewReg;
3515  bool Reads, Writes;
3516  std::tie(Reads, Writes) =
3517  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3518  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3519  OrderBeforeUse = true;
3520  if (MoveUse == 0)
3521  MoveUse = Pos;
3522  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3523  // Add the instruction after the scheduled instruction.
3524  OrderAfterDef = true;
3525  MoveDef = Pos;
3526  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3527  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3528  OrderBeforeUse = true;
3529  if (MoveUse == 0)
3530  MoveUse = Pos;
3531  } else {
3532  OrderAfterDef = true;
3533  MoveDef = Pos;
3534  }
3535  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3536  OrderBeforeUse = true;
3537  if (MoveUse == 0)
3538  MoveUse = Pos;
3539  if (MoveUse != 0) {
3540  OrderAfterDef = true;
3541  MoveDef = Pos - 1;
3542  }
3543  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3544  // Add the instruction before the scheduled instruction.
3545  OrderBeforeUse = true;
3546  if (MoveUse == 0)
3547  MoveUse = Pos;
3548  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3549  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3550  if (MoveUse == 0) {
3551  OrderBeforeDef = true;
3552  MoveUse = Pos;
3553  }
3554  }
3555  }
3556  // Check for order dependences between instructions. Make sure the source
3557  // is ordered before the destination.
3558  for (auto &S : SU->Succs) {
3559  if (S.getSUnit() != *I)
3560  continue;
3561  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3562  OrderBeforeUse = true;
3563  if (Pos < MoveUse)
3564  MoveUse = Pos;
3565  }
3566  }
3567  for (auto &P : SU->Preds) {
3568  if (P.getSUnit() != *I)
3569  continue;
3570  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3571  OrderAfterDef = true;
3572  MoveDef = Pos;
3573  }
3574  }
3575  }
3576 
3577  // A circular dependence.
3578  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3579  OrderBeforeUse = false;
3580 
3581  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3582  // to a loop-carried dependence.
3583  if (OrderBeforeDef)
3584  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3585 
3586  // The uncommon case when the instruction order needs to be updated because
3587  // there is both a use and def.
3588  if (OrderBeforeUse && OrderAfterDef) {
3589  SUnit *UseSU = Insts.at(MoveUse);
3590  SUnit *DefSU = Insts.at(MoveDef);
3591  if (MoveUse > MoveDef) {
3592  Insts.erase(Insts.begin() + MoveUse);
3593  Insts.erase(Insts.begin() + MoveDef);
3594  } else {
3595  Insts.erase(Insts.begin() + MoveDef);
3596  Insts.erase(Insts.begin() + MoveUse);
3597  }
3598  orderDependence(SSD, UseSU, Insts);
3599  orderDependence(SSD, SU, Insts);
3600  orderDependence(SSD, DefSU, Insts);
3601  return;
3602  }
3603  // Put the new instruction first if there is a use in the list. Otherwise,
3604  // put it at the end of the list.
3605  if (OrderBeforeUse)
3606  Insts.push_front(SU);
3607  else
3608  Insts.push_back(SU);
3609 }
3610 
3611 /// Return true if the scheduled Phi has a loop carried operand.
3613  if (!Phi.isPHI())
3614  return false;
3615  assert(Phi.isPHI() && "Expecting a Phi.");
3616  SUnit *DefSU = SSD->getSUnit(&Phi);
3617  unsigned DefCycle = cycleScheduled(DefSU);
3618  int DefStage = stageScheduled(DefSU);
3619 
3620  unsigned InitVal = 0;
3621  unsigned LoopVal = 0;
3622  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3623  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3624  if (!UseSU)
3625  return true;
3626  if (UseSU->getInstr()->isPHI())
3627  return true;
3628  unsigned LoopCycle = cycleScheduled(UseSU);
3629  int LoopStage = stageScheduled(UseSU);
3630  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3631 }
3632 
3633 /// Return true if the instruction is a definition that is loop carried
3634 /// and defines the use on the next iteration.
3635 /// v1 = phi(v2, v3)
3636 /// (Def) v3 = op v1
3637 /// (MO) = v1
3638 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
3639 /// register.
3642  if (!MO.isReg())
3643  return false;
3644  if (Def->isPHI())
3645  return false;
3646  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
3647  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3648  return false;
3649  if (!isLoopCarried(SSD, *Phi))
3650  return false;
3651  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3652  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
3653  MachineOperand &DMO = Def->getOperand(i);
3654  if (!DMO.isReg() || !DMO.isDef())
3655  continue;
3656  if (DMO.getReg() == LoopReg)
3657  return true;
3658  }
3659  return false;
3660 }
3661 
3662 // Check if the generated schedule is valid. This function checks if
3663 // an instruction that uses a physical register is scheduled in a
3664 // different stage than the definition. The pipeliner does not handle
3665 // physical register values that may cross a basic block boundary.
3667  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
3668  SUnit &SU = SSD->SUnits[i];
3669  if (!SU.hasPhysRegDefs)
3670  continue;
3671  int StageDef = stageScheduled(&SU);
3672  assert(StageDef != -1 && "Instruction should have been scheduled.");
3673  for (auto &SI : SU.Succs)
3674  if (SI.isAssignedRegDep())
3675  if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
3676  if (stageScheduled(SI.getSUnit()) != StageDef)
3677  return false;
3678  }
3679  return true;
3680 }
3681 
3682 /// A property of the node order in swing-modulo-scheduling is
3683 /// that for nodes outside circuits the following holds:
3684 /// none of them is scheduled after both a successor and a
3685 /// predecessor.
3686 /// The method below checks whether the property is met.
3687 /// If not, debug information is printed and statistics information updated.
3688 /// Note that we do not use an assert statement.
3689 /// The reason is that although an invalid node oder may prevent
3690 /// the pipeliner from finding a pipelined schedule for arbitrary II,
3691 /// it does not lead to the generation of incorrect code.
3692 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
3693 
3694  // a sorted vector that maps each SUnit to its index in the NodeOrder
3695  typedef std::pair<SUnit *, unsigned> UnitIndex;
3696  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
3697 
3698  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3699  Indices.push_back(std::make_pair(NodeOrder[i], i));
3700 
3701  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3702  return std::get<0>(i1) < std::get<0>(i2);
3703  };
3704 
3705  // sort, so that we can perform a binary search
3706  llvm::sort(Indices, CompareKey);
3707 
3708  bool Valid = true;
3709  (void)Valid;
3710  // for each SUnit in the NodeOrder, check whether
3711  // it appears after both a successor and a predecessor
3712  // of the SUnit. If this is the case, and the SUnit
3713  // is not part of circuit, then the NodeOrder is not
3714  // valid.
3715  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3716  SUnit *SU = NodeOrder[i];
3717  unsigned Index = i;
3718 
3719  bool PredBefore = false;
3720  bool SuccBefore = false;
3721 
3722  SUnit *Succ;
3723  SUnit *Pred;
3724  (void)Succ;
3725  (void)Pred;
3726 
3727  for (SDep &PredEdge : SU->Preds) {
3728  SUnit *PredSU = PredEdge.getSUnit();
3729  unsigned PredIndex =
3730  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
3731  std::make_pair(PredSU, 0), CompareKey));
3732  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
3733  PredBefore = true;
3734  Pred = PredSU;
3735  break;
3736  }
3737  }
3738 
3739  for (SDep &SuccEdge : SU->Succs) {
3740  SUnit *SuccSU = SuccEdge.getSUnit();
3741  // Do not process a boundary node, it was not included in NodeOrder,
3742  // hence not in Indices either, call to std::lower_bound() below will
3743  // return Indices.end().
3744  if (SuccSU->isBoundaryNode())
3745  continue;
3746  unsigned SuccIndex =
3747  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
3748  std::make_pair(SuccSU, 0), CompareKey));
3749  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
3750  SuccBefore = true;
3751  Succ = SuccSU;
3752  break;
3753  }
3754  }
3755 
3756  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
3757  // instructions in circuits are allowed to be scheduled
3758  // after both a successor and predecessor.
3759  bool InCircuit = std::any_of(
3760  Circuits.begin(), Circuits.end(),
3761  [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
3762  if (InCircuit)
3763  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
3764  else {
3765  Valid = false;
3766  NumNodeOrderIssues++;
3767  LLVM_DEBUG(dbgs() << "Predecessor ";);
3768  }
3769  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
3770  << " are scheduled before node " << SU->NodeNum
3771  << "\n";);
3772  }
3773  }
3774 
3775  LLVM_DEBUG({
3776  if (!Valid)
3777  dbgs() << "Invalid node order found!\n";
3778  });
3779 }
3780 
3781 /// Attempt to fix the degenerate cases when the instruction serialization
3782 /// causes the register lifetimes to overlap. For example,
3783 /// p' = store_pi(p, b)
3784 /// = load p, offset
3785 /// In this case p and p' overlap, which means that two registers are needed.
3786 /// Instead, this function changes the load to use p' and updates the offset.
3787 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
3788  unsigned OverlapReg = 0;
3789  unsigned NewBaseReg = 0;
3790  for (SUnit *SU : Instrs) {
3791  MachineInstr *MI = SU->getInstr();
3792  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3793  const MachineOperand &MO = MI->getOperand(i);
3794  // Look for an instruction that uses p. The instruction occurs in the
3795  // same cycle but occurs later in the serialized order.
3796  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
3797  // Check that the instruction appears in the InstrChanges structure,
3798  // which contains instructions that can have the offset updated.
3800  InstrChanges.find(SU);
3801  if (It != InstrChanges.end()) {
3802  unsigned BasePos, OffsetPos;
3803  // Update the base register and adjust the offset.
3804  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
3805  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3806  NewMI->getOperand(BasePos).setReg(NewBaseReg);
3807  int64_t NewOffset =
3808  MI->getOperand(OffsetPos).getImm() - It->second.second;
3809  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3810  SU->setInstr(NewMI);
3811  MISUnitMap[NewMI] = SU;
3812  NewMIs.insert(NewMI);
3813  }
3814  }
3815  OverlapReg = 0;
3816  NewBaseReg = 0;
3817  break;
3818  }
3819  // Look for an instruction of the form p' = op(p), which uses and defines
3820  // two virtual registers that get allocated to the same physical register.
3821  unsigned TiedUseIdx = 0;
3822  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3823  // OverlapReg is p in the example above.
3824  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
3825  // NewBaseReg is p' in the example above.
3826  NewBaseReg = MI->getOperand(i).getReg();
3827  break;
3828  }
3829  }
3830  }
3831 }
3832 
3833 /// After the schedule has been formed, call this function to combine
3834 /// the instructions from the different stages/cycles. That is, this
3835 /// function creates a schedule that represents a single iteration.
3837  // Move all instructions to the first stage from later stages.
3838  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3839  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3840  ++stage) {
3841  std::deque<SUnit *> &cycleInstrs =
3842  ScheduledInstrs[cycle + (stage * InitiationInterval)];
3843  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
3844  E = cycleInstrs.rend();
3845  I != E; ++I)
3846  ScheduledInstrs[cycle].push_front(*I);
3847  }
3848  }
3849  // Iterate over the definitions in each instruction, and compute the
3850  // stage difference for each use. Keep the maximum value.
3851  for (auto &I : InstrToCycle) {
3852  int DefStage = stageScheduled(I.first);
3853  MachineInstr *MI = I.first->getInstr();
3854  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3855  MachineOperand &Op = MI->getOperand(i);
3856  if (!Op.isReg() || !Op.isDef())
3857  continue;
3858 
3859  unsigned Reg = Op.getReg();
3860  unsigned MaxDiff = 0;
3861  bool PhiIsSwapped = false;
3862  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
3863  EI = MRI.use_end();
3864  UI != EI; ++UI) {
3865  MachineOperand &UseOp = *UI;
3866  MachineInstr *UseMI = UseOp.getParent();
3867  SUnit *SUnitUse = SSD->getSUnit(UseMI);
3868  int UseStage = stageScheduled(SUnitUse);
3869  unsigned Diff = 0;
3870  if (UseStage != -1 && UseStage >= DefStage)
3871  Diff = UseStage - DefStage;
3872  if (MI->isPHI()) {
3873  if (isLoopCarried(SSD, *MI))
3874  ++Diff;
3875  else
3876  PhiIsSwapped = true;
3877  }
3878  MaxDiff = std::max(Diff, MaxDiff);
3879  }
3880  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
3881  }
3882  }
3883 
3884  // Erase all the elements in the later stages. Only one iteration should
3885  // remain in the scheduled list, and it contains all the instructions.
3886  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3887  ScheduledInstrs.erase(cycle);
3888 
3889  // Change the registers in instruction as specified in the InstrChanges
3890  // map. We need to use the new registers to create the correct order.
3891  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
3892  SUnit *SU = &SSD->SUnits[i];
3893  SSD->applyInstrChange(SU->getInstr(), *this);
3894  }
3895 
3896  // Reorder the instructions in each cycle to fix and improve the
3897  // generated code.
3898  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
3899  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3900  std::deque<SUnit *> newOrderPhi;
3901  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3902  SUnit *SU = cycleInstrs[i];
3903  if (SU->getInstr()->isPHI())
3904  newOrderPhi.push_back(SU);
3905  }
3906  std::deque<SUnit *> newOrderI;
3907  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3908  SUnit *SU = cycleInstrs[i];
3909  if (!SU->getInstr()->isPHI())
3910  orderDependence(SSD, SU, newOrderI);
3911  }
3912  // Replace the old order with the new order.
3913  cycleInstrs.swap(newOrderPhi);
3914  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
3915  SSD->fixupRegisterOverlaps(cycleInstrs);
3916  }
3917 
3918  LLVM_DEBUG(dump(););
3919 }
3920 
3921 void NodeSet::print(raw_ostream &os) const {
3922  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
3923  << " depth " << MaxDepth << " col " << Colocate << "\n";
3924  for (const auto &I : Nodes)
3925  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
3926  os << "\n";
3927 }
3928 
3929 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3930 /// Print the schedule information to the given output.
3932  // Iterate over each cycle.
3933  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3934  // Iterate over each instruction in the cycle.
3935  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
3936  for (SUnit *CI : cycleInstrs->second) {
3937  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
3938  os << "(" << CI->NodeNum << ") ";
3939  CI->getInstr()->print(os);
3940  os << "\n";
3941  }
3942  }
3943 }
3944 
3945 /// Utility function used for debugging to print the schedule.
3948 
3949 #endif
3950 
3952  const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
3953  unsigned ProcResourceID = 0;
3954 
3955  // We currently limit the resource kinds to 64 and below so that we can use
3956  // uint64_t for Masks
3957  assert(SM.getNumProcResourceKinds() < 64 &&
3958  "Too many kinds of resources, unsupported");
3959  // Create a unique bitmask for every processor resource unit.
3960  // Skip resource at index 0, since it always references 'InvalidUnit'.
3961  Masks.resize(SM.getNumProcResourceKinds());
3962  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3963  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3964  if (Desc.SubUnitsIdxBegin)
3965  continue;
3966  Masks[I] = 1ULL << ProcResourceID;
3967  ProcResourceID++;
3968  }
3969  // Create a unique bitmask for every processor resource group.
3970  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3971  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3972  if (!Desc.SubUnitsIdxBegin)
3973  continue;
3974  Masks[I] = 1ULL << ProcResourceID;
3975  for (unsigned U = 0; U < Desc.NumUnits; ++U)
3976  Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
3977  ProcResourceID++;
3978  }
3979  LLVM_DEBUG({
3980  if (SwpShowResMask) {
3981  dbgs() << "ProcResourceDesc:\n";
3982  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3983  const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
3984  dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3985  ProcResource->Name, I, Masks[I],
3986  ProcResource->NumUnits);
3987  }
3988  dbgs() << " -----------------\n";
3989  }
3990  });
3991 }
3992 
3994 
3995  LLVM_DEBUG({
3996  if (SwpDebugResource)
3997  dbgs() << "canReserveResources:\n";
3998  });
3999  if (UseDFA)
4000  return DFAResources->canReserveResources(MID);
4001 
4002  unsigned InsnClass = MID->getSchedClass();
4003  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
4004  if (!SCDesc->isValid()) {
4005  LLVM_DEBUG({
4006  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
4007  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
4008  });
4009  return true;
4010  }
4011 
4012  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
4013  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
4014  for (; I != E; ++I) {
4015  if (!I->Cycles)
4016  continue;
4017  const MCProcResourceDesc *ProcResource =
4018  SM.getProcResource(I->ProcResourceIdx);
4019  unsigned NumUnits = ProcResource->NumUnits;
4020  LLVM_DEBUG({
4021  if (SwpDebugResource)
4022  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
4023  ProcResource->Name, I->ProcResourceIdx,
4024  ProcResourceCount[I->ProcResourceIdx], NumUnits,
4025  I->Cycles);
4026  });
4027  if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
4028  return false;
4029  }
4030  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
4031  return true;
4032 }
4033 
4035  LLVM_DEBUG({
4036  if (SwpDebugResource)
4037  dbgs() << "reserveResources:\n";
4038  });
4039  if (UseDFA)
4040  return DFAResources->reserveResources(MID);
4041 
4042  unsigned InsnClass = MID->getSchedClass();
4043  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
4044  if (!SCDesc->isValid()) {
4045  LLVM_DEBUG({
4046  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
4047  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
4048  });
4049  return;
4050  }
4051  for (const MCWriteProcResEntry &PRE :
4052  make_range(STI->getWriteProcResBegin(SCDesc),
4053  STI->getWriteProcResEnd(SCDesc))) {
4054  if (!PRE.Cycles)
4055  continue;
4056  ++ProcResourceCount[PRE.ProcResourceIdx];
4057  LLVM_DEBUG({
4058  if (SwpDebugResource) {
4059  const MCProcResourceDesc *ProcResource =
4060  SM.getProcResource(PRE.ProcResourceIdx);
4061  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
4062  ProcResource->Name, PRE.ProcResourceIdx,
4063  ProcResourceCount[PRE.ProcResourceIdx],
4064  ProcResource->NumUnits, PRE.Cycles);
4065  }
4066  });
4067  }
4068  LLVM_DEBUG({
4069  if (SwpDebugResource)
4070  dbgs() << "reserveResources: done!\n\n";
4071  });
4072 }
4073 
4075  return canReserveResources(&MI.getDesc());
4076 }
4077 
4079  return reserveResources(&MI.getDesc());
4080 }
4081 
4083  if (UseDFA)
4084  return DFAResources->clearResources();
4085  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
4086 }
4087 
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
auto lower_bound(R &&Range, T &&Value) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1288
uint64_t CallInst * C
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:769
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:455
RegisterClassInfo RegClassInfo
A common definition of LaneBitmask for use in TableGen and CodeGen.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void clear()
Definition: MapVector.h:88
instr_iterator instr_begin()
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:634
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:266
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr *> &PrevInsts, unsigned Iter, unsigned MaxIter) const
Generate code to reduce the loop iteration by one and check if the loop is finished.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:473
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static constexpr LocationSize unknown()
bool empty() const
cl::opt< bool > SwpEnableCopyToPhi
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
bool isValidSchedule(SwingSchedulerDAG *SSD)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
const MachineLoopInfo * MLI
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
unsigned getSubReg() const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:123
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
bool isInlineAsm() const
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:339
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
bool isRegSequence() const
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
F(f)
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
block Block Frequency true
bool isPseudo() const
Return true if this is a pseudo instruction that doesn&#39;t correspond to a real machine instruction...
Definition: MCInstrDesc.h:243
static void getUnderlyingObjects(const MachineInstr *MI, SmallVectorImpl< const Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:346
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:460
bool isPHI() const
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:260
SUnit * getNode(unsigned i) const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
bool canReserveResources(const MCInstrDesc *MID) const
Check if the resources occupied by a MCInstrDesc are available in the current state.
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
int compareRecMII(NodeSet &RHS)
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:259
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout, ignoring any parts of the loop not contiguous with the part that contains the header.
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
Modulo Software Pipelining
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
void reserveResources(const MCInstrDesc *MID)
Reserve the resources occupied by a MCInstrDesc and change the current state to reflect that change...
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1217
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
BlockT * getHeader() const
Definition: LoopInfo.h:102
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
SlotIndexes pass.
Definition: SlotIndexes.h:328
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:841
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
void dump() const
Definition: Pass.cpp:134
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Itinerary data supplied by a subtarget to be used by a target.
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
unsigned count(SUnit *SU) const
bool isValid() const
Definition: MCSchedule.h:127
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator find(const KeyT &Key)
Definition: MapVector.h:147
bool insert(SUnit *SU)
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1258
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
StringRef getString() const
Definition: Metadata.cpp:463
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Scheduling dependency.
Definition: ScheduleDAG.h:49
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:580
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:582
void finishBlock() override
Clean up after the software pipeliner runs.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:821
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
int getFirstCycle() const
Return the first cycle in the completed schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
void GetUnderlyingObjects(const Value *V, SmallVectorImpl< const Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
The main class in the implementation of the target independent software pipeliner pass...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
MachineFunction * MF
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
void clearResources()
Reset the state.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:517
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:54
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
MachineInstrBuilder & UseMI
unsigned getStagesForPhi(int Reg)
The number of stages for a Phi is a little different than other instructions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:550
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1199
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
void dump() const
Utility function used for debugging to print the schedule.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
const TargetInstrInfo * TII
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
void print(raw_ostream &os) const
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
void fixupRegisterOverlaps(std::deque< SUnit *> &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
LLVM_DUMP_METHOD void dump() const
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
bool isCopy() const
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:56
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
const InstrItineraryData * InstrItins
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1122
hexagon gen pred
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SetVector< SUnit * >::const_iterator iterator
virtual bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Iterator for intrusive lists based on ilist_node.
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:320
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1173
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:535
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static unsigned getDepth(const SmallVectorImpl< const MachineBasicBlock *> &Stack, const MachineBasicBlock *MBB)
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:32
MachineInstrBuilder MachineInstrBuilder & DefMI
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
void setRecMII(unsigned mii)
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:762
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
This class represents the scheduled code.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:262
unsigned getStagesForReg(int Reg, unsigned CurStage)
Return the max.
LiveInterval & createEmptyInterval(unsigned Reg)
Interval creation.
bool isEmpty() const
Returns true if there are no itineraries.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
void setColocate(unsigned c)
std::set< NodeId > NodeSet
Definition: RDFGraph.h:513
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:63
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:165
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
iterator begin() const
Definition: SmallPtrSet.h:396
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:467
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit *> &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:151
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
void setSubReg(unsigned subReg)
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
Generic base class for all target subtargets.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:128
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class...
iterator end() const
Definition: SmallPtrSet.h:401
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:808
bool memoperands_empty() const
Return true if we don&#39;t have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:547
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:71
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand *> MemRefs)
Assign this MachineInstr&#39;s memory reference descriptor list.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
void stable_sort(R &&Range)
Definition: STLExtras.h:1316
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
Definition: Value.h:72
mop_iterator operands_begin()
Definition: MachineInstr.h:454
A vector that has set insertion semantics.
Definition: SetVector.h:40
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
SmallVector< MachineOperand, 4 > BrCond
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
A single uniqued string.
Definition: Metadata.h:603
void setPos(MachineBasicBlock::const_iterator Pos)
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:244
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
std::vector< MachineBasicBlock * >::iterator succ_iterator
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:335
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
void resize(size_type N)
Definition: SmallVector.h:344
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431