LLVM  10.0.0svn
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
17 // one and try again.
18 //
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
25 //
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
28 // instructions.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/BitVector.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/MapVector.h"
36 #include "llvm/ADT/PriorityQueue.h"
37 #include "llvm/ADT/SetVector.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
65 #include "llvm/Config/llvm-config.h"
66 #include "llvm/IR/Attributes.h"
67 #include "llvm/IR/DebugLoc.h"
68 #include "llvm/IR/Function.h"
69 #include "llvm/MC/LaneBitmask.h"
70 #include "llvm/MC/MCInstrDesc.h"
72 #include "llvm/MC/MCRegisterInfo.h"
73 #include "llvm/Pass.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
79 #include <algorithm>
80 #include <cassert>
81 #include <climits>
82 #include <cstdint>
83 #include <deque>
84 #include <functional>
85 #include <iterator>
86 #include <map>
87 #include <memory>
88 #include <tuple>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 
94 #define DEBUG_TYPE "pipeliner"
95 
96 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
97 STATISTIC(NumPipelined, "Number of loops software pipelined");
98 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
99 STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
100 STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
101 STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
102 STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
103 STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
104 STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
105 STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
106 STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
107 
108 /// A command line option to turn software pipelining on or off.
109 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
111  cl::desc("Enable Software Pipelining"));
112 
113 /// A command line option to enable SWP at -Os.
114 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
115  cl::desc("Enable SWP at Os."), cl::Hidden,
116  cl::init(false));
117 
118 /// A command line argument to limit minimum initial interval for pipelining.
119 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
120  cl::desc("Size limit for the MII."),
121  cl::Hidden, cl::init(27));
122 
123 /// A command line argument to limit the number of stages in the pipeline.
124 static cl::opt<int>
125  SwpMaxStages("pipeliner-max-stages",
126  cl::desc("Maximum stages allowed in the generated scheduled."),
127  cl::Hidden, cl::init(3));
128 
129 /// A command line option to disable the pruning of chain dependences due to
130 /// an unrelated Phi.
131 static cl::opt<bool>
132  SwpPruneDeps("pipeliner-prune-deps",
133  cl::desc("Prune dependences between unrelated Phi nodes."),
134  cl::Hidden, cl::init(true));
135 
136 /// A command line option to disable the pruning of loop carried order
137 /// dependences.
138 static cl::opt<bool>
139  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
140  cl::desc("Prune loop carried order dependences."),
141  cl::Hidden, cl::init(true));
142 
143 #ifndef NDEBUG
144 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
145 #endif
146 
147 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
148  cl::ReallyHidden, cl::init(false),
149  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
150 
151 static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
152  cl::init(false));
153 static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
154  cl::init(false));
155 
156 namespace llvm {
157 
158 // A command line option to enable the CopyToPhi DAG mutation.
160  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
161  cl::init(true), cl::ZeroOrMore,
162  cl::desc("Enable CopyToPhi DAG Mutation"));
163 
164 } // end namespace llvm
165 
166 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
167 char MachinePipeliner::ID = 0;
168 #ifndef NDEBUG
170 #endif
172 
174  "Modulo Software Pipelining", false, false)
180  "Modulo Software Pipelining", false, false)
181 
182 /// The "main" function for implementing Swing Modulo Scheduling.
183 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
184  if (skipFunction(mf.getFunction()))
185  return false;
186 
187  if (!EnableSWP)
188  return false;
189 
190  if (mf.getFunction().getAttributes().hasAttribute(
191  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
192  !EnableSWPOptSize.getPosition())
193  return false;
194 
195  if (!mf.getSubtarget().enableMachinePipeliner())
196  return false;
197 
198  // Cannot pipeline loops without instruction itineraries if we are using
199  // DFA for the pipeliner.
200  if (mf.getSubtarget().useDFAforSMS() &&
201  (!mf.getSubtarget().getInstrItineraryData() ||
202  mf.getSubtarget().getInstrItineraryData()->isEmpty()))
203  return false;
204 
205  MF = &mf;
206  MLI = &getAnalysis<MachineLoopInfo>();
207  MDT = &getAnalysis<MachineDominatorTree>();
208  TII = MF->getSubtarget().getInstrInfo();
209  RegClassInfo.runOnMachineFunction(*MF);
210 
211  for (auto &L : *MLI)
212  scheduleLoop(*L);
213 
214  return false;
215 }
216 
217 /// Attempt to perform the SMS algorithm on the specified loop. This function is
218 /// the main entry point for the algorithm. The function identifies candidate
219 /// loops, calculates the minimum initiation interval, and attempts to schedule
220 /// the loop.
221 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
222  bool Changed = false;
223  for (auto &InnerLoop : L)
224  Changed |= scheduleLoop(*InnerLoop);
225 
226 #ifndef NDEBUG
227  // Stop trying after reaching the limit (if any).
228  int Limit = SwpLoopLimit;
229  if (Limit >= 0) {
230  if (NumTries >= SwpLoopLimit)
231  return Changed;
232  NumTries++;
233  }
234 #endif
235 
236  setPragmaPipelineOptions(L);
237  if (!canPipelineLoop(L)) {
238  LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
239  return Changed;
240  }
241 
242  ++NumTrytoPipeline;
243 
244  Changed = swingModuloScheduler(L);
245 
246  return Changed;
247 }
248 
249 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
250  MachineBasicBlock *LBLK = L.getTopBlock();
251 
252  if (LBLK == nullptr)
253  return;
254 
255  const BasicBlock *BBLK = LBLK->getBasicBlock();
256  if (BBLK == nullptr)
257  return;
258 
259  const Instruction *TI = BBLK->getTerminator();
260  if (TI == nullptr)
261  return;
262 
263  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
264  if (LoopID == nullptr)
265  return;
266 
267  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
268  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
269 
270  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
271  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
272 
273  if (MD == nullptr)
274  continue;
275 
276  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
277 
278  if (S == nullptr)
279  continue;
280 
281  if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
282  assert(MD->getNumOperands() == 2 &&
283  "Pipeline initiation interval hint metadata should have two operands.");
285  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
286  assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
287  } else if (S->getString() == "llvm.loop.pipeline.disable") {
288  disabledByPragma = true;
289  }
290  }
291 }
292 
293 /// Return true if the loop can be software pipelined. The algorithm is
294 /// restricted to loops with a single basic block. Make sure that the
295 /// branch in the loop can be analyzed.
296 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
297  if (L.getNumBlocks() != 1)
298  return false;
299 
300  if (disabledByPragma)
301  return false;
302 
303  // Check if the branch can't be understood because we can't do pipelining
304  // if that's the case.
305  LI.TBB = nullptr;
306  LI.FBB = nullptr;
307  LI.BrCond.clear();
308  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
309  LLVM_DEBUG(
310  dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
311  NumFailBranch++;
312  return false;
313  }
314 
315  LI.LoopInductionVar = nullptr;
316  LI.LoopCompare = nullptr;
318  LLVM_DEBUG(
319  dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
320  NumFailLoop++;
321  return false;
322  }
323 
324  if (!L.getLoopPreheader()) {
325  LLVM_DEBUG(
326  dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
327  NumFailPreheader++;
328  return false;
329  }
330 
331  // Remove any subregisters from inputs to phi nodes.
332  preprocessPhiNodes(*L.getHeader());
333  return true;
334 }
335 
336 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
338  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
339 
340  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
341  MachineOperand &DefOp = PI.getOperand(0);
342  assert(DefOp.getSubReg() == 0);
343  auto *RC = MRI.getRegClass(DefOp.getReg());
344 
345  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
346  MachineOperand &RegOp = PI.getOperand(i);
347  if (RegOp.getSubReg() == 0)
348  continue;
349 
350  // If the operand uses a subregister, replace it with a new register
351  // without subregisters, and generate a copy to the new register.
352  Register NewReg = MRI.createVirtualRegister(RC);
353  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
355  const DebugLoc &DL = PredB.findDebugLoc(At);
356  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
357  .addReg(RegOp.getReg(), getRegState(RegOp),
358  RegOp.getSubReg());
359  Slots.insertMachineInstrInMaps(*Copy);
360  RegOp.setReg(NewReg);
361  RegOp.setSubReg(0);
362  }
363  }
364 }
365 
366 /// The SMS algorithm consists of the following main steps:
367 /// 1. Computation and analysis of the dependence graph.
368 /// 2. Ordering of the nodes (instructions).
369 /// 3. Attempt to Schedule the loop.
370 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
371  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
372 
373  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
375 
376  MachineBasicBlock *MBB = L.getHeader();
377  // The kernel should not include any terminator instructions. These
378  // will be added back later.
379  SMS.startBlock(MBB);
380 
381  // Compute the number of 'real' instructions in the basic block by
382  // ignoring terminators.
383  unsigned size = MBB->size();
385  E = MBB->instr_end();
386  I != E; ++I, --size)
387  ;
388 
389  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
390  SMS.schedule();
391  SMS.exitRegion();
392 
393  SMS.finishBlock();
394  return SMS.hasNewSchedule();
395 }
396 
397 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
398  if (II_setByPragma > 0)
399  MII = II_setByPragma;
400  else
401  MII = std::max(ResMII, RecMII);
402 }
403 
404 void SwingSchedulerDAG::setMAX_II() {
405  if (II_setByPragma > 0)
406  MAX_II = II_setByPragma;
407  else
408  MAX_II = MII + 10;
409 }
410 
411 /// We override the schedule function in ScheduleDAGInstrs to implement the
412 /// scheduling part of the Swing Modulo Scheduling algorithm.
414  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
415  buildSchedGraph(AA);
416  addLoopCarriedDependences(AA);
417  updatePhiDependences();
418  Topo.InitDAGTopologicalSorting();
419  changeDependences();
420  postprocessDAG();
421  LLVM_DEBUG(dump());
422 
423  NodeSetType NodeSets;
424  findCircuits(NodeSets);
425  NodeSetType Circuits = NodeSets;
426 
427  // Calculate the MII.
428  unsigned ResMII = calculateResMII();
429  unsigned RecMII = calculateRecMII(NodeSets);
430 
431  fuseRecs(NodeSets);
432 
433  // This flag is used for testing and can cause correctness problems.
434  if (SwpIgnoreRecMII)
435  RecMII = 0;
436 
437  setMII(ResMII, RecMII);
438  setMAX_II();
439 
440  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
441  << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
442 
443  // Can't schedule a loop without a valid MII.
444  if (MII == 0) {
445  LLVM_DEBUG(
446  dbgs()
447  << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
448  NumFailZeroMII++;
449  return;
450  }
451 
452  // Don't pipeline large loops.
453  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
454  LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
455  << ", we don't pipleline large loops\n");
456  NumFailLargeMaxMII++;
457  return;
458  }
459 
460  computeNodeFunctions(NodeSets);
461 
462  registerPressureFilter(NodeSets);
463 
464  colocateNodeSets(NodeSets);
465 
466  checkNodeSets(NodeSets);
467 
468  LLVM_DEBUG({
469  for (auto &I : NodeSets) {
470  dbgs() << " Rec NodeSet ";
471  I.dump();
472  }
473  });
474 
475  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
476 
477  groupRemainingNodes(NodeSets);
478 
479  removeDuplicateNodes(NodeSets);
480 
481  LLVM_DEBUG({
482  for (auto &I : NodeSets) {
483  dbgs() << " NodeSet ";
484  I.dump();
485  }
486  });
487 
488  computeNodeOrder(NodeSets);
489 
490  // check for node order issues
491  checkValidNodeOrder(Circuits);
492 
493  SMSchedule Schedule(Pass.MF);
494  Scheduled = schedulePipeline(Schedule);
495 
496  if (!Scheduled){
497  LLVM_DEBUG(dbgs() << "No schedule found, return\n");
498  NumFailNoSchedule++;
499  return;
500  }
501 
502  unsigned numStages = Schedule.getMaxStageCount();
503  // No need to generate pipeline if there are no overlapped iterations.
504  if (numStages == 0) {
505  LLVM_DEBUG(
506  dbgs() << "No overlapped iterations, no need to generate pipeline\n");
507  NumFailZeroStage++;
508  return;
509  }
510  // Check that the maximum stage count is less than user-defined limit.
511  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
512  LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
513  << " : too many stages, abort\n");
514  NumFailLargeMaxStage++;
515  return;
516  }
517 
518  generatePipelinedLoop(Schedule);
519  ++NumPipelined;
520 }
521 
522 /// Clean up after the software pipeliner runs.
524  for (MachineInstr *I : NewMIs)
526  NewMIs.clear();
527 
528  // Call the superclass.
530 }
531 
532 /// Return the register values for the operands of a Phi instruction.
533 /// This function assume the instruction is a Phi.
535  unsigned &InitVal, unsigned &LoopVal) {
536  assert(Phi.isPHI() && "Expecting a Phi.");
537 
538  InitVal = 0;
539  LoopVal = 0;
540  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
541  if (Phi.getOperand(i + 1).getMBB() != Loop)
542  InitVal = Phi.getOperand(i).getReg();
543  else
544  LoopVal = Phi.getOperand(i).getReg();
545 
546  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
547 }
548 
549 /// Return the Phi register value that comes from the incoming block.
550 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
551  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
552  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
553  return Phi.getOperand(i).getReg();
554  return 0;
555 }
556 
557 /// Return the Phi register value that comes the loop block.
558 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
559  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
560  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
561  return Phi.getOperand(i).getReg();
562  return 0;
563 }
564 
565 /// Return true if SUb can be reached from SUa following the chain edges.
566 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
567  SmallPtrSet<SUnit *, 8> Visited;
568  SmallVector<SUnit *, 8> Worklist;
569  Worklist.push_back(SUa);
570  while (!Worklist.empty()) {
571  const SUnit *SU = Worklist.pop_back_val();
572  for (auto &SI : SU->Succs) {
573  SUnit *SuccSU = SI.getSUnit();
574  if (SI.getKind() == SDep::Order) {
575  if (Visited.count(SuccSU))
576  continue;
577  if (SuccSU == SUb)
578  return true;
579  Worklist.push_back(SuccSU);
580  Visited.insert(SuccSU);
581  }
582  }
583  }
584  return false;
585 }
586 
587 /// Return true if the instruction causes a chain between memory
588 /// references before and after it.
590  return MI.isCall() || MI.mayRaiseFPException() ||
592  (MI.hasOrderedMemoryRef() &&
593  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
594 }
595 
596 /// Return the underlying objects for the memory references of an instruction.
597 /// This function calls the code in ValueTracking, but first checks that the
598 /// instruction has a memory operand.
601  const DataLayout &DL) {
602  if (!MI->hasOneMemOperand())
603  return;
605  if (!MM->getValue())
606  return;
607  GetUnderlyingObjects(MM->getValue(), Objs, DL);
608  for (const Value *V : Objs) {
609  if (!isIdentifiedObject(V)) {
610  Objs.clear();
611  return;
612  }
613  Objs.push_back(V);
614  }
615 }
616 
617 /// Add a chain edge between a load and store if the store can be an
618 /// alias of the load on a subsequent iteration, i.e., a loop carried
619 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
620 /// but that code doesn't create loop carried dependences.
621 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
623  Value *UnknownValue =
625  for (auto &SU : SUnits) {
626  MachineInstr &MI = *SU.getInstr();
627  if (isDependenceBarrier(MI, AA))
628  PendingLoads.clear();
629  else if (MI.mayLoad()) {
631  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
632  if (Objs.empty())
633  Objs.push_back(UnknownValue);
634  for (auto V : Objs) {
635  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
636  SUs.push_back(&SU);
637  }
638  } else if (MI.mayStore()) {
640  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
641  if (Objs.empty())
642  Objs.push_back(UnknownValue);
643  for (auto V : Objs) {
645  PendingLoads.find(V);
646  if (I == PendingLoads.end())
647  continue;
648  for (auto Load : I->second) {
649  if (isSuccOrder(Load, &SU))
650  continue;
651  MachineInstr &LdMI = *Load->getInstr();
652  // First, perform the cheaper check that compares the base register.
653  // If they are the same and the load offset is less than the store
654  // offset, then mark the dependence as loop carried potentially.
655  const MachineOperand *BaseOp1, *BaseOp2;
656  int64_t Offset1, Offset2;
657  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
658  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
659  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
660  (int)Offset1 < (int)Offset2) {
661  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
662  "What happened to the chain edge?");
663  SDep Dep(Load, SDep::Barrier);
664  Dep.setLatency(1);
665  SU.addPred(Dep);
666  continue;
667  }
668  }
669  // Second, the more expensive check that uses alias analysis on the
670  // base registers. If they alias, and the load offset is less than
671  // the store offset, the mark the dependence as loop carried.
672  if (!AA) {
673  SDep Dep(Load, SDep::Barrier);
674  Dep.setLatency(1);
675  SU.addPred(Dep);
676  continue;
677  }
678  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
679  MachineMemOperand *MMO2 = *MI.memoperands_begin();
680  if (!MMO1->getValue() || !MMO2->getValue()) {
681  SDep Dep(Load, SDep::Barrier);
682  Dep.setLatency(1);
683  SU.addPred(Dep);
684  continue;
685  }
686  if (MMO1->getValue() == MMO2->getValue() &&
687  MMO1->getOffset() <= MMO2->getOffset()) {
688  SDep Dep(Load, SDep::Barrier);
689  Dep.setLatency(1);
690  SU.addPred(Dep);
691  continue;
692  }
693  AliasResult AAResult = AA->alias(
695  MMO1->getAAInfo()),
697  MMO2->getAAInfo()));
698 
699  if (AAResult != NoAlias) {
700  SDep Dep(Load, SDep::Barrier);
701  Dep.setLatency(1);
702  SU.addPred(Dep);
703  }
704  }
705  }
706  }
707  }
708 }
709 
710 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
711 /// processes dependences for PHIs. This function adds true dependences
712 /// from a PHI to a use, and a loop carried dependence from the use to the
713 /// PHI. The loop carried dependence is represented as an anti dependence
714 /// edge. This function also removes chain dependences between unrelated
715 /// PHIs.
716 void SwingSchedulerDAG::updatePhiDependences() {
717  SmallVector<SDep, 4> RemoveDeps;
719 
720  // Iterate over each DAG node.
721  for (SUnit &I : SUnits) {
722  RemoveDeps.clear();
723  // Set to true if the instruction has an operand defined by a Phi.
724  unsigned HasPhiUse = 0;
725  unsigned HasPhiDef = 0;
726  MachineInstr *MI = I.getInstr();
727  // Iterate over each operand, and we process the definitions.
729  MOE = MI->operands_end();
730  MOI != MOE; ++MOI) {
731  if (!MOI->isReg())
732  continue;
733  Register Reg = MOI->getReg();
734  if (MOI->isDef()) {
735  // If the register is used by a Phi, then create an anti dependence.
737  UI = MRI.use_instr_begin(Reg),
738  UE = MRI.use_instr_end();
739  UI != UE; ++UI) {
740  MachineInstr *UseMI = &*UI;
741  SUnit *SU = getSUnit(UseMI);
742  if (SU != nullptr && UseMI->isPHI()) {
743  if (!MI->isPHI()) {
744  SDep Dep(SU, SDep::Anti, Reg);
745  Dep.setLatency(1);
746  I.addPred(Dep);
747  } else {
748  HasPhiDef = Reg;
749  // Add a chain edge to a dependent Phi that isn't an existing
750  // predecessor.
751  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
752  I.addPred(SDep(SU, SDep::Barrier));
753  }
754  }
755  }
756  } else if (MOI->isUse()) {
757  // If the register is defined by a Phi, then create a true dependence.
758  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
759  if (DefMI == nullptr)
760  continue;
761  SUnit *SU = getSUnit(DefMI);
762  if (SU != nullptr && DefMI->isPHI()) {
763  if (!MI->isPHI()) {
764  SDep Dep(SU, SDep::Data, Reg);
765  Dep.setLatency(0);
766  ST.adjustSchedDependency(SU, &I, Dep);
767  I.addPred(Dep);
768  } else {
769  HasPhiUse = Reg;
770  // Add a chain edge to a dependent Phi that isn't an existing
771  // predecessor.
772  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
773  I.addPred(SDep(SU, SDep::Barrier));
774  }
775  }
776  }
777  }
778  // Remove order dependences from an unrelated Phi.
779  if (!SwpPruneDeps)
780  continue;
781  for (auto &PI : I.Preds) {
782  MachineInstr *PMI = PI.getSUnit()->getInstr();
783  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
784  if (I.getInstr()->isPHI()) {
785  if (PMI->getOperand(0).getReg() == HasPhiUse)
786  continue;
787  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
788  continue;
789  }
790  RemoveDeps.push_back(PI);
791  }
792  }
793  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
794  I.removePred(RemoveDeps[i]);
795  }
796 }
797 
798 /// Iterate over each DAG node and see if we can change any dependences
799 /// in order to reduce the recurrence MII.
800 void SwingSchedulerDAG::changeDependences() {
801  // See if an instruction can use a value from the previous iteration.
802  // If so, we update the base and offset of the instruction and change
803  // the dependences.
804  for (SUnit &I : SUnits) {
805  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
806  int64_t NewOffset = 0;
807  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
808  NewOffset))
809  continue;
810 
811  // Get the MI and SUnit for the instruction that defines the original base.
812  Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
813  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
814  if (!DefMI)
815  continue;
816  SUnit *DefSU = getSUnit(DefMI);
817  if (!DefSU)
818  continue;
819  // Get the MI and SUnit for the instruction that defins the new base.
820  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
821  if (!LastMI)
822  continue;
823  SUnit *LastSU = getSUnit(LastMI);
824  if (!LastSU)
825  continue;
826 
827  if (Topo.IsReachable(&I, LastSU))
828  continue;
829 
830  // Remove the dependence. The value now depends on a prior iteration.
832  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
833  ++P)
834  if (P->getSUnit() == DefSU)
835  Deps.push_back(*P);
836  for (int i = 0, e = Deps.size(); i != e; i++) {
837  Topo.RemovePred(&I, Deps[i].getSUnit());
838  I.removePred(Deps[i]);
839  }
840  // Remove the chain dependence between the instructions.
841  Deps.clear();
842  for (auto &P : LastSU->Preds)
843  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
844  Deps.push_back(P);
845  for (int i = 0, e = Deps.size(); i != e; i++) {
846  Topo.RemovePred(LastSU, Deps[i].getSUnit());
847  LastSU->removePred(Deps[i]);
848  }
849 
850  // Add a dependence between the new instruction and the instruction
851  // that defines the new base.
852  SDep Dep(&I, SDep::Anti, NewBase);
853  Topo.AddPred(LastSU, &I);
854  LastSU->addPred(Dep);
855 
856  // Remember the base and offset information so that we can update the
857  // instruction during code generation.
858  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
859  }
860 }
861 
862 namespace {
863 
864 // FuncUnitSorter - Comparison operator used to sort instructions by
865 // the number of functional unit choices.
866 struct FuncUnitSorter {
868  const MCSubtargetInfo *STI;
870 
871  FuncUnitSorter(const TargetSubtargetInfo &TSI)
872  : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
873 
874  // Compute the number of functional unit alternatives needed
875  // at each stage, and take the minimum value. We prioritize the
876  // instructions by the least number of choices first.
877  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
878  unsigned SchedClass = Inst->getDesc().getSchedClass();
879  unsigned min = UINT_MAX;
880  if (InstrItins && !InstrItins->isEmpty()) {
881  for (const InstrStage &IS :
882  make_range(InstrItins->beginStage(SchedClass),
883  InstrItins->endStage(SchedClass))) {
884  unsigned funcUnits = IS.getUnits();
885  unsigned numAlternatives = countPopulation(funcUnits);
886  if (numAlternatives < min) {
887  min = numAlternatives;
888  F = funcUnits;
889  }
890  }
891  return min;
892  }
893  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
894  const MCSchedClassDesc *SCDesc =
895  STI->getSchedModel().getSchedClassDesc(SchedClass);
896  if (!SCDesc->isValid())
897  // No valid Schedule Class Desc for schedClass, should be
898  // Pseudo/PostRAPseudo
899  return min;
900 
901  for (const MCWriteProcResEntry &PRE :
902  make_range(STI->getWriteProcResBegin(SCDesc),
903  STI->getWriteProcResEnd(SCDesc))) {
904  if (!PRE.Cycles)
905  continue;
906  const MCProcResourceDesc *ProcResource =
907  STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
908  unsigned NumUnits = ProcResource->NumUnits;
909  if (NumUnits < min) {
910  min = NumUnits;
911  F = PRE.ProcResourceIdx;
912  }
913  }
914  return min;
915  }
916  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
917  }
918 
919  // Compute the critical resources needed by the instruction. This
920  // function records the functional units needed by instructions that
921  // must use only one functional unit. We use this as a tie breaker
922  // for computing the resource MII. The instrutions that require
923  // the same, highly used, functional unit have high priority.
924  void calcCriticalResources(MachineInstr &MI) {
925  unsigned SchedClass = MI.getDesc().getSchedClass();
926  if (InstrItins && !InstrItins->isEmpty()) {
927  for (const InstrStage &IS :
928  make_range(InstrItins->beginStage(SchedClass),
929  InstrItins->endStage(SchedClass))) {
930  unsigned FuncUnits = IS.getUnits();
931  if (countPopulation(FuncUnits) == 1)
932  Resources[FuncUnits]++;
933  }
934  return;
935  }
936  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
937  const MCSchedClassDesc *SCDesc =
938  STI->getSchedModel().getSchedClassDesc(SchedClass);
939  if (!SCDesc->isValid())
940  // No valid Schedule Class Desc for schedClass, should be
941  // Pseudo/PostRAPseudo
942  return;
943 
944  for (const MCWriteProcResEntry &PRE :
945  make_range(STI->getWriteProcResBegin(SCDesc),
946  STI->getWriteProcResEnd(SCDesc))) {
947  if (!PRE.Cycles)
948  continue;
949  Resources[PRE.ProcResourceIdx]++;
950  }
951  return;
952  }
953  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
954  }
955 
956  /// Return true if IS1 has less priority than IS2.
957  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
958  unsigned F1 = 0, F2 = 0;
959  unsigned MFUs1 = minFuncUnits(IS1, F1);
960  unsigned MFUs2 = minFuncUnits(IS2, F2);
961  if (MFUs1 == MFUs2)
962  return Resources.lookup(F1) < Resources.lookup(F2);
963  return MFUs1 > MFUs2;
964  }
965 };
966 
967 } // end anonymous namespace
968 
969 /// Calculate the resource constrained minimum initiation interval for the
970 /// specified loop. We use the DFA to model the resources needed for
971 /// each instruction, and we ignore dependences. A different DFA is created
972 /// for each cycle that is required. When adding a new instruction, we attempt
973 /// to add it to each existing DFA, until a legal space is found. If the
974 /// instruction cannot be reserved in an existing DFA, we create a new one.
975 unsigned SwingSchedulerDAG::calculateResMII() {
976 
977  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
980  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
981 
982  // Sort the instructions by the number of available choices for scheduling,
983  // least to most. Use the number of critical resources as the tie breaker.
984  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
986  E = MBB->getFirstTerminator();
987  I != E; ++I)
988  FUS.calcCriticalResources(*I);
990  FuncUnitOrder(FUS);
991 
993  E = MBB->getFirstTerminator();
994  I != E; ++I)
995  FuncUnitOrder.push(&*I);
996 
997  while (!FuncUnitOrder.empty()) {
998  MachineInstr *MI = FuncUnitOrder.top();
999  FuncUnitOrder.pop();
1000  if (TII->isZeroCost(MI->getOpcode()))
1001  continue;
1002  // Attempt to reserve the instruction in an existing DFA. At least one
1003  // DFA is needed for each cycle.
1004  unsigned NumCycles = getSUnit(MI)->Latency;
1005  unsigned ReservedCycles = 0;
1008  LLVM_DEBUG({
1009  dbgs() << "Trying to reserve resource for " << NumCycles
1010  << " cycles for \n";
1011  MI->dump();
1012  });
1013  for (unsigned C = 0; C < NumCycles; ++C)
1014  while (RI != RE) {
1015  if ((*RI)->canReserveResources(*MI)) {
1016  (*RI)->reserveResources(*MI);
1017  ++ReservedCycles;
1018  break;
1019  }
1020  RI++;
1021  }
1022  LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
1023  << ", NumCycles:" << NumCycles << "\n");
1024  // Add new DFAs, if needed, to reserve resources.
1025  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1027  << "NewResource created to reserve resources"
1028  << "\n");
1029  ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
1030  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1031  NewResource->reserveResources(*MI);
1032  Resources.push_back(NewResource);
1033  }
1034  }
1035  int Resmii = Resources.size();
1036  LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
1037  // Delete the memory for each of the DFAs that were created earlier.
1038  for (ResourceManager *RI : Resources) {
1039  ResourceManager *D = RI;
1040  delete D;
1041  }
1042  Resources.clear();
1043  return Resmii;
1044 }
1045 
1046 /// Calculate the recurrence-constrainted minimum initiation interval.
1047 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1048 /// for each circuit. The II needs to satisfy the inequality
1049 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1050 /// II that satisfies the inequality, and the RecMII is the maximum
1051 /// of those values.
1052 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1053  unsigned RecMII = 0;
1054 
1055  for (NodeSet &Nodes : NodeSets) {
1056  if (Nodes.empty())
1057  continue;
1058 
1059  unsigned Delay = Nodes.getLatency();
1060  unsigned Distance = 1;
1061 
1062  // ii = ceil(delay / distance)
1063  unsigned CurMII = (Delay + Distance - 1) / Distance;
1064  Nodes.setRecMII(CurMII);
1065  if (CurMII > RecMII)
1066  RecMII = CurMII;
1067  }
1068 
1069  return RecMII;
1070 }
1071 
1072 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1073 /// but we do this to find the circuits, and then change them back.
1074 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1076  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1077  SUnit *SU = &SUnits[i];
1078  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1079  IP != EP; ++IP) {
1080  if (IP->getKind() != SDep::Anti)
1081  continue;
1082  DepsAdded.push_back(std::make_pair(SU, *IP));
1083  }
1084  }
1085  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1086  E = DepsAdded.end();
1087  I != E; ++I) {
1088  // Remove this anti dependency and add one in the reverse direction.
1089  SUnit *SU = I->first;
1090  SDep &D = I->second;
1091  SUnit *TargetSU = D.getSUnit();
1092  unsigned Reg = D.getReg();
1093  unsigned Lat = D.getLatency();
1094  SU->removePred(D);
1095  SDep Dep(SU, SDep::Anti, Reg);
1096  Dep.setLatency(Lat);
1097  TargetSU->addPred(Dep);
1098  }
1099 }
1100 
1101 /// Create the adjacency structure of the nodes in the graph.
1102 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1103  SwingSchedulerDAG *DAG) {
1104  BitVector Added(SUnits.size());
1105  DenseMap<int, int> OutputDeps;
1106  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1107  Added.reset();
1108  // Add any successor to the adjacency matrix and exclude duplicates.
1109  for (auto &SI : SUnits[i].Succs) {
1110  // Only create a back-edge on the first and last nodes of a dependence
1111  // chain. This records any chains and adds them later.
1112  if (SI.getKind() == SDep::Output) {
1113  int N = SI.getSUnit()->NodeNum;
1114  int BackEdge = i;
1115  auto Dep = OutputDeps.find(BackEdge);
1116  if (Dep != OutputDeps.end()) {
1117  BackEdge = Dep->second;
1118  OutputDeps.erase(Dep);
1119  }
1120  OutputDeps[N] = BackEdge;
1121  }
1122  // Do not process a boundary node, an artificial node.
1123  // A back-edge is processed only if it goes to a Phi.
1124  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1125  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1126  continue;
1127  int N = SI.getSUnit()->NodeNum;
1128  if (!Added.test(N)) {
1129  AdjK[i].push_back(N);
1130  Added.set(N);
1131  }
1132  }
1133  // A chain edge between a store and a load is treated as a back-edge in the
1134  // adjacency matrix.
1135  for (auto &PI : SUnits[i].Preds) {
1136  if (!SUnits[i].getInstr()->mayStore() ||
1137  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1138  continue;
1139  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1140  int N = PI.getSUnit()->NodeNum;
1141  if (!Added.test(N)) {
1142  AdjK[i].push_back(N);
1143  Added.set(N);
1144  }
1145  }
1146  }
1147  }
1148  // Add back-edges in the adjacency matrix for the output dependences.
1149  for (auto &OD : OutputDeps)
1150  if (!Added.test(OD.second)) {
1151  AdjK[OD.first].push_back(OD.second);
1152  Added.set(OD.second);
1153  }
1154 }
1155 
1156 /// Identify an elementary circuit in the dependence graph starting at the
1157 /// specified node.
1158 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1159  bool HasBackedge) {
1160  SUnit *SV = &SUnits[V];
1161  bool F = false;
1162  Stack.insert(SV);
1163  Blocked.set(V);
1164 
1165  for (auto W : AdjK[V]) {
1166  if (NumPaths > MaxPaths)
1167  break;
1168  if (W < S)
1169  continue;
1170  if (W == S) {
1171  if (!HasBackedge)
1172  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1173  F = true;
1174  ++NumPaths;
1175  break;
1176  } else if (!Blocked.test(W)) {
1177  if (circuit(W, S, NodeSets,
1178  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1179  F = true;
1180  }
1181  }
1182 
1183  if (F)
1184  unblock(V);
1185  else {
1186  for (auto W : AdjK[V]) {
1187  if (W < S)
1188  continue;
1189  if (B[W].count(SV) == 0)
1190  B[W].insert(SV);
1191  }
1192  }
1193  Stack.pop_back();
1194  return F;
1195 }
1196 
1197 /// Unblock a node in the circuit finding algorithm.
1198 void SwingSchedulerDAG::Circuits::unblock(int U) {
1199  Blocked.reset(U);
1200  SmallPtrSet<SUnit *, 4> &BU = B[U];
1201  while (!BU.empty()) {
1203  assert(SI != BU.end() && "Invalid B set.");
1204  SUnit *W = *SI;
1205  BU.erase(W);
1206  if (Blocked.test(W->NodeNum))
1207  unblock(W->NodeNum);
1208  }
1209 }
1210 
1211 /// Identify all the elementary circuits in the dependence graph using
1212 /// Johnson's circuit algorithm.
1213 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1214  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1215  // but we do this to find the circuits, and then change them back.
1216  swapAntiDependences(SUnits);
1217 
1218  Circuits Cir(SUnits, Topo);
1219  // Create the adjacency structure.
1220  Cir.createAdjacencyStructure(this);
1221  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1222  Cir.reset();
1223  Cir.circuit(i, i, NodeSets);
1224  }
1225 
1226  // Change the dependences back so that we've created a DAG again.
1227  swapAntiDependences(SUnits);
1228 }
1229 
1230 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1231 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1232 // additional copies that are needed across iterations. An artificial dependence
1233 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1234 
1235 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1236 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1237 // PHI-------True-Dep------> USEOfPhi
1238 
1239 // The mutation creates
1240 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1241 
1242 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1243 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1244 // late to avoid additional copies across iterations. The possible scheduling
1245 // order would be
1246 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1247 
1249  for (SUnit &SU : DAG->SUnits) {
1250  // Find the COPY/REG_SEQUENCE instruction.
1251  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1252  continue;
1253 
1254  // Record the loop carried PHIs.
1255  SmallVector<SUnit *, 4> PHISUs;
1256  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1257  SmallVector<SUnit *, 4> SrcSUs;
1258 
1259  for (auto &Dep : SU.Preds) {
1260  SUnit *TmpSU = Dep.getSUnit();
1261  MachineInstr *TmpMI = TmpSU->getInstr();
1262  SDep::Kind DepKind = Dep.getKind();
1263  // Save the loop carried PHI.
1264  if (DepKind == SDep::Anti && TmpMI->isPHI())
1265  PHISUs.push_back(TmpSU);
1266  // Save the source of COPY/REG_SEQUENCE.
1267  // If the source has no pre-decessors, we will end up creating cycles.
1268  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1269  SrcSUs.push_back(TmpSU);
1270  }
1271 
1272  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1273  continue;
1274 
1275  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1276  // SUnit to the container.
1277  SmallVector<SUnit *, 8> UseSUs;
1278  for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
1279  for (auto &Dep : (*I)->Succs) {
1280  if (Dep.getKind() != SDep::Data)
1281  continue;
1282 
1283  SUnit *TmpSU = Dep.getSUnit();
1284  MachineInstr *TmpMI = TmpSU->getInstr();
1285  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1286  PHISUs.push_back(TmpSU);
1287  continue;
1288  }
1289  UseSUs.push_back(TmpSU);
1290  }
1291  }
1292 
1293  if (UseSUs.size() == 0)
1294  continue;
1295 
1296  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1297  // Add the artificial dependencies if it does not form a cycle.
1298  for (auto I : UseSUs) {
1299  for (auto Src : SrcSUs) {
1300  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1301  Src->addPred(SDep(I, SDep::Artificial));
1302  SDAG->Topo.AddPred(Src, I);
1303  }
1304  }
1305  }
1306  }
1307 }
1308 
1309 /// Return true for DAG nodes that we ignore when computing the cost functions.
1310 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1311 /// in the calculation of the ASAP, ALAP, etc functions.
1312 static bool ignoreDependence(const SDep &D, bool isPred) {
1313  if (D.isArtificial())
1314  return true;
1315  return D.getKind() == SDep::Anti && isPred;
1316 }
1317 
1318 /// Compute several functions need to order the nodes for scheduling.
1319 /// ASAP - Earliest time to schedule a node.
1320 /// ALAP - Latest time to schedule a node.
1321 /// MOV - Mobility function, difference between ALAP and ASAP.
1322 /// D - Depth of each node.
1323 /// H - Height of each node.
1324 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1325  ScheduleInfo.resize(SUnits.size());
1326 
1327  LLVM_DEBUG({
1328  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1329  E = Topo.end();
1330  I != E; ++I) {
1331  const SUnit &SU = SUnits[*I];
1332  dumpNode(SU);
1333  }
1334  });
1335 
1336  int maxASAP = 0;
1337  // Compute ASAP and ZeroLatencyDepth.
1338  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1339  E = Topo.end();
1340  I != E; ++I) {
1341  int asap = 0;
1342  int zeroLatencyDepth = 0;
1343  SUnit *SU = &SUnits[*I];
1344  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1345  EP = SU->Preds.end();
1346  IP != EP; ++IP) {
1347  SUnit *pred = IP->getSUnit();
1348  if (IP->getLatency() == 0)
1349  zeroLatencyDepth =
1350  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1351  if (ignoreDependence(*IP, true))
1352  continue;
1353  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1354  getDistance(pred, SU, *IP) * MII));
1355  }
1356  maxASAP = std::max(maxASAP, asap);
1357  ScheduleInfo[*I].ASAP = asap;
1358  ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
1359  }
1360 
1361  // Compute ALAP, ZeroLatencyHeight, and MOV.
1363  E = Topo.rend();
1364  I != E; ++I) {
1365  int alap = maxASAP;
1366  int zeroLatencyHeight = 0;
1367  SUnit *SU = &SUnits[*I];
1368  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1369  ES = SU->Succs.end();
1370  IS != ES; ++IS) {
1371  SUnit *succ = IS->getSUnit();
1372  if (IS->getLatency() == 0)
1373  zeroLatencyHeight =
1374  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1375  if (ignoreDependence(*IS, true))
1376  continue;
1377  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1378  getDistance(SU, succ, *IS) * MII));
1379  }
1380 
1381  ScheduleInfo[*I].ALAP = alap;
1382  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1383  }
1384 
1385  // After computing the node functions, compute the summary for each node set.
1386  for (NodeSet &I : NodeSets)
1387  I.computeNodeSetInfo(this);
1388 
1389  LLVM_DEBUG({
1390  for (unsigned i = 0; i < SUnits.size(); i++) {
1391  dbgs() << "\tNode " << i << ":\n";
1392  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1393  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1394  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1395  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1396  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1397  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1398  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1399  }
1400  });
1401 }
1402 
1403 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1404 /// as the predecessors of the elements of NodeOrder that are not also in
1405 /// NodeOrder.
1408  const NodeSet *S = nullptr) {
1409  Preds.clear();
1410  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1411  I != E; ++I) {
1412  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1413  PI != PE; ++PI) {
1414  if (S && S->count(PI->getSUnit()) == 0)
1415  continue;
1416  if (ignoreDependence(*PI, true))
1417  continue;
1418  if (NodeOrder.count(PI->getSUnit()) == 0)
1419  Preds.insert(PI->getSUnit());
1420  }
1421  // Back-edges are predecessors with an anti-dependence.
1422  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1423  ES = (*I)->Succs.end();
1424  IS != ES; ++IS) {
1425  if (IS->getKind() != SDep::Anti)
1426  continue;
1427  if (S && S->count(IS->getSUnit()) == 0)
1428  continue;
1429  if (NodeOrder.count(IS->getSUnit()) == 0)
1430  Preds.insert(IS->getSUnit());
1431  }
1432  }
1433  return !Preds.empty();
1434 }
1435 
1436 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1437 /// as the successors of the elements of NodeOrder that are not also in
1438 /// NodeOrder.
1441  const NodeSet *S = nullptr) {
1442  Succs.clear();
1443  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1444  I != E; ++I) {
1445  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1446  SI != SE; ++SI) {
1447  if (S && S->count(SI->getSUnit()) == 0)
1448  continue;
1449  if (ignoreDependence(*SI, false))
1450  continue;
1451  if (NodeOrder.count(SI->getSUnit()) == 0)
1452  Succs.insert(SI->getSUnit());
1453  }
1454  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1455  PE = (*I)->Preds.end();
1456  PI != PE; ++PI) {
1457  if (PI->getKind() != SDep::Anti)
1458  continue;
1459  if (S && S->count(PI->getSUnit()) == 0)
1460  continue;
1461  if (NodeOrder.count(PI->getSUnit()) == 0)
1462  Succs.insert(PI->getSUnit());
1463  }
1464  }
1465  return !Succs.empty();
1466 }
1467 
1468 /// Return true if there is a path from the specified node to any of the nodes
1469 /// in DestNodes. Keep track and return the nodes in any path.
1470 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1471  SetVector<SUnit *> &DestNodes,
1472  SetVector<SUnit *> &Exclude,
1473  SmallPtrSet<SUnit *, 8> &Visited) {
1474  if (Cur->isBoundaryNode())
1475  return false;
1476  if (Exclude.count(Cur) != 0)
1477  return false;
1478  if (DestNodes.count(Cur) != 0)
1479  return true;
1480  if (!Visited.insert(Cur).second)
1481  return Path.count(Cur) != 0;
1482  bool FoundPath = false;
1483  for (auto &SI : Cur->Succs)
1484  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1485  for (auto &PI : Cur->Preds)
1486  if (PI.getKind() == SDep::Anti)
1487  FoundPath |=
1488  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1489  if (FoundPath)
1490  Path.insert(Cur);
1491  return FoundPath;
1492 }
1493 
1494 /// Return true if Set1 is a subset of Set2.
1495 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1496  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1497  if (Set2.count(*I) == 0)
1498  return false;
1499  return true;
1500 }
1501 
1502 /// Compute the live-out registers for the instructions in a node-set.
1503 /// The live-out registers are those that are defined in the node-set,
1504 /// but not used. Except for use operands of Phis.
1506  NodeSet &NS) {
1510  SmallSet<unsigned, 4> Uses;
1511  for (SUnit *SU : NS) {
1512  const MachineInstr *MI = SU->getInstr();
1513  if (MI->isPHI())
1514  continue;
1515  for (const MachineOperand &MO : MI->operands())
1516  if (MO.isReg() && MO.isUse()) {
1517  Register Reg = MO.getReg();
1518  if (Register::isVirtualRegister(Reg))
1519  Uses.insert(Reg);
1520  else if (MRI.isAllocatable(Reg))
1521  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1522  Uses.insert(*Units);
1523  }
1524  }
1525  for (SUnit *SU : NS)
1526  for (const MachineOperand &MO : SU->getInstr()->operands())
1527  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1528  Register Reg = MO.getReg();
1529  if (Register::isVirtualRegister(Reg)) {
1530  if (!Uses.count(Reg))
1531  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1533  } else if (MRI.isAllocatable(Reg)) {
1534  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1535  if (!Uses.count(*Units))
1536  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1538  }
1539  }
1540  RPTracker.addLiveRegs(LiveOutRegs);
1541 }
1542 
1543 /// A heuristic to filter nodes in recurrent node-sets if the register
1544 /// pressure of a set is too high.
1545 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1546  for (auto &NS : NodeSets) {
1547  // Skip small node-sets since they won't cause register pressure problems.
1548  if (NS.size() <= 2)
1549  continue;
1550  IntervalPressure RecRegPressure;
1551  RegPressureTracker RecRPTracker(RecRegPressure);
1552  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1553  computeLiveOuts(MF, RecRPTracker, NS);
1554  RecRPTracker.closeBottom();
1555 
1556  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1557  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1558  return A->NodeNum > B->NodeNum;
1559  });
1560 
1561  for (auto &SU : SUnits) {
1562  // Since we're computing the register pressure for a subset of the
1563  // instructions in a block, we need to set the tracker for each
1564  // instruction in the node-set. The tracker is set to the instruction
1565  // just after the one we're interested in.
1566  MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
1567  RecRPTracker.setPos(std::next(CurInstI));
1568 
1569  RegPressureDelta RPDelta;
1570  ArrayRef<PressureChange> CriticalPSets;
1571  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1572  CriticalPSets,
1573  RecRegPressure.MaxSetPressure);
1574  if (RPDelta.Excess.isValid()) {
1575  LLVM_DEBUG(
1576  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1577  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1578  << ":" << RPDelta.Excess.getUnitInc());
1579  NS.setExceedPressure(SU);
1580  break;
1581  }
1582  RecRPTracker.recede();
1583  }
1584  }
1585 }
1586 
1587 /// A heuristic to colocate node sets that have the same set of
1588 /// successors.
1589 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1590  unsigned Colocate = 0;
1591  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1592  NodeSet &N1 = NodeSets[i];
1594  if (N1.empty() || !succ_L(N1, S1))
1595  continue;
1596  for (int j = i + 1; j < e; ++j) {
1597  NodeSet &N2 = NodeSets[j];
1598  if (N1.compareRecMII(N2) != 0)
1599  continue;
1601  if (N2.empty() || !succ_L(N2, S2))
1602  continue;
1603  if (isSubset(S1, S2) && S1.size() == S2.size()) {
1604  N1.setColocate(++Colocate);
1605  N2.setColocate(Colocate);
1606  break;
1607  }
1608  }
1609  }
1610 }
1611 
1612 /// Check if the existing node-sets are profitable. If not, then ignore the
1613 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1614 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1615 /// then it's best to try to schedule all instructions together instead of
1616 /// starting with the recurrent node-sets.
1617 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1618  // Look for loops with a large MII.
1619  if (MII < 17)
1620  return;
1621  // Check if the node-set contains only a simple add recurrence.
1622  for (auto &NS : NodeSets) {
1623  if (NS.getRecMII() > 2)
1624  return;
1625  if (NS.getMaxDepth() > MII)
1626  return;
1627  }
1628  NodeSets.clear();
1629  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1630  return;
1631 }
1632 
1633 /// Add the nodes that do not belong to a recurrence set into groups
1634 /// based upon connected componenets.
1635 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1636  SetVector<SUnit *> NodesAdded;
1637  SmallPtrSet<SUnit *, 8> Visited;
1638  // Add the nodes that are on a path between the previous node sets and
1639  // the current node set.
1640  for (NodeSet &I : NodeSets) {
1642  // Add the nodes from the current node set to the previous node set.
1643  if (succ_L(I, N)) {
1644  SetVector<SUnit *> Path;
1645  for (SUnit *NI : N) {
1646  Visited.clear();
1647  computePath(NI, Path, NodesAdded, I, Visited);
1648  }
1649  if (!Path.empty())
1650  I.insert(Path.begin(), Path.end());
1651  }
1652  // Add the nodes from the previous node set to the current node set.
1653  N.clear();
1654  if (succ_L(NodesAdded, N)) {
1655  SetVector<SUnit *> Path;
1656  for (SUnit *NI : N) {
1657  Visited.clear();
1658  computePath(NI, Path, I, NodesAdded, Visited);
1659  }
1660  if (!Path.empty())
1661  I.insert(Path.begin(), Path.end());
1662  }
1663  NodesAdded.insert(I.begin(), I.end());
1664  }
1665 
1666  // Create a new node set with the connected nodes of any successor of a node
1667  // in a recurrent set.
1668  NodeSet NewSet;
1670  if (succ_L(NodesAdded, N))
1671  for (SUnit *I : N)
1672  addConnectedNodes(I, NewSet, NodesAdded);
1673  if (!NewSet.empty())
1674  NodeSets.push_back(NewSet);
1675 
1676  // Create a new node set with the connected nodes of any predecessor of a node
1677  // in a recurrent set.
1678  NewSet.clear();
1679  if (pred_L(NodesAdded, N))
1680  for (SUnit *I : N)
1681  addConnectedNodes(I, NewSet, NodesAdded);
1682  if (!NewSet.empty())
1683  NodeSets.push_back(NewSet);
1684 
1685  // Create new nodes sets with the connected nodes any remaining node that
1686  // has no predecessor.
1687  for (unsigned i = 0; i < SUnits.size(); ++i) {
1688  SUnit *SU = &SUnits[i];
1689  if (NodesAdded.count(SU) == 0) {
1690  NewSet.clear();
1691  addConnectedNodes(SU, NewSet, NodesAdded);
1692  if (!NewSet.empty())
1693  NodeSets.push_back(NewSet);
1694  }
1695  }
1696 }
1697 
1698 /// Add the node to the set, and add all of its connected nodes to the set.
1699 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1700  SetVector<SUnit *> &NodesAdded) {
1701  NewSet.insert(SU);
1702  NodesAdded.insert(SU);
1703  for (auto &SI : SU->Succs) {
1704  SUnit *Successor = SI.getSUnit();
1705  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1706  addConnectedNodes(Successor, NewSet, NodesAdded);
1707  }
1708  for (auto &PI : SU->Preds) {
1709  SUnit *Predecessor = PI.getSUnit();
1710  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1711  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1712  }
1713 }
1714 
1715 /// Return true if Set1 contains elements in Set2. The elements in common
1716 /// are returned in a different container.
1717 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1718  SmallSetVector<SUnit *, 8> &Result) {
1719  Result.clear();
1720  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1721  SUnit *SU = Set1[i];
1722  if (Set2.count(SU) != 0)
1723  Result.insert(SU);
1724  }
1725  return !Result.empty();
1726 }
1727 
1728 /// Merge the recurrence node sets that have the same initial node.
1729 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1730  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1731  ++I) {
1732  NodeSet &NI = *I;
1733  for (NodeSetType::iterator J = I + 1; J != E;) {
1734  NodeSet &NJ = *J;
1735  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1736  if (NJ.compareRecMII(NI) > 0)
1737  NI.setRecMII(NJ.getRecMII());
1738  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
1739  ++NII)
1740  I->insert(*NII);
1741  NodeSets.erase(J);
1742  E = NodeSets.end();
1743  } else {
1744  ++J;
1745  }
1746  }
1747  }
1748 }
1749 
1750 /// Remove nodes that have been scheduled in previous NodeSets.
1751 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1752  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1753  ++I)
1754  for (NodeSetType::iterator J = I + 1; J != E;) {
1755  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1756 
1757  if (J->empty()) {
1758  NodeSets.erase(J);
1759  E = NodeSets.end();
1760  } else {
1761  ++J;
1762  }
1763  }
1764 }
1765 
1766 /// Compute an ordered list of the dependence graph nodes, which
1767 /// indicates the order that the nodes will be scheduled. This is a
1768 /// two-level algorithm. First, a partial order is created, which
1769 /// consists of a list of sets ordered from highest to lowest priority.
1770 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1772  NodeOrder.clear();
1773 
1774  for (auto &Nodes : NodeSets) {
1775  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1776  OrderKind Order;
1778  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
1779  R.insert(N.begin(), N.end());
1780  Order = BottomUp;
1781  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1782  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
1783  R.insert(N.begin(), N.end());
1784  Order = TopDown;
1785  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1786  } else if (isIntersect(N, Nodes, R)) {
1787  // If some of the successors are in the existing node-set, then use the
1788  // top-down ordering.
1789  Order = TopDown;
1790  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1791  } else if (NodeSets.size() == 1) {
1792  for (auto &N : Nodes)
1793  if (N->Succs.size() == 0)
1794  R.insert(N);
1795  Order = BottomUp;
1796  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1797  } else {
1798  // Find the node with the highest ASAP.
1799  SUnit *maxASAP = nullptr;
1800  for (SUnit *SU : Nodes) {
1801  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1802  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1803  maxASAP = SU;
1804  }
1805  R.insert(maxASAP);
1806  Order = BottomUp;
1807  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1808  }
1809 
1810  while (!R.empty()) {
1811  if (Order == TopDown) {
1812  // Choose the node with the maximum height. If more than one, choose
1813  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1814  // choose the node with the lowest MOV.
1815  while (!R.empty()) {
1816  SUnit *maxHeight = nullptr;
1817  for (SUnit *I : R) {
1818  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1819  maxHeight = I;
1820  else if (getHeight(I) == getHeight(maxHeight) &&
1821  getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
1822  maxHeight = I;
1823  else if (getHeight(I) == getHeight(maxHeight) &&
1824  getZeroLatencyHeight(I) ==
1825  getZeroLatencyHeight(maxHeight) &&
1826  getMOV(I) < getMOV(maxHeight))
1827  maxHeight = I;
1828  }
1829  NodeOrder.insert(maxHeight);
1830  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1831  R.remove(maxHeight);
1832  for (const auto &I : maxHeight->Succs) {
1833  if (Nodes.count(I.getSUnit()) == 0)
1834  continue;
1835  if (NodeOrder.count(I.getSUnit()) != 0)
1836  continue;
1837  if (ignoreDependence(I, false))
1838  continue;
1839  R.insert(I.getSUnit());
1840  }
1841  // Back-edges are predecessors with an anti-dependence.
1842  for (const auto &I : maxHeight->Preds) {
1843  if (I.getKind() != SDep::Anti)
1844  continue;
1845  if (Nodes.count(I.getSUnit()) == 0)
1846  continue;
1847  if (NodeOrder.count(I.getSUnit()) != 0)
1848  continue;
1849  R.insert(I.getSUnit());
1850  }
1851  }
1852  Order = BottomUp;
1853  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1855  if (pred_L(NodeOrder, N, &Nodes))
1856  R.insert(N.begin(), N.end());
1857  } else {
1858  // Choose the node with the maximum depth. If more than one, choose
1859  // the node with the maximum ZeroLatencyDepth. If still more than one,
1860  // choose the node with the lowest MOV.
1861  while (!R.empty()) {
1862  SUnit *maxDepth = nullptr;
1863  for (SUnit *I : R) {
1864  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1865  maxDepth = I;
1866  else if (getDepth(I) == getDepth(maxDepth) &&
1867  getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
1868  maxDepth = I;
1869  else if (getDepth(I) == getDepth(maxDepth) &&
1870  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1871  getMOV(I) < getMOV(maxDepth))
1872  maxDepth = I;
1873  }
1874  NodeOrder.insert(maxDepth);
1875  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1876  R.remove(maxDepth);
1877  if (Nodes.isExceedSU(maxDepth)) {
1878  Order = TopDown;
1879  R.clear();
1880  R.insert(Nodes.getNode(0));
1881  break;
1882  }
1883  for (const auto &I : maxDepth->Preds) {
1884  if (Nodes.count(I.getSUnit()) == 0)
1885  continue;
1886  if (NodeOrder.count(I.getSUnit()) != 0)
1887  continue;
1888  R.insert(I.getSUnit());
1889  }
1890  // Back-edges are predecessors with an anti-dependence.
1891  for (const auto &I : maxDepth->Succs) {
1892  if (I.getKind() != SDep::Anti)
1893  continue;
1894  if (Nodes.count(I.getSUnit()) == 0)
1895  continue;
1896  if (NodeOrder.count(I.getSUnit()) != 0)
1897  continue;
1898  R.insert(I.getSUnit());
1899  }
1900  }
1901  Order = TopDown;
1902  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
1904  if (succ_L(NodeOrder, N, &Nodes))
1905  R.insert(N.begin(), N.end());
1906  }
1907  }
1908  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1909  }
1910 
1911  LLVM_DEBUG({
1912  dbgs() << "Node order: ";
1913  for (SUnit *I : NodeOrder)
1914  dbgs() << " " << I->NodeNum << " ";
1915  dbgs() << "\n";
1916  });
1917 }
1918 
1919 /// Process the nodes in the computed order and create the pipelined schedule
1920 /// of the instructions, if possible. Return true if a schedule is found.
1921 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
1922 
1923  if (NodeOrder.empty()){
1924  LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
1925  return false;
1926  }
1927 
1928  bool scheduleFound = false;
1929  unsigned II = 0;
1930  // Keep increasing II until a valid schedule is found.
1931  for (II = MII; II <= MAX_II && !scheduleFound; ++II) {
1932  Schedule.reset();
1933  Schedule.setInitiationInterval(II);
1934  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
1935 
1938  do {
1939  SUnit *SU = *NI;
1940 
1941  // Compute the schedule time for the instruction, which is based
1942  // upon the scheduled time for any predecessors/successors.
1943  int EarlyStart = INT_MIN;
1944  int LateStart = INT_MAX;
1945  // These values are set when the size of the schedule window is limited
1946  // due to chain dependences.
1947  int SchedEnd = INT_MAX;
1948  int SchedStart = INT_MIN;
1949  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
1950  II, this);
1951  LLVM_DEBUG({
1952  dbgs() << "\n";
1953  dbgs() << "Inst (" << SU->NodeNum << ") ";
1954  SU->getInstr()->dump();
1955  dbgs() << "\n";
1956  });
1957  LLVM_DEBUG({
1958  dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
1959  LateStart, SchedEnd, SchedStart);
1960  });
1961 
1962  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
1963  SchedStart > LateStart)
1964  scheduleFound = false;
1965  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
1966  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
1967  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
1968  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
1969  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
1970  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
1971  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
1972  SchedEnd =
1973  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
1974  // When scheduling a Phi it is better to start at the late cycle and go
1975  // backwards. The default order may insert the Phi too far away from
1976  // its first dependence.
1977  if (SU->getInstr()->isPHI())
1978  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
1979  else
1980  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
1981  } else {
1982  int FirstCycle = Schedule.getFirstCycle();
1983  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
1984  FirstCycle + getASAP(SU) + II - 1, II);
1985  }
1986  // Even if we find a schedule, make sure the schedule doesn't exceed the
1987  // allowable number of stages. We keep trying if this happens.
1988  if (scheduleFound)
1989  if (SwpMaxStages > -1 &&
1990  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
1991  scheduleFound = false;
1992 
1993  LLVM_DEBUG({
1994  if (!scheduleFound)
1995  dbgs() << "\tCan't schedule\n";
1996  });
1997  } while (++NI != NE && scheduleFound);
1998 
1999  // If a schedule is found, check if it is a valid schedule too.
2000  if (scheduleFound)
2001  scheduleFound = Schedule.isValidSchedule(this);
2002  }
2003 
2004  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
2005  << ")\n");
2006 
2007  if (scheduleFound)
2008  Schedule.finalizeSchedule(this);
2009  else
2010  Schedule.reset();
2011 
2012  return scheduleFound && Schedule.getMaxStageCount() > 0;
2013 }
2014 
2015 /// Given a schedule for the loop, generate a new version of the loop,
2016 /// and replace the old version. This function generates a prolog
2017 /// that contains the initial iterations in the pipeline, and kernel
2018 /// loop, and the epilogue that contains the code for the final
2019 /// iterations.
2020 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
2021  // Create a new basic block for the kernel and add it to the CFG.
2022  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2023 
2024  unsigned MaxStageCount = Schedule.getMaxStageCount();
2025 
2026  // Remember the registers that are used in different stages. The index is
2027  // the iteration, or stage, that the instruction is scheduled in. This is
2028  // a map between register names in the original block and the names created
2029  // in each stage of the pipelined loop.
2030  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
2031  InstrMapTy InstrMap;
2032 
2034 
2035  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2036  assert(PreheaderBB != nullptr &&
2037  "Need to add code to handle loops w/o preheader");
2038  // Generate the prolog instructions that set up the pipeline.
2039  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
2040  MF.insert(BB->getIterator(), KernelBB);
2041 
2042  // Rearrange the instructions to generate the new, pipelined loop,
2043  // and update register names as needed.
2044  for (int Cycle = Schedule.getFirstCycle(),
2045  LastCycle = Schedule.getFinalCycle();
2046  Cycle <= LastCycle; ++Cycle) {
2047  std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
2048  // This inner loop schedules each instruction in the cycle.
2049  for (SUnit *CI : CycleInstrs) {
2050  if (CI->getInstr()->isPHI())
2051  continue;
2052  unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
2053  MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
2054  updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
2055  KernelBB->push_back(NewMI);
2056  InstrMap[NewMI] = CI->getInstr();
2057  }
2058  }
2059 
2060  // Copy any terminator instructions to the new kernel, and update
2061  // names as needed.
2062  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
2063  E = BB->instr_end();
2064  I != E; ++I) {
2065  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
2066  updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
2067  KernelBB->push_back(NewMI);
2068  InstrMap[NewMI] = &*I;
2069  }
2070 
2071  KernelBB->transferSuccessors(BB);
2072  KernelBB->replaceSuccessor(BB, KernelBB);
2073 
2074  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
2075  VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
2076  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
2077  InstrMap, MaxStageCount, MaxStageCount, false);
2078 
2079  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
2080 
2082  // Generate the epilog instructions to complete the pipeline.
2083  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
2084  PrologBBs);
2085 
2086  // We need this step because the register allocation doesn't handle some
2087  // situations well, so we insert copies to help out.
2088  splitLifetimes(KernelBB, EpilogBBs, Schedule);
2089 
2090  // Remove dead instructions due to loop induction variables.
2091  removeDeadInstructions(KernelBB, EpilogBBs);
2092 
2093  // Add branches between prolog and epilog blocks.
2094  addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
2095 
2096  // Remove the original loop since it's no longer referenced.
2097  for (auto &I : *BB)
2098  LIS.RemoveMachineInstrFromMaps(I);
2099  BB->clear();
2100  BB->eraseFromParent();
2101 
2102  delete[] VRMap;
2103 }
2104 
2105 /// Generate the pipeline prolog code.
2106 void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
2107  MachineBasicBlock *KernelBB,
2108  ValueMapTy *VRMap,
2109  MBBVectorTy &PrologBBs) {
2110  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2111  assert(PreheaderBB != nullptr &&
2112  "Need to add code to handle loops w/o preheader");
2113  MachineBasicBlock *PredBB = PreheaderBB;
2114  InstrMapTy InstrMap;
2115 
2116  // Generate a basic block for each stage, not including the last stage,
2117  // which will be generated in the kernel. Each basic block may contain
2118  // instructions from multiple stages/iterations.
2119  for (unsigned i = 0; i < LastStage; ++i) {
2120  // Create and insert the prolog basic block prior to the original loop
2121  // basic block. The original loop is removed later.
2122  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2123  PrologBBs.push_back(NewBB);
2124  MF.insert(BB->getIterator(), NewBB);
2125  NewBB->transferSuccessors(PredBB);
2126  PredBB->addSuccessor(NewBB);
2127  PredBB = NewBB;
2128 
2129  // Generate instructions for each appropriate stage. Process instructions
2130  // in original program order.
2131  for (int StageNum = i; StageNum >= 0; --StageNum) {
2132  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2133  BBE = BB->getFirstTerminator();
2134  BBI != BBE; ++BBI) {
2135  if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
2136  if (BBI->isPHI())
2137  continue;
2138  MachineInstr *NewMI =
2139  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
2140  updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
2141  VRMap);
2142  NewBB->push_back(NewMI);
2143  InstrMap[NewMI] = &*BBI;
2144  }
2145  }
2146  }
2147  rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
2148  LLVM_DEBUG({
2149  dbgs() << "prolog:\n";
2150  NewBB->dump();
2151  });
2152  }
2153 
2154  PredBB->replaceSuccessor(BB, KernelBB);
2155 
2156  // Check if we need to remove the branch from the preheader to the original
2157  // loop, and replace it with a branch to the new loop.
2158  unsigned numBranches = TII->removeBranch(*PreheaderBB);
2159  if (numBranches) {
2161  TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
2162  }
2163 }
2164 
2165 /// Generate the pipeline epilog code. The epilog code finishes the iterations
2166 /// that were started in either the prolog or the kernel. We create a basic
2167 /// block for each stage that needs to complete.
2168 void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
2169  MachineBasicBlock *KernelBB,
2170  ValueMapTy *VRMap,
2171  MBBVectorTy &EpilogBBs,
2172  MBBVectorTy &PrologBBs) {
2173  // We need to change the branch from the kernel to the first epilog block, so
2174  // this call to analyze branch uses the kernel rather than the original BB.
2175  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2177  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2178  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2179  if (checkBranch)
2180  return;
2181 
2182  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2183  if (*LoopExitI == KernelBB)
2184  ++LoopExitI;
2185  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2186  MachineBasicBlock *LoopExitBB = *LoopExitI;
2187 
2188  MachineBasicBlock *PredBB = KernelBB;
2189  MachineBasicBlock *EpilogStart = LoopExitBB;
2190  InstrMapTy InstrMap;
2191 
2192  // Generate a basic block for each stage, not including the last stage,
2193  // which was generated for the kernel. Each basic block may contain
2194  // instructions from multiple stages/iterations.
2195  int EpilogStage = LastStage + 1;
2196  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2198  EpilogBBs.push_back(NewBB);
2199  MF.insert(BB->getIterator(), NewBB);
2200 
2201  PredBB->replaceSuccessor(LoopExitBB, NewBB);
2202  NewBB->addSuccessor(LoopExitBB);
2203 
2204  if (EpilogStart == LoopExitBB)
2205  EpilogStart = NewBB;
2206 
2207  // Add instructions to the epilog depending on the current block.
2208  // Process instructions in original program order.
2209  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2210  for (auto &BBI : *BB) {
2211  if (BBI.isPHI())
2212  continue;
2213  MachineInstr *In = &BBI;
2214  if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2215  // Instructions with memoperands in the epilog are updated with
2216  // conservative values.
2217  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
2218  updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2219  NewBB->push_back(NewMI);
2220  InstrMap[NewMI] = In;
2221  }
2222  }
2223  }
2224  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2225  VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2226  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2227  InstrMap, LastStage, EpilogStage, i == 1);
2228  PredBB = NewBB;
2229 
2230  LLVM_DEBUG({
2231  dbgs() << "epilog:\n";
2232  NewBB->dump();
2233  });
2234  }
2235 
2236  // Fix any Phi nodes in the loop exit block.
2237  for (MachineInstr &MI : *LoopExitBB) {
2238  if (!MI.isPHI())
2239  break;
2240  for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
2241  MachineOperand &MO = MI.getOperand(i);
2242  if (MO.getMBB() == BB)
2243  MO.setMBB(PredBB);
2244  }
2245  }
2246 
2247  // Create a branch to the new epilog from the kernel.
2248  // Remove the original branch and add a new branch to the epilog.
2249  TII->removeBranch(*KernelBB);
2250  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2251  // Add a branch to the loop exit.
2252  if (EpilogBBs.size() > 0) {
2253  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2255  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2256  }
2257 }
2258 
2259 /// Replace all uses of FromReg that appear outside the specified
2260 /// basic block with ToReg.
2261 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2262  MachineBasicBlock *MBB,
2264  LiveIntervals &LIS) {
2265  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2266  E = MRI.use_end();
2267  I != E;) {
2268  MachineOperand &O = *I;
2269  ++I;
2270  if (O.getParent()->getParent() != MBB)
2271  O.setReg(ToReg);
2272  }
2273  if (!LIS.hasInterval(ToReg))
2274  LIS.createEmptyInterval(ToReg);
2275 }
2276 
2277 /// Return true if the register has a use that occurs outside the
2278 /// specified loop.
2279 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2282  E = MRI.use_end();
2283  I != E; ++I)
2284  if (I->getParent()->getParent() != BB)
2285  return true;
2286  return false;
2287 }
2288 
2289 /// Generate Phis for the specific block in the generated pipelined code.
2290 /// This function looks at the Phis from the original code to guide the
2291 /// creation of new Phis.
2292 void SwingSchedulerDAG::generateExistingPhis(
2294  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2295  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2296  bool IsLast) {
2297  // Compute the stage number for the initial value of the Phi, which
2298  // comes from the prolog. The prolog to use depends on to which kernel/
2299  // epilog that we're adding the Phi.
2300  unsigned PrologStage = 0;
2301  unsigned PrevStage = 0;
2302  bool InKernel = (LastStageNum == CurStageNum);
2303  if (InKernel) {
2304  PrologStage = LastStageNum - 1;
2305  PrevStage = CurStageNum;
2306  } else {
2307  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2308  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2309  }
2310 
2311  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2312  BBE = BB->getFirstNonPHI();
2313  BBI != BBE; ++BBI) {
2314  Register Def = BBI->getOperand(0).getReg();
2315 
2316  unsigned InitVal = 0;
2317  unsigned LoopVal = 0;
2318  getPhiRegs(*BBI, BB, InitVal, LoopVal);
2319 
2320  unsigned PhiOp1 = 0;
2321  // The Phi value from the loop body typically is defined in the loop, but
2322  // not always. So, we need to check if the value is defined in the loop.
2323  unsigned PhiOp2 = LoopVal;
2324  if (VRMap[LastStageNum].count(LoopVal))
2325  PhiOp2 = VRMap[LastStageNum][LoopVal];
2326 
2327  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2328  int LoopValStage =
2329  Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2330  unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2331  if (NumStages == 0) {
2332  // We don't need to generate a Phi anymore, but we need to rename any uses
2333  // of the Phi value.
2334  unsigned NewReg = VRMap[PrevStage][LoopVal];
2335  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2336  Def, InitVal, NewReg);
2337  if (VRMap[CurStageNum].count(LoopVal))
2338  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2339  }
2340  // Adjust the number of Phis needed depending on the number of prologs left,
2341  // and the distance from where the Phi is first scheduled. The number of
2342  // Phis cannot exceed the number of prolog stages. Each stage can
2343  // potentially define two values.
2344  unsigned MaxPhis = PrologStage + 2;
2345  if (!InKernel && (int)PrologStage <= LoopValStage)
2346  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
2347  unsigned NumPhis = std::min(NumStages, MaxPhis);
2348 
2349  unsigned NewReg = 0;
2350  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2351  // In the epilog, we may need to look back one stage to get the correct
2352  // Phi name because the epilog and prolog blocks execute the same stage.
2353  // The correct name is from the previous block only when the Phi has
2354  // been completely scheduled prior to the epilog, and Phi value is not
2355  // needed in multiple stages.
2356  int StageDiff = 0;
2357  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2358  NumPhis == 1)
2359  StageDiff = 1;
2360  // Adjust the computations below when the phi and the loop definition
2361  // are scheduled in different stages.
2362  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2363  StageDiff = StageScheduled - LoopValStage;
2364  for (unsigned np = 0; np < NumPhis; ++np) {
2365  // If the Phi hasn't been scheduled, then use the initial Phi operand
2366  // value. Otherwise, use the scheduled version of the instruction. This
2367  // is a little complicated when a Phi references another Phi.
2368  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
2369  PhiOp1 = InitVal;
2370  // Check if the Phi has already been scheduled in a prolog stage.
2371  else if (PrologStage >= AccessStage + StageDiff + np &&
2372  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
2373  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2374  // Check if the Phi has already been scheduled, but the loop instruction
2375  // is either another Phi, or doesn't occur in the loop.
2376  else if (PrologStage >= AccessStage + StageDiff + np) {
2377  // If the Phi references another Phi, we need to examine the other
2378  // Phi to get the correct value.
2379  PhiOp1 = LoopVal;
2380  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2381  int Indirects = 1;
2382  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
2383  int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2384  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2385  PhiOp1 = getInitPhiReg(*InstOp1, BB);
2386  else
2387  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2388  InstOp1 = MRI.getVRegDef(PhiOp1);
2389  int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2390  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2391  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2392  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
2393  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2394  break;
2395  }
2396  ++Indirects;
2397  }
2398  } else
2399  PhiOp1 = InitVal;
2400  // If this references a generated Phi in the kernel, get the Phi operand
2401  // from the incoming block.
2402  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2403  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2404  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2405 
2406  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2407  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2408  // In the epilog, a map lookup is needed to get the value from the kernel,
2409  // or previous epilog block. How is does this depends on if the
2410  // instruction is scheduled in the previous block.
2411  if (!InKernel) {
2412  int StageDiffAdj = 0;
2413  if (LoopValStage != -1 && StageScheduled > LoopValStage)
2414  StageDiffAdj = StageScheduled - LoopValStage;
2415  // Use the loop value defined in the kernel, unless the kernel
2416  // contains the last definition of the Phi.
2417  if (np == 0 && PrevStage == LastStageNum &&
2418  (StageScheduled != 0 || LoopValStage != 0) &&
2419  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
2420  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2421  // Use the value defined by the Phi. We add one because we switch
2422  // from looking at the loop value to the Phi definition.
2423  else if (np > 0 && PrevStage == LastStageNum &&
2424  VRMap[PrevStage - np + 1].count(Def))
2425  PhiOp2 = VRMap[PrevStage - np + 1][Def];
2426  // Use the loop value defined in the kernel.
2427  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
2428  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
2429  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2430  // Use the value defined by the Phi, unless we're generating the first
2431  // epilog and the Phi refers to a Phi in a different stage.
2432  else if (VRMap[PrevStage - np].count(Def) &&
2433  (!LoopDefIsPhi || (PrevStage != LastStageNum) || (LoopValStage == StageScheduled)))
2434  PhiOp2 = VRMap[PrevStage - np][Def];
2435  }
2436 
2437  // Check if we can reuse an existing Phi. This occurs when a Phi
2438  // references another Phi, and the other Phi is scheduled in an
2439  // earlier stage. We can try to reuse an existing Phi up until the last
2440  // stage of the current Phi.
2441  if (LoopDefIsPhi) {
2442  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
2443  int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2444  int StageDiff = (StageScheduled - LoopValStage);
2445  LVNumStages -= StageDiff;
2446  // Make sure the loop value Phi has been processed already.
2447  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
2448  NewReg = PhiOp2;
2449  unsigned ReuseStage = CurStageNum;
2450  if (Schedule.isLoopCarried(this, *PhiInst))
2451  ReuseStage -= LVNumStages;
2452  // Check if the Phi to reuse has been generated yet. If not, then
2453  // there is nothing to reuse.
2454  if (VRMap[ReuseStage - np].count(LoopVal)) {
2455  NewReg = VRMap[ReuseStage - np][LoopVal];
2456 
2457  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2458  &*BBI, Def, NewReg);
2459  // Update the map with the new Phi name.
2460  VRMap[CurStageNum - np][Def] = NewReg;
2461  PhiOp2 = NewReg;
2462  if (VRMap[LastStageNum - np - 1].count(LoopVal))
2463  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2464 
2465  if (IsLast && np == NumPhis - 1)
2466  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2467  continue;
2468  }
2469  }
2470  }
2471  if (InKernel && StageDiff > 0 &&
2472  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
2473  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2474  }
2475 
2476  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2477  NewReg = MRI.createVirtualRegister(RC);
2478 
2479  MachineInstrBuilder NewPhi =
2480  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2481  TII->get(TargetOpcode::PHI), NewReg);
2482  NewPhi.addReg(PhiOp1).addMBB(BB1);
2483  NewPhi.addReg(PhiOp2).addMBB(BB2);
2484  if (np == 0)
2485  InstrMap[NewPhi] = &*BBI;
2486 
2487  // We define the Phis after creating the new pipelined code, so
2488  // we need to rename the Phi values in scheduled instructions.
2489 
2490  unsigned PrevReg = 0;
2491  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
2492  PrevReg = VRMap[PrevStage - np][LoopVal];
2493  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2494  Def, NewReg, PrevReg);
2495  // If the Phi has been scheduled, use the new name for rewriting.
2496  if (VRMap[CurStageNum - np].count(Def)) {
2497  unsigned R = VRMap[CurStageNum - np][Def];
2498  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2499  R, NewReg);
2500  }
2501 
2502  // Check if we need to rename any uses that occurs after the loop. The
2503  // register to replace depends on whether the Phi is scheduled in the
2504  // epilog.
2505  if (IsLast && np == NumPhis - 1)
2506  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2507 
2508  // In the kernel, a dependent Phi uses the value from this Phi.
2509  if (InKernel)
2510  PhiOp2 = NewReg;
2511 
2512  // Update the map with the new Phi name.
2513  VRMap[CurStageNum - np][Def] = NewReg;
2514  }
2515 
2516  while (NumPhis++ < NumStages) {
2517  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2518  &*BBI, Def, NewReg, 0);
2519  }
2520 
2521  // Check if we need to rename a Phi that has been eliminated due to
2522  // scheduling.
2523  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
2524  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2525  }
2526 }
2527 
2528 /// Generate Phis for the specified block in the generated pipelined code.
2529 /// These are new Phis needed because the definition is scheduled after the
2530 /// use in the pipelined sequence.
2531 void SwingSchedulerDAG::generatePhis(
2533  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2534  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2535  bool IsLast) {
2536  // Compute the stage number that contains the initial Phi value, and
2537  // the Phi from the previous stage.
2538  unsigned PrologStage = 0;
2539  unsigned PrevStage = 0;
2540  unsigned StageDiff = CurStageNum - LastStageNum;
2541  bool InKernel = (StageDiff == 0);
2542  if (InKernel) {
2543  PrologStage = LastStageNum - 1;
2544  PrevStage = CurStageNum;
2545  } else {
2546  PrologStage = LastStageNum - StageDiff;
2547  PrevStage = LastStageNum + StageDiff - 1;
2548  }
2549 
2550  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2551  BBE = BB->instr_end();
2552  BBI != BBE; ++BBI) {
2553  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2554  MachineOperand &MO = BBI->getOperand(i);
2555  if (!MO.isReg() || !MO.isDef() ||
2557  continue;
2558 
2559  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2560  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2561  Register Def = MO.getReg();
2562  unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2563  // An instruction scheduled in stage 0 and is used after the loop
2564  // requires a phi in the epilog for the last definition from either
2565  // the kernel or prolog.
2566  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2567  hasUseAfterLoop(Def, BB, MRI))
2568  NumPhis = 1;
2569  if (!InKernel && (unsigned)StageScheduled > PrologStage)
2570  continue;
2571 
2572  unsigned PhiOp2 = VRMap[PrevStage][Def];
2573  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2574  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2575  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2576  // The number of Phis can't exceed the number of prolog stages. The
2577  // prolog stage number is zero based.
2578  if (NumPhis > PrologStage + 1 - StageScheduled)
2579  NumPhis = PrologStage + 1 - StageScheduled;
2580  for (unsigned np = 0; np < NumPhis; ++np) {
2581  unsigned PhiOp1 = VRMap[PrologStage][Def];
2582  if (np <= PrologStage)
2583  PhiOp1 = VRMap[PrologStage - np][Def];
2584  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2585  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2586  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2587  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2588  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2589  }
2590  if (!InKernel)
2591  PhiOp2 = VRMap[PrevStage - np][Def];
2592 
2593  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2594  Register NewReg = MRI.createVirtualRegister(RC);
2595 
2596  MachineInstrBuilder NewPhi =
2597  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2598  TII->get(TargetOpcode::PHI), NewReg);
2599  NewPhi.addReg(PhiOp1).addMBB(BB1);
2600  NewPhi.addReg(PhiOp2).addMBB(BB2);
2601  if (np == 0)
2602  InstrMap[NewPhi] = &*BBI;
2603 
2604  // Rewrite uses and update the map. The actions depend upon whether
2605  // we generating code for the kernel or epilog blocks.
2606  if (InKernel) {
2607  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2608  &*BBI, PhiOp1, NewReg);
2609  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2610  &*BBI, PhiOp2, NewReg);
2611 
2612  PhiOp2 = NewReg;
2613  VRMap[PrevStage - np - 1][Def] = NewReg;
2614  } else {
2615  VRMap[CurStageNum - np][Def] = NewReg;
2616  if (np == NumPhis - 1)
2617  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2618  &*BBI, Def, NewReg);
2619  }
2620  if (IsLast && np == NumPhis - 1)
2621  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2622  }
2623  }
2624  }
2625 }
2626 
2627 /// Remove instructions that generate values with no uses.
2628 /// Typically, these are induction variable operations that generate values
2629 /// used in the loop itself. A dead instruction has a definition with
2630 /// no uses, or uses that occur in the original loop only.
2631 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
2632  MBBVectorTy &EpilogBBs) {
2633  // For each epilog block, check that the value defined by each instruction
2634  // is used. If not, delete it.
2635  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
2636  MBE = EpilogBBs.rend();
2637  MBB != MBE; ++MBB)
2638  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
2639  ME = (*MBB)->instr_rend();
2640  MI != ME;) {
2641  // From DeadMachineInstructionElem. Don't delete inline assembly.
2642  if (MI->isInlineAsm()) {
2643  ++MI;
2644  continue;
2645  }
2646  bool SawStore = false;
2647  // Check if it's safe to remove the instruction due to side effects.
2648  // We can, and want to, remove Phis here.
2649  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
2650  ++MI;
2651  continue;
2652  }
2653  bool used = true;
2654  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
2655  MOE = MI->operands_end();
2656  MOI != MOE; ++MOI) {
2657  if (!MOI->isReg() || !MOI->isDef())
2658  continue;
2659  Register reg = MOI->getReg();
2660  // Assume physical registers are used, unless they are marked dead.
2661  if (Register::isPhysicalRegister(reg)) {
2662  used = !MOI->isDead();
2663  if (used)
2664  break;
2665  continue;
2666  }
2667  unsigned realUses = 0;
2668  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
2669  EI = MRI.use_end();
2670  UI != EI; ++UI) {
2671  // Check if there are any uses that occur only in the original
2672  // loop. If so, that's not a real use.
2673  if (UI->getParent()->getParent() != BB) {
2674  realUses++;
2675  used = true;
2676  break;
2677  }
2678  }
2679  if (realUses > 0)
2680  break;
2681  used = false;
2682  }
2683  if (!used) {
2684  LIS.RemoveMachineInstrFromMaps(*MI);
2685  MI++->eraseFromParent();
2686  continue;
2687  }
2688  ++MI;
2689  }
2690  // In the kernel block, check if we can remove a Phi that generates a value
2691  // used in an instruction removed in the epilog block.
2692  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2693  BBE = KernelBB->getFirstNonPHI();
2694  BBI != BBE;) {
2695  MachineInstr *MI = &*BBI;
2696  ++BBI;
2697  Register reg = MI->getOperand(0).getReg();
2698  if (MRI.use_begin(reg) == MRI.use_end()) {
2699  LIS.RemoveMachineInstrFromMaps(*MI);
2700  MI->eraseFromParent();
2701  }
2702  }
2703 }
2704 
2705 /// For loop carried definitions, we split the lifetime of a virtual register
2706 /// that has uses past the definition in the next iteration. A copy with a new
2707 /// virtual register is inserted before the definition, which helps with
2708 /// generating a better register assignment.
2709 ///
2710 /// v1 = phi(a, v2) v1 = phi(a, v2)
2711 /// v2 = phi(b, v3) v2 = phi(b, v3)
2712 /// v3 = .. v4 = copy v1
2713 /// .. = V1 v3 = ..
2714 /// .. = v4
2715 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
2716  MBBVectorTy &EpilogBBs,
2717  SMSchedule &Schedule) {
2719  for (auto &PHI : KernelBB->phis()) {
2720  Register Def = PHI.getOperand(0).getReg();
2721  // Check for any Phi definition that used as an operand of another Phi
2722  // in the same block.
2723  for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
2724  E = MRI.use_instr_end();
2725  I != E; ++I) {
2726  if (I->isPHI() && I->getParent() == KernelBB) {
2727  // Get the loop carried definition.
2728  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
2729  if (!LCDef)
2730  continue;
2731  MachineInstr *MI = MRI.getVRegDef(LCDef);
2732  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
2733  continue;
2734  // Search through the rest of the block looking for uses of the Phi
2735  // definition. If one occurs, then split the lifetime.
2736  unsigned SplitReg = 0;
2737  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
2738  KernelBB->instr_end()))
2739  if (BBJ.readsRegister(Def)) {
2740  // We split the lifetime when we find the first use.
2741  if (SplitReg == 0) {
2742  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
2743  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
2744  TII->get(TargetOpcode::COPY), SplitReg)
2745  .addReg(Def);
2746  }
2747  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
2748  }
2749  if (!SplitReg)
2750  continue;
2751  // Search through each of the epilog blocks for any uses to be renamed.
2752  for (auto &Epilog : EpilogBBs)
2753  for (auto &I : *Epilog)
2754  if (I.readsRegister(Def))
2755  I.substituteRegister(Def, SplitReg, 0, *TRI);
2756  break;
2757  }
2758  }
2759  }
2760 }
2761 
2762 /// Remove the incoming block from the Phis in a basic block.
2763 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
2764  for (MachineInstr &MI : *BB) {
2765  if (!MI.isPHI())
2766  break;
2767  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
2768  if (MI.getOperand(i + 1).getMBB() == Incoming) {
2769  MI.RemoveOperand(i + 1);
2770  MI.RemoveOperand(i);
2771  break;
2772  }
2773  }
2774 }
2775 
2776 /// Create branches from each prolog basic block to the appropriate epilog
2777 /// block. These edges are needed if the loop ends before reaching the
2778 /// kernel.
2779 void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB,
2780  MBBVectorTy &PrologBBs,
2781  MachineBasicBlock *KernelBB,
2782  MBBVectorTy &EpilogBBs,
2783  SMSchedule &Schedule, ValueMapTy *VRMap) {
2784  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
2785  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
2786  MachineInstr *Cmp = Pass.LI.LoopCompare;
2787  MachineBasicBlock *LastPro = KernelBB;
2788  MachineBasicBlock *LastEpi = KernelBB;
2789 
2790  // Start from the blocks connected to the kernel and work "out"
2791  // to the first prolog and the last epilog blocks.
2793  unsigned MaxIter = PrologBBs.size() - 1;
2794  unsigned LC = UINT_MAX;
2795  unsigned LCMin = UINT_MAX;
2796  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
2797  // Add branches to the prolog that go to the corresponding
2798  // epilog, and the fall-thru prolog/kernel block.
2799  MachineBasicBlock *Prolog = PrologBBs[j];
2800  MachineBasicBlock *Epilog = EpilogBBs[i];
2801  // We've executed one iteration, so decrement the loop count and check for
2802  // the loop end.
2804  // Check if the LOOP0 has already been removed. If so, then there is no need
2805  // to reduce the trip count.
2806  if (LC != 0)
2807  LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond,
2808  PrevInsts, j, MaxIter);
2809 
2810  // Record the value of the first trip count, which is used to determine if
2811  // branches and blocks can be removed for constant trip counts.
2812  if (LCMin == UINT_MAX)
2813  LCMin = LC;
2814 
2815  unsigned numAdded = 0;
2816  if (Register::isVirtualRegister(LC)) {
2817  Prolog->addSuccessor(Epilog);
2818  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
2819  } else if (j >= LCMin) {
2820  Prolog->addSuccessor(Epilog);
2821  Prolog->removeSuccessor(LastPro);
2822  LastEpi->removeSuccessor(Epilog);
2823  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
2824  removePhis(Epilog, LastEpi);
2825  // Remove the blocks that are no longer referenced.
2826  if (LastPro != LastEpi) {
2827  LastEpi->clear();
2828  LastEpi->eraseFromParent();
2829  }
2830  LastPro->clear();
2831  LastPro->eraseFromParent();
2832  } else {
2833  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
2834  removePhis(Epilog, Prolog);
2835  }
2836  LastPro = Prolog;
2837  LastEpi = Epilog;
2839  E = Prolog->instr_rend();
2840  I != E && numAdded > 0; ++I, --numAdded)
2841  updateInstruction(&*I, false, j, 0, Schedule, VRMap);
2842  }
2843 }
2844 
2845 /// Return true if we can compute the amount the instruction changes
2846 /// during each iteration. Set Delta to the amount of the change.
2847 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2849  const MachineOperand *BaseOp;
2850  int64_t Offset;
2851  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
2852  return false;
2853 
2854  if (!BaseOp->isReg())
2855  return false;
2856 
2857  Register BaseReg = BaseOp->getReg();
2858 
2860  // Check if there is a Phi. If so, get the definition in the loop.
2861  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2862  if (BaseDef && BaseDef->isPHI()) {
2863  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2864  BaseDef = MRI.getVRegDef(BaseReg);
2865  }
2866  if (!BaseDef)
2867  return false;
2868 
2869  int D = 0;
2870  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2871  return false;
2872 
2873  Delta = D;
2874  return true;
2875 }
2876 
2877 /// Update the memory operand with a new offset when the pipeliner
2878 /// generates a new copy of the instruction that refers to a
2879 /// different memory location.
2880 void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
2881  MachineInstr &OldMI, unsigned Num) {
2882  if (Num == 0)
2883  return;
2884  // If the instruction has memory operands, then adjust the offset
2885  // when the instruction appears in different stages.
2886  if (NewMI.memoperands_empty())
2887  return;
2889  for (MachineMemOperand *MMO : NewMI.memoperands()) {
2890  // TODO: Figure out whether isAtomic is really necessary (see D57601).
2891  if (MMO->isVolatile() || MMO->isAtomic() ||
2892  (MMO->isInvariant() && MMO->isDereferenceable()) ||
2893  (!MMO->getValue())) {
2894  NewMMOs.push_back(MMO);
2895  continue;
2896  }
2897  unsigned Delta;
2898  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
2899  int64_t AdjOffset = Delta * Num;
2900  NewMMOs.push_back(
2901  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
2902  } else {
2903  NewMMOs.push_back(
2905  }
2906  }
2907  NewMI.setMemRefs(MF, NewMMOs);
2908 }
2909 
2910 /// Clone the instruction for the new pipelined loop and update the
2911 /// memory operands, if needed.
2912 MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
2913  unsigned CurStageNum,
2914  unsigned InstStageNum) {
2915  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2916  // Check for tied operands in inline asm instructions. This should be handled
2917  // elsewhere, but I'm not sure of the best solution.
2918  if (OldMI->isInlineAsm())
2919  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
2920  const auto &MO = OldMI->getOperand(i);
2921  if (MO.isReg() && MO.isUse())
2922  break;
2923  unsigned UseIdx;
2924  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
2925  NewMI->tieOperands(i, UseIdx);
2926  }
2927  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2928  return NewMI;
2929 }
2930 
2931 /// Clone the instruction for the new pipelined loop. If needed, this
2932 /// function updates the instruction using the values saved in the
2933 /// InstrChanges structure.
2934 MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
2935  unsigned CurStageNum,
2936  unsigned InstStageNum,
2937  SMSchedule &Schedule) {
2938  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2940  InstrChanges.find(getSUnit(OldMI));
2941  if (It != InstrChanges.end()) {
2942  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2943  unsigned BasePos, OffsetPos;
2944  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
2945  return nullptr;
2946  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
2947  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
2948  if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
2949  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
2950  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2951  }
2952  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2953  return NewMI;
2954 }
2955 
2956 /// Update the machine instruction with new virtual registers. This
2957 /// function may change the defintions and/or uses.
2958 void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
2959  unsigned CurStageNum,
2960  unsigned InstrStageNum,
2961  SMSchedule &Schedule,
2962  ValueMapTy *VRMap) {
2963  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
2964  MachineOperand &MO = NewMI->getOperand(i);
2965  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
2966  continue;
2967  Register reg = MO.getReg();
2968  if (MO.isDef()) {
2969  // Create a new virtual register for the definition.
2970  const TargetRegisterClass *RC = MRI.getRegClass(reg);
2971  Register NewReg = MRI.createVirtualRegister(RC);
2972  MO.setReg(NewReg);
2973  VRMap[CurStageNum][reg] = NewReg;
2974  if (LastDef)
2975  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
2976  } else if (MO.isUse()) {
2977  MachineInstr *Def = MRI.getVRegDef(reg);
2978  // Compute the stage that contains the last definition for instruction.
2979  int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
2980  unsigned StageNum = CurStageNum;
2981  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
2982  // Compute the difference in stages between the defintion and the use.
2983  unsigned StageDiff = (InstrStageNum - DefStageNum);
2984  // Make an adjustment to get the last definition.
2985  StageNum -= StageDiff;
2986  }
2987  if (VRMap[StageNum].count(reg))
2988  MO.setReg(VRMap[StageNum][reg]);
2989  }
2990  }
2991 }
2992 
2993 /// Return the instruction in the loop that defines the register.
2994 /// If the definition is a Phi, then follow the Phi operand to
2995 /// the instruction in the loop.
2996 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
2998  MachineInstr *Def = MRI.getVRegDef(Reg);
2999  while (Def->isPHI()) {
3000  if (!Visited.insert(Def).second)
3001  break;
3002  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3003  if (Def->getOperand(i + 1).getMBB() == BB) {
3004  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
3005  break;
3006  }
3007  }
3008  return Def;
3009 }
3010 
3011 /// Return the new name for the value from the previous stage.
3012 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
3013  unsigned LoopVal, unsigned LoopStage,
3014  ValueMapTy *VRMap,
3015  MachineBasicBlock *BB) {
3016  unsigned PrevVal = 0;
3017  if (StageNum > PhiStage) {
3018  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
3019  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
3020  // The name is defined in the previous stage.
3021  PrevVal = VRMap[StageNum - 1][LoopVal];
3022  else if (VRMap[StageNum].count(LoopVal))
3023  // The previous name is defined in the current stage when the instruction
3024  // order is swapped.
3025  PrevVal = VRMap[StageNum][LoopVal];
3026  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
3027  // The loop value hasn't yet been scheduled.
3028  PrevVal = LoopVal;
3029  else if (StageNum == PhiStage + 1)
3030  // The loop value is another phi, which has not been scheduled.
3031  PrevVal = getInitPhiReg(*LoopInst, BB);
3032  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
3033  // The loop value is another phi, which has been scheduled.
3034  PrevVal =
3035  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
3036  LoopStage, VRMap, BB);
3037  }
3038  return PrevVal;
3039 }
3040 
3041 /// Rewrite the Phi values in the specified block to use the mappings
3042 /// from the initial operand. Once the Phi is scheduled, we switch
3043 /// to using the loop value instead of the Phi value, so those names
3044 /// do not need to be rewritten.
3045 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
3046  unsigned StageNum,
3047  SMSchedule &Schedule,
3048  ValueMapTy *VRMap,
3049  InstrMapTy &InstrMap) {
3050  for (auto &PHI : BB->phis()) {
3051  unsigned InitVal = 0;
3052  unsigned LoopVal = 0;
3053  getPhiRegs(PHI, BB, InitVal, LoopVal);
3054  Register PhiDef = PHI.getOperand(0).getReg();
3055 
3056  unsigned PhiStage =
3057  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
3058  unsigned LoopStage =
3059  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
3060  unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
3061  if (NumPhis > StageNum)
3062  NumPhis = StageNum;
3063  for (unsigned np = 0; np <= NumPhis; ++np) {
3064  unsigned NewVal =
3065  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
3066  if (!NewVal)
3067  NewVal = InitVal;
3068  rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
3069  PhiDef, NewVal);
3070  }
3071  }
3072 }
3073 
3074 /// Rewrite a previously scheduled instruction to use the register value
3075 /// from the new instruction. Make sure the instruction occurs in the
3076 /// basic block, and we don't change the uses in the new instruction.
3077 void SwingSchedulerDAG::rewriteScheduledInstr(
3078  MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
3079  unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
3080  unsigned NewReg, unsigned PrevReg) {
3081  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
3082  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
3083  // Rewrite uses that have been scheduled already to use the new
3084  // Phi register.
3085  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
3086  EI = MRI.use_end();
3087  UI != EI;) {
3088  MachineOperand &UseOp = *UI;
3089  MachineInstr *UseMI = UseOp.getParent();
3090  ++UI;
3091  if (UseMI->getParent() != BB)
3092  continue;
3093  if (UseMI->isPHI()) {
3094  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
3095  continue;
3096  if (getLoopPhiReg(*UseMI, BB) != OldReg)
3097  continue;
3098  }
3099  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
3100  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
3101  SUnit *OrigMISU = getSUnit(OrigInstr->second);
3102  int StageSched = Schedule.stageScheduled(OrigMISU);
3103  int CycleSched = Schedule.cycleScheduled(OrigMISU);
3104  unsigned ReplaceReg = 0;
3105  // This is the stage for the scheduled instruction.
3106  if (StagePhi == StageSched && Phi->isPHI()) {
3107  int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
3108  if (PrevReg && InProlog)
3109  ReplaceReg = PrevReg;
3110  else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
3111  (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
3112  ReplaceReg = PrevReg;
3113  else
3114  ReplaceReg = NewReg;
3115  }
3116  // The scheduled instruction occurs before the scheduled Phi, and the
3117  // Phi is not loop carried.
3118  if (!InProlog && StagePhi + 1 == StageSched &&
3119  !Schedule.isLoopCarried(this, *Phi))
3120  ReplaceReg = NewReg;
3121  if (StagePhi > StageSched && Phi->isPHI())
3122  ReplaceReg = NewReg;
3123  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
3124  ReplaceReg = NewReg;
3125  if (ReplaceReg) {
3126  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
3127  UseOp.setReg(ReplaceReg);
3128  }
3129  }
3130 }
3131 
3132 /// Check if we can change the instruction to use an offset value from the
3133 /// previous iteration. If so, return true and set the base and offset values
3134 /// so that we can rewrite the load, if necessary.
3135 /// v1 = Phi(v0, v3)
3136 /// v2 = load v1, 0
3137 /// v3 = post_store v1, 4, x
3138 /// This function enables the load to be rewritten as v2 = load v3, 4.
3139 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3140  unsigned &BasePos,
3141  unsigned &OffsetPos,
3142  unsigned &NewBase,
3143  int64_t &Offset) {
3144  // Get the load instruction.
3145  if (TII->isPostIncrement(*MI))
3146  return false;
3147  unsigned BasePosLd, OffsetPosLd;
3148  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
3149  return false;
3150  Register BaseReg = MI->getOperand(BasePosLd).getReg();
3151 
3152  // Look for the Phi instruction.
3154  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3155  if (!Phi || !Phi->isPHI())
3156  return false;
3157  // Get the register defined in the loop block.
3158  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
3159  if (!PrevReg)
3160  return false;
3161 
3162  // Check for the post-increment load/store instruction.
3163  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3164  if (!PrevDef || PrevDef == MI)
3165  return false;
3166 
3167  if (!TII->isPostIncrement(*PrevDef))
3168  return false;
3169 
3170  unsigned BasePos1 = 0, OffsetPos1 = 0;
3171  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
3172  return false;
3173 
3174  // Make sure that the instructions do not access the same memory location in
3175  // the next iteration.
3176  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3177  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3178  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3179  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
3180  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
3181  MF.DeleteMachineInstr(NewMI);
3182  if (!Disjoint)
3183  return false;
3184 
3185  // Set the return value once we determine that we return true.
3186  BasePos = BasePosLd;
3187  OffsetPos = OffsetPosLd;
3188  NewBase = PrevReg;
3189  Offset = StoreOffset;
3190  return true;
3191 }
3192 
3193 /// Apply changes to the instruction if needed. The changes are need
3194 /// to improve the scheduling and depend up on the final schedule.
3196  SMSchedule &Schedule) {
3197  SUnit *SU = getSUnit(MI);
3199  InstrChanges.find(SU);
3200  if (It != InstrChanges.end()) {
3201  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3202  unsigned BasePos, OffsetPos;
3203  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3204  return;
3205  Register BaseReg = MI->getOperand(BasePos).getReg();
3206  MachineInstr *LoopDef = findDefInLoop(BaseReg);
3207  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3208  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3209  int BaseStageNum = Schedule.stageScheduled(SU);
3210  int BaseCycleNum = Schedule.cycleScheduled(SU);
3211  if (BaseStageNum < DefStageNum) {
3212  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3213  int OffsetDiff = DefStageNum - BaseStageNum;
3214  if (DefCycleNum < BaseCycleNum) {
3215  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3216  if (OffsetDiff > 0)
3217  --OffsetDiff;
3218  }
3219  int64_t NewOffset =
3220  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3221  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3222  SU->setInstr(NewMI);
3223  MISUnitMap[NewMI] = SU;
3224  NewMIs.insert(NewMI);
3225  }
3226  }
3227 }
3228 
3229 /// Return true for an order or output dependence that is loop carried
3230 /// potentially. A dependence is loop carried if the destination defines a valu
3231 /// that may be used or defined by the source in a subsequent iteration.
3233  bool isSucc) {
3234  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
3235  Dep.isArtificial())
3236  return false;
3237 
3238  if (!SwpPruneLoopCarried)
3239  return true;
3240 
3241  if (Dep.getKind() == SDep::Output)
3242  return true;
3243 
3244  MachineInstr *SI = Source->getInstr();
3245  MachineInstr *DI = Dep.getSUnit()->getInstr();
3246  if (!isSucc)
3247  std::swap(SI, DI);
3248  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3249 
3250  // Assume ordered loads and stores may have a loop carried dependence.
3251  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3252  SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
3254  return true;
3255 
3256  // Only chain dependences between a load and store can be loop carried.
3257  if (!DI->mayStore() || !SI->mayLoad())
3258  return false;
3259 
3260  unsigned DeltaS, DeltaD;
3261  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
3262  return true;
3263 
3264  const MachineOperand *BaseOpS, *BaseOpD;
3265  int64_t OffsetS, OffsetD;
3267  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
3268  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
3269  return true;
3270 
3271  if (!BaseOpS->isIdenticalTo(*BaseOpD))
3272  return true;
3273 
3274  // Check that the base register is incremented by a constant value for each
3275  // iteration.
3276  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
3277  if (!Def || !Def->isPHI())
3278  return true;
3279  unsigned InitVal = 0;
3280  unsigned LoopVal = 0;
3281  getPhiRegs(*Def, BB, InitVal, LoopVal);
3282  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
3283  int D = 0;
3284  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
3285  return true;
3286 
3287  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3288  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3289 
3290  // This is the main test, which checks the offset values and the loop
3291  // increment value to determine if the accesses may be loop carried.
3292  if (AccessSizeS == MemoryLocation::UnknownSize ||
3293  AccessSizeD == MemoryLocation::UnknownSize)
3294  return true;
3295 
3296  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
3297  return true;
3298 
3299  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
3300 }
3301 
3302 void SwingSchedulerDAG::postprocessDAG() {
3303  for (auto &M : Mutations)
3304  M->apply(this);
3305 }
3306 
3307 /// Try to schedule the node at the specified StartCycle and continue
3308 /// until the node is schedule or the EndCycle is reached. This function
3309 /// returns true if the node is scheduled. This routine may search either
3310 /// forward or backward for a place to insert the instruction based upon
3311 /// the relative values of StartCycle and EndCycle.
3312 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3313  bool forward = true;
3314  LLVM_DEBUG({
3315  dbgs() << "Trying to insert node between " << StartCycle << " and "
3316  << EndCycle << " II: " << II << "\n";
3317  });
3318  if (StartCycle > EndCycle)
3319  forward = false;
3320 
3321  // The terminating condition depends on the direction.
3322  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3323  for (int curCycle = StartCycle; curCycle != termCycle;
3324  forward ? ++curCycle : --curCycle) {
3325 
3326  // Add the already scheduled instructions at the specified cycle to the
3327  // DFA.
3328  ProcItinResources.clearResources();
3329  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3330  checkCycle <= LastCycle; checkCycle += II) {
3331  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3332 
3333  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3334  E = cycleInstrs.end();
3335  I != E; ++I) {
3336  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3337  continue;
3338  assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) &&
3339  "These instructions have already been scheduled.");
3340  ProcItinResources.reserveResources(*(*I)->getInstr());
3341  }
3342  }
3343  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3344  ProcItinResources.canReserveResources(*SU->getInstr())) {
3345  LLVM_DEBUG({
3346  dbgs() << "\tinsert at cycle " << curCycle << " ";
3347  SU->getInstr()->dump();
3348  });
3349 
3350  ScheduledInstrs[curCycle].push_back(SU);
3351  InstrToCycle.insert(std::make_pair(SU, curCycle));
3352  if (curCycle > LastCycle)
3353  LastCycle = curCycle;
3354  if (curCycle < FirstCycle)
3355  FirstCycle = curCycle;
3356  return true;
3357  }
3358  LLVM_DEBUG({
3359  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3360  SU->getInstr()->dump();
3361  });
3362  }
3363  return false;
3364 }
3365 
3366 // Return the cycle of the earliest scheduled instruction in the chain.
3368  SmallPtrSet<SUnit *, 8> Visited;
3369  SmallVector<SDep, 8> Worklist;
3370  Worklist.push_back(Dep);
3371  int EarlyCycle = INT_MAX;
3372  while (!Worklist.empty()) {
3373  const SDep &Cur = Worklist.pop_back_val();
3374  SUnit *PrevSU = Cur.getSUnit();
3375  if (Visited.count(PrevSU))
3376  continue;
3377  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3378  if (it == InstrToCycle.end())
3379  continue;
3380  EarlyCycle = std::min(EarlyCycle, it->second);
3381  for (const auto &PI : PrevSU->Preds)
3382  if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3383  Worklist.push_back(PI);
3384  Visited.insert(PrevSU);
3385  }
3386  return EarlyCycle;
3387 }
3388 
3389 // Return the cycle of the latest scheduled instruction in the chain.
3391  SmallPtrSet<SUnit *, 8> Visited;
3392  SmallVector<SDep, 8> Worklist;
3393  Worklist.push_back(Dep);
3394  int LateCycle = INT_MIN;
3395  while (!Worklist.empty()) {
3396  const SDep &Cur = Worklist.pop_back_val();
3397  SUnit *SuccSU = Cur.getSUnit();
3398  if (Visited.count(SuccSU))
3399  continue;
3400  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3401  if (it == InstrToCycle.end())
3402  continue;
3403  LateCycle = std::max(LateCycle, it->second);
3404  for (const auto &SI : SuccSU->Succs)
3405  if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3406  Worklist.push_back(SI);
3407  Visited.insert(SuccSU);
3408  }
3409  return LateCycle;
3410 }
3411 
3412 /// If an instruction has a use that spans multiple iterations, then
3413 /// return true. These instructions are characterized by having a back-ege
3414 /// to a Phi, which contains a reference to another Phi.
3416  for (auto &P : SU->Preds)
3417  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
3418  for (auto &S : P.getSUnit()->Succs)
3419  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
3420  return P.getSUnit();
3421  return nullptr;
3422 }
3423 
3424 /// Compute the scheduling start slot for the instruction. The start slot
3425 /// depends on any predecessor or successor nodes scheduled already.
3426 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3427  int *MinEnd, int *MaxStart, int II,
3428  SwingSchedulerDAG *DAG) {
3429  // Iterate over each instruction that has been scheduled already. The start
3430  // slot computation depends on whether the previously scheduled instruction
3431  // is a predecessor or successor of the specified instruction.
3432  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3433 
3434  // Iterate over each instruction in the current cycle.
3435  for (SUnit *I : getInstructions(cycle)) {
3436  // Because we're processing a DAG for the dependences, we recognize
3437  // the back-edge in recurrences by anti dependences.
3438  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
3439  const SDep &Dep = SU->Preds[i];
3440  if (Dep.getSUnit() == I) {
3441  if (!DAG->isBackedge(SU, Dep)) {
3442  int EarlyStart = cycle + Dep.getLatency() -
3443  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3444  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3445  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
3446  int End = earliestCycleInChain(Dep) + (II - 1);
3447  *MinEnd = std::min(*MinEnd, End);
3448  }
3449  } else {
3450  int LateStart = cycle - Dep.getLatency() +
3451  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3452  *MinLateStart = std::min(*MinLateStart, LateStart);
3453  }
3454  }
3455  // For instruction that requires multiple iterations, make sure that
3456  // the dependent instruction is not scheduled past the definition.
3457  SUnit *BE = multipleIterations(I, DAG);
3458  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3459  !SU->isPred(I))
3460  *MinLateStart = std::min(*MinLateStart, cycle);
3461  }
3462  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
3463  if (SU->Succs[i].getSUnit() == I) {
3464  const SDep &Dep = SU->Succs[i];
3465  if (!DAG->isBackedge(SU, Dep)) {
3466  int LateStart = cycle - Dep.getLatency() +
3467  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3468  *MinLateStart = std::min(*MinLateStart, LateStart);
3469  if (DAG->isLoopCarriedDep(SU, Dep)) {
3470  int Start = latestCycleInChain(Dep) + 1 - II;
3471  *MaxStart = std::max(*MaxStart, Start);
3472  }
3473  } else {
3474  int EarlyStart = cycle + Dep.getLatency() -
3475  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3476  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3477  }
3478  }
3479  }
3480  }
3481  }
3482 }
3483 
3484 /// Order the instructions within a cycle so that the definitions occur
3485 /// before the uses. Returns true if the instruction is added to the start
3486 /// of the list, or false if added to the end.
3488  std::deque<SUnit *> &Insts) {
3489  MachineInstr *MI = SU->getInstr();
3490  bool OrderBeforeUse = false;
3491  bool OrderAfterDef = false;
3492  bool OrderBeforeDef = false;
3493  unsigned MoveDef = 0;
3494  unsigned MoveUse = 0;
3495  int StageInst1 = stageScheduled(SU);
3496 
3497  unsigned Pos = 0;
3498  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3499  ++I, ++Pos) {
3500  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3501  MachineOperand &MO = MI->getOperand(i);
3502  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
3503  continue;
3504 
3505  Register Reg = MO.getReg();
3506  unsigned BasePos, OffsetPos;
3507  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3508  if (MI->getOperand(BasePos).getReg() == Reg)
3509  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3510  Reg = NewReg;
3511  bool Reads, Writes;
3512  std::tie(Reads, Writes) =
3513  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3514  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3515  OrderBeforeUse = true;
3516  if (MoveUse == 0)
3517  MoveUse = Pos;
3518  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3519  // Add the instruction after the scheduled instruction.
3520  OrderAfterDef = true;
3521  MoveDef = Pos;
3522  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3523  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3524  OrderBeforeUse = true;
3525  if (MoveUse == 0)
3526  MoveUse = Pos;
3527  } else {
3528  OrderAfterDef = true;
3529  MoveDef = Pos;
3530  }
3531  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3532  OrderBeforeUse = true;
3533  if (MoveUse == 0)
3534  MoveUse = Pos;
3535  if (MoveUse != 0) {
3536  OrderAfterDef = true;
3537  MoveDef = Pos - 1;
3538  }
3539  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3540  // Add the instruction before the scheduled instruction.
3541  OrderBeforeUse = true;
3542  if (MoveUse == 0)
3543  MoveUse = Pos;
3544  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3545  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3546  if (MoveUse == 0) {
3547  OrderBeforeDef = true;
3548  MoveUse = Pos;
3549  }
3550  }
3551  }
3552  // Check for order dependences between instructions. Make sure the source
3553  // is ordered before the destination.
3554  for (auto &S : SU->Succs) {
3555  if (S.getSUnit() != *I)
3556  continue;
3557  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3558  OrderBeforeUse = true;
3559  if (Pos < MoveUse)
3560  MoveUse = Pos;
3561  }
3562  // We did not handle HW dependences in previous for loop,
3563  // and we normally set Latency = 0 for Anti deps,
3564  // so may have nodes in same cycle with Anti denpendent on HW regs.
3565  else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
3566  OrderBeforeUse = true;
3567  if ((MoveUse == 0) || (Pos < MoveUse))
3568  MoveUse = Pos;
3569  }
3570  }
3571  for (auto &P : SU->Preds) {
3572  if (P.getSUnit() != *I)
3573  continue;
3574  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3575  OrderAfterDef = true;
3576  MoveDef = Pos;
3577  }
3578  }
3579  }
3580 
3581  // A circular dependence.
3582  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3583  OrderBeforeUse = false;
3584 
3585  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3586  // to a loop-carried dependence.
3587  if (OrderBeforeDef)
3588  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3589 
3590  // The uncommon case when the instruction order needs to be updated because
3591  // there is both a use and def.
3592  if (OrderBeforeUse && OrderAfterDef) {
3593  SUnit *UseSU = Insts.at(MoveUse);
3594  SUnit *DefSU = Insts.at(MoveDef);
3595  if (MoveUse > MoveDef) {
3596  Insts.erase(Insts.begin() + MoveUse);
3597  Insts.erase(Insts.begin() + MoveDef);
3598  } else {
3599  Insts.erase(Insts.begin() + MoveDef);
3600  Insts.erase(Insts.begin() + MoveUse);
3601  }
3602  orderDependence(SSD, UseSU, Insts);
3603  orderDependence(SSD, SU, Insts);
3604  orderDependence(SSD, DefSU, Insts);
3605  return;
3606  }
3607  // Put the new instruction first if there is a use in the list. Otherwise,
3608  // put it at the end of the list.
3609  if (OrderBeforeUse)
3610  Insts.push_front(SU);
3611  else
3612  Insts.push_back(SU);
3613 }
3614 
3615 /// Return true if the scheduled Phi has a loop carried operand.
3617  if (!Phi.isPHI())
3618  return false;
3619  assert(Phi.isPHI() && "Expecting a Phi.");
3620  SUnit *DefSU = SSD->getSUnit(&Phi);
3621  unsigned DefCycle = cycleScheduled(DefSU);
3622  int DefStage = stageScheduled(DefSU);
3623 
3624  unsigned InitVal = 0;
3625  unsigned LoopVal = 0;
3626  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3627  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3628  if (!UseSU)
3629  return true;
3630  if (UseSU->getInstr()->isPHI())
3631  return true;
3632  unsigned LoopCycle = cycleScheduled(UseSU);
3633  int LoopStage = stageScheduled(UseSU);
3634  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3635 }
3636 
3637 /// Return true if the instruction is a definition that is loop carried
3638 /// and defines the use on the next iteration.
3639 /// v1 = phi(v2, v3)
3640 /// (Def) v3 = op v1
3641 /// (MO) = v1
3642 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
3643 /// register.
3646  if (!MO.isReg())
3647  return false;
3648  if (Def->isPHI())
3649  return false;
3650  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
3651  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3652  return false;
3653  if (!isLoopCarried(SSD, *Phi))
3654  return false;
3655  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3656  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
3657  MachineOperand &DMO = Def->getOperand(i);
3658  if (!DMO.isReg() || !DMO.isDef())
3659  continue;
3660  if (DMO.getReg() == LoopReg)
3661  return true;
3662  }
3663  return false;
3664 }
3665 
3666 // Check if the generated schedule is valid. This function checks if
3667 // an instruction that uses a physical register is scheduled in a
3668 // different stage than the definition. The pipeliner does not handle
3669 // physical register values that may cross a basic block boundary.
3671  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
3672  SUnit &SU = SSD->SUnits[i];
3673  if (!SU.hasPhysRegDefs)
3674  continue;
3675  int StageDef = stageScheduled(&SU);
3676  assert(StageDef != -1 && "Instruction should have been scheduled.");
3677  for (auto &SI : SU.Succs)
3678  if (SI.isAssignedRegDep())
3679  if (Register::isPhysicalRegister(SI.getReg()))
3680  if (stageScheduled(SI.getSUnit()) != StageDef)
3681  return false;
3682  }
3683  return true;
3684 }
3685 
3686 /// A property of the node order in swing-modulo-scheduling is
3687 /// that for nodes outside circuits the following holds:
3688 /// none of them is scheduled after both a successor and a
3689 /// predecessor.
3690 /// The method below checks whether the property is met.
3691 /// If not, debug information is printed and statistics information updated.
3692 /// Note that we do not use an assert statement.
3693 /// The reason is that although an invalid node oder may prevent
3694 /// the pipeliner from finding a pipelined schedule for arbitrary II,
3695 /// it does not lead to the generation of incorrect code.
3696 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
3697 
3698  // a sorted vector that maps each SUnit to its index in the NodeOrder
3699  typedef std::pair<SUnit *, unsigned> UnitIndex;
3700  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
3701 
3702  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3703  Indices.push_back(std::make_pair(NodeOrder[i], i));
3704 
3705  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3706  return std::get<0>(i1) < std::get<0>(i2);
3707  };
3708 
3709  // sort, so that we can perform a binary search
3710  llvm::sort(Indices, CompareKey);
3711 
3712  bool Valid = true;
3713  (void)Valid;
3714  // for each SUnit in the NodeOrder, check whether
3715  // it appears after both a successor and a predecessor
3716  // of the SUnit. If this is the case, and the SUnit
3717  // is not part of circuit, then the NodeOrder is not
3718  // valid.
3719  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3720  SUnit *SU = NodeOrder[i];
3721  unsigned Index = i;
3722 
3723  bool PredBefore = false;
3724  bool SuccBefore = false;
3725 
3726  SUnit *Succ;
3727  SUnit *Pred;
3728  (void)Succ;
3729  (void)Pred;
3730 
3731  for (SDep &PredEdge : SU->Preds) {
3732  SUnit *PredSU = PredEdge.getSUnit();
3733  unsigned PredIndex = std::get<1>(
3734  *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
3735  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
3736  PredBefore = true;
3737  Pred = PredSU;
3738  break;
3739  }
3740  }
3741 
3742  for (SDep &SuccEdge : SU->Succs) {
3743  SUnit *SuccSU = SuccEdge.getSUnit();
3744  // Do not process a boundary node, it was not included in NodeOrder,
3745  // hence not in Indices either, call to std::lower_bound() below will
3746  // return Indices.end().
3747  if (SuccSU->isBoundaryNode())
3748  continue;
3749  unsigned SuccIndex = std::get<1>(
3750  *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
3751  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
3752  SuccBefore = true;
3753  Succ = SuccSU;
3754  break;
3755  }
3756  }
3757 
3758  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
3759  // instructions in circuits are allowed to be scheduled
3760  // after both a successor and predecessor.
3761  bool InCircuit = llvm::any_of(
3762  Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
3763  if (InCircuit)
3764  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
3765  else {
3766  Valid = false;
3767  NumNodeOrderIssues++;
3768  LLVM_DEBUG(dbgs() << "Predecessor ";);
3769  }
3770  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
3771  << " are scheduled before node " << SU->NodeNum
3772  << "\n";);
3773  }
3774  }
3775 
3776  LLVM_DEBUG({
3777  if (!Valid)
3778  dbgs() << "Invalid node order found!\n";
3779  });
3780 }
3781 
3782 /// Attempt to fix the degenerate cases when the instruction serialization
3783 /// causes the register lifetimes to overlap. For example,
3784 /// p' = store_pi(p, b)
3785 /// = load p, offset
3786 /// In this case p and p' overlap, which means that two registers are needed.
3787 /// Instead, this function changes the load to use p' and updates the offset.
3788 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
3789  unsigned OverlapReg = 0;
3790  unsigned NewBaseReg = 0;
3791  for (SUnit *SU : Instrs) {
3792  MachineInstr *MI = SU->getInstr();
3793  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3794  const MachineOperand &MO = MI->getOperand(i);
3795  // Look for an instruction that uses p. The instruction occurs in the
3796  // same cycle but occurs later in the serialized order.
3797  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
3798  // Check that the instruction appears in the InstrChanges structure,
3799  // which contains instructions that can have the offset updated.
3801  InstrChanges.find(SU);
3802  if (It != InstrChanges.end()) {
3803  unsigned BasePos, OffsetPos;
3804  // Update the base register and adjust the offset.
3805  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
3806  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3807  NewMI->getOperand(BasePos).setReg(NewBaseReg);
3808  int64_t NewOffset =
3809  MI->getOperand(OffsetPos).getImm() - It->second.second;
3810  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3811  SU->setInstr(NewMI);
3812  MISUnitMap[NewMI] = SU;
3813  NewMIs.insert(NewMI);
3814  }
3815  }
3816  OverlapReg = 0;
3817  NewBaseReg = 0;
3818  break;
3819  }
3820  // Look for an instruction of the form p' = op(p), which uses and defines
3821  // two virtual registers that get allocated to the same physical register.
3822  unsigned TiedUseIdx = 0;
3823  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3824  // OverlapReg is p in the example above.
3825  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
3826  // NewBaseReg is p' in the example above.
3827  NewBaseReg = MI->getOperand(i).getReg();
3828  break;
3829  }
3830  }
3831  }
3832 }
3833 
3834 /// After the schedule has been formed, call this function to combine
3835 /// the instructions from the different stages/cycles. That is, this
3836 /// function creates a schedule that represents a single iteration.
3838  // Move all instructions to the first stage from later stages.
3839  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3840  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3841  ++stage) {
3842  std::deque<SUnit *> &cycleInstrs =
3843  ScheduledInstrs[cycle + (stage * InitiationInterval)];
3844  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
3845  E = cycleInstrs.rend();
3846  I != E; ++I)
3847  ScheduledInstrs[cycle].push_front(*I);
3848  }
3849  }
3850  // Iterate over the definitions in each instruction, and compute the
3851  // stage difference for each use. Keep the maximum value.
3852  for (auto &I : InstrToCycle) {
3853  int DefStage = stageScheduled(I.first);
3854  MachineInstr *MI = I.first->getInstr();
3855  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3856  MachineOperand &Op = MI->getOperand(i);
3857  if (!Op.isReg() || !Op.isDef())
3858  continue;
3859 
3860  Register Reg = Op.getReg();
3861  unsigned MaxDiff = 0;
3862  bool PhiIsSwapped = false;
3863  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
3864  EI = MRI.use_end();
3865  UI != EI; ++UI) {
3866  MachineOperand &UseOp = *UI;
3867  MachineInstr *UseMI = UseOp.getParent();
3868  SUnit *SUnitUse = SSD->getSUnit(UseMI);
3869  int UseStage = stageScheduled(SUnitUse);
3870  unsigned Diff = 0;
3871  if (UseStage != -1 && UseStage >= DefStage)
3872  Diff = UseStage - DefStage;
3873  if (MI->isPHI()) {
3874  if (isLoopCarried(SSD, *MI))
3875  ++Diff;
3876  else
3877  PhiIsSwapped = true;
3878  }
3879  MaxDiff = std::max(Diff, MaxDiff);
3880  }
3881  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
3882  }
3883  }
3884 
3885  // Erase all the elements in the later stages. Only one iteration should
3886  // remain in the scheduled list, and it contains all the instructions.
3887  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3888  ScheduledInstrs.erase(cycle);
3889 
3890  // Change the registers in instruction as specified in the InstrChanges
3891  // map. We need to use the new registers to create the correct order.
3892  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
3893  SUnit *SU = &SSD->SUnits[i];
3894  SSD->applyInstrChange(SU->getInstr(), *this);
3895  }
3896 
3897  // Reorder the instructions in each cycle to fix and improve the
3898  // generated code.
3899  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
3900  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3901  std::deque<SUnit *> newOrderPhi;
3902  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3903  SUnit *SU = cycleInstrs[i];
3904  if (SU->getInstr()->isPHI())
3905  newOrderPhi.push_back(SU);
3906  }
3907  std::deque<SUnit *> newOrderI;
3908  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3909  SUnit *SU = cycleInstrs[i];
3910  if (!SU->getInstr()->isPHI())
3911  orderDependence(SSD, SU, newOrderI);
3912  }
3913  // Replace the old order with the new order.
3914  cycleInstrs.swap(newOrderPhi);
3915  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
3916  SSD->fixupRegisterOverlaps(cycleInstrs);
3917  }
3918 
3919  LLVM_DEBUG(dump(););
3920 }
3921 
3922 void NodeSet::print(raw_ostream &os) const {
3923  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
3924  << " depth " << MaxDepth << " col " << Colocate << "\n";
3925  for (const auto &I : Nodes)
3926  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
3927  os << "\n";
3928 }
3929 
3930 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3931 /// Print the schedule information to the given output.
3933  // Iterate over each cycle.
3934  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3935  // Iterate over each instruction in the cycle.
3936  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
3937  for (SUnit *CI : cycleInstrs->second) {
3938  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
3939  os << "(" << CI->NodeNum << ") ";
3940  CI->getInstr()->print(os);
3941  os << "\n";
3942  }
3943  }
3944 }
3945 
3946 /// Utility function used for debugging to print the schedule.
3949 
3950 #endif
3951 
3953  const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
3954  unsigned ProcResourceID = 0;
3955 
3956  // We currently limit the resource kinds to 64 and below so that we can use
3957  // uint64_t for Masks
3958  assert(SM.getNumProcResourceKinds() < 64 &&
3959  "Too many kinds of resources, unsupported");
3960  // Create a unique bitmask for every processor resource unit.
3961  // Skip resource at index 0, since it always references 'InvalidUnit'.
3962  Masks.resize(SM.getNumProcResourceKinds());
3963  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3964  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3965  if (Desc.SubUnitsIdxBegin)
3966  continue;
3967  Masks[I] = 1ULL << ProcResourceID;
3968  ProcResourceID++;
3969  }
3970  // Create a unique bitmask for every processor resource group.
3971  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3972  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3973  if (!Desc.SubUnitsIdxBegin)
3974  continue;
3975  Masks[I] = 1ULL << ProcResourceID;
3976  for (unsigned U = 0; U < Desc.NumUnits; ++U)
3977  Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
3978  ProcResourceID++;
3979  }
3980  LLVM_DEBUG({
3981  if (SwpShowResMask) {
3982  dbgs() << "ProcResourceDesc:\n";
3983  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3984  const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
3985  dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3986  ProcResource->Name, I, Masks[I],
3987  ProcResource->NumUnits);
3988  }
3989  dbgs() << " -----------------\n";
3990  }
3991  });
3992 }
3993 
3995 
3996  LLVM_DEBUG({
3997  if (SwpDebugResource)
3998  dbgs() << "canReserveResources:\n";
3999  });
4000  if (UseDFA)
4001  return DFAResources->canReserveResources(MID);
4002 
4003  unsigned InsnClass = MID->getSchedClass();
4004  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
4005  if (!SCDesc->isValid()) {
4006  LLVM_DEBUG({
4007  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
4008  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
4009  });
4010  return true;
4011  }
4012 
4013  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
4014  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
4015  for (; I != E; ++I) {
4016  if (!I->Cycles)
4017  continue;
4018  const MCProcResourceDesc *ProcResource =
4019  SM.getProcResource(I->ProcResourceIdx);
4020  unsigned NumUnits = ProcResource->NumUnits;
4021  LLVM_DEBUG({
4022  if (SwpDebugResource)
4023  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
4024  ProcResource->Name, I->ProcResourceIdx,
4025  ProcResourceCount[I->ProcResourceIdx], NumUnits,
4026  I->Cycles);
4027  });
4028  if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
4029  return false;
4030  }
4031  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
4032  return true;
4033 }
4034 
4036  LLVM_DEBUG({
4037  if (SwpDebugResource)
4038  dbgs() << "reserveResources:\n";
4039  });
4040  if (UseDFA)
4041  return DFAResources->reserveResources(MID);
4042 
4043  unsigned InsnClass = MID->getSchedClass();
4044  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
4045  if (!SCDesc->isValid()) {
4046  LLVM_DEBUG({
4047  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
4048  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
4049  });
4050  return;
4051  }
4052  for (const MCWriteProcResEntry &PRE :
4053  make_range(STI->getWriteProcResBegin(SCDesc),
4054  STI->getWriteProcResEnd(SCDesc))) {
4055  if (!PRE.Cycles)
4056  continue;
4057  ++ProcResourceCount[PRE.ProcResourceIdx];
4058  LLVM_DEBUG({
4059  if (SwpDebugResource) {
4060  const MCProcResourceDesc *ProcResource =
4061  SM.getProcResource(PRE.ProcResourceIdx);
4062  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
4063  ProcResource->Name, PRE.ProcResourceIdx,
4064  ProcResourceCount[PRE.ProcResourceIdx],
4065  ProcResource->NumUnits, PRE.Cycles);
4066  }
4067  });
4068  }
4069  LLVM_DEBUG({
4070  if (SwpDebugResource)
4071  dbgs() << "reserveResources: done!\n\n";
4072  });
4073 }
4074 
4076  return canReserveResources(&MI.getDesc());
4077 }
4078 
4080  return reserveResources(&MI.getDesc());
4081 }
4082 
4084  if (UseDFA)
4085  return DFAResources->clearResources();
4086  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
4087 }
4088 
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
auto lower_bound(R &&Range, T &&Value) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1261
uint64_t CallInst * C
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:769
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:472
RegisterClassInfo RegClassInfo
A common definition of LaneBitmask for use in TableGen and CodeGen.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void clear()
Definition: MapVector.h:88
instr_iterator instr_begin()
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:651
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:266
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr *> &PrevInsts, unsigned Iter, unsigned MaxIter) const
Generate code to reduce the loop iteration by one and check if the loop is finished.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:476
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static constexpr LocationSize unknown()
bool empty() const
cl::opt< bool > SwpEnableCopyToPhi
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:63
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:178
bool isValidSchedule(SwingSchedulerDAG *SSD)
unsigned Reg
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
const MachineLoopInfo * MLI
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
unsigned getSubReg() const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
bool isInlineAsm() const
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:339
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
bool isRegSequence() const
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
F(f)
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
block Block Frequency true
bool isPseudo() const
Return true if this is a pseudo instruction that doesn&#39;t correspond to a real machine instruction...
Definition: MCInstrDesc.h:257
static void getUnderlyingObjects(const MachineInstr *MI, SmallVectorImpl< const Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:346
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
bool isPHI() const
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:260
SUnit * getNode(unsigned i) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
bool canReserveResources(const MCInstrDesc *MID) const
Check if the resources occupied by a MCInstrDesc are available in the current state.
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
int compareRecMII(NodeSet &RHS)
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:259
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout, ignoring any parts of the loop not contiguous with the part that contains the header.
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
Modulo Software Pipelining
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
void reserveResources(const MCInstrDesc *MID)
Reserve the resources occupied by a MCInstrDesc and change the current state to reflect that change...
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1217
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
BlockT * getHeader() const
Definition: LoopInfo.h:105
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:408
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
bool hasInterval(Register Reg) const
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
SlotIndexes pass.
Definition: SlotIndexes.h:314
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:858
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
void dump() const
Definition: Pass.cpp:134
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Itinerary data supplied by a subtarget to be used by a target.
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
void setReg(Register Reg)
Change the register this operand corresponds to.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
unsigned count(SUnit *SU) const
bool isValid() const
Definition: MCSchedule.h:127
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator find(const KeyT &Key)
Definition: MapVector.h:147
bool insert(SUnit *SU)
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1231
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
StringRef getString() const
Definition: Metadata.cpp:463
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Scheduling dependency.
Definition: ScheduleDAG.h:49
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:538
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:596
void finishBlock() override
Clean up after the software pipeliner runs.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:838
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
int getFirstCycle() const
Return the first cycle in the completed schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
void GetUnderlyingObjects(const Value *V, SmallVectorImpl< const Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
The main class in the implementation of the target independent software pipeliner pass...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
MachineFunction * MF
unsigned const MachineRegisterInfo * MRI
void clearResources()
Reset the state.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:534
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:54
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
MachineInstrBuilder & UseMI
unsigned getStagesForPhi(int Reg)
The number of stages for a Phi is a little different than other instructions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:567
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
void dump() const
Utility function used for debugging to print the schedule.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
const TargetInstrInfo * TII
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
void print(raw_ostream &os) const
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
void fixupRegisterOverlaps(std::deque< SUnit *> &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
LLVM_DUMP_METHOD void dump() const
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
bool isCopy() const
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:56
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
const InstrItineraryData * InstrItins
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
hexagon gen pred
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SetVector< SUnit * >::const_iterator iterator
virtual bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Iterator for intrusive lists based on ilist_node.
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:320
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:552
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static unsigned getDepth(const SmallVectorImpl< const MachineBasicBlock *> &Stack, const MachineBasicBlock *MBB)
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:32
MachineInstrBuilder MachineInstrBuilder & DefMI
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
void setRecMII(unsigned mii)
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:762
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
This class represents the scheduled code.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:262
unsigned getStagesForReg(int Reg, unsigned CurStage)
Return the max.
bool isEmpty() const
Returns true if there are no itineraries.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
void setColocate(unsigned c)
std::set< NodeId > NodeSet
Definition: RDFGraph.h:513
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:64
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:168
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
iterator begin() const
Definition: SmallPtrSet.h:396
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit *> &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:154
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
void setSubReg(unsigned subReg)
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
Generic base class for all target subtargets.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:128
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class...
iterator end() const
Definition: SmallPtrSet.h:401
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:825
bool memoperands_empty() const
Return true if we don&#39;t have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:564
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:71
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand *> MemRefs)
Assign this MachineInstr&#39;s memory reference descriptor list.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
void stable_sort(R &&Range)
Definition: STLExtras.h:1289
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:471
A vector that has set insertion semantics.
Definition: SetVector.h:40
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
SmallVector< MachineOperand, 4 > BrCond
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
A single uniqued string.
Definition: Metadata.h:603
void setPos(MachineBasicBlock::const_iterator Pos)
Register getReg() const
getReg - Returns the register number.
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:244
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
std::vector< MachineBasicBlock * >::iterator succ_iterator
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:335
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
void resize(size_type N)
Definition: SmallVector.h:344
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431