LLVM  10.0.0svn
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
17 // one and try again.
18 //
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
25 //
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
28 // instructions.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/BitVector.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/MapVector.h"
36 #include "llvm/ADT/PriorityQueue.h"
37 #include "llvm/ADT/SetVector.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
66 #include "llvm/Config/llvm-config.h"
67 #include "llvm/IR/Attributes.h"
68 #include "llvm/IR/DebugLoc.h"
69 #include "llvm/IR/Function.h"
70 #include "llvm/MC/LaneBitmask.h"
71 #include "llvm/MC/MCInstrDesc.h"
73 #include "llvm/MC/MCRegisterInfo.h"
74 #include "llvm/Pass.h"
76 #include "llvm/Support/Compiler.h"
77 #include "llvm/Support/Debug.h"
80 #include <algorithm>
81 #include <cassert>
82 #include <climits>
83 #include <cstdint>
84 #include <deque>
85 #include <functional>
86 #include <iterator>
87 #include <map>
88 #include <memory>
89 #include <tuple>
90 #include <utility>
91 #include <vector>
92 
93 using namespace llvm;
94 
95 #define DEBUG_TYPE "pipeliner"
96 
97 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
98 STATISTIC(NumPipelined, "Number of loops software pipelined");
99 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
100 STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
101 STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
102 STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
103 STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
104 STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
105 STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
106 STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
107 STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
108 
109 /// A command line option to turn software pipelining on or off.
110 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
112  cl::desc("Enable Software Pipelining"));
113 
114 /// A command line option to enable SWP at -Os.
115 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
116  cl::desc("Enable SWP at Os."), cl::Hidden,
117  cl::init(false));
118 
119 /// A command line argument to limit minimum initial interval for pipelining.
120 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
121  cl::desc("Size limit for the MII."),
122  cl::Hidden, cl::init(27));
123 
124 /// A command line argument to limit the number of stages in the pipeline.
125 static cl::opt<int>
126  SwpMaxStages("pipeliner-max-stages",
127  cl::desc("Maximum stages allowed in the generated scheduled."),
128  cl::Hidden, cl::init(3));
129 
130 /// A command line option to disable the pruning of chain dependences due to
131 /// an unrelated Phi.
132 static cl::opt<bool>
133  SwpPruneDeps("pipeliner-prune-deps",
134  cl::desc("Prune dependences between unrelated Phi nodes."),
135  cl::Hidden, cl::init(true));
136 
137 /// A command line option to disable the pruning of loop carried order
138 /// dependences.
139 static cl::opt<bool>
140  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
141  cl::desc("Prune loop carried order dependences."),
142  cl::Hidden, cl::init(true));
143 
144 #ifndef NDEBUG
145 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
146 #endif
147 
148 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
149  cl::ReallyHidden, cl::init(false),
150  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
151 
152 static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
153  cl::init(false));
154 static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
155  cl::init(false));
156 
158  "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
159  cl::desc("Instead of emitting the pipelined code, annotate instructions "
160  "with the generated schedule for feeding into the "
161  "-modulo-schedule-test pass"));
162 
164  "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
165  cl::desc(
166  "Use the experimental peeling code generator for software pipelining"));
167 
168 namespace llvm {
169 
170 // A command line option to enable the CopyToPhi DAG mutation.
172  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
173  cl::init(true), cl::ZeroOrMore,
174  cl::desc("Enable CopyToPhi DAG Mutation"));
175 
176 } // end namespace llvm
177 
178 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
179 char MachinePipeliner::ID = 0;
180 #ifndef NDEBUG
182 #endif
184 
186  "Modulo Software Pipelining", false, false)
192  "Modulo Software Pipelining", false, false)
193 
194 /// The "main" function for implementing Swing Modulo Scheduling.
195 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
196  if (skipFunction(mf.getFunction()))
197  return false;
198 
199  if (!EnableSWP)
200  return false;
201 
202  if (mf.getFunction().getAttributes().hasAttribute(
203  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
204  !EnableSWPOptSize.getPosition())
205  return false;
206 
207  if (!mf.getSubtarget().enableMachinePipeliner())
208  return false;
209 
210  // Cannot pipeline loops without instruction itineraries if we are using
211  // DFA for the pipeliner.
212  if (mf.getSubtarget().useDFAforSMS() &&
213  (!mf.getSubtarget().getInstrItineraryData() ||
214  mf.getSubtarget().getInstrItineraryData()->isEmpty()))
215  return false;
216 
217  MF = &mf;
218  MLI = &getAnalysis<MachineLoopInfo>();
219  MDT = &getAnalysis<MachineDominatorTree>();
220  TII = MF->getSubtarget().getInstrInfo();
221  RegClassInfo.runOnMachineFunction(*MF);
222 
223  for (auto &L : *MLI)
224  scheduleLoop(*L);
225 
226  return false;
227 }
228 
229 /// Attempt to perform the SMS algorithm on the specified loop. This function is
230 /// the main entry point for the algorithm. The function identifies candidate
231 /// loops, calculates the minimum initiation interval, and attempts to schedule
232 /// the loop.
233 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
234  bool Changed = false;
235  for (auto &InnerLoop : L)
236  Changed |= scheduleLoop(*InnerLoop);
237 
238 #ifndef NDEBUG
239  // Stop trying after reaching the limit (if any).
240  int Limit = SwpLoopLimit;
241  if (Limit >= 0) {
242  if (NumTries >= SwpLoopLimit)
243  return Changed;
244  NumTries++;
245  }
246 #endif
247 
248  setPragmaPipelineOptions(L);
249  if (!canPipelineLoop(L)) {
250  LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
251  return Changed;
252  }
253 
254  ++NumTrytoPipeline;
255 
256  Changed = swingModuloScheduler(L);
257 
258  return Changed;
259 }
260 
261 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
262  MachineBasicBlock *LBLK = L.getTopBlock();
263 
264  if (LBLK == nullptr)
265  return;
266 
267  const BasicBlock *BBLK = LBLK->getBasicBlock();
268  if (BBLK == nullptr)
269  return;
270 
271  const Instruction *TI = BBLK->getTerminator();
272  if (TI == nullptr)
273  return;
274 
275  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
276  if (LoopID == nullptr)
277  return;
278 
279  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
280  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
281 
282  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
283  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
284 
285  if (MD == nullptr)
286  continue;
287 
288  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
289 
290  if (S == nullptr)
291  continue;
292 
293  if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
294  assert(MD->getNumOperands() == 2 &&
295  "Pipeline initiation interval hint metadata should have two operands.");
297  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
298  assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
299  } else if (S->getString() == "llvm.loop.pipeline.disable") {
300  disabledByPragma = true;
301  }
302  }
303 }
304 
305 /// Return true if the loop can be software pipelined. The algorithm is
306 /// restricted to loops with a single basic block. Make sure that the
307 /// branch in the loop can be analyzed.
308 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
309  if (L.getNumBlocks() != 1)
310  return false;
311 
312  if (disabledByPragma)
313  return false;
314 
315  // Check if the branch can't be understood because we can't do pipelining
316  // if that's the case.
317  LI.TBB = nullptr;
318  LI.FBB = nullptr;
319  LI.BrCond.clear();
320  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
321  LLVM_DEBUG(
322  dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
323  NumFailBranch++;
324  return false;
325  }
326 
327  LI.LoopInductionVar = nullptr;
328  LI.LoopCompare = nullptr;
330  LLVM_DEBUG(
331  dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
332  NumFailLoop++;
333  return false;
334  }
335 
336  if (!L.getLoopPreheader()) {
337  LLVM_DEBUG(
338  dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
339  NumFailPreheader++;
340  return false;
341  }
342 
343  // Remove any subregisters from inputs to phi nodes.
344  preprocessPhiNodes(*L.getHeader());
345  return true;
346 }
347 
348 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
350  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
351 
352  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
353  MachineOperand &DefOp = PI.getOperand(0);
354  assert(DefOp.getSubReg() == 0);
355  auto *RC = MRI.getRegClass(DefOp.getReg());
356 
357  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
358  MachineOperand &RegOp = PI.getOperand(i);
359  if (RegOp.getSubReg() == 0)
360  continue;
361 
362  // If the operand uses a subregister, replace it with a new register
363  // without subregisters, and generate a copy to the new register.
364  Register NewReg = MRI.createVirtualRegister(RC);
365  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
367  const DebugLoc &DL = PredB.findDebugLoc(At);
368  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
369  .addReg(RegOp.getReg(), getRegState(RegOp),
370  RegOp.getSubReg());
371  Slots.insertMachineInstrInMaps(*Copy);
372  RegOp.setReg(NewReg);
373  RegOp.setSubReg(0);
374  }
375  }
376 }
377 
378 /// The SMS algorithm consists of the following main steps:
379 /// 1. Computation and analysis of the dependence graph.
380 /// 2. Ordering of the nodes (instructions).
381 /// 3. Attempt to Schedule the loop.
382 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
383  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
384 
385  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
387 
388  MachineBasicBlock *MBB = L.getHeader();
389  // The kernel should not include any terminator instructions. These
390  // will be added back later.
391  SMS.startBlock(MBB);
392 
393  // Compute the number of 'real' instructions in the basic block by
394  // ignoring terminators.
395  unsigned size = MBB->size();
397  E = MBB->instr_end();
398  I != E; ++I, --size)
399  ;
400 
401  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
402  SMS.schedule();
403  SMS.exitRegion();
404 
405  SMS.finishBlock();
406  return SMS.hasNewSchedule();
407 }
408 
409 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
410  if (II_setByPragma > 0)
411  MII = II_setByPragma;
412  else
413  MII = std::max(ResMII, RecMII);
414 }
415 
416 void SwingSchedulerDAG::setMAX_II() {
417  if (II_setByPragma > 0)
418  MAX_II = II_setByPragma;
419  else
420  MAX_II = MII + 10;
421 }
422 
423 /// We override the schedule function in ScheduleDAGInstrs to implement the
424 /// scheduling part of the Swing Modulo Scheduling algorithm.
426  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
427  buildSchedGraph(AA);
428  addLoopCarriedDependences(AA);
429  updatePhiDependences();
430  Topo.InitDAGTopologicalSorting();
431  changeDependences();
432  postprocessDAG();
433  LLVM_DEBUG(dump());
434 
435  NodeSetType NodeSets;
436  findCircuits(NodeSets);
437  NodeSetType Circuits = NodeSets;
438 
439  // Calculate the MII.
440  unsigned ResMII = calculateResMII();
441  unsigned RecMII = calculateRecMII(NodeSets);
442 
443  fuseRecs(NodeSets);
444 
445  // This flag is used for testing and can cause correctness problems.
446  if (SwpIgnoreRecMII)
447  RecMII = 0;
448 
449  setMII(ResMII, RecMII);
450  setMAX_II();
451 
452  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
453  << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
454 
455  // Can't schedule a loop without a valid MII.
456  if (MII == 0) {
457  LLVM_DEBUG(
458  dbgs()
459  << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
460  NumFailZeroMII++;
461  return;
462  }
463 
464  // Don't pipeline large loops.
465  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
466  LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
467  << ", we don't pipleline large loops\n");
468  NumFailLargeMaxMII++;
469  return;
470  }
471 
472  computeNodeFunctions(NodeSets);
473 
474  registerPressureFilter(NodeSets);
475 
476  colocateNodeSets(NodeSets);
477 
478  checkNodeSets(NodeSets);
479 
480  LLVM_DEBUG({
481  for (auto &I : NodeSets) {
482  dbgs() << " Rec NodeSet ";
483  I.dump();
484  }
485  });
486 
487  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
488 
489  groupRemainingNodes(NodeSets);
490 
491  removeDuplicateNodes(NodeSets);
492 
493  LLVM_DEBUG({
494  for (auto &I : NodeSets) {
495  dbgs() << " NodeSet ";
496  I.dump();
497  }
498  });
499 
500  computeNodeOrder(NodeSets);
501 
502  // check for node order issues
503  checkValidNodeOrder(Circuits);
504 
505  SMSchedule Schedule(Pass.MF);
506  Scheduled = schedulePipeline(Schedule);
507 
508  if (!Scheduled){
509  LLVM_DEBUG(dbgs() << "No schedule found, return\n");
510  NumFailNoSchedule++;
511  return;
512  }
513 
514  unsigned numStages = Schedule.getMaxStageCount();
515  // No need to generate pipeline if there are no overlapped iterations.
516  if (numStages == 0) {
517  LLVM_DEBUG(
518  dbgs() << "No overlapped iterations, no need to generate pipeline\n");
519  NumFailZeroStage++;
520  return;
521  }
522  // Check that the maximum stage count is less than user-defined limit.
523  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
524  LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
525  << " : too many stages, abort\n");
526  NumFailLargeMaxStage++;
527  return;
528  }
529 
530  // Generate the schedule as a ModuloSchedule.
531  DenseMap<MachineInstr *, int> Cycles, Stages;
532  std::vector<MachineInstr *> OrderedInsts;
533  for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
534  ++Cycle) {
535  for (SUnit *SU : Schedule.getInstructions(Cycle)) {
536  OrderedInsts.push_back(SU->getInstr());
537  Cycles[SU->getInstr()] = Cycle;
538  Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
539  }
540  }
542  for (auto &KV : NewMIs) {
543  Cycles[KV.first] = Cycles[KV.second];
544  Stages[KV.first] = Stages[KV.second];
545  NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
546  }
547 
548  ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
549  std::move(Stages));
550  if (EmitTestAnnotations) {
551  assert(NewInstrChanges.empty() &&
552  "Cannot serialize a schedule with InstrChanges!");
554  MSTI.annotate();
555  return;
556  }
557  // The experimental code generator can't work if there are InstChanges.
558  if (ExperimentalCodeGen && NewInstrChanges.empty()) {
559  PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
560  MSE.expand();
561  } else {
562  ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
563  MSE.expand();
564  MSE.cleanup();
565  }
566  ++NumPipelined;
567 }
568 
569 /// Clean up after the software pipeliner runs.
571  for (auto &KV : NewMIs)
572  MF.DeleteMachineInstr(KV.second);
573  NewMIs.clear();
574 
575  // Call the superclass.
577 }
578 
579 /// Return the register values for the operands of a Phi instruction.
580 /// This function assume the instruction is a Phi.
582  unsigned &InitVal, unsigned &LoopVal) {
583  assert(Phi.isPHI() && "Expecting a Phi.");
584 
585  InitVal = 0;
586  LoopVal = 0;
587  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
588  if (Phi.getOperand(i + 1).getMBB() != Loop)
589  InitVal = Phi.getOperand(i).getReg();
590  else
591  LoopVal = Phi.getOperand(i).getReg();
592 
593  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
594 }
595 
596 /// Return the Phi register value that comes the loop block.
597 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
598  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
599  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
600  return Phi.getOperand(i).getReg();
601  return 0;
602 }
603 
604 /// Return true if SUb can be reached from SUa following the chain edges.
605 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
606  SmallPtrSet<SUnit *, 8> Visited;
607  SmallVector<SUnit *, 8> Worklist;
608  Worklist.push_back(SUa);
609  while (!Worklist.empty()) {
610  const SUnit *SU = Worklist.pop_back_val();
611  for (auto &SI : SU->Succs) {
612  SUnit *SuccSU = SI.getSUnit();
613  if (SI.getKind() == SDep::Order) {
614  if (Visited.count(SuccSU))
615  continue;
616  if (SuccSU == SUb)
617  return true;
618  Worklist.push_back(SuccSU);
619  Visited.insert(SuccSU);
620  }
621  }
622  }
623  return false;
624 }
625 
626 /// Return true if the instruction causes a chain between memory
627 /// references before and after it.
629  return MI.isCall() || MI.mayRaiseFPException() ||
631  (MI.hasOrderedMemoryRef() &&
632  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
633 }
634 
635 /// Return the underlying objects for the memory references of an instruction.
636 /// This function calls the code in ValueTracking, but first checks that the
637 /// instruction has a memory operand.
640  const DataLayout &DL) {
641  if (!MI->hasOneMemOperand())
642  return;
644  if (!MM->getValue())
645  return;
646  GetUnderlyingObjects(MM->getValue(), Objs, DL);
647  for (const Value *V : Objs) {
648  if (!isIdentifiedObject(V)) {
649  Objs.clear();
650  return;
651  }
652  Objs.push_back(V);
653  }
654 }
655 
656 /// Add a chain edge between a load and store if the store can be an
657 /// alias of the load on a subsequent iteration, i.e., a loop carried
658 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
659 /// but that code doesn't create loop carried dependences.
660 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
662  Value *UnknownValue =
664  for (auto &SU : SUnits) {
665  MachineInstr &MI = *SU.getInstr();
666  if (isDependenceBarrier(MI, AA))
667  PendingLoads.clear();
668  else if (MI.mayLoad()) {
670  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
671  if (Objs.empty())
672  Objs.push_back(UnknownValue);
673  for (auto V : Objs) {
674  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
675  SUs.push_back(&SU);
676  }
677  } else if (MI.mayStore()) {
679  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
680  if (Objs.empty())
681  Objs.push_back(UnknownValue);
682  for (auto V : Objs) {
684  PendingLoads.find(V);
685  if (I == PendingLoads.end())
686  continue;
687  for (auto Load : I->second) {
688  if (isSuccOrder(Load, &SU))
689  continue;
690  MachineInstr &LdMI = *Load->getInstr();
691  // First, perform the cheaper check that compares the base register.
692  // If they are the same and the load offset is less than the store
693  // offset, then mark the dependence as loop carried potentially.
694  const MachineOperand *BaseOp1, *BaseOp2;
695  int64_t Offset1, Offset2;
696  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
697  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
698  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
699  (int)Offset1 < (int)Offset2) {
701  "What happened to the chain edge?");
702  SDep Dep(Load, SDep::Barrier);
703  Dep.setLatency(1);
704  SU.addPred(Dep);
705  continue;
706  }
707  }
708  // Second, the more expensive check that uses alias analysis on the
709  // base registers. If they alias, and the load offset is less than
710  // the store offset, the mark the dependence as loop carried.
711  if (!AA) {
712  SDep Dep(Load, SDep::Barrier);
713  Dep.setLatency(1);
714  SU.addPred(Dep);
715  continue;
716  }
717  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
718  MachineMemOperand *MMO2 = *MI.memoperands_begin();
719  if (!MMO1->getValue() || !MMO2->getValue()) {
720  SDep Dep(Load, SDep::Barrier);
721  Dep.setLatency(1);
722  SU.addPred(Dep);
723  continue;
724  }
725  if (MMO1->getValue() == MMO2->getValue() &&
726  MMO1->getOffset() <= MMO2->getOffset()) {
727  SDep Dep(Load, SDep::Barrier);
728  Dep.setLatency(1);
729  SU.addPred(Dep);
730  continue;
731  }
732  AliasResult AAResult = AA->alias(
734  MMO1->getAAInfo()),
736  MMO2->getAAInfo()));
737 
738  if (AAResult != NoAlias) {
739  SDep Dep(Load, SDep::Barrier);
740  Dep.setLatency(1);
741  SU.addPred(Dep);
742  }
743  }
744  }
745  }
746  }
747 }
748 
749 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
750 /// processes dependences for PHIs. This function adds true dependences
751 /// from a PHI to a use, and a loop carried dependence from the use to the
752 /// PHI. The loop carried dependence is represented as an anti dependence
753 /// edge. This function also removes chain dependences between unrelated
754 /// PHIs.
755 void SwingSchedulerDAG::updatePhiDependences() {
756  SmallVector<SDep, 4> RemoveDeps;
758 
759  // Iterate over each DAG node.
760  for (SUnit &I : SUnits) {
761  RemoveDeps.clear();
762  // Set to true if the instruction has an operand defined by a Phi.
763  unsigned HasPhiUse = 0;
764  unsigned HasPhiDef = 0;
765  MachineInstr *MI = I.getInstr();
766  // Iterate over each operand, and we process the definitions.
768  MOE = MI->operands_end();
769  MOI != MOE; ++MOI) {
770  if (!MOI->isReg())
771  continue;
772  Register Reg = MOI->getReg();
773  if (MOI->isDef()) {
774  // If the register is used by a Phi, then create an anti dependence.
776  UI = MRI.use_instr_begin(Reg),
777  UE = MRI.use_instr_end();
778  UI != UE; ++UI) {
779  MachineInstr *UseMI = &*UI;
780  SUnit *SU = getSUnit(UseMI);
781  if (SU != nullptr && UseMI->isPHI()) {
782  if (!MI->isPHI()) {
783  SDep Dep(SU, SDep::Anti, Reg);
784  Dep.setLatency(1);
785  I.addPred(Dep);
786  } else {
787  HasPhiDef = Reg;
788  // Add a chain edge to a dependent Phi that isn't an existing
789  // predecessor.
790  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
791  I.addPred(SDep(SU, SDep::Barrier));
792  }
793  }
794  }
795  } else if (MOI->isUse()) {
796  // If the register is defined by a Phi, then create a true dependence.
797  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
798  if (DefMI == nullptr)
799  continue;
800  SUnit *SU = getSUnit(DefMI);
801  if (SU != nullptr && DefMI->isPHI()) {
802  if (!MI->isPHI()) {
803  SDep Dep(SU, SDep::Data, Reg);
804  Dep.setLatency(0);
805  ST.adjustSchedDependency(SU, &I, Dep);
806  I.addPred(Dep);
807  } else {
808  HasPhiUse = Reg;
809  // Add a chain edge to a dependent Phi that isn't an existing
810  // predecessor.
811  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
812  I.addPred(SDep(SU, SDep::Barrier));
813  }
814  }
815  }
816  }
817  // Remove order dependences from an unrelated Phi.
818  if (!SwpPruneDeps)
819  continue;
820  for (auto &PI : I.Preds) {
821  MachineInstr *PMI = PI.getSUnit()->getInstr();
822  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
823  if (I.getInstr()->isPHI()) {
824  if (PMI->getOperand(0).getReg() == HasPhiUse)
825  continue;
826  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
827  continue;
828  }
829  RemoveDeps.push_back(PI);
830  }
831  }
832  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
833  I.removePred(RemoveDeps[i]);
834  }
835 }
836 
837 /// Iterate over each DAG node and see if we can change any dependences
838 /// in order to reduce the recurrence MII.
839 void SwingSchedulerDAG::changeDependences() {
840  // See if an instruction can use a value from the previous iteration.
841  // If so, we update the base and offset of the instruction and change
842  // the dependences.
843  for (SUnit &I : SUnits) {
844  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
845  int64_t NewOffset = 0;
846  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
847  NewOffset))
848  continue;
849 
850  // Get the MI and SUnit for the instruction that defines the original base.
851  Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
852  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
853  if (!DefMI)
854  continue;
855  SUnit *DefSU = getSUnit(DefMI);
856  if (!DefSU)
857  continue;
858  // Get the MI and SUnit for the instruction that defins the new base.
859  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
860  if (!LastMI)
861  continue;
862  SUnit *LastSU = getSUnit(LastMI);
863  if (!LastSU)
864  continue;
865 
866  if (Topo.IsReachable(&I, LastSU))
867  continue;
868 
869  // Remove the dependence. The value now depends on a prior iteration.
871  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
872  ++P)
873  if (P->getSUnit() == DefSU)
874  Deps.push_back(*P);
875  for (int i = 0, e = Deps.size(); i != e; i++) {
876  Topo.RemovePred(&I, Deps[i].getSUnit());
877  I.removePred(Deps[i]);
878  }
879  // Remove the chain dependence between the instructions.
880  Deps.clear();
881  for (auto &P : LastSU->Preds)
882  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
883  Deps.push_back(P);
884  for (int i = 0, e = Deps.size(); i != e; i++) {
885  Topo.RemovePred(LastSU, Deps[i].getSUnit());
886  LastSU->removePred(Deps[i]);
887  }
888 
889  // Add a dependence between the new instruction and the instruction
890  // that defines the new base.
891  SDep Dep(&I, SDep::Anti, NewBase);
892  Topo.AddPred(LastSU, &I);
893  LastSU->addPred(Dep);
894 
895  // Remember the base and offset information so that we can update the
896  // instruction during code generation.
897  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
898  }
899 }
900 
901 namespace {
902 
903 // FuncUnitSorter - Comparison operator used to sort instructions by
904 // the number of functional unit choices.
905 struct FuncUnitSorter {
907  const MCSubtargetInfo *STI;
909 
910  FuncUnitSorter(const TargetSubtargetInfo &TSI)
911  : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
912 
913  // Compute the number of functional unit alternatives needed
914  // at each stage, and take the minimum value. We prioritize the
915  // instructions by the least number of choices first.
916  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
917  unsigned SchedClass = Inst->getDesc().getSchedClass();
918  unsigned min = UINT_MAX;
919  if (InstrItins && !InstrItins->isEmpty()) {
920  for (const InstrStage &IS :
921  make_range(InstrItins->beginStage(SchedClass),
922  InstrItins->endStage(SchedClass))) {
923  unsigned funcUnits = IS.getUnits();
924  unsigned numAlternatives = countPopulation(funcUnits);
925  if (numAlternatives < min) {
926  min = numAlternatives;
927  F = funcUnits;
928  }
929  }
930  return min;
931  }
932  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
933  const MCSchedClassDesc *SCDesc =
934  STI->getSchedModel().getSchedClassDesc(SchedClass);
935  if (!SCDesc->isValid())
936  // No valid Schedule Class Desc for schedClass, should be
937  // Pseudo/PostRAPseudo
938  return min;
939 
940  for (const MCWriteProcResEntry &PRE :
941  make_range(STI->getWriteProcResBegin(SCDesc),
942  STI->getWriteProcResEnd(SCDesc))) {
943  if (!PRE.Cycles)
944  continue;
945  const MCProcResourceDesc *ProcResource =
946  STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
947  unsigned NumUnits = ProcResource->NumUnits;
948  if (NumUnits < min) {
949  min = NumUnits;
950  F = PRE.ProcResourceIdx;
951  }
952  }
953  return min;
954  }
955  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
956  }
957 
958  // Compute the critical resources needed by the instruction. This
959  // function records the functional units needed by instructions that
960  // must use only one functional unit. We use this as a tie breaker
961  // for computing the resource MII. The instrutions that require
962  // the same, highly used, functional unit have high priority.
963  void calcCriticalResources(MachineInstr &MI) {
964  unsigned SchedClass = MI.getDesc().getSchedClass();
965  if (InstrItins && !InstrItins->isEmpty()) {
966  for (const InstrStage &IS :
967  make_range(InstrItins->beginStage(SchedClass),
968  InstrItins->endStage(SchedClass))) {
969  unsigned FuncUnits = IS.getUnits();
970  if (countPopulation(FuncUnits) == 1)
971  Resources[FuncUnits]++;
972  }
973  return;
974  }
975  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
976  const MCSchedClassDesc *SCDesc =
977  STI->getSchedModel().getSchedClassDesc(SchedClass);
978  if (!SCDesc->isValid())
979  // No valid Schedule Class Desc for schedClass, should be
980  // Pseudo/PostRAPseudo
981  return;
982 
983  for (const MCWriteProcResEntry &PRE :
984  make_range(STI->getWriteProcResBegin(SCDesc),
985  STI->getWriteProcResEnd(SCDesc))) {
986  if (!PRE.Cycles)
987  continue;
988  Resources[PRE.ProcResourceIdx]++;
989  }
990  return;
991  }
992  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
993  }
994 
995  /// Return true if IS1 has less priority than IS2.
996  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
997  unsigned F1 = 0, F2 = 0;
998  unsigned MFUs1 = minFuncUnits(IS1, F1);
999  unsigned MFUs2 = minFuncUnits(IS2, F2);
1000  if (MFUs1 == MFUs2)
1001  return Resources.lookup(F1) < Resources.lookup(F2);
1002  return MFUs1 > MFUs2;
1003  }
1004 };
1005 
1006 } // end anonymous namespace
1007 
1008 /// Calculate the resource constrained minimum initiation interval for the
1009 /// specified loop. We use the DFA to model the resources needed for
1010 /// each instruction, and we ignore dependences. A different DFA is created
1011 /// for each cycle that is required. When adding a new instruction, we attempt
1012 /// to add it to each existing DFA, until a legal space is found. If the
1013 /// instruction cannot be reserved in an existing DFA, we create a new one.
1014 unsigned SwingSchedulerDAG::calculateResMII() {
1015 
1016  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
1018  MachineBasicBlock *MBB = Loop.getHeader();
1019  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
1020 
1021  // Sort the instructions by the number of available choices for scheduling,
1022  // least to most. Use the number of critical resources as the tie breaker.
1023  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
1025  E = MBB->getFirstTerminator();
1026  I != E; ++I)
1027  FUS.calcCriticalResources(*I);
1029  FuncUnitOrder(FUS);
1030 
1032  E = MBB->getFirstTerminator();
1033  I != E; ++I)
1034  FuncUnitOrder.push(&*I);
1035 
1036  while (!FuncUnitOrder.empty()) {
1037  MachineInstr *MI = FuncUnitOrder.top();
1038  FuncUnitOrder.pop();
1039  if (TII->isZeroCost(MI->getOpcode()))
1040  continue;
1041  // Attempt to reserve the instruction in an existing DFA. At least one
1042  // DFA is needed for each cycle.
1043  unsigned NumCycles = getSUnit(MI)->Latency;
1044  unsigned ReservedCycles = 0;
1047  LLVM_DEBUG({
1048  dbgs() << "Trying to reserve resource for " << NumCycles
1049  << " cycles for \n";
1050  MI->dump();
1051  });
1052  for (unsigned C = 0; C < NumCycles; ++C)
1053  while (RI != RE) {
1054  if ((*RI)->canReserveResources(*MI)) {
1055  (*RI)->reserveResources(*MI);
1056  ++ReservedCycles;
1057  break;
1058  }
1059  RI++;
1060  }
1061  LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
1062  << ", NumCycles:" << NumCycles << "\n");
1063  // Add new DFAs, if needed, to reserve resources.
1064  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1066  << "NewResource created to reserve resources"
1067  << "\n");
1068  ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
1069  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1070  NewResource->reserveResources(*MI);
1071  Resources.push_back(NewResource);
1072  }
1073  }
1074  int Resmii = Resources.size();
1075  LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
1076  // Delete the memory for each of the DFAs that were created earlier.
1077  for (ResourceManager *RI : Resources) {
1078  ResourceManager *D = RI;
1079  delete D;
1080  }
1081  Resources.clear();
1082  return Resmii;
1083 }
1084 
1085 /// Calculate the recurrence-constrainted minimum initiation interval.
1086 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1087 /// for each circuit. The II needs to satisfy the inequality
1088 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1089 /// II that satisfies the inequality, and the RecMII is the maximum
1090 /// of those values.
1091 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1092  unsigned RecMII = 0;
1093 
1094  for (NodeSet &Nodes : NodeSets) {
1095  if (Nodes.empty())
1096  continue;
1097 
1098  unsigned Delay = Nodes.getLatency();
1099  unsigned Distance = 1;
1100 
1101  // ii = ceil(delay / distance)
1102  unsigned CurMII = (Delay + Distance - 1) / Distance;
1103  Nodes.setRecMII(CurMII);
1104  if (CurMII > RecMII)
1105  RecMII = CurMII;
1106  }
1107 
1108  return RecMII;
1109 }
1110 
1111 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1112 /// but we do this to find the circuits, and then change them back.
1113 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1115  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1116  SUnit *SU = &SUnits[i];
1117  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1118  IP != EP; ++IP) {
1119  if (IP->getKind() != SDep::Anti)
1120  continue;
1121  DepsAdded.push_back(std::make_pair(SU, *IP));
1122  }
1123  }
1124  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1125  E = DepsAdded.end();
1126  I != E; ++I) {
1127  // Remove this anti dependency and add one in the reverse direction.
1128  SUnit *SU = I->first;
1129  SDep &D = I->second;
1130  SUnit *TargetSU = D.getSUnit();
1131  unsigned Reg = D.getReg();
1132  unsigned Lat = D.getLatency();
1133  SU->removePred(D);
1134  SDep Dep(SU, SDep::Anti, Reg);
1135  Dep.setLatency(Lat);
1136  TargetSU->addPred(Dep);
1137  }
1138 }
1139 
1140 /// Create the adjacency structure of the nodes in the graph.
1141 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1142  SwingSchedulerDAG *DAG) {
1143  BitVector Added(SUnits.size());
1144  DenseMap<int, int> OutputDeps;
1145  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1146  Added.reset();
1147  // Add any successor to the adjacency matrix and exclude duplicates.
1148  for (auto &SI : SUnits[i].Succs) {
1149  // Only create a back-edge on the first and last nodes of a dependence
1150  // chain. This records any chains and adds them later.
1151  if (SI.getKind() == SDep::Output) {
1152  int N = SI.getSUnit()->NodeNum;
1153  int BackEdge = i;
1154  auto Dep = OutputDeps.find(BackEdge);
1155  if (Dep != OutputDeps.end()) {
1156  BackEdge = Dep->second;
1157  OutputDeps.erase(Dep);
1158  }
1159  OutputDeps[N] = BackEdge;
1160  }
1161  // Do not process a boundary node, an artificial node.
1162  // A back-edge is processed only if it goes to a Phi.
1163  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1164  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1165  continue;
1166  int N = SI.getSUnit()->NodeNum;
1167  if (!Added.test(N)) {
1168  AdjK[i].push_back(N);
1169  Added.set(N);
1170  }
1171  }
1172  // A chain edge between a store and a load is treated as a back-edge in the
1173  // adjacency matrix.
1174  for (auto &PI : SUnits[i].Preds) {
1175  if (!SUnits[i].getInstr()->mayStore() ||
1176  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1177  continue;
1178  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1179  int N = PI.getSUnit()->NodeNum;
1180  if (!Added.test(N)) {
1181  AdjK[i].push_back(N);
1182  Added.set(N);
1183  }
1184  }
1185  }
1186  }
1187  // Add back-edges in the adjacency matrix for the output dependences.
1188  for (auto &OD : OutputDeps)
1189  if (!Added.test(OD.second)) {
1190  AdjK[OD.first].push_back(OD.second);
1191  Added.set(OD.second);
1192  }
1193 }
1194 
1195 /// Identify an elementary circuit in the dependence graph starting at the
1196 /// specified node.
1197 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1198  bool HasBackedge) {
1199  SUnit *SV = &SUnits[V];
1200  bool F = false;
1201  Stack.insert(SV);
1202  Blocked.set(V);
1203 
1204  for (auto W : AdjK[V]) {
1205  if (NumPaths > MaxPaths)
1206  break;
1207  if (W < S)
1208  continue;
1209  if (W == S) {
1210  if (!HasBackedge)
1211  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1212  F = true;
1213  ++NumPaths;
1214  break;
1215  } else if (!Blocked.test(W)) {
1216  if (circuit(W, S, NodeSets,
1217  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1218  F = true;
1219  }
1220  }
1221 
1222  if (F)
1223  unblock(V);
1224  else {
1225  for (auto W : AdjK[V]) {
1226  if (W < S)
1227  continue;
1228  if (B[W].count(SV) == 0)
1229  B[W].insert(SV);
1230  }
1231  }
1232  Stack.pop_back();
1233  return F;
1234 }
1235 
1236 /// Unblock a node in the circuit finding algorithm.
1237 void SwingSchedulerDAG::Circuits::unblock(int U) {
1238  Blocked.reset(U);
1239  SmallPtrSet<SUnit *, 4> &BU = B[U];
1240  while (!BU.empty()) {
1242  assert(SI != BU.end() && "Invalid B set.");
1243  SUnit *W = *SI;
1244  BU.erase(W);
1245  if (Blocked.test(W->NodeNum))
1246  unblock(W->NodeNum);
1247  }
1248 }
1249 
1250 /// Identify all the elementary circuits in the dependence graph using
1251 /// Johnson's circuit algorithm.
1252 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1253  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1254  // but we do this to find the circuits, and then change them back.
1255  swapAntiDependences(SUnits);
1256 
1257  Circuits Cir(SUnits, Topo);
1258  // Create the adjacency structure.
1259  Cir.createAdjacencyStructure(this);
1260  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1261  Cir.reset();
1262  Cir.circuit(i, i, NodeSets);
1263  }
1264 
1265  // Change the dependences back so that we've created a DAG again.
1266  swapAntiDependences(SUnits);
1267 }
1268 
1269 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1270 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1271 // additional copies that are needed across iterations. An artificial dependence
1272 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1273 
1274 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1275 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1276 // PHI-------True-Dep------> USEOfPhi
1277 
1278 // The mutation creates
1279 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1280 
1281 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1282 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1283 // late to avoid additional copies across iterations. The possible scheduling
1284 // order would be
1285 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1286 
1288  for (SUnit &SU : DAG->SUnits) {
1289  // Find the COPY/REG_SEQUENCE instruction.
1290  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1291  continue;
1292 
1293  // Record the loop carried PHIs.
1294  SmallVector<SUnit *, 4> PHISUs;
1295  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1296  SmallVector<SUnit *, 4> SrcSUs;
1297 
1298  for (auto &Dep : SU.Preds) {
1299  SUnit *TmpSU = Dep.getSUnit();
1300  MachineInstr *TmpMI = TmpSU->getInstr();
1301  SDep::Kind DepKind = Dep.getKind();
1302  // Save the loop carried PHI.
1303  if (DepKind == SDep::Anti && TmpMI->isPHI())
1304  PHISUs.push_back(TmpSU);
1305  // Save the source of COPY/REG_SEQUENCE.
1306  // If the source has no pre-decessors, we will end up creating cycles.
1307  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1308  SrcSUs.push_back(TmpSU);
1309  }
1310 
1311  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1312  continue;
1313 
1314  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1315  // SUnit to the container.
1316  SmallVector<SUnit *, 8> UseSUs;
1317  for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
1318  for (auto &Dep : (*I)->Succs) {
1319  if (Dep.getKind() != SDep::Data)
1320  continue;
1321 
1322  SUnit *TmpSU = Dep.getSUnit();
1323  MachineInstr *TmpMI = TmpSU->getInstr();
1324  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1325  PHISUs.push_back(TmpSU);
1326  continue;
1327  }
1328  UseSUs.push_back(TmpSU);
1329  }
1330  }
1331 
1332  if (UseSUs.size() == 0)
1333  continue;
1334 
1335  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1336  // Add the artificial dependencies if it does not form a cycle.
1337  for (auto I : UseSUs) {
1338  for (auto Src : SrcSUs) {
1339  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1340  Src->addPred(SDep(I, SDep::Artificial));
1341  SDAG->Topo.AddPred(Src, I);
1342  }
1343  }
1344  }
1345  }
1346 }
1347 
1348 /// Return true for DAG nodes that we ignore when computing the cost functions.
1349 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1350 /// in the calculation of the ASAP, ALAP, etc functions.
1351 static bool ignoreDependence(const SDep &D, bool isPred) {
1352  if (D.isArtificial())
1353  return true;
1354  return D.getKind() == SDep::Anti && isPred;
1355 }
1356 
1357 /// Compute several functions need to order the nodes for scheduling.
1358 /// ASAP - Earliest time to schedule a node.
1359 /// ALAP - Latest time to schedule a node.
1360 /// MOV - Mobility function, difference between ALAP and ASAP.
1361 /// D - Depth of each node.
1362 /// H - Height of each node.
1363 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1364  ScheduleInfo.resize(SUnits.size());
1365 
1366  LLVM_DEBUG({
1367  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1368  E = Topo.end();
1369  I != E; ++I) {
1370  const SUnit &SU = SUnits[*I];
1371  dumpNode(SU);
1372  }
1373  });
1374 
1375  int maxASAP = 0;
1376  // Compute ASAP and ZeroLatencyDepth.
1377  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1378  E = Topo.end();
1379  I != E; ++I) {
1380  int asap = 0;
1381  int zeroLatencyDepth = 0;
1382  SUnit *SU = &SUnits[*I];
1383  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1384  EP = SU->Preds.end();
1385  IP != EP; ++IP) {
1386  SUnit *pred = IP->getSUnit();
1387  if (IP->getLatency() == 0)
1388  zeroLatencyDepth =
1389  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1390  if (ignoreDependence(*IP, true))
1391  continue;
1392  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1393  getDistance(pred, SU, *IP) * MII));
1394  }
1395  maxASAP = std::max(maxASAP, asap);
1396  ScheduleInfo[*I].ASAP = asap;
1397  ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
1398  }
1399 
1400  // Compute ALAP, ZeroLatencyHeight, and MOV.
1402  E = Topo.rend();
1403  I != E; ++I) {
1404  int alap = maxASAP;
1405  int zeroLatencyHeight = 0;
1406  SUnit *SU = &SUnits[*I];
1407  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1408  ES = SU->Succs.end();
1409  IS != ES; ++IS) {
1410  SUnit *succ = IS->getSUnit();
1411  if (IS->getLatency() == 0)
1412  zeroLatencyHeight =
1413  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1414  if (ignoreDependence(*IS, true))
1415  continue;
1416  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1417  getDistance(SU, succ, *IS) * MII));
1418  }
1419 
1420  ScheduleInfo[*I].ALAP = alap;
1421  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1422  }
1423 
1424  // After computing the node functions, compute the summary for each node set.
1425  for (NodeSet &I : NodeSets)
1426  I.computeNodeSetInfo(this);
1427 
1428  LLVM_DEBUG({
1429  for (unsigned i = 0; i < SUnits.size(); i++) {
1430  dbgs() << "\tNode " << i << ":\n";
1431  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1432  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1433  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1434  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1435  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1436  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1437  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1438  }
1439  });
1440 }
1441 
1442 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1443 /// as the predecessors of the elements of NodeOrder that are not also in
1444 /// NodeOrder.
1447  const NodeSet *S = nullptr) {
1448  Preds.clear();
1449  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1450  I != E; ++I) {
1451  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1452  PI != PE; ++PI) {
1453  if (S && S->count(PI->getSUnit()) == 0)
1454  continue;
1455  if (ignoreDependence(*PI, true))
1456  continue;
1457  if (NodeOrder.count(PI->getSUnit()) == 0)
1458  Preds.insert(PI->getSUnit());
1459  }
1460  // Back-edges are predecessors with an anti-dependence.
1461  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1462  ES = (*I)->Succs.end();
1463  IS != ES; ++IS) {
1464  if (IS->getKind() != SDep::Anti)
1465  continue;
1466  if (S && S->count(IS->getSUnit()) == 0)
1467  continue;
1468  if (NodeOrder.count(IS->getSUnit()) == 0)
1469  Preds.insert(IS->getSUnit());
1470  }
1471  }
1472  return !Preds.empty();
1473 }
1474 
1475 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1476 /// as the successors of the elements of NodeOrder that are not also in
1477 /// NodeOrder.
1480  const NodeSet *S = nullptr) {
1481  Succs.clear();
1482  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1483  I != E; ++I) {
1484  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1485  SI != SE; ++SI) {
1486  if (S && S->count(SI->getSUnit()) == 0)
1487  continue;
1488  if (ignoreDependence(*SI, false))
1489  continue;
1490  if (NodeOrder.count(SI->getSUnit()) == 0)
1491  Succs.insert(SI->getSUnit());
1492  }
1493  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1494  PE = (*I)->Preds.end();
1495  PI != PE; ++PI) {
1496  if (PI->getKind() != SDep::Anti)
1497  continue;
1498  if (S && S->count(PI->getSUnit()) == 0)
1499  continue;
1500  if (NodeOrder.count(PI->getSUnit()) == 0)
1501  Succs.insert(PI->getSUnit());
1502  }
1503  }
1504  return !Succs.empty();
1505 }
1506 
1507 /// Return true if there is a path from the specified node to any of the nodes
1508 /// in DestNodes. Keep track and return the nodes in any path.
1509 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1510  SetVector<SUnit *> &DestNodes,
1511  SetVector<SUnit *> &Exclude,
1512  SmallPtrSet<SUnit *, 8> &Visited) {
1513  if (Cur->isBoundaryNode())
1514  return false;
1515  if (Exclude.count(Cur) != 0)
1516  return false;
1517  if (DestNodes.count(Cur) != 0)
1518  return true;
1519  if (!Visited.insert(Cur).second)
1520  return Path.count(Cur) != 0;
1521  bool FoundPath = false;
1522  for (auto &SI : Cur->Succs)
1523  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1524  for (auto &PI : Cur->Preds)
1525  if (PI.getKind() == SDep::Anti)
1526  FoundPath |=
1527  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1528  if (FoundPath)
1529  Path.insert(Cur);
1530  return FoundPath;
1531 }
1532 
1533 /// Return true if Set1 is a subset of Set2.
1534 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1535  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1536  if (Set2.count(*I) == 0)
1537  return false;
1538  return true;
1539 }
1540 
1541 /// Compute the live-out registers for the instructions in a node-set.
1542 /// The live-out registers are those that are defined in the node-set,
1543 /// but not used. Except for use operands of Phis.
1545  NodeSet &NS) {
1549  SmallSet<unsigned, 4> Uses;
1550  for (SUnit *SU : NS) {
1551  const MachineInstr *MI = SU->getInstr();
1552  if (MI->isPHI())
1553  continue;
1554  for (const MachineOperand &MO : MI->operands())
1555  if (MO.isReg() && MO.isUse()) {
1556  Register Reg = MO.getReg();
1557  if (Register::isVirtualRegister(Reg))
1558  Uses.insert(Reg);
1559  else if (MRI.isAllocatable(Reg))
1560  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1561  Uses.insert(*Units);
1562  }
1563  }
1564  for (SUnit *SU : NS)
1565  for (const MachineOperand &MO : SU->getInstr()->operands())
1566  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1567  Register Reg = MO.getReg();
1568  if (Register::isVirtualRegister(Reg)) {
1569  if (!Uses.count(Reg))
1570  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1572  } else if (MRI.isAllocatable(Reg)) {
1573  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1574  if (!Uses.count(*Units))
1575  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1577  }
1578  }
1579  RPTracker.addLiveRegs(LiveOutRegs);
1580 }
1581 
1582 /// A heuristic to filter nodes in recurrent node-sets if the register
1583 /// pressure of a set is too high.
1584 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1585  for (auto &NS : NodeSets) {
1586  // Skip small node-sets since they won't cause register pressure problems.
1587  if (NS.size() <= 2)
1588  continue;
1589  IntervalPressure RecRegPressure;
1590  RegPressureTracker RecRPTracker(RecRegPressure);
1591  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1592  computeLiveOuts(MF, RecRPTracker, NS);
1593  RecRPTracker.closeBottom();
1594 
1595  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1596  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1597  return A->NodeNum > B->NodeNum;
1598  });
1599 
1600  for (auto &SU : SUnits) {
1601  // Since we're computing the register pressure for a subset of the
1602  // instructions in a block, we need to set the tracker for each
1603  // instruction in the node-set. The tracker is set to the instruction
1604  // just after the one we're interested in.
1605  MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
1606  RecRPTracker.setPos(std::next(CurInstI));
1607 
1608  RegPressureDelta RPDelta;
1609  ArrayRef<PressureChange> CriticalPSets;
1610  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1611  CriticalPSets,
1612  RecRegPressure.MaxSetPressure);
1613  if (RPDelta.Excess.isValid()) {
1614  LLVM_DEBUG(
1615  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1616  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1617  << ":" << RPDelta.Excess.getUnitInc());
1618  NS.setExceedPressure(SU);
1619  break;
1620  }
1621  RecRPTracker.recede();
1622  }
1623  }
1624 }
1625 
1626 /// A heuristic to colocate node sets that have the same set of
1627 /// successors.
1628 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1629  unsigned Colocate = 0;
1630  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1631  NodeSet &N1 = NodeSets[i];
1633  if (N1.empty() || !succ_L(N1, S1))
1634  continue;
1635  for (int j = i + 1; j < e; ++j) {
1636  NodeSet &N2 = NodeSets[j];
1637  if (N1.compareRecMII(N2) != 0)
1638  continue;
1640  if (N2.empty() || !succ_L(N2, S2))
1641  continue;
1642  if (isSubset(S1, S2) && S1.size() == S2.size()) {
1643  N1.setColocate(++Colocate);
1644  N2.setColocate(Colocate);
1645  break;
1646  }
1647  }
1648  }
1649 }
1650 
1651 /// Check if the existing node-sets are profitable. If not, then ignore the
1652 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1653 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1654 /// then it's best to try to schedule all instructions together instead of
1655 /// starting with the recurrent node-sets.
1656 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1657  // Look for loops with a large MII.
1658  if (MII < 17)
1659  return;
1660  // Check if the node-set contains only a simple add recurrence.
1661  for (auto &NS : NodeSets) {
1662  if (NS.getRecMII() > 2)
1663  return;
1664  if (NS.getMaxDepth() > MII)
1665  return;
1666  }
1667  NodeSets.clear();
1668  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1669  return;
1670 }
1671 
1672 /// Add the nodes that do not belong to a recurrence set into groups
1673 /// based upon connected componenets.
1674 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1675  SetVector<SUnit *> NodesAdded;
1676  SmallPtrSet<SUnit *, 8> Visited;
1677  // Add the nodes that are on a path between the previous node sets and
1678  // the current node set.
1679  for (NodeSet &I : NodeSets) {
1681  // Add the nodes from the current node set to the previous node set.
1682  if (succ_L(I, N)) {
1683  SetVector<SUnit *> Path;
1684  for (SUnit *NI : N) {
1685  Visited.clear();
1686  computePath(NI, Path, NodesAdded, I, Visited);
1687  }
1688  if (!Path.empty())
1689  I.insert(Path.begin(), Path.end());
1690  }
1691  // Add the nodes from the previous node set to the current node set.
1692  N.clear();
1693  if (succ_L(NodesAdded, N)) {
1694  SetVector<SUnit *> Path;
1695  for (SUnit *NI : N) {
1696  Visited.clear();
1697  computePath(NI, Path, I, NodesAdded, Visited);
1698  }
1699  if (!Path.empty())
1700  I.insert(Path.begin(), Path.end());
1701  }
1702  NodesAdded.insert(I.begin(), I.end());
1703  }
1704 
1705  // Create a new node set with the connected nodes of any successor of a node
1706  // in a recurrent set.
1707  NodeSet NewSet;
1709  if (succ_L(NodesAdded, N))
1710  for (SUnit *I : N)
1711  addConnectedNodes(I, NewSet, NodesAdded);
1712  if (!NewSet.empty())
1713  NodeSets.push_back(NewSet);
1714 
1715  // Create a new node set with the connected nodes of any predecessor of a node
1716  // in a recurrent set.
1717  NewSet.clear();
1718  if (pred_L(NodesAdded, N))
1719  for (SUnit *I : N)
1720  addConnectedNodes(I, NewSet, NodesAdded);
1721  if (!NewSet.empty())
1722  NodeSets.push_back(NewSet);
1723 
1724  // Create new nodes sets with the connected nodes any remaining node that
1725  // has no predecessor.
1726  for (unsigned i = 0; i < SUnits.size(); ++i) {
1727  SUnit *SU = &SUnits[i];
1728  if (NodesAdded.count(SU) == 0) {
1729  NewSet.clear();
1730  addConnectedNodes(SU, NewSet, NodesAdded);
1731  if (!NewSet.empty())
1732  NodeSets.push_back(NewSet);
1733  }
1734  }
1735 }
1736 
1737 /// Add the node to the set, and add all of its connected nodes to the set.
1738 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1739  SetVector<SUnit *> &NodesAdded) {
1740  NewSet.insert(SU);
1741  NodesAdded.insert(SU);
1742  for (auto &SI : SU->Succs) {
1743  SUnit *Successor = SI.getSUnit();
1744  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1745  addConnectedNodes(Successor, NewSet, NodesAdded);
1746  }
1747  for (auto &PI : SU->Preds) {
1748  SUnit *Predecessor = PI.getSUnit();
1749  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1750  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1751  }
1752 }
1753 
1754 /// Return true if Set1 contains elements in Set2. The elements in common
1755 /// are returned in a different container.
1756 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1757  SmallSetVector<SUnit *, 8> &Result) {
1758  Result.clear();
1759  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1760  SUnit *SU = Set1[i];
1761  if (Set2.count(SU) != 0)
1762  Result.insert(SU);
1763  }
1764  return !Result.empty();
1765 }
1766 
1767 /// Merge the recurrence node sets that have the same initial node.
1768 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1769  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1770  ++I) {
1771  NodeSet &NI = *I;
1772  for (NodeSetType::iterator J = I + 1; J != E;) {
1773  NodeSet &NJ = *J;
1774  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1775  if (NJ.compareRecMII(NI) > 0)
1776  NI.setRecMII(NJ.getRecMII());
1777  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
1778  ++NII)
1779  I->insert(*NII);
1780  NodeSets.erase(J);
1781  E = NodeSets.end();
1782  } else {
1783  ++J;
1784  }
1785  }
1786  }
1787 }
1788 
1789 /// Remove nodes that have been scheduled in previous NodeSets.
1790 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1791  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1792  ++I)
1793  for (NodeSetType::iterator J = I + 1; J != E;) {
1794  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1795 
1796  if (J->empty()) {
1797  NodeSets.erase(J);
1798  E = NodeSets.end();
1799  } else {
1800  ++J;
1801  }
1802  }
1803 }
1804 
1805 /// Compute an ordered list of the dependence graph nodes, which
1806 /// indicates the order that the nodes will be scheduled. This is a
1807 /// two-level algorithm. First, a partial order is created, which
1808 /// consists of a list of sets ordered from highest to lowest priority.
1809 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1811  NodeOrder.clear();
1812 
1813  for (auto &Nodes : NodeSets) {
1814  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1815  OrderKind Order;
1817  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
1818  R.insert(N.begin(), N.end());
1819  Order = BottomUp;
1820  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1821  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
1822  R.insert(N.begin(), N.end());
1823  Order = TopDown;
1824  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1825  } else if (isIntersect(N, Nodes, R)) {
1826  // If some of the successors are in the existing node-set, then use the
1827  // top-down ordering.
1828  Order = TopDown;
1829  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1830  } else if (NodeSets.size() == 1) {
1831  for (auto &N : Nodes)
1832  if (N->Succs.size() == 0)
1833  R.insert(N);
1834  Order = BottomUp;
1835  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1836  } else {
1837  // Find the node with the highest ASAP.
1838  SUnit *maxASAP = nullptr;
1839  for (SUnit *SU : Nodes) {
1840  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1841  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1842  maxASAP = SU;
1843  }
1844  R.insert(maxASAP);
1845  Order = BottomUp;
1846  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1847  }
1848 
1849  while (!R.empty()) {
1850  if (Order == TopDown) {
1851  // Choose the node with the maximum height. If more than one, choose
1852  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1853  // choose the node with the lowest MOV.
1854  while (!R.empty()) {
1855  SUnit *maxHeight = nullptr;
1856  for (SUnit *I : R) {
1857  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1858  maxHeight = I;
1859  else if (getHeight(I) == getHeight(maxHeight) &&
1860  getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
1861  maxHeight = I;
1862  else if (getHeight(I) == getHeight(maxHeight) &&
1863  getZeroLatencyHeight(I) ==
1864  getZeroLatencyHeight(maxHeight) &&
1865  getMOV(I) < getMOV(maxHeight))
1866  maxHeight = I;
1867  }
1868  NodeOrder.insert(maxHeight);
1869  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1870  R.remove(maxHeight);
1871  for (const auto &I : maxHeight->Succs) {
1872  if (Nodes.count(I.getSUnit()) == 0)
1873  continue;
1874  if (NodeOrder.count(I.getSUnit()) != 0)
1875  continue;
1876  if (ignoreDependence(I, false))
1877  continue;
1878  R.insert(I.getSUnit());
1879  }
1880  // Back-edges are predecessors with an anti-dependence.
1881  for (const auto &I : maxHeight->Preds) {
1882  if (I.getKind() != SDep::Anti)
1883  continue;
1884  if (Nodes.count(I.getSUnit()) == 0)
1885  continue;
1886  if (NodeOrder.count(I.getSUnit()) != 0)
1887  continue;
1888  R.insert(I.getSUnit());
1889  }
1890  }
1891  Order = BottomUp;
1892  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1894  if (pred_L(NodeOrder, N, &Nodes))
1895  R.insert(N.begin(), N.end());
1896  } else {
1897  // Choose the node with the maximum depth. If more than one, choose
1898  // the node with the maximum ZeroLatencyDepth. If still more than one,
1899  // choose the node with the lowest MOV.
1900  while (!R.empty()) {
1901  SUnit *maxDepth = nullptr;
1902  for (SUnit *I : R) {
1903  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1904  maxDepth = I;
1905  else if (getDepth(I) == getDepth(maxDepth) &&
1906  getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
1907  maxDepth = I;
1908  else if (getDepth(I) == getDepth(maxDepth) &&
1909  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1910  getMOV(I) < getMOV(maxDepth))
1911  maxDepth = I;
1912  }
1913  NodeOrder.insert(maxDepth);
1914  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1915  R.remove(maxDepth);
1916  if (Nodes.isExceedSU(maxDepth)) {
1917  Order = TopDown;
1918  R.clear();
1919  R.insert(Nodes.getNode(0));
1920  break;
1921  }
1922  for (const auto &I : maxDepth->Preds) {
1923  if (Nodes.count(I.getSUnit()) == 0)
1924  continue;
1925  if (NodeOrder.count(I.getSUnit()) != 0)
1926  continue;
1927  R.insert(I.getSUnit());
1928  }
1929  // Back-edges are predecessors with an anti-dependence.
1930  for (const auto &I : maxDepth->Succs) {
1931  if (I.getKind() != SDep::Anti)
1932  continue;
1933  if (Nodes.count(I.getSUnit()) == 0)
1934  continue;
1935  if (NodeOrder.count(I.getSUnit()) != 0)
1936  continue;
1937  R.insert(I.getSUnit());
1938  }
1939  }
1940  Order = TopDown;
1941  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
1943  if (succ_L(NodeOrder, N, &Nodes))
1944  R.insert(N.begin(), N.end());
1945  }
1946  }
1947  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1948  }
1949 
1950  LLVM_DEBUG({
1951  dbgs() << "Node order: ";
1952  for (SUnit *I : NodeOrder)
1953  dbgs() << " " << I->NodeNum << " ";
1954  dbgs() << "\n";
1955  });
1956 }
1957 
1958 /// Process the nodes in the computed order and create the pipelined schedule
1959 /// of the instructions, if possible. Return true if a schedule is found.
1960 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
1961 
1962  if (NodeOrder.empty()){
1963  LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
1964  return false;
1965  }
1966 
1967  bool scheduleFound = false;
1968  unsigned II = 0;
1969  // Keep increasing II until a valid schedule is found.
1970  for (II = MII; II <= MAX_II && !scheduleFound; ++II) {
1971  Schedule.reset();
1972  Schedule.setInitiationInterval(II);
1973  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
1974 
1977  do {
1978  SUnit *SU = *NI;
1979 
1980  // Compute the schedule time for the instruction, which is based
1981  // upon the scheduled time for any predecessors/successors.
1982  int EarlyStart = INT_MIN;
1983  int LateStart = INT_MAX;
1984  // These values are set when the size of the schedule window is limited
1985  // due to chain dependences.
1986  int SchedEnd = INT_MAX;
1987  int SchedStart = INT_MIN;
1988  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
1989  II, this);
1990  LLVM_DEBUG({
1991  dbgs() << "\n";
1992  dbgs() << "Inst (" << SU->NodeNum << ") ";
1993  SU->getInstr()->dump();
1994  dbgs() << "\n";
1995  });
1996  LLVM_DEBUG({
1997  dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
1998  LateStart, SchedEnd, SchedStart);
1999  });
2000 
2001  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2002  SchedStart > LateStart)
2003  scheduleFound = false;
2004  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2005  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2006  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2007  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2008  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2009  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2010  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2011  SchedEnd =
2012  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2013  // When scheduling a Phi it is better to start at the late cycle and go
2014  // backwards. The default order may insert the Phi too far away from
2015  // its first dependence.
2016  if (SU->getInstr()->isPHI())
2017  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2018  else
2019  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2020  } else {
2021  int FirstCycle = Schedule.getFirstCycle();
2022  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2023  FirstCycle + getASAP(SU) + II - 1, II);
2024  }
2025  // Even if we find a schedule, make sure the schedule doesn't exceed the
2026  // allowable number of stages. We keep trying if this happens.
2027  if (scheduleFound)
2028  if (SwpMaxStages > -1 &&
2029  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2030  scheduleFound = false;
2031 
2032  LLVM_DEBUG({
2033  if (!scheduleFound)
2034  dbgs() << "\tCan't schedule\n";
2035  });
2036  } while (++NI != NE && scheduleFound);
2037 
2038  // If a schedule is found, check if it is a valid schedule too.
2039  if (scheduleFound)
2040  scheduleFound = Schedule.isValidSchedule(this);
2041  }
2042 
2043  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
2044  << ")\n");
2045 
2046  if (scheduleFound)
2047  Schedule.finalizeSchedule(this);
2048  else
2049  Schedule.reset();
2050 
2051  return scheduleFound && Schedule.getMaxStageCount() > 0;
2052 }
2053 
2054 /// Return true if we can compute the amount the instruction changes
2055 /// during each iteration. Set Delta to the amount of the change.
2056 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2058  const MachineOperand *BaseOp;
2059  int64_t Offset;
2060  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
2061  return false;
2062 
2063  if (!BaseOp->isReg())
2064  return false;
2065 
2066  Register BaseReg = BaseOp->getReg();
2067 
2069  // Check if there is a Phi. If so, get the definition in the loop.
2070  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2071  if (BaseDef && BaseDef->isPHI()) {
2072  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2073  BaseDef = MRI.getVRegDef(BaseReg);
2074  }
2075  if (!BaseDef)
2076  return false;
2077 
2078  int D = 0;
2079  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2080  return false;
2081 
2082  Delta = D;
2083  return true;
2084 }
2085 
2086 /// Check if we can change the instruction to use an offset value from the
2087 /// previous iteration. If so, return true and set the base and offset values
2088 /// so that we can rewrite the load, if necessary.
2089 /// v1 = Phi(v0, v3)
2090 /// v2 = load v1, 0
2091 /// v3 = post_store v1, 4, x
2092 /// This function enables the load to be rewritten as v2 = load v3, 4.
2093 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
2094  unsigned &BasePos,
2095  unsigned &OffsetPos,
2096  unsigned &NewBase,
2097  int64_t &Offset) {
2098  // Get the load instruction.
2099  if (TII->isPostIncrement(*MI))
2100  return false;
2101  unsigned BasePosLd, OffsetPosLd;
2102  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
2103  return false;
2104  Register BaseReg = MI->getOperand(BasePosLd).getReg();
2105 
2106  // Look for the Phi instruction.
2108  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
2109  if (!Phi || !Phi->isPHI())
2110  return false;
2111  // Get the register defined in the loop block.
2112  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
2113  if (!PrevReg)
2114  return false;
2115 
2116  // Check for the post-increment load/store instruction.
2117  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
2118  if (!PrevDef || PrevDef == MI)
2119  return false;
2120 
2121  if (!TII->isPostIncrement(*PrevDef))
2122  return false;
2123 
2124  unsigned BasePos1 = 0, OffsetPos1 = 0;
2125  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
2126  return false;
2127 
2128  // Make sure that the instructions do not access the same memory location in
2129  // the next iteration.
2130  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
2131  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
2132  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2133  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
2134  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
2135  MF.DeleteMachineInstr(NewMI);
2136  if (!Disjoint)
2137  return false;
2138 
2139  // Set the return value once we determine that we return true.
2140  BasePos = BasePosLd;
2141  OffsetPos = OffsetPosLd;
2142  NewBase = PrevReg;
2143  Offset = StoreOffset;
2144  return true;
2145 }
2146 
2147 /// Apply changes to the instruction if needed. The changes are need
2148 /// to improve the scheduling and depend up on the final schedule.
2150  SMSchedule &Schedule) {
2151  SUnit *SU = getSUnit(MI);
2153  InstrChanges.find(SU);
2154  if (It != InstrChanges.end()) {
2155  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2156  unsigned BasePos, OffsetPos;
2157  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2158  return;
2159  Register BaseReg = MI->getOperand(BasePos).getReg();
2160  MachineInstr *LoopDef = findDefInLoop(BaseReg);
2161  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
2162  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
2163  int BaseStageNum = Schedule.stageScheduled(SU);
2164  int BaseCycleNum = Schedule.cycleScheduled(SU);
2165  if (BaseStageNum < DefStageNum) {
2166  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2167  int OffsetDiff = DefStageNum - BaseStageNum;
2168  if (DefCycleNum < BaseCycleNum) {
2169  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
2170  if (OffsetDiff > 0)
2171  --OffsetDiff;
2172  }
2173  int64_t NewOffset =
2174  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2175  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2176  SU->setInstr(NewMI);
2177  MISUnitMap[NewMI] = SU;
2178  NewMIs[MI] = NewMI;
2179  }
2180  }
2181 }
2182 
2183 /// Return the instruction in the loop that defines the register.
2184 /// If the definition is a Phi, then follow the Phi operand to
2185 /// the instruction in the loop.
2186 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
2188  MachineInstr *Def = MRI.getVRegDef(Reg);
2189  while (Def->isPHI()) {
2190  if (!Visited.insert(Def).second)
2191  break;
2192  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2193  if (Def->getOperand(i + 1).getMBB() == BB) {
2194  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2195  break;
2196  }
2197  }
2198  return Def;
2199 }
2200 
2201 /// Return true for an order or output dependence that is loop carried
2202 /// potentially. A dependence is loop carried if the destination defines a valu
2203 /// that may be used or defined by the source in a subsequent iteration.
2205  bool isSucc) {
2206  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
2207  Dep.isArtificial())
2208  return false;
2209 
2210  if (!SwpPruneLoopCarried)
2211  return true;
2212 
2213  if (Dep.getKind() == SDep::Output)
2214  return true;
2215 
2216  MachineInstr *SI = Source->getInstr();
2217  MachineInstr *DI = Dep.getSUnit()->getInstr();
2218  if (!isSucc)
2219  std::swap(SI, DI);
2220  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
2221 
2222  // Assume ordered loads and stores may have a loop carried dependence.
2223  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
2224  SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
2226  return true;
2227 
2228  // Only chain dependences between a load and store can be loop carried.
2229  if (!DI->mayStore() || !SI->mayLoad())
2230  return false;
2231 
2232  unsigned DeltaS, DeltaD;
2233  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2234  return true;
2235 
2236  const MachineOperand *BaseOpS, *BaseOpD;
2237  int64_t OffsetS, OffsetD;
2239  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
2240  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
2241  return true;
2242 
2243  if (!BaseOpS->isIdenticalTo(*BaseOpD))
2244  return true;
2245 
2246  // Check that the base register is incremented by a constant value for each
2247  // iteration.
2248  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
2249  if (!Def || !Def->isPHI())
2250  return true;
2251  unsigned InitVal = 0;
2252  unsigned LoopVal = 0;
2253  getPhiRegs(*Def, BB, InitVal, LoopVal);
2254  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
2255  int D = 0;
2256  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
2257  return true;
2258 
2259  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
2260  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
2261 
2262  // This is the main test, which checks the offset values and the loop
2263  // increment value to determine if the accesses may be loop carried.
2264  if (AccessSizeS == MemoryLocation::UnknownSize ||
2265  AccessSizeD == MemoryLocation::UnknownSize)
2266  return true;
2267 
2268  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
2269  return true;
2270 
2271  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
2272 }
2273 
2274 void SwingSchedulerDAG::postprocessDAG() {
2275  for (auto &M : Mutations)
2276  M->apply(this);
2277 }
2278 
2279 /// Try to schedule the node at the specified StartCycle and continue
2280 /// until the node is schedule or the EndCycle is reached. This function
2281 /// returns true if the node is scheduled. This routine may search either
2282 /// forward or backward for a place to insert the instruction based upon
2283 /// the relative values of StartCycle and EndCycle.
2284 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
2285  bool forward = true;
2286  LLVM_DEBUG({
2287  dbgs() << "Trying to insert node between " << StartCycle << " and "
2288  << EndCycle << " II: " << II << "\n";
2289  });
2290  if (StartCycle > EndCycle)
2291  forward = false;
2292 
2293  // The terminating condition depends on the direction.
2294  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2295  for (int curCycle = StartCycle; curCycle != termCycle;
2296  forward ? ++curCycle : --curCycle) {
2297 
2298  // Add the already scheduled instructions at the specified cycle to the
2299  // DFA.
2300  ProcItinResources.clearResources();
2301  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
2302  checkCycle <= LastCycle; checkCycle += II) {
2303  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
2304 
2305  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
2306  E = cycleInstrs.end();
2307  I != E; ++I) {
2308  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
2309  continue;
2310  assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) &&
2311  "These instructions have already been scheduled.");
2312  ProcItinResources.reserveResources(*(*I)->getInstr());
2313  }
2314  }
2315  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
2316  ProcItinResources.canReserveResources(*SU->getInstr())) {
2317  LLVM_DEBUG({
2318  dbgs() << "\tinsert at cycle " << curCycle << " ";
2319  SU->getInstr()->dump();
2320  });
2321 
2322  ScheduledInstrs[curCycle].push_back(SU);
2323  InstrToCycle.insert(std::make_pair(SU, curCycle));
2324  if (curCycle > LastCycle)
2325  LastCycle = curCycle;
2326  if (curCycle < FirstCycle)
2327  FirstCycle = curCycle;
2328  return true;
2329  }
2330  LLVM_DEBUG({
2331  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
2332  SU->getInstr()->dump();
2333  });
2334  }
2335  return false;
2336 }
2337 
2338 // Return the cycle of the earliest scheduled instruction in the chain.
2340  SmallPtrSet<SUnit *, 8> Visited;
2341  SmallVector<SDep, 8> Worklist;
2342  Worklist.push_back(Dep);
2343  int EarlyCycle = INT_MAX;
2344  while (!Worklist.empty()) {
2345  const SDep &Cur = Worklist.pop_back_val();
2346  SUnit *PrevSU = Cur.getSUnit();
2347  if (Visited.count(PrevSU))
2348  continue;
2349  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2350  if (it == InstrToCycle.end())
2351  continue;
2352  EarlyCycle = std::min(EarlyCycle, it->second);
2353  for (const auto &PI : PrevSU->Preds)
2354  if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
2355  Worklist.push_back(PI);
2356  Visited.insert(PrevSU);
2357  }
2358  return EarlyCycle;
2359 }
2360 
2361 // Return the cycle of the latest scheduled instruction in the chain.
2363  SmallPtrSet<SUnit *, 8> Visited;
2364  SmallVector<SDep, 8> Worklist;
2365  Worklist.push_back(Dep);
2366  int LateCycle = INT_MIN;
2367  while (!Worklist.empty()) {
2368  const SDep &Cur = Worklist.pop_back_val();
2369  SUnit *SuccSU = Cur.getSUnit();
2370  if (Visited.count(SuccSU))
2371  continue;
2372  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2373  if (it == InstrToCycle.end())
2374  continue;
2375  LateCycle = std::max(LateCycle, it->second);
2376  for (const auto &SI : SuccSU->Succs)
2377  if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
2378  Worklist.push_back(SI);
2379  Visited.insert(SuccSU);
2380  }
2381  return LateCycle;
2382 }
2383 
2384 /// If an instruction has a use that spans multiple iterations, then
2385 /// return true. These instructions are characterized by having a back-ege
2386 /// to a Phi, which contains a reference to another Phi.
2388  for (auto &P : SU->Preds)
2389  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
2390  for (auto &S : P.getSUnit()->Succs)
2391  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
2392  return P.getSUnit();
2393  return nullptr;
2394 }
2395 
2396 /// Compute the scheduling start slot for the instruction. The start slot
2397 /// depends on any predecessor or successor nodes scheduled already.
2398 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
2399  int *MinEnd, int *MaxStart, int II,
2400  SwingSchedulerDAG *DAG) {
2401  // Iterate over each instruction that has been scheduled already. The start
2402  // slot computation depends on whether the previously scheduled instruction
2403  // is a predecessor or successor of the specified instruction.
2404  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
2405 
2406  // Iterate over each instruction in the current cycle.
2407  for (SUnit *I : getInstructions(cycle)) {
2408  // Because we're processing a DAG for the dependences, we recognize
2409  // the back-edge in recurrences by anti dependences.
2410  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
2411  const SDep &Dep = SU->Preds[i];
2412  if (Dep.getSUnit() == I) {
2413  if (!DAG->isBackedge(SU, Dep)) {
2414  int EarlyStart = cycle + Dep.getLatency() -
2415  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2416  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2417  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
2418  int End = earliestCycleInChain(Dep) + (II - 1);
2419  *MinEnd = std::min(*MinEnd, End);
2420  }
2421  } else {
2422  int LateStart = cycle - Dep.getLatency() +
2423  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2424  *MinLateStart = std::min(*MinLateStart, LateStart);
2425  }
2426  }
2427  // For instruction that requires multiple iterations, make sure that
2428  // the dependent instruction is not scheduled past the definition.
2429  SUnit *BE = multipleIterations(I, DAG);
2430  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
2431  !SU->isPred(I))
2432  *MinLateStart = std::min(*MinLateStart, cycle);
2433  }
2434  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
2435  if (SU->Succs[i].getSUnit() == I) {
2436  const SDep &Dep = SU->Succs[i];
2437  if (!DAG->isBackedge(SU, Dep)) {
2438  int LateStart = cycle - Dep.getLatency() +
2439  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2440  *MinLateStart = std::min(*MinLateStart, LateStart);
2441  if (DAG->isLoopCarriedDep(SU, Dep)) {
2442  int Start = latestCycleInChain(Dep) + 1 - II;
2443  *MaxStart = std::max(*MaxStart, Start);
2444  }
2445  } else {
2446  int EarlyStart = cycle + Dep.getLatency() -
2447  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2448  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2449  }
2450  }
2451  }
2452  }
2453  }
2454 }
2455 
2456 /// Order the instructions within a cycle so that the definitions occur
2457 /// before the uses. Returns true if the instruction is added to the start
2458 /// of the list, or false if added to the end.
2460  std::deque<SUnit *> &Insts) {
2461  MachineInstr *MI = SU->getInstr();
2462  bool OrderBeforeUse = false;
2463  bool OrderAfterDef = false;
2464  bool OrderBeforeDef = false;
2465  unsigned MoveDef = 0;
2466  unsigned MoveUse = 0;
2467  int StageInst1 = stageScheduled(SU);
2468 
2469  unsigned Pos = 0;
2470  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
2471  ++I, ++Pos) {
2472  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2473  MachineOperand &MO = MI->getOperand(i);
2474  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
2475  continue;
2476 
2477  Register Reg = MO.getReg();
2478  unsigned BasePos, OffsetPos;
2479  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2480  if (MI->getOperand(BasePos).getReg() == Reg)
2481  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
2482  Reg = NewReg;
2483  bool Reads, Writes;
2484  std::tie(Reads, Writes) =
2485  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2486  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
2487  OrderBeforeUse = true;
2488  if (MoveUse == 0)
2489  MoveUse = Pos;
2490  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
2491  // Add the instruction after the scheduled instruction.
2492  OrderAfterDef = true;
2493  MoveDef = Pos;
2494  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
2495  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
2496  OrderBeforeUse = true;
2497  if (MoveUse == 0)
2498  MoveUse = Pos;
2499  } else {
2500  OrderAfterDef = true;
2501  MoveDef = Pos;
2502  }
2503  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
2504  OrderBeforeUse = true;
2505  if (MoveUse == 0)
2506  MoveUse = Pos;
2507  if (MoveUse != 0) {
2508  OrderAfterDef = true;
2509  MoveDef = Pos - 1;
2510  }
2511  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
2512  // Add the instruction before the scheduled instruction.
2513  OrderBeforeUse = true;
2514  if (MoveUse == 0)
2515  MoveUse = Pos;
2516  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
2517  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
2518  if (MoveUse == 0) {
2519  OrderBeforeDef = true;
2520  MoveUse = Pos;
2521  }
2522  }
2523  }
2524  // Check for order dependences between instructions. Make sure the source
2525  // is ordered before the destination.
2526  for (auto &S : SU->Succs) {
2527  if (S.getSUnit() != *I)
2528  continue;
2529  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2530  OrderBeforeUse = true;
2531  if (Pos < MoveUse)
2532  MoveUse = Pos;
2533  }
2534  // We did not handle HW dependences in previous for loop,
2535  // and we normally set Latency = 0 for Anti deps,
2536  // so may have nodes in same cycle with Anti denpendent on HW regs.
2537  else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
2538  OrderBeforeUse = true;
2539  if ((MoveUse == 0) || (Pos < MoveUse))
2540  MoveUse = Pos;
2541  }
2542  }
2543  for (auto &P : SU->Preds) {
2544  if (P.getSUnit() != *I)
2545  continue;
2546  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2547  OrderAfterDef = true;
2548  MoveDef = Pos;
2549  }
2550  }
2551  }
2552 
2553  // A circular dependence.
2554  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
2555  OrderBeforeUse = false;
2556 
2557  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
2558  // to a loop-carried dependence.
2559  if (OrderBeforeDef)
2560  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
2561 
2562  // The uncommon case when the instruction order needs to be updated because
2563  // there is both a use and def.
2564  if (OrderBeforeUse && OrderAfterDef) {
2565  SUnit *UseSU = Insts.at(MoveUse);
2566  SUnit *DefSU = Insts.at(MoveDef);
2567  if (MoveUse > MoveDef) {
2568  Insts.erase(Insts.begin() + MoveUse);
2569  Insts.erase(Insts.begin() + MoveDef);
2570  } else {
2571  Insts.erase(Insts.begin() + MoveDef);
2572  Insts.erase(Insts.begin() + MoveUse);
2573  }
2574  orderDependence(SSD, UseSU, Insts);
2575  orderDependence(SSD, SU, Insts);
2576  orderDependence(SSD, DefSU, Insts);
2577  return;
2578  }
2579  // Put the new instruction first if there is a use in the list. Otherwise,
2580  // put it at the end of the list.
2581  if (OrderBeforeUse)
2582  Insts.push_front(SU);
2583  else
2584  Insts.push_back(SU);
2585 }
2586 
2587 /// Return true if the scheduled Phi has a loop carried operand.
2589  if (!Phi.isPHI())
2590  return false;
2591  assert(Phi.isPHI() && "Expecting a Phi.");
2592  SUnit *DefSU = SSD->getSUnit(&Phi);
2593  unsigned DefCycle = cycleScheduled(DefSU);
2594  int DefStage = stageScheduled(DefSU);
2595 
2596  unsigned InitVal = 0;
2597  unsigned LoopVal = 0;
2598  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
2599  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
2600  if (!UseSU)
2601  return true;
2602  if (UseSU->getInstr()->isPHI())
2603  return true;
2604  unsigned LoopCycle = cycleScheduled(UseSU);
2605  int LoopStage = stageScheduled(UseSU);
2606  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
2607 }
2608 
2609 /// Return true if the instruction is a definition that is loop carried
2610 /// and defines the use on the next iteration.
2611 /// v1 = phi(v2, v3)
2612 /// (Def) v3 = op v1
2613 /// (MO) = v1
2614 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
2615 /// register.
2618  if (!MO.isReg())
2619  return false;
2620  if (Def->isPHI())
2621  return false;
2622  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
2623  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
2624  return false;
2625  if (!isLoopCarried(SSD, *Phi))
2626  return false;
2627  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
2628  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
2629  MachineOperand &DMO = Def->getOperand(i);
2630  if (!DMO.isReg() || !DMO.isDef())
2631  continue;
2632  if (DMO.getReg() == LoopReg)
2633  return true;
2634  }
2635  return false;
2636 }
2637 
2638 // Check if the generated schedule is valid. This function checks if
2639 // an instruction that uses a physical register is scheduled in a
2640 // different stage than the definition. The pipeliner does not handle
2641 // physical register values that may cross a basic block boundary.
2643  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
2644  SUnit &SU = SSD->SUnits[i];
2645  if (!SU.hasPhysRegDefs)
2646  continue;
2647  int StageDef = stageScheduled(&SU);
2648  assert(StageDef != -1 && "Instruction should have been scheduled.");
2649  for (auto &SI : SU.Succs)
2650  if (SI.isAssignedRegDep())
2651  if (Register::isPhysicalRegister(SI.getReg()))
2652  if (stageScheduled(SI.getSUnit()) != StageDef)
2653  return false;
2654  }
2655  return true;
2656 }
2657 
2658 /// A property of the node order in swing-modulo-scheduling is
2659 /// that for nodes outside circuits the following holds:
2660 /// none of them is scheduled after both a successor and a
2661 /// predecessor.
2662 /// The method below checks whether the property is met.
2663 /// If not, debug information is printed and statistics information updated.
2664 /// Note that we do not use an assert statement.
2665 /// The reason is that although an invalid node oder may prevent
2666 /// the pipeliner from finding a pipelined schedule for arbitrary II,
2667 /// it does not lead to the generation of incorrect code.
2668 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
2669 
2670  // a sorted vector that maps each SUnit to its index in the NodeOrder
2671  typedef std::pair<SUnit *, unsigned> UnitIndex;
2672  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
2673 
2674  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
2675  Indices.push_back(std::make_pair(NodeOrder[i], i));
2676 
2677  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
2678  return std::get<0>(i1) < std::get<0>(i2);
2679  };
2680 
2681  // sort, so that we can perform a binary search
2682  llvm::sort(Indices, CompareKey);
2683 
2684  bool Valid = true;
2685  (void)Valid;
2686  // for each SUnit in the NodeOrder, check whether
2687  // it appears after both a successor and a predecessor
2688  // of the SUnit. If this is the case, and the SUnit
2689  // is not part of circuit, then the NodeOrder is not
2690  // valid.
2691  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
2692  SUnit *SU = NodeOrder[i];
2693  unsigned Index = i;
2694 
2695  bool PredBefore = false;
2696  bool SuccBefore = false;
2697 
2698  SUnit *Succ;
2699  SUnit *Pred;
2700  (void)Succ;
2701  (void)Pred;
2702 
2703  for (SDep &PredEdge : SU->Preds) {
2704  SUnit *PredSU = PredEdge.getSUnit();
2705  unsigned PredIndex = std::get<1>(
2706  *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
2707  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
2708  PredBefore = true;
2709  Pred = PredSU;
2710  break;
2711  }
2712  }
2713 
2714  for (SDep &SuccEdge : SU->Succs) {
2715  SUnit *SuccSU = SuccEdge.getSUnit();
2716  // Do not process a boundary node, it was not included in NodeOrder,
2717  // hence not in Indices either, call to std::lower_bound() below will
2718  // return Indices.end().
2719  if (SuccSU->isBoundaryNode())
2720  continue;
2721  unsigned SuccIndex = std::get<1>(
2722  *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
2723  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
2724  SuccBefore = true;
2725  Succ = SuccSU;
2726  break;
2727  }
2728  }
2729 
2730  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
2731  // instructions in circuits are allowed to be scheduled
2732  // after both a successor and predecessor.
2733  bool InCircuit = llvm::any_of(
2734  Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
2735  if (InCircuit)
2736  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
2737  else {
2738  Valid = false;
2739  NumNodeOrderIssues++;
2740  LLVM_DEBUG(dbgs() << "Predecessor ";);
2741  }
2742  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
2743  << " are scheduled before node " << SU->NodeNum
2744  << "\n";);
2745  }
2746  }
2747 
2748  LLVM_DEBUG({
2749  if (!Valid)
2750  dbgs() << "Invalid node order found!\n";
2751  });
2752 }
2753 
2754 /// Attempt to fix the degenerate cases when the instruction serialization
2755 /// causes the register lifetimes to overlap. For example,
2756 /// p' = store_pi(p, b)
2757 /// = load p, offset
2758 /// In this case p and p' overlap, which means that two registers are needed.
2759 /// Instead, this function changes the load to use p' and updates the offset.
2760 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
2761  unsigned OverlapReg = 0;
2762  unsigned NewBaseReg = 0;
2763  for (SUnit *SU : Instrs) {
2764  MachineInstr *MI = SU->getInstr();
2765  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2766  const MachineOperand &MO = MI->getOperand(i);
2767  // Look for an instruction that uses p. The instruction occurs in the
2768  // same cycle but occurs later in the serialized order.
2769  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
2770  // Check that the instruction appears in the InstrChanges structure,
2771  // which contains instructions that can have the offset updated.
2773  InstrChanges.find(SU);
2774  if (It != InstrChanges.end()) {
2775  unsigned BasePos, OffsetPos;
2776  // Update the base register and adjust the offset.
2777  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
2778  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2779  NewMI->getOperand(BasePos).setReg(NewBaseReg);
2780  int64_t NewOffset =
2781  MI->getOperand(OffsetPos).getImm() - It->second.second;
2782  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2783  SU->setInstr(NewMI);
2784  MISUnitMap[NewMI] = SU;
2785  NewMIs[MI] = NewMI;
2786  }
2787  }
2788  OverlapReg = 0;
2789  NewBaseReg = 0;
2790  break;
2791  }
2792  // Look for an instruction of the form p' = op(p), which uses and defines
2793  // two virtual registers that get allocated to the same physical register.
2794  unsigned TiedUseIdx = 0;
2795  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
2796  // OverlapReg is p in the example above.
2797  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
2798  // NewBaseReg is p' in the example above.
2799  NewBaseReg = MI->getOperand(i).getReg();
2800  break;
2801  }
2802  }
2803  }
2804 }
2805 
2806 /// After the schedule has been formed, call this function to combine
2807 /// the instructions from the different stages/cycles. That is, this
2808 /// function creates a schedule that represents a single iteration.
2810  // Move all instructions to the first stage from later stages.
2811  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2812  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
2813  ++stage) {
2814  std::deque<SUnit *> &cycleInstrs =
2815  ScheduledInstrs[cycle + (stage * InitiationInterval)];
2816  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
2817  E = cycleInstrs.rend();
2818  I != E; ++I)
2819  ScheduledInstrs[cycle].push_front(*I);
2820  }
2821  }
2822 
2823  // Erase all the elements in the later stages. Only one iteration should
2824  // remain in the scheduled list, and it contains all the instructions.
2825  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
2826  ScheduledInstrs.erase(cycle);
2827 
2828  // Change the registers in instruction as specified in the InstrChanges
2829  // map. We need to use the new registers to create the correct order.
2830  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
2831  SUnit *SU = &SSD->SUnits[i];
2832  SSD->applyInstrChange(SU->getInstr(), *this);
2833  }
2834 
2835  // Reorder the instructions in each cycle to fix and improve the
2836  // generated code.
2837  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
2838  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
2839  std::deque<SUnit *> newOrderPhi;
2840  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
2841  SUnit *SU = cycleInstrs[i];
2842  if (SU->getInstr()->isPHI())
2843  newOrderPhi.push_back(SU);
2844  }
2845  std::deque<SUnit *> newOrderI;
2846  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
2847  SUnit *SU = cycleInstrs[i];
2848  if (!SU->getInstr()->isPHI())
2849  orderDependence(SSD, SU, newOrderI);
2850  }
2851  // Replace the old order with the new order.
2852  cycleInstrs.swap(newOrderPhi);
2853  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
2854  SSD->fixupRegisterOverlaps(cycleInstrs);
2855  }
2856 
2857  LLVM_DEBUG(dump(););
2858 }
2859 
2860 void NodeSet::print(raw_ostream &os) const {
2861  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
2862  << " depth " << MaxDepth << " col " << Colocate << "\n";
2863  for (const auto &I : Nodes)
2864  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
2865  os << "\n";
2866 }
2867 
2868 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2869 /// Print the schedule information to the given output.
2871  // Iterate over each cycle.
2872  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2873  // Iterate over each instruction in the cycle.
2874  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
2875  for (SUnit *CI : cycleInstrs->second) {
2876  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
2877  os << "(" << CI->NodeNum << ") ";
2878  CI->getInstr()->print(os);
2879  os << "\n";
2880  }
2881  }
2882 }
2883 
2884 /// Utility function used for debugging to print the schedule.
2887 
2888 #endif
2889 
2891  const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
2892  unsigned ProcResourceID = 0;
2893 
2894  // We currently limit the resource kinds to 64 and below so that we can use
2895  // uint64_t for Masks
2896  assert(SM.getNumProcResourceKinds() < 64 &&
2897  "Too many kinds of resources, unsupported");
2898  // Create a unique bitmask for every processor resource unit.
2899  // Skip resource at index 0, since it always references 'InvalidUnit'.
2900  Masks.resize(SM.getNumProcResourceKinds());
2901  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2902  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
2903  if (Desc.SubUnitsIdxBegin)
2904  continue;
2905  Masks[I] = 1ULL << ProcResourceID;
2906  ProcResourceID++;
2907  }
2908  // Create a unique bitmask for every processor resource group.
2909  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2910  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
2911  if (!Desc.SubUnitsIdxBegin)
2912  continue;
2913  Masks[I] = 1ULL << ProcResourceID;
2914  for (unsigned U = 0; U < Desc.NumUnits; ++U)
2915  Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
2916  ProcResourceID++;
2917  }
2918  LLVM_DEBUG({
2919  if (SwpShowResMask) {
2920  dbgs() << "ProcResourceDesc:\n";
2921  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2922  const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
2923  dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
2924  ProcResource->Name, I, Masks[I],
2925  ProcResource->NumUnits);
2926  }
2927  dbgs() << " -----------------\n";
2928  }
2929  });
2930 }
2931 
2933 
2934  LLVM_DEBUG({
2935  if (SwpDebugResource)
2936  dbgs() << "canReserveResources:\n";
2937  });
2938  if (UseDFA)
2939  return DFAResources->canReserveResources(MID);
2940 
2941  unsigned InsnClass = MID->getSchedClass();
2942  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
2943  if (!SCDesc->isValid()) {
2944  LLVM_DEBUG({
2945  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
2946  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
2947  });
2948  return true;
2949  }
2950 
2951  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
2952  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
2953  for (; I != E; ++I) {
2954  if (!I->Cycles)
2955  continue;
2956  const MCProcResourceDesc *ProcResource =
2957  SM.getProcResource(I->ProcResourceIdx);
2958  unsigned NumUnits = ProcResource->NumUnits;
2959  LLVM_DEBUG({
2960  if (SwpDebugResource)
2961  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
2962  ProcResource->Name, I->ProcResourceIdx,
2963  ProcResourceCount[I->ProcResourceIdx], NumUnits,
2964  I->Cycles);
2965  });
2966  if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
2967  return false;
2968  }
2969  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
2970  return true;
2971 }
2972 
2974  LLVM_DEBUG({
2975  if (SwpDebugResource)
2976  dbgs() << "reserveResources:\n";
2977  });
2978  if (UseDFA)
2979  return DFAResources->reserveResources(MID);
2980 
2981  unsigned InsnClass = MID->getSchedClass();
2982  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
2983  if (!SCDesc->isValid()) {
2984  LLVM_DEBUG({
2985  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
2986  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
2987  });
2988  return;
2989  }
2990  for (const MCWriteProcResEntry &PRE :
2991  make_range(STI->getWriteProcResBegin(SCDesc),
2992  STI->getWriteProcResEnd(SCDesc))) {
2993  if (!PRE.Cycles)
2994  continue;
2995  ++ProcResourceCount[PRE.ProcResourceIdx];
2996  LLVM_DEBUG({
2997  if (SwpDebugResource) {
2998  const MCProcResourceDesc *ProcResource =
2999  SM.getProcResource(PRE.ProcResourceIdx);
3000  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3001  ProcResource->Name, PRE.ProcResourceIdx,
3002  ProcResourceCount[PRE.ProcResourceIdx],
3003  ProcResource->NumUnits, PRE.Cycles);
3004  }
3005  });
3006  }
3007  LLVM_DEBUG({
3008  if (SwpDebugResource)
3009  dbgs() << "reserveResources: done!\n\n";
3010  });
3011 }
3012 
3014  return canReserveResources(&MI.getDesc());
3015 }
3016 
3018  return reserveResources(&MI.getDesc());
3019 }
3020 
3022  if (UseDFA)
3023  return DFAResources->clearResources();
3024  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
3025 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
auto lower_bound(R &&Range, T &&Value) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1261
uint64_t CallInst * C
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:769
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:471
RegisterClassInfo RegClassInfo
A common definition of LaneBitmask for use in TableGen and CodeGen.
void clear()
Definition: MapVector.h:88
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:656
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:266
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A reimplementation of ModuloScheduleExpander.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:484
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static constexpr LocationSize unknown()
Represents a schedule for a single-block loop.
bool empty() const
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
cl::opt< bool > SwpEnableCopyToPhi
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:63
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
bool isValidSchedule(SwingSchedulerDAG *SSD)
unsigned Reg
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
unsigned getSubReg() const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:339
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
bool isRegSequence() const
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
F(f)
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
block Block Frequency true
bool isPseudo() const
Return true if this is a pseudo instruction that doesn&#39;t correspond to a real machine instruction...
Definition: MCInstrDesc.h:262
static void getUnderlyingObjects(const MachineInstr *MI, SmallVectorImpl< const Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:346
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:476
bool isPHI() const
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:260
SUnit * getNode(unsigned i) const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
bool canReserveResources(const MCInstrDesc *MID) const
Check if the resources occupied by a MCInstrDesc are available in the current state.
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
int compareRecMII(NodeSet &RHS)
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
A description of a memory reference used in the backend.
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:259
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout, ignoring any parts of the loop not contiguous with the part that contains the header.
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
Modulo Software Pipelining
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
void reserveResources(const MCInstrDesc *MID)
Reserve the resources occupied by a MCInstrDesc and change the current state to reflect that change...
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1217
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
BlockT * getHeader() const
Definition: LoopInfo.h:105
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
SlotIndexes pass.
Definition: SlotIndexes.h:314
void expand()
Performs the actual expansion.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:863
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
void dump() const
Definition: Pass.cpp:134
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:244
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Itinerary data supplied by a subtarget to be used by a target.
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
void setReg(Register Reg)
Change the register this operand corresponds to.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
unsigned count(SUnit *SU) const
bool isValid() const
Definition: MCSchedule.h:127
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator find(const KeyT &Key)
Definition: MapVector.h:147
bool insert(SUnit *SU)
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1231
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
StringRef getString() const
Definition: Metadata.cpp:463
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Scheduling dependency.
Definition: ScheduleDAG.h:49
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:538
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:601
void finishBlock() override
Clean up after the software pipeliner runs.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:843
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
int getFirstCycle() const
Return the first cycle in the completed schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
void GetUnderlyingObjects(const Value *V, SmallVectorImpl< const Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
The main class in the implementation of the target independent software pipeliner pass...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
MachineFunction * MF
unsigned const MachineRegisterInfo * MRI
void clearResources()
Reset the state.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:54
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:165
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:566
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
constexpr double e
Definition: MathExtras.h:57
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
void dump() const
Utility function used for debugging to print the schedule.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
void cleanup()
Performs final cleanup after expansion.
const TargetInstrInfo * TII
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
void print(raw_ostream &os) const
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
void fixupRegisterOverlaps(std::deque< SUnit *> &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
LLVM_DUMP_METHOD void dump() const
bool isCopy() const
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:56
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
const InstrItineraryData * InstrItins
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
hexagon gen pred
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SetVector< SUnit * >::const_iterator iterator
virtual bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:320
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:556
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:551
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
bool isDereferenceableInvariantLoad(AAResults *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static unsigned getDepth(const SmallVectorImpl< const MachineBasicBlock *> &Stack, const MachineBasicBlock *MBB)
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:32
MachineInstrBuilder MachineInstrBuilder & DefMI
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
void setRecMII(unsigned mii)
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:762
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
This class represents the scheduled code.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:262
bool isEmpty() const
Returns true if there are no itineraries.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
void setColocate(unsigned c)
std::set< NodeId > NodeSet
Definition: RDFGraph.h:513
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:63
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:168
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
iterator begin() const
Definition: SmallPtrSet.h:396
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit *> &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:154
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
void setSubReg(unsigned subReg)
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
Generic base class for all target subtargets.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:128
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class...
iterator end() const
Definition: SmallPtrSet.h:401
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:185
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:830
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:71
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
void stable_sort(R &&Range)
Definition: STLExtras.h:1289
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
Definition: Value.h:74
mop_iterator operands_begin()
Definition: MachineInstr.h:470
A vector that has set insertion semantics.
Definition: SetVector.h:40
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
SmallVector< MachineOperand, 4 > BrCond
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
A single uniqued string.
Definition: Metadata.h:603
void setPos(MachineBasicBlock::const_iterator Pos)
Register getReg() const
getReg - Returns the register number.
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:244
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:335
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
void resize(size_type N)
Definition: SmallVector.h:344
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431