LLVM  6.0.0svn
MachineTraceMetrics.cpp
Go to the documentation of this file.
1 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/DenseMap.h"
13 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/SparseSet.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Format.h"
34 #include <algorithm>
35 #include <cassert>
36 #include <iterator>
37 #include <tuple>
38 #include <utility>
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "machine-trace-metrics"
43 
46 
48  "Machine Trace Metrics", false, true)
53 
54 MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
55  std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
56 }
57 
59  AU.setPreservesAll();
63 }
64 
66  MF = &Func;
67  const TargetSubtargetInfo &ST = MF->getSubtarget();
68  TII = ST.getInstrInfo();
69  TRI = ST.getRegisterInfo();
70  MRI = &MF->getRegInfo();
71  Loops = &getAnalysis<MachineLoopInfo>();
72  SchedModel.init(ST.getSchedModel(), &ST, TII);
73  BlockInfo.resize(MF->getNumBlockIDs());
74  ProcResourceCycles.resize(MF->getNumBlockIDs() *
75  SchedModel.getNumProcResourceKinds());
76  return false;
77 }
78 
80  MF = nullptr;
81  BlockInfo.clear();
82  for (unsigned i = 0; i != TS_NumStrategies; ++i) {
83  delete Ensembles[i];
84  Ensembles[i] = nullptr;
85  }
86 }
87 
88 //===----------------------------------------------------------------------===//
89 // Fixed block information
90 //===----------------------------------------------------------------------===//
91 //
92 // The number of instructions in a basic block and the CPU resources used by
93 // those instructions don't depend on any given trace strategy.
94 
95 /// Compute the resource usage in basic block MBB.
98  assert(MBB && "No basic block");
99  FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
100  if (FBI->hasResources())
101  return FBI;
102 
103  // Compute resource usage in the block.
104  FBI->HasCalls = false;
105  unsigned InstrCount = 0;
106 
107  // Add up per-processor resource cycles as well.
108  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
109  SmallVector<unsigned, 32> PRCycles(PRKinds);
110 
111  for (const auto &MI : *MBB) {
112  if (MI.isTransient())
113  continue;
114  ++InstrCount;
115  if (MI.isCall())
116  FBI->HasCalls = true;
117 
118  // Count processor resources used.
119  if (!SchedModel.hasInstrSchedModel())
120  continue;
121  const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
122  if (!SC->isValid())
123  continue;
124 
126  PI = SchedModel.getWriteProcResBegin(SC),
127  PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
128  assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
129  PRCycles[PI->ProcResourceIdx] += PI->Cycles;
130  }
131  }
132  FBI->InstrCount = InstrCount;
133 
134  // Scale the resource cycles so they are comparable.
135  unsigned PROffset = MBB->getNumber() * PRKinds;
136  for (unsigned K = 0; K != PRKinds; ++K)
137  ProcResourceCycles[PROffset + K] =
138  PRCycles[K] * SchedModel.getResourceFactor(K);
139 
140  return FBI;
141 }
142 
145  assert(BlockInfo[MBBNum].hasResources() &&
146  "getResources() must be called before getProcResourceCycles()");
147  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
148  assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
149  return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
150 }
151 
152 //===----------------------------------------------------------------------===//
153 // Ensemble utility functions
154 //===----------------------------------------------------------------------===//
155 
157  : MTM(*ct) {
158  BlockInfo.resize(MTM.BlockInfo.size());
159  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
160  ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
161  ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
162 }
163 
164 // Virtual destructor serves as an anchor.
166 
167 const MachineLoop*
169  return MTM.Loops->getLoopFor(MBB);
170 }
171 
172 // Update resource-related information in the TraceBlockInfo for MBB.
173 // Only update resources related to the trace above MBB.
174 void MachineTraceMetrics::Ensemble::
175 computeDepthResources(const MachineBasicBlock *MBB) {
176  TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
177  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
178  unsigned PROffset = MBB->getNumber() * PRKinds;
179 
180  // Compute resources from trace above. The top block is simple.
181  if (!TBI->Pred) {
182  TBI->InstrDepth = 0;
183  TBI->Head = MBB->getNumber();
184  std::fill(ProcResourceDepths.begin() + PROffset,
185  ProcResourceDepths.begin() + PROffset + PRKinds, 0);
186  return;
187  }
188 
189  // Compute from the block above. A post-order traversal ensures the
190  // predecessor is always computed first.
191  unsigned PredNum = TBI->Pred->getNumber();
192  TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
193  assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
194  const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
195  TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
196  TBI->Head = PredTBI->Head;
197 
198  // Compute per-resource depths.
199  ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
200  ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
201  for (unsigned K = 0; K != PRKinds; ++K)
202  ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
203 }
204 
205 // Update resource-related information in the TraceBlockInfo for MBB.
206 // Only update resources related to the trace below MBB.
207 void MachineTraceMetrics::Ensemble::
208 computeHeightResources(const MachineBasicBlock *MBB) {
209  TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
210  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
211  unsigned PROffset = MBB->getNumber() * PRKinds;
212 
213  // Compute resources for the current block.
214  TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
216 
217  // The trace tail is done.
218  if (!TBI->Succ) {
219  TBI->Tail = MBB->getNumber();
220  std::copy(PRCycles.begin(), PRCycles.end(),
221  ProcResourceHeights.begin() + PROffset);
222  return;
223  }
224 
225  // Compute from the block below. A post-order traversal ensures the
226  // predecessor is always computed first.
227  unsigned SuccNum = TBI->Succ->getNumber();
228  TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
229  assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
230  TBI->InstrHeight += SuccTBI->InstrHeight;
231  TBI->Tail = SuccTBI->Tail;
232 
233  // Compute per-resource heights.
234  ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
235  for (unsigned K = 0; K != PRKinds; ++K)
236  ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
237 }
238 
239 // Check if depth resources for MBB are valid and return the TBI.
240 // Return NULL if the resources have been invalidated.
244  const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
245  return TBI->hasValidDepth() ? TBI : nullptr;
246 }
247 
248 // Check if height resources for MBB are valid and return the TBI.
249 // Return NULL if the resources have been invalidated.
253  const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
254  return TBI->hasValidHeight() ? TBI : nullptr;
255 }
256 
257 /// Get an array of processor resource depths for MBB. Indexed by processor
258 /// resource kind, this array contains the scaled processor resources consumed
259 /// by all blocks preceding MBB in its trace. It does not include instructions
260 /// in MBB.
261 ///
262 /// Compare TraceBlockInfo::InstrDepth.
265 getProcResourceDepths(unsigned MBBNum) const {
266  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
267  assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
268  return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
269 }
270 
271 /// Get an array of processor resource heights for MBB. Indexed by processor
272 /// resource kind, this array contains the scaled processor resources consumed
273 /// by this block and all blocks following it in its trace.
274 ///
275 /// Compare TraceBlockInfo::InstrHeight.
278 getProcResourceHeights(unsigned MBBNum) const {
279  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
280  assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
281  return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
282 }
283 
284 //===----------------------------------------------------------------------===//
285 // Trace Selection Strategies
286 //===----------------------------------------------------------------------===//
287 //
288 // A trace selection strategy is implemented as a sub-class of Ensemble. The
289 // trace through a block B is computed by two DFS traversals of the CFG
290 // starting from B. One upwards, and one downwards. During the upwards DFS,
291 // pickTracePred() is called on the post-ordered blocks. During the downwards
292 // DFS, pickTraceSucc() is called in a post-order.
293 //
294 
295 // We never allow traces that leave loops, but we do allow traces to enter
296 // nested loops. We also never allow traces to contain back-edges.
297 //
298 // This means that a loop header can never appear above the center block of a
299 // trace, except as the trace head. Below the center block, loop exiting edges
300 // are banned.
301 //
302 // Return true if an edge from the From loop to the To loop is leaving a loop.
303 // Either of To and From can be null.
304 static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
305  return From && !From->contains(To);
306 }
307 
308 // MinInstrCountEnsemble - Pick the trace that executes the least number of
309 // instructions.
310 namespace {
311 
312 class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
313  const char *getName() const override { return "MinInstr"; }
314  const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
315  const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
316 
317 public:
318  MinInstrCountEnsemble(MachineTraceMetrics *mtm)
320 };
321 
322 } // end anonymous namespace
323 
324 // Select the preferred predecessor for MBB.
325 const MachineBasicBlock*
326 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
327  if (MBB->pred_empty())
328  return nullptr;
329  const MachineLoop *CurLoop = getLoopFor(MBB);
330  // Don't leave loops, and never follow back-edges.
331  if (CurLoop && MBB == CurLoop->getHeader())
332  return nullptr;
333  unsigned CurCount = MTM.getResources(MBB)->InstrCount;
334  const MachineBasicBlock *Best = nullptr;
335  unsigned BestDepth = 0;
336  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
337  const MachineTraceMetrics::TraceBlockInfo *PredTBI =
338  getDepthResources(Pred);
339  // Ignore cycles that aren't natural loops.
340  if (!PredTBI)
341  continue;
342  // Pick the predecessor that would give this block the smallest InstrDepth.
343  unsigned Depth = PredTBI->InstrDepth + CurCount;
344  if (!Best || Depth < BestDepth) {
345  Best = Pred;
346  BestDepth = Depth;
347  }
348  }
349  return Best;
350 }
351 
352 // Select the preferred successor for MBB.
353 const MachineBasicBlock*
354 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
355  if (MBB->pred_empty())
356  return nullptr;
357  const MachineLoop *CurLoop = getLoopFor(MBB);
358  const MachineBasicBlock *Best = nullptr;
359  unsigned BestHeight = 0;
360  for (const MachineBasicBlock *Succ : MBB->successors()) {
361  // Don't consider back-edges.
362  if (CurLoop && Succ == CurLoop->getHeader())
363  continue;
364  // Don't consider successors exiting CurLoop.
365  if (isExitingLoop(CurLoop, getLoopFor(Succ)))
366  continue;
367  const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
368  getHeightResources(Succ);
369  // Ignore cycles that aren't natural loops.
370  if (!SuccTBI)
371  continue;
372  // Pick the successor that would give this block the smallest InstrHeight.
373  unsigned Height = SuccTBI->InstrHeight;
374  if (!Best || Height < BestHeight) {
375  Best = Succ;
376  BestHeight = Height;
377  }
378  }
379  return Best;
380 }
381 
382 // Get an Ensemble sub-class for the requested trace strategy.
385  assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
386  Ensemble *&E = Ensembles[strategy];
387  if (E)
388  return E;
389 
390  // Allocate new Ensemble on demand.
391  switch (strategy) {
392  case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
393  default: llvm_unreachable("Invalid trace strategy enum");
394  }
395 }
396 
398  DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
399  BlockInfo[MBB->getNumber()].invalidate();
400  for (unsigned i = 0; i != TS_NumStrategies; ++i)
401  if (Ensembles[i])
402  Ensembles[i]->invalidate(MBB);
403 }
404 
406  if (!MF)
407  return;
408 #ifndef NDEBUG
409  assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
410  for (unsigned i = 0; i != TS_NumStrategies; ++i)
411  if (Ensembles[i])
412  Ensembles[i]->verify();
413 #endif
414 }
415 
416 //===----------------------------------------------------------------------===//
417 // Trace building
418 //===----------------------------------------------------------------------===//
419 //
420 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
421 // set abstraction that confines the search to the current loop, and doesn't
422 // revisit blocks.
423 
424 namespace {
425 
426 struct LoopBounds {
429  const MachineLoopInfo *Loops;
430  bool Downward = false;
431 
433  const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
434 };
435 
436 } // end anonymous namespace
437 
438 // Specialize po_iterator_storage in order to prune the post-order traversal so
439 // it is limited to the current loop and doesn't traverse the loop back edges.
440 namespace llvm {
441 
442 template<>
443 class po_iterator_storage<LoopBounds, true> {
444  LoopBounds &LB;
445 
446 public:
447  po_iterator_storage(LoopBounds &lb) : LB(lb) {}
448 
450 
452  const MachineBasicBlock *To) {
453  // Skip already visited To blocks.
454  MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
455  if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
456  return false;
457  // From is null once when To is the trace center block.
458  if (From) {
459  if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
460  // Don't follow backedges, don't leave FromLoop when going upwards.
461  if ((LB.Downward ? To : *From) == FromLoop->getHeader())
462  return false;
463  // Don't leave FromLoop.
464  if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
465  return false;
466  }
467  }
468  // To is a new block. Mark the block as visited in case the CFG has cycles
469  // that MachineLoopInfo didn't recognize as a natural loop.
470  return LB.Visited.insert(To).second;
471  }
472 };
473 
474 } // end namespace llvm
475 
476 /// Compute the trace through MBB.
477 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
478  DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
479  << MBB->getNumber() << '\n');
480  // Set up loop bounds for the backwards post-order traversal.
481  LoopBounds Bounds(BlockInfo, MTM.Loops);
482 
483  // Run an upwards post-order search for the trace start.
484  Bounds.Downward = false;
485  Bounds.Visited.clear();
486  for (auto I : inverse_post_order_ext(MBB, Bounds)) {
487  DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
488  TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
489  // All the predecessors have been visited, pick the preferred one.
490  TBI.Pred = pickTracePred(I);
491  DEBUG({
492  if (TBI.Pred)
493  dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
494  else
495  dbgs() << "null\n";
496  });
497  // The trace leading to I is now known, compute the depth resources.
498  computeDepthResources(I);
499  }
500 
501  // Run a downwards post-order search for the trace end.
502  Bounds.Downward = true;
503  Bounds.Visited.clear();
504  for (auto I : post_order_ext(MBB, Bounds)) {
505  DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
506  TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
507  // All the successors have been visited, pick the preferred one.
508  TBI.Succ = pickTraceSucc(I);
509  DEBUG({
510  if (TBI.Succ)
511  dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
512  else
513  dbgs() << "null\n";
514  });
515  // The trace leaving I is now known, compute the height resources.
516  computeHeightResources(I);
517  }
518 }
519 
520 /// Invalidate traces through BadMBB.
521 void
524  TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
525 
526  // Invalidate height resources of blocks above MBB.
527  if (BadTBI.hasValidHeight()) {
528  BadTBI.invalidateHeight();
529  WorkList.push_back(BadMBB);
530  do {
531  const MachineBasicBlock *MBB = WorkList.pop_back_val();
532  DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
533  << " height.\n");
534  // Find any MBB predecessors that have MBB as their preferred successor.
535  // They are the only ones that need to be invalidated.
536  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
537  TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
538  if (!TBI.hasValidHeight())
539  continue;
540  if (TBI.Succ == MBB) {
541  TBI.invalidateHeight();
542  WorkList.push_back(Pred);
543  continue;
544  }
545  // Verify that TBI.Succ is actually a *I successor.
546  assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
547  }
548  } while (!WorkList.empty());
549  }
550 
551  // Invalidate depth resources of blocks below MBB.
552  if (BadTBI.hasValidDepth()) {
553  BadTBI.invalidateDepth();
554  WorkList.push_back(BadMBB);
555  do {
556  const MachineBasicBlock *MBB = WorkList.pop_back_val();
557  DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
558  << " depth.\n");
559  // Find any MBB successors that have MBB as their preferred predecessor.
560  // They are the only ones that need to be invalidated.
561  for (const MachineBasicBlock *Succ : MBB->successors()) {
562  TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
563  if (!TBI.hasValidDepth())
564  continue;
565  if (TBI.Pred == MBB) {
566  TBI.invalidateDepth();
567  WorkList.push_back(Succ);
568  continue;
569  }
570  // Verify that TBI.Pred is actually a *I predecessor.
571  assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
572  }
573  } while (!WorkList.empty());
574  }
575 
576  // Clear any per-instruction data. We only have to do this for BadMBB itself
577  // because the instructions in that block may change. Other blocks may be
578  // invalidated, but their instructions will stay the same, so there is no
579  // need to erase the Cycle entries. They will be overwritten when we
580  // recompute.
581  for (const auto &I : *BadMBB)
582  Cycles.erase(&I);
583 }
584 
586 #ifndef NDEBUG
587  assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
588  "Outdated BlockInfo size");
589  for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
590  const TraceBlockInfo &TBI = BlockInfo[Num];
591  if (TBI.hasValidDepth() && TBI.Pred) {
592  const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
593  assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
594  assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
595  "Trace is broken, depth should have been invalidated.");
596  const MachineLoop *Loop = getLoopFor(MBB);
597  assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
598  }
599  if (TBI.hasValidHeight() && TBI.Succ) {
600  const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
601  assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
602  assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
603  "Trace is broken, height should have been invalidated.");
604  const MachineLoop *Loop = getLoopFor(MBB);
605  const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
606  assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
607  "Trace contains backedge");
608  }
609  }
610 #endif
611 }
612 
613 //===----------------------------------------------------------------------===//
614 // Data Dependencies
615 //===----------------------------------------------------------------------===//
616 //
617 // Compute the depth and height of each instruction based on data dependencies
618 // and instruction latencies. These cycle numbers assume that the CPU can issue
619 // an infinite number of instructions per cycle as long as their dependencies
620 // are ready.
621 
622 // A data dependency is represented as a defining MI and operand numbers on the
623 // defining and using MI.
624 namespace {
625 
626 struct DataDep {
627  const MachineInstr *DefMI;
628  unsigned DefOp;
629  unsigned UseOp;
630 
631  DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
632  : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
633 
634  /// Create a DataDep from an SSA form virtual register.
635  DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
636  : UseOp(UseOp) {
638  MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
639  assert(!DefI.atEnd() && "Register has no defs");
640  DefMI = DefI->getParent();
641  DefOp = DefI.getOperandNo();
642  assert((++DefI).atEnd() && "Register has multiple defs");
643  }
644 };
645 
646 } // end anonymous namespace
647 
648 // Get the input data dependencies that must be ready before UseMI can issue.
649 // Return true if UseMI has any physreg operands.
650 static bool getDataDeps(const MachineInstr &UseMI,
652  const MachineRegisterInfo *MRI) {
653  // Debug values should not be included in any calculations.
654  if (UseMI.isDebugValue())
655  return false;
656 
657  bool HasPhysRegs = false;
659  E = UseMI.operands_end(); I != E; ++I) {
660  const MachineOperand &MO = *I;
661  if (!MO.isReg())
662  continue;
663  unsigned Reg = MO.getReg();
664  if (!Reg)
665  continue;
667  HasPhysRegs = true;
668  continue;
669  }
670  // Collect virtual register reads.
671  if (MO.readsReg())
672  Deps.push_back(DataDep(MRI, Reg, UseMI.getOperandNo(I)));
673  }
674  return HasPhysRegs;
675 }
676 
677 // Get the input data dependencies of a PHI instruction, using Pred as the
678 // preferred predecessor.
679 // This will add at most one dependency to Deps.
680 static void getPHIDeps(const MachineInstr &UseMI,
682  const MachineBasicBlock *Pred,
683  const MachineRegisterInfo *MRI) {
684  // No predecessor at the beginning of a trace. Ignore dependencies.
685  if (!Pred)
686  return;
687  assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
688  for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
689  if (UseMI.getOperand(i + 1).getMBB() == Pred) {
690  unsigned Reg = UseMI.getOperand(i).getReg();
691  Deps.push_back(DataDep(MRI, Reg, i));
692  return;
693  }
694  }
695 }
696 
697 // Keep track of physreg data dependencies by recording each live register unit.
698 // Associate each regunit with an instruction operand. Depending on the
699 // direction instructions are scanned, it could be the operand that defined the
700 // regunit, or the highest operand to read the regunit.
701 namespace {
702 
703 struct LiveRegUnit {
704  unsigned RegUnit;
705  unsigned Cycle = 0;
706  const MachineInstr *MI = nullptr;
707  unsigned Op = 0;
708 
709  unsigned getSparseSetIndex() const { return RegUnit; }
710 
711  LiveRegUnit(unsigned RU) : RegUnit(RU) {}
712 };
713 
714 } // end anonymous namespace
715 
716 // Identify physreg dependencies for UseMI, and update the live regunit
717 // tracking set when scanning instructions downwards.
720  SparseSet<LiveRegUnit> &RegUnits,
721  const TargetRegisterInfo *TRI) {
723  SmallVector<unsigned, 8> LiveDefOps;
724 
726  ME = UseMI->operands_end(); MI != ME; ++MI) {
727  const MachineOperand &MO = *MI;
728  if (!MO.isReg())
729  continue;
730  unsigned Reg = MO.getReg();
732  continue;
733  // Track live defs and kills for updating RegUnits.
734  if (MO.isDef()) {
735  if (MO.isDead())
736  Kills.push_back(Reg);
737  else
738  LiveDefOps.push_back(UseMI->getOperandNo(MI));
739  } else if (MO.isKill())
740  Kills.push_back(Reg);
741  // Identify dependencies.
742  if (!MO.readsReg())
743  continue;
744  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
745  SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
746  if (I == RegUnits.end())
747  continue;
748  Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
749  break;
750  }
751  }
752 
753  // Update RegUnits to reflect live registers after UseMI.
754  // First kills.
755  for (unsigned Kill : Kills)
756  for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
757  RegUnits.erase(*Units);
758 
759  // Second, live defs.
760  for (unsigned DefOp : LiveDefOps) {
761  for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
762  Units.isValid(); ++Units) {
763  LiveRegUnit &LRU = RegUnits[*Units];
764  LRU.MI = UseMI;
765  LRU.Op = DefOp;
766  }
767  }
768 }
769 
770 /// The length of the critical path through a trace is the maximum of two path
771 /// lengths:
772 ///
773 /// 1. The maximum height+depth over all instructions in the trace center block.
774 ///
775 /// 2. The longest cross-block dependency chain. For small blocks, it is
776 /// possible that the critical path through the trace doesn't include any
777 /// instructions in the block.
778 ///
779 /// This function computes the second number from the live-in list of the
780 /// center block.
781 unsigned MachineTraceMetrics::Ensemble::
782 computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
783  assert(TBI.HasValidInstrDepths && "Missing depth info");
784  assert(TBI.HasValidInstrHeights && "Missing height info");
785  unsigned MaxLen = 0;
786  for (const LiveInReg &LIR : TBI.LiveIns) {
788  continue;
789  const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
790  // Ignore dependencies outside the current trace.
791  const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
792  if (!DefTBI.isUsefulDominator(TBI))
793  continue;
794  unsigned Len = LIR.Height + Cycles[DefMI].Depth;
795  MaxLen = std::max(MaxLen, Len);
796  }
797  return MaxLen;
798 }
799 
800 /// Compute instruction depths for all instructions above or in MBB in its
801 /// trace. This assumes that the trace through MBB has already been computed.
802 void MachineTraceMetrics::Ensemble::
803 computeInstrDepths(const MachineBasicBlock *MBB) {
804  // The top of the trace may already be computed, and HasValidInstrDepths
805  // implies Head->HasValidInstrDepths, so we only need to start from the first
806  // block in the trace that needs to be recomputed.
808  do {
809  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
810  assert(TBI.hasValidDepth() && "Incomplete trace");
811  if (TBI.HasValidInstrDepths)
812  break;
813  Stack.push_back(MBB);
814  MBB = TBI.Pred;
815  } while (MBB);
816 
817  // FIXME: If MBB is non-null at this point, it is the last pre-computed block
818  // in the trace. We should track any live-out physregs that were defined in
819  // the trace. This is quite rare in SSA form, typically created by CSE
820  // hoisting a compare.
821  SparseSet<LiveRegUnit> RegUnits;
822  RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
823 
824  // Go through trace blocks in top-down order, stopping after the center block.
826  while (!Stack.empty()) {
827  MBB = Stack.pop_back_val();
828  DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
829  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
830  TBI.HasValidInstrDepths = true;
831  TBI.CriticalPath = 0;
832 
833  // Print out resource depths here as well.
834  DEBUG({
835  dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
837  for (unsigned K = 0; K != PRDepths.size(); ++K)
838  if (PRDepths[K]) {
839  unsigned Factor = MTM.SchedModel.getResourceFactor(K);
840  dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
841  << MTM.SchedModel.getProcResource(K)->Name << " ("
842  << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
843  }
844  });
845 
846  // Also compute the critical path length through MBB when possible.
847  if (TBI.HasValidInstrHeights)
848  TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
849 
850  for (const auto &UseMI : *MBB) {
851  // Collect all data dependencies.
852  Deps.clear();
853  if (UseMI.isPHI())
854  getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
855  else if (getDataDeps(UseMI, Deps, MTM.MRI))
856  updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
857 
858  // Filter and process dependencies, computing the earliest issue cycle.
859  unsigned Cycle = 0;
860  for (const DataDep &Dep : Deps) {
861  const TraceBlockInfo&DepTBI =
862  BlockInfo[Dep.DefMI->getParent()->getNumber()];
863  // Ignore dependencies from outside the current trace.
864  if (!DepTBI.isUsefulDominator(TBI))
865  continue;
866  assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
867  unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
868  // Add latency if DefMI is a real instruction. Transients get latency 0.
869  if (!Dep.DefMI->isTransient())
870  DepCycle += MTM.SchedModel
871  .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
872  Cycle = std::max(Cycle, DepCycle);
873  }
874  // Remember the instruction depth.
875  InstrCycles &MICycles = Cycles[&UseMI];
876  MICycles.Depth = Cycle;
877 
878  if (!TBI.HasValidInstrHeights) {
879  DEBUG(dbgs() << Cycle << '\t' << UseMI);
880  continue;
881  }
882  // Update critical path length.
883  TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
884  DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
885  }
886  }
887 }
888 
889 // Identify physreg dependencies for MI when scanning instructions upwards.
890 // Return the issue height of MI after considering any live regunits.
891 // Height is the issue height computed from virtual register dependencies alone.
892 static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
893  SparseSet<LiveRegUnit> &RegUnits,
894  const TargetSchedModel &SchedModel,
895  const TargetInstrInfo *TII,
896  const TargetRegisterInfo *TRI) {
897  SmallVector<unsigned, 8> ReadOps;
898 
900  MOE = MI.operands_end();
901  MOI != MOE; ++MOI) {
902  const MachineOperand &MO = *MOI;
903  if (!MO.isReg())
904  continue;
905  unsigned Reg = MO.getReg();
907  continue;
908  if (MO.readsReg())
909  ReadOps.push_back(MI.getOperandNo(MOI));
910  if (!MO.isDef())
911  continue;
912  // This is a def of Reg. Remove corresponding entries from RegUnits, and
913  // update MI Height to consider the physreg dependencies.
914  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
915  SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
916  if (I == RegUnits.end())
917  continue;
918  unsigned DepHeight = I->Cycle;
919  if (!MI.isTransient()) {
920  // We may not know the UseMI of this dependency, if it came from the
921  // live-in list. SchedModel can handle a NULL UseMI.
922  DepHeight += SchedModel.computeOperandLatency(&MI, MI.getOperandNo(MOI),
923  I->MI, I->Op);
924  }
925  Height = std::max(Height, DepHeight);
926  // This regunit is dead above MI.
927  RegUnits.erase(I);
928  }
929  }
930 
931  // Now we know the height of MI. Update any regunits read.
932  for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
933  unsigned Reg = MI.getOperand(ReadOps[i]).getReg();
934  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
935  LiveRegUnit &LRU = RegUnits[*Units];
936  // Set the height to the highest reader of the unit.
937  if (LRU.Cycle <= Height && LRU.MI != &MI) {
938  LRU.Cycle = Height;
939  LRU.MI = &MI;
940  LRU.Op = ReadOps[i];
941  }
942  }
943  }
944 
945  return Height;
946 }
947 
949 
950 // Push the height of DefMI upwards if required to match UseMI.
951 // Return true if this is the first time DefMI was seen.
952 static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
953  unsigned UseHeight, MIHeightMap &Heights,
954  const TargetSchedModel &SchedModel,
955  const TargetInstrInfo *TII) {
956  // Adjust height by Dep.DefMI latency.
957  if (!Dep.DefMI->isTransient())
958  UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
959  Dep.UseOp);
960 
961  // Update Heights[DefMI] to be the maximum height seen.
963  bool New;
964  std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
965  if (New)
966  return true;
967 
968  // DefMI has been pushed before. Give it the max height.
969  if (I->second < UseHeight)
970  I->second = UseHeight;
971  return false;
972 }
973 
974 /// Assuming that the virtual register defined by DefMI:DefOp was used by
975 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
976 /// when reaching the block that contains DefMI.
977 void MachineTraceMetrics::Ensemble::
978 addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
980  assert(!Trace.empty() && "Trace should contain at least one block");
981  unsigned Reg = DefMI->getOperand(DefOp).getReg();
983  const MachineBasicBlock *DefMBB = DefMI->getParent();
984 
985  // Reg is live-in to all blocks in Trace that follow DefMBB.
986  for (unsigned i = Trace.size(); i; --i) {
987  const MachineBasicBlock *MBB = Trace[i-1];
988  if (MBB == DefMBB)
989  return;
990  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
991  // Just add the register. The height will be updated later.
992  TBI.LiveIns.push_back(Reg);
993  }
994 }
995 
996 /// Compute instruction heights in the trace through MBB. This updates MBB and
997 /// the blocks below it in the trace. It is assumed that the trace has already
998 /// been computed.
999 void MachineTraceMetrics::Ensemble::
1000 computeInstrHeights(const MachineBasicBlock *MBB) {
1001  // The bottom of the trace may already be computed.
1002  // Find the blocks that need updating.
1004  do {
1005  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1006  assert(TBI.hasValidHeight() && "Incomplete trace");
1007  if (TBI.HasValidInstrHeights)
1008  break;
1009  Stack.push_back(MBB);
1010  TBI.LiveIns.clear();
1011  MBB = TBI.Succ;
1012  } while (MBB);
1013 
1014  // As we move upwards in the trace, keep track of instructions that are
1015  // required by deeper trace instructions. Map MI -> height required so far.
1016  MIHeightMap Heights;
1017 
1018  // For physregs, the def isn't known when we see the use.
1019  // Instead, keep track of the highest use of each regunit.
1020  SparseSet<LiveRegUnit> RegUnits;
1021  RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1022 
1023  // If the bottom of the trace was already precomputed, initialize heights
1024  // from its live-in list.
1025  // MBB is the highest precomputed block in the trace.
1026  if (MBB) {
1027  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1028  for (LiveInReg &LI : TBI.LiveIns) {
1030  // For virtual registers, the def latency is included.
1031  unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1032  if (Height < LI.Height)
1033  Height = LI.Height;
1034  } else {
1035  // For register units, the def latency is not included because we don't
1036  // know the def yet.
1037  RegUnits[LI.Reg].Cycle = LI.Height;
1038  }
1039  }
1040  }
1041 
1042  // Go through the trace blocks in bottom-up order.
1044  for (;!Stack.empty(); Stack.pop_back()) {
1045  MBB = Stack.back();
1046  DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
1047  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1048  TBI.HasValidInstrHeights = true;
1049  TBI.CriticalPath = 0;
1050 
1051  DEBUG({
1052  dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1054  for (unsigned K = 0; K != PRHeights.size(); ++K)
1055  if (PRHeights[K]) {
1056  unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1057  dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1058  << MTM.SchedModel.getProcResource(K)->Name << " ("
1059  << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1060  }
1061  });
1062 
1063  // Get dependencies from PHIs in the trace successor.
1064  const MachineBasicBlock *Succ = TBI.Succ;
1065  // If MBB is the last block in the trace, and it has a back-edge to the
1066  // loop header, get loop-carried dependencies from PHIs in the header. For
1067  // that purpose, pretend that all the loop header PHIs have height 0.
1068  if (!Succ)
1069  if (const MachineLoop *Loop = getLoopFor(MBB))
1070  if (MBB->isSuccessor(Loop->getHeader()))
1071  Succ = Loop->getHeader();
1072 
1073  if (Succ) {
1074  for (const auto &PHI : *Succ) {
1075  if (!PHI.isPHI())
1076  break;
1077  Deps.clear();
1078  getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1079  if (!Deps.empty()) {
1080  // Loop header PHI heights are all 0.
1081  unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1082  DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1083  if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1084  MTM.TII))
1085  addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1086  }
1087  }
1088  }
1089 
1090  // Go through the block backwards.
1091  for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
1092  BI != BB;) {
1093  const MachineInstr &MI = *--BI;
1094 
1095  // Find the MI height as determined by virtual register uses in the
1096  // trace below.
1097  unsigned Cycle = 0;
1098  MIHeightMap::iterator HeightI = Heights.find(&MI);
1099  if (HeightI != Heights.end()) {
1100  Cycle = HeightI->second;
1101  // We won't be seeing any more MI uses.
1102  Heights.erase(HeightI);
1103  }
1104 
1105  // Don't process PHI deps. They depend on the specific predecessor, and
1106  // we'll get them when visiting the predecessor.
1107  Deps.clear();
1108  bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1109 
1110  // There may also be regunit dependencies to include in the height.
1111  if (HasPhysRegs)
1112  Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1113  MTM.TII, MTM.TRI);
1114 
1115  // Update the required height of any virtual registers read by MI.
1116  for (const DataDep &Dep : Deps)
1117  if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1118  addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1119 
1120  InstrCycles &MICycles = Cycles[&MI];
1121  MICycles.Height = Cycle;
1122  if (!TBI.HasValidInstrDepths) {
1123  DEBUG(dbgs() << Cycle << '\t' << MI);
1124  continue;
1125  }
1126  // Update critical path length.
1127  TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1128  DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1129  }
1130 
1131  // Update virtual live-in heights. They were added by addLiveIns() with a 0
1132  // height because the final height isn't known until now.
1133  DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
1134  for (LiveInReg &LIR : TBI.LiveIns) {
1135  const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1136  LIR.Height = Heights.lookup(DefMI);
1137  DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
1138  }
1139 
1140  // Transfer the live regunits to the live-in list.
1142  RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
1143  TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
1144  DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
1145  << '@' << RI->Cycle);
1146  }
1147  DEBUG(dbgs() << '\n');
1148 
1149  if (!TBI.HasValidInstrDepths)
1150  continue;
1151  // Add live-ins to the critical path length.
1152  TBI.CriticalPath = std::max(TBI.CriticalPath,
1153  computeCrossBlockCriticalPath(TBI));
1154  DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1155  }
1156 }
1157 
1160  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1161 
1162  if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1163  computeTrace(MBB);
1164  if (!TBI.HasValidInstrDepths)
1165  computeInstrDepths(MBB);
1166  if (!TBI.HasValidInstrHeights)
1167  computeInstrHeights(MBB);
1168 
1169  return Trace(*this, TBI);
1170 }
1171 
1172 unsigned
1174  assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1175  "MI must be in the trace center block");
1176  InstrCycles Cyc = getInstrCycles(MI);
1177  return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1178 }
1179 
1180 unsigned
1182  const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1184  getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1185  assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1186  DataDep &Dep = Deps.front();
1187  unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1188  // Add latency if DefMI is a real instruction. Transients get latency 0.
1189  if (!Dep.DefMI->isTransient())
1190  DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1191  &PHI, Dep.UseOp);
1192  return DepCycle;
1193 }
1194 
1195 /// When bottom is set include instructions in current block in estimate.
1197  // Find the limiting processor resource.
1198  // Numbers have been pre-scaled to be comparable.
1199  unsigned PRMax = 0;
1200  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1201  if (Bottom) {
1202  ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
1203  for (unsigned K = 0; K != PRDepths.size(); ++K)
1204  PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1205  } else {
1206  for (unsigned K = 0; K != PRDepths.size(); ++K)
1207  PRMax = std::max(PRMax, PRDepths[K]);
1208  }
1209  // Convert to cycle count.
1210  PRMax = TE.MTM.getCycles(PRMax);
1211 
1212  /// All instructions before current block
1213  unsigned Instrs = TBI.InstrDepth;
1214  // plus instructions in current block
1215  if (Bottom)
1216  Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1217  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1218  Instrs /= IW;
1219  // Assume issue width 1 without a schedule model.
1220  return std::max(Instrs, PRMax);
1221 }
1222 
1226  ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1227  // Add up resources above and below the center block.
1228  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1229  ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1230  unsigned PRMax = 0;
1231 
1232  // Capture computing cycles from extra instructions
1233  auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1234  unsigned ResourceIdx)
1235  ->unsigned {
1236  unsigned Cycles = 0;
1237  for (const MCSchedClassDesc *SC : Instrs) {
1238  if (!SC->isValid())
1239  continue;
1241  PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1242  PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1243  PI != PE; ++PI) {
1244  if (PI->ProcResourceIdx != ResourceIdx)
1245  continue;
1246  Cycles +=
1247  (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1248  }
1249  }
1250  return Cycles;
1251  };
1252 
1253  for (unsigned K = 0; K != PRDepths.size(); ++K) {
1254  unsigned PRCycles = PRDepths[K] + PRHeights[K];
1255  for (const MachineBasicBlock *MBB : Extrablocks)
1256  PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
1257  PRCycles += extraCycles(ExtraInstrs, K);
1258  PRCycles -= extraCycles(RemoveInstrs, K);
1259  PRMax = std::max(PRMax, PRCycles);
1260  }
1261  // Convert to cycle count.
1262  PRMax = TE.MTM.getCycles(PRMax);
1263 
1264  // Instrs: #instructions in current trace outside current block.
1265  unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1266  // Add instruction count from the extra blocks.
1267  for (const MachineBasicBlock *MBB : Extrablocks)
1268  Instrs += TE.MTM.getResources(MBB)->InstrCount;
1269  Instrs += ExtraInstrs.size();
1270  Instrs -= RemoveInstrs.size();
1271  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1272  Instrs /= IW;
1273  // Assume issue width 1 without a schedule model.
1274  return std::max(Instrs, PRMax);
1275 }
1276 
1278  const MachineInstr &UseMI) const {
1279  if (DefMI.getParent() == UseMI.getParent())
1280  return true;
1281 
1282  const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1283  const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1284 
1285  return DepTBI.isUsefulDominator(TBI);
1286 }
1287 
1289  OS << getName() << " ensemble:\n";
1290  for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1291  OS << " BB#" << i << '\t';
1292  BlockInfo[i].print(OS);
1293  OS << '\n';
1294  }
1295 }
1296 
1298  if (hasValidDepth()) {
1299  OS << "depth=" << InstrDepth;
1300  if (Pred)
1301  OS << " pred=BB#" << Pred->getNumber();
1302  else
1303  OS << " pred=null";
1304  OS << " head=BB#" << Head;
1305  if (HasValidInstrDepths)
1306  OS << " +instrs";
1307  } else
1308  OS << "depth invalid";
1309  OS << ", ";
1310  if (hasValidHeight()) {
1311  OS << "height=" << InstrHeight;
1312  if (Succ)
1313  OS << " succ=BB#" << Succ->getNumber();
1314  else
1315  OS << " succ=null";
1316  OS << " tail=BB#" << Tail;
1317  if (HasValidInstrHeights)
1318  OS << " +instrs";
1319  } else
1320  OS << "height invalid";
1321  if (HasValidInstrDepths && HasValidInstrHeights)
1322  OS << ", crit=" << CriticalPath;
1323 }
1324 
1326  unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1327 
1328  OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
1329  << " --> BB#" << TBI.Tail << ':';
1330  if (TBI.hasValidHeight() && TBI.hasValidDepth())
1331  OS << ' ' << getInstrCount() << " instrs.";
1332  if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1333  OS << ' ' << TBI.CriticalPath << " cycles.";
1334 
1335  const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
1336  OS << "\nBB#" << MBBNum;
1337  while (Block->hasValidDepth() && Block->Pred) {
1338  unsigned Num = Block->Pred->getNumber();
1339  OS << " <- BB#" << Num;
1340  Block = &TE.BlockInfo[Num];
1341  }
1342 
1343  Block = &TBI;
1344  OS << "\n ";
1345  while (Block->hasValidHeight() && Block->Succ) {
1346  unsigned Num = Block->Succ->getNumber();
1347  OS << " -> BB#" << Num;
1348  Block = &TE.BlockInfo[Num];
1349  }
1350  OS << '\n';
1351 }
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:243
mop_iterator operands_end()
Definition: MachineInstr.h:301
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:234
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
MachineBasicBlock * getMBB() const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
iterator begin() const
Definition: ArrayRef.h:137
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID&#39;s allocated.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:358
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool HasCalls
True when the block contains calls.
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
Definition: MachineInstr.h:863
A trace ensemble is a collection of traces selected using the same strategy, for example &#39;minimum res...
static unsigned InstrCount
bool isPHI() const
Definition: MachineInstr.h:792
unsigned getNumRegUnits() const
Return the number of (native) register units in the target.
unsigned InstrCount
The number of non-trivial instructions in the block.
iterator_range< succ_iterator > successors()
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:176
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Hexagon Hardware Loops
bool hasResources() const
Returns true when resource information for this block has been computed.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:282
Strategy
Strategies for selecting traces.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Reg
All possible values of the reg field in the ModR/M byte.
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
def_iterator def_begin(unsigned RegNo) const
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
ArrayRef< unsigned > getProcResourceCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
BlockT * getHeader() const
Definition: LoopInfo.h:103
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
virtual const MachineBasicBlock * pickTraceSucc(const MachineBasicBlock *)=0
Select the trace through a block that has the fewest instructions.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:323
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
virtual const TargetInstrInfo * getInstrInfo() const
bool isValid() const
Definition: MCSchedule.h:118
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:285
TargetInstrInfo - Interface to description of machine instruction set.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:131
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
#define DEBUG_TYPE
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
bool erase(const KeyT &Val)
Definition: DenseMap.h:247
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:291
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().
typename DenseT::iterator iterator
Definition: SparseSet.h:171
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
Ensemble * getEnsemble(Strategy)
Get the trace ensemble representing the given trace selection strategy.
typename DenseT::const_iterator const_iterator
Definition: SparseSet.h:172
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:55
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
MachineInstrBuilder & UseMI
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
Machine Trace Metrics
Per-basic block information that doesn&#39;t depend on the trace through the block.
bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
Represent the analysis usage information of a pass.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
Default po_iterator_storage implementation with an internal set object.
bool atEnd() const
atEnd - return true if this iterator is equal to reg_end() on the value.
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
iterator_range< pred_iterator > predecessors()
virtual const char * getName() const =0
virtual const MachineBasicBlock * pickTracePred(const MachineBasicBlock *)=0
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
const_iterator end() const
Definition: SparseSet.h:175
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, SparseSet< LiveRegUnit > &RegUnits, const TargetRegisterInfo *TRI)
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
#define E
Definition: LargeTest.cpp:27
bool isDebugValue() const
Definition: MachineInstr.h:782
MachineOperand class - Representation of each machine instruction operand.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
MachineInstrBuilder MachineInstrBuilder & DefMI
const_iterator begin() const
Definition: SparseSet.h:174
iterator end() const
Definition: ArrayRef.h:138
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
CHAIN = SC CHAIN, Imm128 - System call.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Per-basic block information that relates to a specific trace through the block.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void setPreservesAll()
Set by analyses that do not transform their input at all.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:139
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
bool insertEdge(Optional< const MachineBasicBlock *> From, const MachineBasicBlock *To)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
Representation of each machine instruction.
Definition: MachineInstr.h:59
Basic Alias true
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
pointer data()
Return a pointer to the vector&#39;s buffer, even if empty().
Definition: SmallVector.h:143
SparseSet - Fast set implmentation for objects that can be identified by small unsigned keys...
Definition: SparseSet.h:123
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:360
INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE, "Machine Trace Metrics", false, true) INITIALIZE_PASS_END(MachineTraceMetrics
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition: SparseSet.h:224
InstrCycles represents the cycle height and depth of an instruction in a trace.
#define I(x, y, z)
Definition: MD5.cpp:58
iterator end()
Definition: DenseMap.h:73
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:166
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
mop_iterator operands_begin()
Definition: MachineInstr.h:300
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock *> Extrablocks=None, ArrayRef< const MCSchedClassDesc *> ExtraInstrs=None, ArrayRef< const MCSchedClassDesc *> RemoveInstrs=None) const
Return the resource length of the trace.
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
IRTranslator LLVM IR MI
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:284
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
DenseMap< const MachineInstr *, unsigned > MIHeightMap
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, SparseSet< LiveRegUnit > &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
loops
Definition: LoopInfo.cpp:721
void resize(size_type N)
Definition: SmallVector.h:355
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.