LLVM  8.0.0svn
MachineTraceMetrics.cpp
Go to the documentation of this file.
1 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/DenseMap.h"
13 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/SparseSet.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/Format.h"
34 #include <algorithm>
35 #include <cassert>
36 #include <iterator>
37 #include <tuple>
38 #include <utility>
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "machine-trace-metrics"
43 
45 
47 
49  "Machine Trace Metrics", false, true)
54 
55 MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
56  std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
57 }
58 
60  AU.setPreservesAll();
64 }
65 
67  MF = &Func;
68  const TargetSubtargetInfo &ST = MF->getSubtarget();
69  TII = ST.getInstrInfo();
70  TRI = ST.getRegisterInfo();
71  MRI = &MF->getRegInfo();
72  Loops = &getAnalysis<MachineLoopInfo>();
73  SchedModel.init(&ST);
74  BlockInfo.resize(MF->getNumBlockIDs());
75  ProcResourceCycles.resize(MF->getNumBlockIDs() *
76  SchedModel.getNumProcResourceKinds());
77  return false;
78 }
79 
81  MF = nullptr;
82  BlockInfo.clear();
83  for (unsigned i = 0; i != TS_NumStrategies; ++i) {
84  delete Ensembles[i];
85  Ensembles[i] = nullptr;
86  }
87 }
88 
89 //===----------------------------------------------------------------------===//
90 // Fixed block information
91 //===----------------------------------------------------------------------===//
92 //
93 // The number of instructions in a basic block and the CPU resources used by
94 // those instructions don't depend on any given trace strategy.
95 
96 /// Compute the resource usage in basic block MBB.
99  assert(MBB && "No basic block");
100  FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
101  if (FBI->hasResources())
102  return FBI;
103 
104  // Compute resource usage in the block.
105  FBI->HasCalls = false;
106  unsigned InstrCount = 0;
107 
108  // Add up per-processor resource cycles as well.
109  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
110  SmallVector<unsigned, 32> PRCycles(PRKinds);
111 
112  for (const auto &MI : *MBB) {
113  if (MI.isTransient())
114  continue;
115  ++InstrCount;
116  if (MI.isCall())
117  FBI->HasCalls = true;
118 
119  // Count processor resources used.
120  if (!SchedModel.hasInstrSchedModel())
121  continue;
122  const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
123  if (!SC->isValid())
124  continue;
125 
127  PI = SchedModel.getWriteProcResBegin(SC),
128  PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
129  assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
130  PRCycles[PI->ProcResourceIdx] += PI->Cycles;
131  }
132  }
133  FBI->InstrCount = InstrCount;
134 
135  // Scale the resource cycles so they are comparable.
136  unsigned PROffset = MBB->getNumber() * PRKinds;
137  for (unsigned K = 0; K != PRKinds; ++K)
138  ProcResourceCycles[PROffset + K] =
139  PRCycles[K] * SchedModel.getResourceFactor(K);
140 
141  return FBI;
142 }
143 
146  assert(BlockInfo[MBBNum].hasResources() &&
147  "getResources() must be called before getProcResourceCycles()");
148  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
149  assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
150  return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
151 }
152 
153 //===----------------------------------------------------------------------===//
154 // Ensemble utility functions
155 //===----------------------------------------------------------------------===//
156 
158  : MTM(*ct) {
159  BlockInfo.resize(MTM.BlockInfo.size());
160  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
161  ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
162  ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
163 }
164 
165 // Virtual destructor serves as an anchor.
167 
168 const MachineLoop*
170  return MTM.Loops->getLoopFor(MBB);
171 }
172 
173 // Update resource-related information in the TraceBlockInfo for MBB.
174 // Only update resources related to the trace above MBB.
175 void MachineTraceMetrics::Ensemble::
176 computeDepthResources(const MachineBasicBlock *MBB) {
177  TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
178  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
179  unsigned PROffset = MBB->getNumber() * PRKinds;
180 
181  // Compute resources from trace above. The top block is simple.
182  if (!TBI->Pred) {
183  TBI->InstrDepth = 0;
184  TBI->Head = MBB->getNumber();
185  std::fill(ProcResourceDepths.begin() + PROffset,
186  ProcResourceDepths.begin() + PROffset + PRKinds, 0);
187  return;
188  }
189 
190  // Compute from the block above. A post-order traversal ensures the
191  // predecessor is always computed first.
192  unsigned PredNum = TBI->Pred->getNumber();
193  TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
194  assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
195  const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
196  TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
197  TBI->Head = PredTBI->Head;
198 
199  // Compute per-resource depths.
200  ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
201  ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
202  for (unsigned K = 0; K != PRKinds; ++K)
203  ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
204 }
205 
206 // Update resource-related information in the TraceBlockInfo for MBB.
207 // Only update resources related to the trace below MBB.
208 void MachineTraceMetrics::Ensemble::
209 computeHeightResources(const MachineBasicBlock *MBB) {
210  TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
211  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
212  unsigned PROffset = MBB->getNumber() * PRKinds;
213 
214  // Compute resources for the current block.
215  TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
217 
218  // The trace tail is done.
219  if (!TBI->Succ) {
220  TBI->Tail = MBB->getNumber();
221  std::copy(PRCycles.begin(), PRCycles.end(),
222  ProcResourceHeights.begin() + PROffset);
223  return;
224  }
225 
226  // Compute from the block below. A post-order traversal ensures the
227  // predecessor is always computed first.
228  unsigned SuccNum = TBI->Succ->getNumber();
229  TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
230  assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
231  TBI->InstrHeight += SuccTBI->InstrHeight;
232  TBI->Tail = SuccTBI->Tail;
233 
234  // Compute per-resource heights.
235  ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
236  for (unsigned K = 0; K != PRKinds; ++K)
237  ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
238 }
239 
240 // Check if depth resources for MBB are valid and return the TBI.
241 // Return NULL if the resources have been invalidated.
245  const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
246  return TBI->hasValidDepth() ? TBI : nullptr;
247 }
248 
249 // Check if height resources for MBB are valid and return the TBI.
250 // Return NULL if the resources have been invalidated.
254  const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
255  return TBI->hasValidHeight() ? TBI : nullptr;
256 }
257 
258 /// Get an array of processor resource depths for MBB. Indexed by processor
259 /// resource kind, this array contains the scaled processor resources consumed
260 /// by all blocks preceding MBB in its trace. It does not include instructions
261 /// in MBB.
262 ///
263 /// Compare TraceBlockInfo::InstrDepth.
266 getProcResourceDepths(unsigned MBBNum) const {
267  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
268  assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
269  return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
270 }
271 
272 /// Get an array of processor resource heights for MBB. Indexed by processor
273 /// resource kind, this array contains the scaled processor resources consumed
274 /// by this block and all blocks following it in its trace.
275 ///
276 /// Compare TraceBlockInfo::InstrHeight.
279 getProcResourceHeights(unsigned MBBNum) const {
280  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
281  assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
282  return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
283 }
284 
285 //===----------------------------------------------------------------------===//
286 // Trace Selection Strategies
287 //===----------------------------------------------------------------------===//
288 //
289 // A trace selection strategy is implemented as a sub-class of Ensemble. The
290 // trace through a block B is computed by two DFS traversals of the CFG
291 // starting from B. One upwards, and one downwards. During the upwards DFS,
292 // pickTracePred() is called on the post-ordered blocks. During the downwards
293 // DFS, pickTraceSucc() is called in a post-order.
294 //
295 
296 // We never allow traces that leave loops, but we do allow traces to enter
297 // nested loops. We also never allow traces to contain back-edges.
298 //
299 // This means that a loop header can never appear above the center block of a
300 // trace, except as the trace head. Below the center block, loop exiting edges
301 // are banned.
302 //
303 // Return true if an edge from the From loop to the To loop is leaving a loop.
304 // Either of To and From can be null.
305 static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
306  return From && !From->contains(To);
307 }
308 
309 // MinInstrCountEnsemble - Pick the trace that executes the least number of
310 // instructions.
311 namespace {
312 
313 class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
314  const char *getName() const override { return "MinInstr"; }
315  const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
316  const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
317 
318 public:
319  MinInstrCountEnsemble(MachineTraceMetrics *mtm)
321 };
322 
323 } // end anonymous namespace
324 
325 // Select the preferred predecessor for MBB.
326 const MachineBasicBlock*
327 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
328  if (MBB->pred_empty())
329  return nullptr;
330  const MachineLoop *CurLoop = getLoopFor(MBB);
331  // Don't leave loops, and never follow back-edges.
332  if (CurLoop && MBB == CurLoop->getHeader())
333  return nullptr;
334  unsigned CurCount = MTM.getResources(MBB)->InstrCount;
335  const MachineBasicBlock *Best = nullptr;
336  unsigned BestDepth = 0;
337  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
338  const MachineTraceMetrics::TraceBlockInfo *PredTBI =
339  getDepthResources(Pred);
340  // Ignore cycles that aren't natural loops.
341  if (!PredTBI)
342  continue;
343  // Pick the predecessor that would give this block the smallest InstrDepth.
344  unsigned Depth = PredTBI->InstrDepth + CurCount;
345  if (!Best || Depth < BestDepth) {
346  Best = Pred;
347  BestDepth = Depth;
348  }
349  }
350  return Best;
351 }
352 
353 // Select the preferred successor for MBB.
354 const MachineBasicBlock*
355 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
356  if (MBB->pred_empty())
357  return nullptr;
358  const MachineLoop *CurLoop = getLoopFor(MBB);
359  const MachineBasicBlock *Best = nullptr;
360  unsigned BestHeight = 0;
361  for (const MachineBasicBlock *Succ : MBB->successors()) {
362  // Don't consider back-edges.
363  if (CurLoop && Succ == CurLoop->getHeader())
364  continue;
365  // Don't consider successors exiting CurLoop.
366  if (isExitingLoop(CurLoop, getLoopFor(Succ)))
367  continue;
368  const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
369  getHeightResources(Succ);
370  // Ignore cycles that aren't natural loops.
371  if (!SuccTBI)
372  continue;
373  // Pick the successor that would give this block the smallest InstrHeight.
374  unsigned Height = SuccTBI->InstrHeight;
375  if (!Best || Height < BestHeight) {
376  Best = Succ;
377  BestHeight = Height;
378  }
379  }
380  return Best;
381 }
382 
383 // Get an Ensemble sub-class for the requested trace strategy.
386  assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
387  Ensemble *&E = Ensembles[strategy];
388  if (E)
389  return E;
390 
391  // Allocate new Ensemble on demand.
392  switch (strategy) {
393  case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
394  default: llvm_unreachable("Invalid trace strategy enum");
395  }
396 }
397 
399  LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
400  << '\n');
401  BlockInfo[MBB->getNumber()].invalidate();
402  for (unsigned i = 0; i != TS_NumStrategies; ++i)
403  if (Ensembles[i])
404  Ensembles[i]->invalidate(MBB);
405 }
406 
408  if (!MF)
409  return;
410 #ifndef NDEBUG
411  assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
412  for (unsigned i = 0; i != TS_NumStrategies; ++i)
413  if (Ensembles[i])
414  Ensembles[i]->verify();
415 #endif
416 }
417 
418 //===----------------------------------------------------------------------===//
419 // Trace building
420 //===----------------------------------------------------------------------===//
421 //
422 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
423 // set abstraction that confines the search to the current loop, and doesn't
424 // revisit blocks.
425 
426 namespace {
427 
428 struct LoopBounds {
431  const MachineLoopInfo *Loops;
432  bool Downward = false;
433 
435  const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
436 };
437 
438 } // end anonymous namespace
439 
440 // Specialize po_iterator_storage in order to prune the post-order traversal so
441 // it is limited to the current loop and doesn't traverse the loop back edges.
442 namespace llvm {
443 
444 template<>
445 class po_iterator_storage<LoopBounds, true> {
446  LoopBounds &LB;
447 
448 public:
449  po_iterator_storage(LoopBounds &lb) : LB(lb) {}
450 
452 
454  const MachineBasicBlock *To) {
455  // Skip already visited To blocks.
456  MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
457  if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
458  return false;
459  // From is null once when To is the trace center block.
460  if (From) {
461  if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
462  // Don't follow backedges, don't leave FromLoop when going upwards.
463  if ((LB.Downward ? To : *From) == FromLoop->getHeader())
464  return false;
465  // Don't leave FromLoop.
466  if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
467  return false;
468  }
469  }
470  // To is a new block. Mark the block as visited in case the CFG has cycles
471  // that MachineLoopInfo didn't recognize as a natural loop.
472  return LB.Visited.insert(To).second;
473  }
474 };
475 
476 } // end namespace llvm
477 
478 /// Compute the trace through MBB.
479 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
480  LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
481  << printMBBReference(*MBB) << '\n');
482  // Set up loop bounds for the backwards post-order traversal.
483  LoopBounds Bounds(BlockInfo, MTM.Loops);
484 
485  // Run an upwards post-order search for the trace start.
486  Bounds.Downward = false;
487  Bounds.Visited.clear();
488  for (auto I : inverse_post_order_ext(MBB, Bounds)) {
489  LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
490  TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
491  // All the predecessors have been visited, pick the preferred one.
492  TBI.Pred = pickTracePred(I);
493  LLVM_DEBUG({
494  if (TBI.Pred)
495  dbgs() << printMBBReference(*TBI.Pred) << '\n';
496  else
497  dbgs() << "null\n";
498  });
499  // The trace leading to I is now known, compute the depth resources.
500  computeDepthResources(I);
501  }
502 
503  // Run a downwards post-order search for the trace end.
504  Bounds.Downward = true;
505  Bounds.Visited.clear();
506  for (auto I : post_order_ext(MBB, Bounds)) {
507  LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
508  TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
509  // All the successors have been visited, pick the preferred one.
510  TBI.Succ = pickTraceSucc(I);
511  LLVM_DEBUG({
512  if (TBI.Succ)
513  dbgs() << printMBBReference(*TBI.Succ) << '\n';
514  else
515  dbgs() << "null\n";
516  });
517  // The trace leaving I is now known, compute the height resources.
518  computeHeightResources(I);
519  }
520 }
521 
522 /// Invalidate traces through BadMBB.
523 void
526  TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
527 
528  // Invalidate height resources of blocks above MBB.
529  if (BadTBI.hasValidHeight()) {
530  BadTBI.invalidateHeight();
531  WorkList.push_back(BadMBB);
532  do {
533  const MachineBasicBlock *MBB = WorkList.pop_back_val();
534  LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
535  << getName() << " height.\n");
536  // Find any MBB predecessors that have MBB as their preferred successor.
537  // They are the only ones that need to be invalidated.
538  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
539  TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
540  if (!TBI.hasValidHeight())
541  continue;
542  if (TBI.Succ == MBB) {
543  TBI.invalidateHeight();
544  WorkList.push_back(Pred);
545  continue;
546  }
547  // Verify that TBI.Succ is actually a *I successor.
548  assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
549  }
550  } while (!WorkList.empty());
551  }
552 
553  // Invalidate depth resources of blocks below MBB.
554  if (BadTBI.hasValidDepth()) {
555  BadTBI.invalidateDepth();
556  WorkList.push_back(BadMBB);
557  do {
558  const MachineBasicBlock *MBB = WorkList.pop_back_val();
559  LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
560  << getName() << " depth.\n");
561  // Find any MBB successors that have MBB as their preferred predecessor.
562  // They are the only ones that need to be invalidated.
563  for (const MachineBasicBlock *Succ : MBB->successors()) {
564  TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
565  if (!TBI.hasValidDepth())
566  continue;
567  if (TBI.Pred == MBB) {
568  TBI.invalidateDepth();
569  WorkList.push_back(Succ);
570  continue;
571  }
572  // Verify that TBI.Pred is actually a *I predecessor.
573  assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
574  }
575  } while (!WorkList.empty());
576  }
577 
578  // Clear any per-instruction data. We only have to do this for BadMBB itself
579  // because the instructions in that block may change. Other blocks may be
580  // invalidated, but their instructions will stay the same, so there is no
581  // need to erase the Cycle entries. They will be overwritten when we
582  // recompute.
583  for (const auto &I : *BadMBB)
584  Cycles.erase(&I);
585 }
586 
588 #ifndef NDEBUG
589  assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
590  "Outdated BlockInfo size");
591  for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
592  const TraceBlockInfo &TBI = BlockInfo[Num];
593  if (TBI.hasValidDepth() && TBI.Pred) {
594  const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
595  assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
596  assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
597  "Trace is broken, depth should have been invalidated.");
598  const MachineLoop *Loop = getLoopFor(MBB);
599  assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
600  }
601  if (TBI.hasValidHeight() && TBI.Succ) {
602  const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
603  assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
604  assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
605  "Trace is broken, height should have been invalidated.");
606  const MachineLoop *Loop = getLoopFor(MBB);
607  const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
608  assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
609  "Trace contains backedge");
610  }
611  }
612 #endif
613 }
614 
615 //===----------------------------------------------------------------------===//
616 // Data Dependencies
617 //===----------------------------------------------------------------------===//
618 //
619 // Compute the depth and height of each instruction based on data dependencies
620 // and instruction latencies. These cycle numbers assume that the CPU can issue
621 // an infinite number of instructions per cycle as long as their dependencies
622 // are ready.
623 
624 // A data dependency is represented as a defining MI and operand numbers on the
625 // defining and using MI.
626 namespace {
627 
628 struct DataDep {
629  const MachineInstr *DefMI;
630  unsigned DefOp;
631  unsigned UseOp;
632 
633  DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
634  : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
635 
636  /// Create a DataDep from an SSA form virtual register.
637  DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
638  : UseOp(UseOp) {
640  MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
641  assert(!DefI.atEnd() && "Register has no defs");
642  DefMI = DefI->getParent();
643  DefOp = DefI.getOperandNo();
644  assert((++DefI).atEnd() && "Register has multiple defs");
645  }
646 };
647 
648 } // end anonymous namespace
649 
650 // Get the input data dependencies that must be ready before UseMI can issue.
651 // Return true if UseMI has any physreg operands.
652 static bool getDataDeps(const MachineInstr &UseMI,
654  const MachineRegisterInfo *MRI) {
655  // Debug values should not be included in any calculations.
656  if (UseMI.isDebugInstr())
657  return false;
658 
659  bool HasPhysRegs = false;
661  E = UseMI.operands_end(); I != E; ++I) {
662  const MachineOperand &MO = *I;
663  if (!MO.isReg())
664  continue;
665  unsigned Reg = MO.getReg();
666  if (!Reg)
667  continue;
669  HasPhysRegs = true;
670  continue;
671  }
672  // Collect virtual register reads.
673  if (MO.readsReg())
674  Deps.push_back(DataDep(MRI, Reg, UseMI.getOperandNo(I)));
675  }
676  return HasPhysRegs;
677 }
678 
679 // Get the input data dependencies of a PHI instruction, using Pred as the
680 // preferred predecessor.
681 // This will add at most one dependency to Deps.
682 static void getPHIDeps(const MachineInstr &UseMI,
684  const MachineBasicBlock *Pred,
685  const MachineRegisterInfo *MRI) {
686  // No predecessor at the beginning of a trace. Ignore dependencies.
687  if (!Pred)
688  return;
689  assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
690  for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
691  if (UseMI.getOperand(i + 1).getMBB() == Pred) {
692  unsigned Reg = UseMI.getOperand(i).getReg();
693  Deps.push_back(DataDep(MRI, Reg, i));
694  return;
695  }
696  }
697 }
698 
699 // Identify physreg dependencies for UseMI, and update the live regunit
700 // tracking set when scanning instructions downwards.
703  SparseSet<LiveRegUnit> &RegUnits,
704  const TargetRegisterInfo *TRI) {
706  SmallVector<unsigned, 8> LiveDefOps;
707 
709  ME = UseMI->operands_end(); MI != ME; ++MI) {
710  const MachineOperand &MO = *MI;
711  if (!MO.isReg())
712  continue;
713  unsigned Reg = MO.getReg();
715  continue;
716  // Track live defs and kills for updating RegUnits.
717  if (MO.isDef()) {
718  if (MO.isDead())
719  Kills.push_back(Reg);
720  else
721  LiveDefOps.push_back(UseMI->getOperandNo(MI));
722  } else if (MO.isKill())
723  Kills.push_back(Reg);
724  // Identify dependencies.
725  if (!MO.readsReg())
726  continue;
727  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
728  SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
729  if (I == RegUnits.end())
730  continue;
731  Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
732  break;
733  }
734  }
735 
736  // Update RegUnits to reflect live registers after UseMI.
737  // First kills.
738  for (unsigned Kill : Kills)
739  for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
740  RegUnits.erase(*Units);
741 
742  // Second, live defs.
743  for (unsigned DefOp : LiveDefOps) {
744  for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
745  Units.isValid(); ++Units) {
746  LiveRegUnit &LRU = RegUnits[*Units];
747  LRU.MI = UseMI;
748  LRU.Op = DefOp;
749  }
750  }
751 }
752 
753 /// The length of the critical path through a trace is the maximum of two path
754 /// lengths:
755 ///
756 /// 1. The maximum height+depth over all instructions in the trace center block.
757 ///
758 /// 2. The longest cross-block dependency chain. For small blocks, it is
759 /// possible that the critical path through the trace doesn't include any
760 /// instructions in the block.
761 ///
762 /// This function computes the second number from the live-in list of the
763 /// center block.
764 unsigned MachineTraceMetrics::Ensemble::
765 computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
766  assert(TBI.HasValidInstrDepths && "Missing depth info");
767  assert(TBI.HasValidInstrHeights && "Missing height info");
768  unsigned MaxLen = 0;
769  for (const LiveInReg &LIR : TBI.LiveIns) {
771  continue;
772  const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
773  // Ignore dependencies outside the current trace.
774  const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
775  if (!DefTBI.isUsefulDominator(TBI))
776  continue;
777  unsigned Len = LIR.Height + Cycles[DefMI].Depth;
778  MaxLen = std::max(MaxLen, Len);
779  }
780  return MaxLen;
781 }
782 
785  SparseSet<LiveRegUnit> &RegUnits) {
787  // Collect all data dependencies.
788  if (UseMI.isPHI())
789  getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
790  else if (getDataDeps(UseMI, Deps, MTM.MRI))
791  updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
792 
793  // Filter and process dependencies, computing the earliest issue cycle.
794  unsigned Cycle = 0;
795  for (const DataDep &Dep : Deps) {
796  const TraceBlockInfo&DepTBI =
797  BlockInfo[Dep.DefMI->getParent()->getNumber()];
798  // Ignore dependencies from outside the current trace.
799  if (!DepTBI.isUsefulDominator(TBI))
800  continue;
801  assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
802  unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
803  // Add latency if DefMI is a real instruction. Transients get latency 0.
804  if (!Dep.DefMI->isTransient())
805  DepCycle += MTM.SchedModel
806  .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
807  Cycle = std::max(Cycle, DepCycle);
808  }
809  // Remember the instruction depth.
810  InstrCycles &MICycles = Cycles[&UseMI];
811  MICycles.Depth = Cycle;
812 
813  if (TBI.HasValidInstrHeights) {
814  // Update critical path length.
815  TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
816  LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
817  } else {
818  LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
819  }
820 }
821 
824  SparseSet<LiveRegUnit> &RegUnits) {
825  updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
826 }
827 
831  SparseSet<LiveRegUnit> &RegUnits) {
832  for (; Start != End; Start++)
833  updateDepth(Start->getParent(), *Start, RegUnits);
834 }
835 
836 /// Compute instruction depths for all instructions above or in MBB in its
837 /// trace. This assumes that the trace through MBB has already been computed.
838 void MachineTraceMetrics::Ensemble::
839 computeInstrDepths(const MachineBasicBlock *MBB) {
840  // The top of the trace may already be computed, and HasValidInstrDepths
841  // implies Head->HasValidInstrDepths, so we only need to start from the first
842  // block in the trace that needs to be recomputed.
844  do {
845  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
846  assert(TBI.hasValidDepth() && "Incomplete trace");
847  if (TBI.HasValidInstrDepths)
848  break;
849  Stack.push_back(MBB);
850  MBB = TBI.Pred;
851  } while (MBB);
852 
853  // FIXME: If MBB is non-null at this point, it is the last pre-computed block
854  // in the trace. We should track any live-out physregs that were defined in
855  // the trace. This is quite rare in SSA form, typically created by CSE
856  // hoisting a compare.
857  SparseSet<LiveRegUnit> RegUnits;
858  RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
859 
860  // Go through trace blocks in top-down order, stopping after the center block.
861  while (!Stack.empty()) {
862  MBB = Stack.pop_back_val();
863  LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
864  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
865  TBI.HasValidInstrDepths = true;
866  TBI.CriticalPath = 0;
867 
868  // Print out resource depths here as well.
869  LLVM_DEBUG({
870  dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
872  for (unsigned K = 0; K != PRDepths.size(); ++K)
873  if (PRDepths[K]) {
874  unsigned Factor = MTM.SchedModel.getResourceFactor(K);
875  dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
876  << MTM.SchedModel.getProcResource(K)->Name << " ("
877  << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
878  }
879  });
880 
881  // Also compute the critical path length through MBB when possible.
882  if (TBI.HasValidInstrHeights)
883  TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
884 
885  for (const auto &UseMI : *MBB) {
886  updateDepth(TBI, UseMI, RegUnits);
887  }
888  }
889 }
890 
891 // Identify physreg dependencies for MI when scanning instructions upwards.
892 // Return the issue height of MI after considering any live regunits.
893 // Height is the issue height computed from virtual register dependencies alone.
894 static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
895  SparseSet<LiveRegUnit> &RegUnits,
896  const TargetSchedModel &SchedModel,
897  const TargetInstrInfo *TII,
898  const TargetRegisterInfo *TRI) {
899  SmallVector<unsigned, 8> ReadOps;
900 
902  MOE = MI.operands_end();
903  MOI != MOE; ++MOI) {
904  const MachineOperand &MO = *MOI;
905  if (!MO.isReg())
906  continue;
907  unsigned Reg = MO.getReg();
909  continue;
910  if (MO.readsReg())
911  ReadOps.push_back(MI.getOperandNo(MOI));
912  if (!MO.isDef())
913  continue;
914  // This is a def of Reg. Remove corresponding entries from RegUnits, and
915  // update MI Height to consider the physreg dependencies.
916  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
917  SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
918  if (I == RegUnits.end())
919  continue;
920  unsigned DepHeight = I->Cycle;
921  if (!MI.isTransient()) {
922  // We may not know the UseMI of this dependency, if it came from the
923  // live-in list. SchedModel can handle a NULL UseMI.
924  DepHeight += SchedModel.computeOperandLatency(&MI, MI.getOperandNo(MOI),
925  I->MI, I->Op);
926  }
927  Height = std::max(Height, DepHeight);
928  // This regunit is dead above MI.
929  RegUnits.erase(I);
930  }
931  }
932 
933  // Now we know the height of MI. Update any regunits read.
934  for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
935  unsigned Reg = MI.getOperand(ReadOps[i]).getReg();
936  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
937  LiveRegUnit &LRU = RegUnits[*Units];
938  // Set the height to the highest reader of the unit.
939  if (LRU.Cycle <= Height && LRU.MI != &MI) {
940  LRU.Cycle = Height;
941  LRU.MI = &MI;
942  LRU.Op = ReadOps[i];
943  }
944  }
945  }
946 
947  return Height;
948 }
949 
951 
952 // Push the height of DefMI upwards if required to match UseMI.
953 // Return true if this is the first time DefMI was seen.
954 static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
955  unsigned UseHeight, MIHeightMap &Heights,
956  const TargetSchedModel &SchedModel,
957  const TargetInstrInfo *TII) {
958  // Adjust height by Dep.DefMI latency.
959  if (!Dep.DefMI->isTransient())
960  UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
961  Dep.UseOp);
962 
963  // Update Heights[DefMI] to be the maximum height seen.
965  bool New;
966  std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
967  if (New)
968  return true;
969 
970  // DefMI has been pushed before. Give it the max height.
971  if (I->second < UseHeight)
972  I->second = UseHeight;
973  return false;
974 }
975 
976 /// Assuming that the virtual register defined by DefMI:DefOp was used by
977 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
978 /// when reaching the block that contains DefMI.
979 void MachineTraceMetrics::Ensemble::
980 addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
982  assert(!Trace.empty() && "Trace should contain at least one block");
983  unsigned Reg = DefMI->getOperand(DefOp).getReg();
985  const MachineBasicBlock *DefMBB = DefMI->getParent();
986 
987  // Reg is live-in to all blocks in Trace that follow DefMBB.
988  for (unsigned i = Trace.size(); i; --i) {
989  const MachineBasicBlock *MBB = Trace[i-1];
990  if (MBB == DefMBB)
991  return;
992  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
993  // Just add the register. The height will be updated later.
994  TBI.LiveIns.push_back(Reg);
995  }
996 }
997 
998 /// Compute instruction heights in the trace through MBB. This updates MBB and
999 /// the blocks below it in the trace. It is assumed that the trace has already
1000 /// been computed.
1001 void MachineTraceMetrics::Ensemble::
1002 computeInstrHeights(const MachineBasicBlock *MBB) {
1003  // The bottom of the trace may already be computed.
1004  // Find the blocks that need updating.
1006  do {
1007  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1008  assert(TBI.hasValidHeight() && "Incomplete trace");
1009  if (TBI.HasValidInstrHeights)
1010  break;
1011  Stack.push_back(MBB);
1012  TBI.LiveIns.clear();
1013  MBB = TBI.Succ;
1014  } while (MBB);
1015 
1016  // As we move upwards in the trace, keep track of instructions that are
1017  // required by deeper trace instructions. Map MI -> height required so far.
1018  MIHeightMap Heights;
1019 
1020  // For physregs, the def isn't known when we see the use.
1021  // Instead, keep track of the highest use of each regunit.
1022  SparseSet<LiveRegUnit> RegUnits;
1023  RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1024 
1025  // If the bottom of the trace was already precomputed, initialize heights
1026  // from its live-in list.
1027  // MBB is the highest precomputed block in the trace.
1028  if (MBB) {
1029  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1030  for (LiveInReg &LI : TBI.LiveIns) {
1032  // For virtual registers, the def latency is included.
1033  unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1034  if (Height < LI.Height)
1035  Height = LI.Height;
1036  } else {
1037  // For register units, the def latency is not included because we don't
1038  // know the def yet.
1039  RegUnits[LI.Reg].Cycle = LI.Height;
1040  }
1041  }
1042  }
1043 
1044  // Go through the trace blocks in bottom-up order.
1046  for (;!Stack.empty(); Stack.pop_back()) {
1047  MBB = Stack.back();
1048  LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1049  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1050  TBI.HasValidInstrHeights = true;
1051  TBI.CriticalPath = 0;
1052 
1053  LLVM_DEBUG({
1054  dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1056  for (unsigned K = 0; K != PRHeights.size(); ++K)
1057  if (PRHeights[K]) {
1058  unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1059  dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1060  << MTM.SchedModel.getProcResource(K)->Name << " ("
1061  << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1062  }
1063  });
1064 
1065  // Get dependencies from PHIs in the trace successor.
1066  const MachineBasicBlock *Succ = TBI.Succ;
1067  // If MBB is the last block in the trace, and it has a back-edge to the
1068  // loop header, get loop-carried dependencies from PHIs in the header. For
1069  // that purpose, pretend that all the loop header PHIs have height 0.
1070  if (!Succ)
1071  if (const MachineLoop *Loop = getLoopFor(MBB))
1072  if (MBB->isSuccessor(Loop->getHeader()))
1073  Succ = Loop->getHeader();
1074 
1075  if (Succ) {
1076  for (const auto &PHI : *Succ) {
1077  if (!PHI.isPHI())
1078  break;
1079  Deps.clear();
1080  getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1081  if (!Deps.empty()) {
1082  // Loop header PHI heights are all 0.
1083  unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1084  LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1085  if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1086  MTM.TII))
1087  addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1088  }
1089  }
1090  }
1091 
1092  // Go through the block backwards.
1093  for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
1094  BI != BB;) {
1095  const MachineInstr &MI = *--BI;
1096 
1097  // Find the MI height as determined by virtual register uses in the
1098  // trace below.
1099  unsigned Cycle = 0;
1100  MIHeightMap::iterator HeightI = Heights.find(&MI);
1101  if (HeightI != Heights.end()) {
1102  Cycle = HeightI->second;
1103  // We won't be seeing any more MI uses.
1104  Heights.erase(HeightI);
1105  }
1106 
1107  // Don't process PHI deps. They depend on the specific predecessor, and
1108  // we'll get them when visiting the predecessor.
1109  Deps.clear();
1110  bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1111 
1112  // There may also be regunit dependencies to include in the height.
1113  if (HasPhysRegs)
1114  Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1115  MTM.TII, MTM.TRI);
1116 
1117  // Update the required height of any virtual registers read by MI.
1118  for (const DataDep &Dep : Deps)
1119  if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1120  addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1121 
1122  InstrCycles &MICycles = Cycles[&MI];
1123  MICycles.Height = Cycle;
1124  if (!TBI.HasValidInstrDepths) {
1125  LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1126  continue;
1127  }
1128  // Update critical path length.
1129  TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1130  LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1131  }
1132 
1133  // Update virtual live-in heights. They were added by addLiveIns() with a 0
1134  // height because the final height isn't known until now.
1135  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1136  for (LiveInReg &LIR : TBI.LiveIns) {
1137  const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1138  LIR.Height = Heights.lookup(DefMI);
1139  LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
1140  }
1141 
1142  // Transfer the live regunits to the live-in list.
1144  RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
1145  TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
1146  LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI) << '@'
1147  << RI->Cycle);
1148  }
1149  LLVM_DEBUG(dbgs() << '\n');
1150 
1151  if (!TBI.HasValidInstrDepths)
1152  continue;
1153  // Add live-ins to the critical path length.
1154  TBI.CriticalPath = std::max(TBI.CriticalPath,
1155  computeCrossBlockCriticalPath(TBI));
1156  LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1157  }
1158 }
1159 
1162  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1163 
1164  if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1165  computeTrace(MBB);
1166  if (!TBI.HasValidInstrDepths)
1167  computeInstrDepths(MBB);
1168  if (!TBI.HasValidInstrHeights)
1169  computeInstrHeights(MBB);
1170 
1171  return Trace(*this, TBI);
1172 }
1173 
1174 unsigned
1176  assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1177  "MI must be in the trace center block");
1178  InstrCycles Cyc = getInstrCycles(MI);
1179  return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1180 }
1181 
1182 unsigned
1184  const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1186  getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1187  assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1188  DataDep &Dep = Deps.front();
1189  unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1190  // Add latency if DefMI is a real instruction. Transients get latency 0.
1191  if (!Dep.DefMI->isTransient())
1192  DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1193  &PHI, Dep.UseOp);
1194  return DepCycle;
1195 }
1196 
1197 /// When bottom is set include instructions in current block in estimate.
1199  // Find the limiting processor resource.
1200  // Numbers have been pre-scaled to be comparable.
1201  unsigned PRMax = 0;
1202  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1203  if (Bottom) {
1204  ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
1205  for (unsigned K = 0; K != PRDepths.size(); ++K)
1206  PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1207  } else {
1208  for (unsigned K = 0; K != PRDepths.size(); ++K)
1209  PRMax = std::max(PRMax, PRDepths[K]);
1210  }
1211  // Convert to cycle count.
1212  PRMax = TE.MTM.getCycles(PRMax);
1213 
1214  /// All instructions before current block
1215  unsigned Instrs = TBI.InstrDepth;
1216  // plus instructions in current block
1217  if (Bottom)
1218  Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1219  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1220  Instrs /= IW;
1221  // Assume issue width 1 without a schedule model.
1222  return std::max(Instrs, PRMax);
1223 }
1224 
1228  ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1229  // Add up resources above and below the center block.
1230  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1231  ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1232  unsigned PRMax = 0;
1233 
1234  // Capture computing cycles from extra instructions
1235  auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1236  unsigned ResourceIdx)
1237  ->unsigned {
1238  unsigned Cycles = 0;
1239  for (const MCSchedClassDesc *SC : Instrs) {
1240  if (!SC->isValid())
1241  continue;
1243  PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1244  PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1245  PI != PE; ++PI) {
1246  if (PI->ProcResourceIdx != ResourceIdx)
1247  continue;
1248  Cycles +=
1249  (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1250  }
1251  }
1252  return Cycles;
1253  };
1254 
1255  for (unsigned K = 0; K != PRDepths.size(); ++K) {
1256  unsigned PRCycles = PRDepths[K] + PRHeights[K];
1257  for (const MachineBasicBlock *MBB : Extrablocks)
1258  PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
1259  PRCycles += extraCycles(ExtraInstrs, K);
1260  PRCycles -= extraCycles(RemoveInstrs, K);
1261  PRMax = std::max(PRMax, PRCycles);
1262  }
1263  // Convert to cycle count.
1264  PRMax = TE.MTM.getCycles(PRMax);
1265 
1266  // Instrs: #instructions in current trace outside current block.
1267  unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1268  // Add instruction count from the extra blocks.
1269  for (const MachineBasicBlock *MBB : Extrablocks)
1270  Instrs += TE.MTM.getResources(MBB)->InstrCount;
1271  Instrs += ExtraInstrs.size();
1272  Instrs -= RemoveInstrs.size();
1273  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1274  Instrs /= IW;
1275  // Assume issue width 1 without a schedule model.
1276  return std::max(Instrs, PRMax);
1277 }
1278 
1280  const MachineInstr &UseMI) const {
1281  if (DefMI.getParent() == UseMI.getParent())
1282  return true;
1283 
1284  const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1285  const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1286 
1287  return DepTBI.isUsefulDominator(TBI);
1288 }
1289 
1291  OS << getName() << " ensemble:\n";
1292  for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1293  OS << " %bb." << i << '\t';
1294  BlockInfo[i].print(OS);
1295  OS << '\n';
1296  }
1297 }
1298 
1300  if (hasValidDepth()) {
1301  OS << "depth=" << InstrDepth;
1302  if (Pred)
1303  OS << " pred=" << printMBBReference(*Pred);
1304  else
1305  OS << " pred=null";
1306  OS << " head=%bb." << Head;
1307  if (HasValidInstrDepths)
1308  OS << " +instrs";
1309  } else
1310  OS << "depth invalid";
1311  OS << ", ";
1312  if (hasValidHeight()) {
1313  OS << "height=" << InstrHeight;
1314  if (Succ)
1315  OS << " succ=" << printMBBReference(*Succ);
1316  else
1317  OS << " succ=null";
1318  OS << " tail=%bb." << Tail;
1319  if (HasValidInstrHeights)
1320  OS << " +instrs";
1321  } else
1322  OS << "height invalid";
1323  if (HasValidInstrDepths && HasValidInstrHeights)
1324  OS << ", crit=" << CriticalPath;
1325 }
1326 
1328  unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1329 
1330  OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1331  << " --> %bb." << TBI.Tail << ':';
1332  if (TBI.hasValidHeight() && TBI.hasValidDepth())
1333  OS << ' ' << getInstrCount() << " instrs.";
1334  if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1335  OS << ' ' << TBI.CriticalPath << " cycles.";
1336 
1337  const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
1338  OS << "\n%bb." << MBBNum;
1339  while (Block->hasValidDepth() && Block->Pred) {
1340  unsigned Num = Block->Pred->getNumber();
1341  OS << " <- " << printMBBReference(*Block->Pred);
1342  Block = &TE.BlockInfo[Num];
1343  }
1344 
1345  Block = &TBI;
1346  OS << "\n ";
1347  while (Block->hasValidHeight() && Block->Succ) {
1348  unsigned Num = Block->Succ->getNumber();
1349  OS << " -> " << printMBBReference(*Block->Succ);
1350  Block = &TE.BlockInfo[Num];
1351  }
1352  OS << '\n';
1353 }
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
mop_iterator operands_end()
Definition: MachineInstr.h:448
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
MachineBasicBlock * getMBB() const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
iterator begin() const
Definition: ArrayRef.h:137
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of the instructions from Start to End.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID&#39;s allocated.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:503
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool HasCalls
True when the block contains calls.
unsigned Reg
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
A trace ensemble is a collection of traces selected using the same strategy, for example &#39;minimum res...
block Block Frequency true
static unsigned InstrCount
bool isPHI() const
Definition: MachineInstr.h:995
unsigned getNumRegUnits() const
Return the number of (native) register units in the target.
unsigned InstrCount
The number of non-trivial instructions in the block.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
iterator_range< succ_iterator > successors()
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Hexagon Hardware Loops
bool hasResources() const
Returns true when resource information for this block has been computed.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:406
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Strategy
Strategies for selecting traces.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
const MachineInstr * MI
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
def_iterator def_begin(unsigned RegNo) const
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
ArrayRef< unsigned > getProcResourceCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
BlockT * getHeader() const
Definition: LoopInfo.h:100
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
virtual const MachineBasicBlock * pickTraceSucc(const MachineBasicBlock *)=0
Select the trace through a block that has the fewest instructions.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs)
Adds registers contained in LiveRegs to the block live-in list of MBB.
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
virtual const TargetInstrInfo * getInstrInfo() const
bool isValid() const
Definition: MCSchedule.h:127
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:286
TargetInstrInfo - Interface to description of machine instruction set.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
#define DEBUG_TYPE
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
unsigned CriticalPath
Critical path length.
bool erase(const KeyT &Val)
Definition: DenseMap.h:268
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:291
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().
typename DenseT::iterator iterator
Definition: SparseSet.h:172
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
Ensemble * getEnsemble(Strategy)
Get the trace ensemble representing the given trace selection strategy.
typename DenseT::const_iterator const_iterator
Definition: SparseSet.h:173
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
MachineInstrBuilder & UseMI
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
Machine Trace Metrics
Per-basic block information that doesn&#39;t depend on the trace through the block.
bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
Represent the analysis usage information of a pass.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
Default po_iterator_storage implementation with an internal set object.
bool atEnd() const
atEnd - return true if this iterator is equal to reg_end() on the value.
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:156
iterator_range< pred_iterator > predecessors()
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
virtual const char * getName() const =0
virtual const MachineBasicBlock * pickTracePred(const MachineBasicBlock *)=0
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
size_t size() const
Definition: SmallVector.h:53
const_iterator end() const
Definition: SparseSet.h:176
bool isDebugInstr() const
Definition: MachineInstr.h:985
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, SparseSet< LiveRegUnit > &RegUnits, const TargetRegisterInfo *TRI)
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
MachineOperand class - Representation of each machine instruction operand.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
MachineInstrBuilder MachineInstrBuilder & DefMI
const_iterator begin() const
Definition: SparseSet.h:175
iterator end() const
Definition: ArrayRef.h:138
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
CHAIN = SC CHAIN, Imm128 - System call.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Per-basic block information that relates to a specific trace through the block.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void setPreservesAll()
Set by analyses that do not transform their input at all.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:248
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
bool insertEdge(Optional< const MachineBasicBlock *> From, const MachineBasicBlock *To)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
Representation of each machine instruction.
Definition: MachineInstr.h:64
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
pointer data()
Return a pointer to the vector&#39;s buffer, even if empty().
Definition: SmallVector.h:149
SparseSet - Fast set implmentation for objects that can be identified by small unsigned keys...
Definition: SparseSet.h:124
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE, "Machine Trace Metrics", false, true) INITIALIZE_PASS_END(MachineTraceMetrics
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition: SparseSet.h:225
InstrCycles represents the cycle height and depth of an instruction in a trace.
#define I(x, y, z)
Definition: MD5.cpp:58
iterator end()
Definition: DenseMap.h:79
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
mop_iterator operands_begin()
Definition: MachineInstr.h:447
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock *> Extrablocks=None, ArrayRef< const MCSchedClassDesc *> ExtraInstrs=None, ArrayRef< const MCSchedClassDesc *> RemoveInstrs=None) const
Return the resource length of the trace.
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
IRTranslator LLVM IR MI
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:408
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1082
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths...
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, SparseSet< LiveRegUnit > &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
loops
Definition: LoopInfo.cpp:719
void resize(size_type N)
Definition: SmallVector.h:351
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.