LLVM  3.7.0
MachineTraceMetrics.cpp
Go to the documentation of this file.
1 //===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
12 #include "llvm/ADT/SparseSet.h"
17 #include "llvm/CodeGen/Passes.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Format.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "machine-trace-metrics"
29 
32 
34  "machine-trace-metrics", "Machine Trace Metrics", false, true)
38  "machine-trace-metrics", "Machine Trace Metrics", false, true)
39 
40 MachineTraceMetrics::MachineTraceMetrics()
41  : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
42  MRI(nullptr), Loops(nullptr) {
43  std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
44 }
45 
47  AU.setPreservesAll();
51 }
52 
54  MF = &Func;
55  const TargetSubtargetInfo &ST = MF->getSubtarget();
56  TII = ST.getInstrInfo();
57  TRI = ST.getRegisterInfo();
58  MRI = &MF->getRegInfo();
59  Loops = &getAnalysis<MachineLoopInfo>();
60  SchedModel.init(ST.getSchedModel(), &ST, TII);
61  BlockInfo.resize(MF->getNumBlockIDs());
62  ProcResourceCycles.resize(MF->getNumBlockIDs() *
63  SchedModel.getNumProcResourceKinds());
64  return false;
65 }
66 
68  MF = nullptr;
69  BlockInfo.clear();
70  for (unsigned i = 0; i != TS_NumStrategies; ++i) {
71  delete Ensembles[i];
72  Ensembles[i] = nullptr;
73  }
74 }
75 
76 //===----------------------------------------------------------------------===//
77 // Fixed block information
78 //===----------------------------------------------------------------------===//
79 //
80 // The number of instructions in a basic block and the CPU resources used by
81 // those instructions don't depend on any given trace strategy.
82 
83 /// Compute the resource usage in basic block MBB.
86  assert(MBB && "No basic block");
87  FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
88  if (FBI->hasResources())
89  return FBI;
90 
91  // Compute resource usage in the block.
92  FBI->HasCalls = false;
93  unsigned InstrCount = 0;
94 
95  // Add up per-processor resource cycles as well.
96  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
97  SmallVector<unsigned, 32> PRCycles(PRKinds);
98 
99  for (const auto &MI : *MBB) {
100  if (MI.isTransient())
101  continue;
102  ++InstrCount;
103  if (MI.isCall())
104  FBI->HasCalls = true;
105 
106  // Count processor resources used.
107  if (!SchedModel.hasInstrSchedModel())
108  continue;
109  const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
110  if (!SC->isValid())
111  continue;
112 
114  PI = SchedModel.getWriteProcResBegin(SC),
115  PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
116  assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
117  PRCycles[PI->ProcResourceIdx] += PI->Cycles;
118  }
119  }
120  FBI->InstrCount = InstrCount;
121 
122  // Scale the resource cycles so they are comparable.
123  unsigned PROffset = MBB->getNumber() * PRKinds;
124  for (unsigned K = 0; K != PRKinds; ++K)
125  ProcResourceCycles[PROffset + K] =
126  PRCycles[K] * SchedModel.getResourceFactor(K);
127 
128  return FBI;
129 }
130 
133  assert(BlockInfo[MBBNum].hasResources() &&
134  "getResources() must be called before getProcResourceCycles()");
135  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
136  assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
137  return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
138 }
139 
140 
141 //===----------------------------------------------------------------------===//
142 // Ensemble utility functions
143 //===----------------------------------------------------------------------===//
144 
146  : MTM(*ct) {
147  BlockInfo.resize(MTM.BlockInfo.size());
148  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
149  ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
150  ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
151 }
152 
153 // Virtual destructor serves as an anchor.
155 
156 const MachineLoop*
158  return MTM.Loops->getLoopFor(MBB);
159 }
160 
161 // Update resource-related information in the TraceBlockInfo for MBB.
162 // Only update resources related to the trace above MBB.
163 void MachineTraceMetrics::Ensemble::
164 computeDepthResources(const MachineBasicBlock *MBB) {
165  TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
166  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
167  unsigned PROffset = MBB->getNumber() * PRKinds;
168 
169  // Compute resources from trace above. The top block is simple.
170  if (!TBI->Pred) {
171  TBI->InstrDepth = 0;
172  TBI->Head = MBB->getNumber();
173  std::fill(ProcResourceDepths.begin() + PROffset,
174  ProcResourceDepths.begin() + PROffset + PRKinds, 0);
175  return;
176  }
177 
178  // Compute from the block above. A post-order traversal ensures the
179  // predecessor is always computed first.
180  unsigned PredNum = TBI->Pred->getNumber();
181  TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
182  assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
183  const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
184  TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
185  TBI->Head = PredTBI->Head;
186 
187  // Compute per-resource depths.
188  ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
189  ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
190  for (unsigned K = 0; K != PRKinds; ++K)
191  ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
192 }
193 
194 // Update resource-related information in the TraceBlockInfo for MBB.
195 // Only update resources related to the trace below MBB.
196 void MachineTraceMetrics::Ensemble::
197 computeHeightResources(const MachineBasicBlock *MBB) {
198  TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
199  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
200  unsigned PROffset = MBB->getNumber() * PRKinds;
201 
202  // Compute resources for the current block.
203  TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
204  ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
205 
206  // The trace tail is done.
207  if (!TBI->Succ) {
208  TBI->Tail = MBB->getNumber();
209  std::copy(PRCycles.begin(), PRCycles.end(),
210  ProcResourceHeights.begin() + PROffset);
211  return;
212  }
213 
214  // Compute from the block below. A post-order traversal ensures the
215  // predecessor is always computed first.
216  unsigned SuccNum = TBI->Succ->getNumber();
217  TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
218  assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
219  TBI->InstrHeight += SuccTBI->InstrHeight;
220  TBI->Tail = SuccTBI->Tail;
221 
222  // Compute per-resource heights.
223  ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
224  for (unsigned K = 0; K != PRKinds; ++K)
225  ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
226 }
227 
228 // Check if depth resources for MBB are valid and return the TBI.
229 // Return NULL if the resources have been invalidated.
233  const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
234  return TBI->hasValidDepth() ? TBI : nullptr;
235 }
236 
237 // Check if height resources for MBB are valid and return the TBI.
238 // Return NULL if the resources have been invalidated.
242  const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
243  return TBI->hasValidHeight() ? TBI : nullptr;
244 }
245 
246 /// Get an array of processor resource depths for MBB. Indexed by processor
247 /// resource kind, this array contains the scaled processor resources consumed
248 /// by all blocks preceding MBB in its trace. It does not include instructions
249 /// in MBB.
250 ///
251 /// Compare TraceBlockInfo::InstrDepth.
254 getProcResourceDepths(unsigned MBBNum) const {
255  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
256  assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
257  return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
258 }
259 
260 /// Get an array of processor resource heights for MBB. Indexed by processor
261 /// resource kind, this array contains the scaled processor resources consumed
262 /// by this block and all blocks following it in its trace.
263 ///
264 /// Compare TraceBlockInfo::InstrHeight.
267 getProcResourceHeights(unsigned MBBNum) const {
268  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
269  assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
270  return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
271 }
272 
273 //===----------------------------------------------------------------------===//
274 // Trace Selection Strategies
275 //===----------------------------------------------------------------------===//
276 //
277 // A trace selection strategy is implemented as a sub-class of Ensemble. The
278 // trace through a block B is computed by two DFS traversals of the CFG
279 // starting from B. One upwards, and one downwards. During the upwards DFS,
280 // pickTracePred() is called on the post-ordered blocks. During the downwards
281 // DFS, pickTraceSucc() is called in a post-order.
282 //
283 
284 // We never allow traces that leave loops, but we do allow traces to enter
285 // nested loops. We also never allow traces to contain back-edges.
286 //
287 // This means that a loop header can never appear above the center block of a
288 // trace, except as the trace head. Below the center block, loop exiting edges
289 // are banned.
290 //
291 // Return true if an edge from the From loop to the To loop is leaving a loop.
292 // Either of To and From can be null.
293 static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
294  return From && !From->contains(To);
295 }
296 
297 // MinInstrCountEnsemble - Pick the trace that executes the least number of
298 // instructions.
299 namespace {
300 class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
301  const char *getName() const override { return "MinInstr"; }
302  const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
303  const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
304 
305 public:
306  MinInstrCountEnsemble(MachineTraceMetrics *mtm)
307  : MachineTraceMetrics::Ensemble(mtm) {}
308 };
309 }
310 
311 // Select the preferred predecessor for MBB.
312 const MachineBasicBlock*
313 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
314  if (MBB->pred_empty())
315  return nullptr;
316  const MachineLoop *CurLoop = getLoopFor(MBB);
317  // Don't leave loops, and never follow back-edges.
318  if (CurLoop && MBB == CurLoop->getHeader())
319  return nullptr;
320  unsigned CurCount = MTM.getResources(MBB)->InstrCount;
321  const MachineBasicBlock *Best = nullptr;
322  unsigned BestDepth = 0;
323  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
324  const MachineTraceMetrics::TraceBlockInfo *PredTBI =
325  getDepthResources(Pred);
326  // Ignore cycles that aren't natural loops.
327  if (!PredTBI)
328  continue;
329  // Pick the predecessor that would give this block the smallest InstrDepth.
330  unsigned Depth = PredTBI->InstrDepth + CurCount;
331  if (!Best || Depth < BestDepth)
332  Best = Pred, BestDepth = Depth;
333  }
334  return Best;
335 }
336 
337 // Select the preferred successor for MBB.
338 const MachineBasicBlock*
339 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
340  if (MBB->pred_empty())
341  return nullptr;
342  const MachineLoop *CurLoop = getLoopFor(MBB);
343  const MachineBasicBlock *Best = nullptr;
344  unsigned BestHeight = 0;
345  for (const MachineBasicBlock *Succ : MBB->successors()) {
346  // Don't consider back-edges.
347  if (CurLoop && Succ == CurLoop->getHeader())
348  continue;
349  // Don't consider successors exiting CurLoop.
350  if (isExitingLoop(CurLoop, getLoopFor(Succ)))
351  continue;
352  const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
353  getHeightResources(Succ);
354  // Ignore cycles that aren't natural loops.
355  if (!SuccTBI)
356  continue;
357  // Pick the successor that would give this block the smallest InstrHeight.
358  unsigned Height = SuccTBI->InstrHeight;
359  if (!Best || Height < BestHeight)
360  Best = Succ, BestHeight = Height;
361  }
362  return Best;
363 }
364 
365 // Get an Ensemble sub-class for the requested trace strategy.
368  assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
369  Ensemble *&E = Ensembles[strategy];
370  if (E)
371  return E;
372 
373  // Allocate new Ensemble on demand.
374  switch (strategy) {
375  case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
376  default: llvm_unreachable("Invalid trace strategy enum");
377  }
378 }
379 
381  DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
382  BlockInfo[MBB->getNumber()].invalidate();
383  for (unsigned i = 0; i != TS_NumStrategies; ++i)
384  if (Ensembles[i])
385  Ensembles[i]->invalidate(MBB);
386 }
387 
389  if (!MF)
390  return;
391 #ifndef NDEBUG
392  assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
393  for (unsigned i = 0; i != TS_NumStrategies; ++i)
394  if (Ensembles[i])
395  Ensembles[i]->verify();
396 #endif
397 }
398 
399 //===----------------------------------------------------------------------===//
400 // Trace building
401 //===----------------------------------------------------------------------===//
402 //
403 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
404 // set abstraction that confines the search to the current loop, and doesn't
405 // revisit blocks.
406 
407 namespace {
408 struct LoopBounds {
411  const MachineLoopInfo *Loops;
412  bool Downward;
414  const MachineLoopInfo *loops)
415  : Blocks(blocks), Loops(loops), Downward(false) {}
416 };
417 }
418 
419 // Specialize po_iterator_storage in order to prune the post-order traversal so
420 // it is limited to the current loop and doesn't traverse the loop back edges.
421 namespace llvm {
422 template<>
423 class po_iterator_storage<LoopBounds, true> {
424  LoopBounds &LB;
425 public:
426  po_iterator_storage(LoopBounds &lb) : LB(lb) {}
428 
429  bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
430  // Skip already visited To blocks.
431  MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
432  if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
433  return false;
434  // From is null once when To is the trace center block.
435  if (From) {
436  if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
437  // Don't follow backedges, don't leave FromLoop when going upwards.
438  if ((LB.Downward ? To : From) == FromLoop->getHeader())
439  return false;
440  // Don't leave FromLoop.
441  if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
442  return false;
443  }
444  }
445  // To is a new block. Mark the block as visited in case the CFG has cycles
446  // that MachineLoopInfo didn't recognize as a natural loop.
447  return LB.Visited.insert(To).second;
448  }
449 };
450 }
451 
452 /// Compute the trace through MBB.
453 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
454  DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
455  << MBB->getNumber() << '\n');
456  // Set up loop bounds for the backwards post-order traversal.
457  LoopBounds Bounds(BlockInfo, MTM.Loops);
458 
459  // Run an upwards post-order search for the trace start.
460  Bounds.Downward = false;
461  Bounds.Visited.clear();
462  for (auto I : inverse_post_order_ext(MBB, Bounds)) {
463  DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
464  TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
465  // All the predecessors have been visited, pick the preferred one.
466  TBI.Pred = pickTracePred(I);
467  DEBUG({
468  if (TBI.Pred)
469  dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
470  else
471  dbgs() << "null\n";
472  });
473  // The trace leading to I is now known, compute the depth resources.
474  computeDepthResources(I);
475  }
476 
477  // Run a downwards post-order search for the trace end.
478  Bounds.Downward = true;
479  Bounds.Visited.clear();
480  for (auto I : post_order_ext(MBB, Bounds)) {
481  DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
482  TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
483  // All the successors have been visited, pick the preferred one.
484  TBI.Succ = pickTraceSucc(I);
485  DEBUG({
486  if (TBI.Succ)
487  dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
488  else
489  dbgs() << "null\n";
490  });
491  // The trace leaving I is now known, compute the height resources.
492  computeHeightResources(I);
493  }
494 }
495 
496 /// Invalidate traces through BadMBB.
497 void
500  TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
501 
502  // Invalidate height resources of blocks above MBB.
503  if (BadTBI.hasValidHeight()) {
504  BadTBI.invalidateHeight();
505  WorkList.push_back(BadMBB);
506  do {
507  const MachineBasicBlock *MBB = WorkList.pop_back_val();
508  DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
509  << " height.\n");
510  // Find any MBB predecessors that have MBB as their preferred successor.
511  // They are the only ones that need to be invalidated.
512  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
513  TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
514  if (!TBI.hasValidHeight())
515  continue;
516  if (TBI.Succ == MBB) {
517  TBI.invalidateHeight();
518  WorkList.push_back(Pred);
519  continue;
520  }
521  // Verify that TBI.Succ is actually a *I successor.
522  assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
523  }
524  } while (!WorkList.empty());
525  }
526 
527  // Invalidate depth resources of blocks below MBB.
528  if (BadTBI.hasValidDepth()) {
529  BadTBI.invalidateDepth();
530  WorkList.push_back(BadMBB);
531  do {
532  const MachineBasicBlock *MBB = WorkList.pop_back_val();
533  DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
534  << " depth.\n");
535  // Find any MBB successors that have MBB as their preferred predecessor.
536  // They are the only ones that need to be invalidated.
537  for (const MachineBasicBlock *Succ : MBB->successors()) {
538  TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
539  if (!TBI.hasValidDepth())
540  continue;
541  if (TBI.Pred == MBB) {
542  TBI.invalidateDepth();
543  WorkList.push_back(Succ);
544  continue;
545  }
546  // Verify that TBI.Pred is actually a *I predecessor.
547  assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
548  }
549  } while (!WorkList.empty());
550  }
551 
552  // Clear any per-instruction data. We only have to do this for BadMBB itself
553  // because the instructions in that block may change. Other blocks may be
554  // invalidated, but their instructions will stay the same, so there is no
555  // need to erase the Cycle entries. They will be overwritten when we
556  // recompute.
557  for (const auto &I : *BadMBB)
558  Cycles.erase(&I);
559 }
560 
562 #ifndef NDEBUG
563  assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
564  "Outdated BlockInfo size");
565  for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
566  const TraceBlockInfo &TBI = BlockInfo[Num];
567  if (TBI.hasValidDepth() && TBI.Pred) {
568  const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
569  assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
570  assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
571  "Trace is broken, depth should have been invalidated.");
572  const MachineLoop *Loop = getLoopFor(MBB);
573  assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
574  }
575  if (TBI.hasValidHeight() && TBI.Succ) {
576  const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
577  assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
578  assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
579  "Trace is broken, height should have been invalidated.");
580  const MachineLoop *Loop = getLoopFor(MBB);
581  const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
582  assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
583  "Trace contains backedge");
584  }
585  }
586 #endif
587 }
588 
589 //===----------------------------------------------------------------------===//
590 // Data Dependencies
591 //===----------------------------------------------------------------------===//
592 //
593 // Compute the depth and height of each instruction based on data dependencies
594 // and instruction latencies. These cycle numbers assume that the CPU can issue
595 // an infinite number of instructions per cycle as long as their dependencies
596 // are ready.
597 
598 // A data dependency is represented as a defining MI and operand numbers on the
599 // defining and using MI.
600 namespace {
601 struct DataDep {
602  const MachineInstr *DefMI;
603  unsigned DefOp;
604  unsigned UseOp;
605 
606  DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
607  : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
608 
609  /// Create a DataDep from an SSA form virtual register.
610  DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
611  : UseOp(UseOp) {
612  assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
613  MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
614  assert(!DefI.atEnd() && "Register has no defs");
615  DefMI = DefI->getParent();
616  DefOp = DefI.getOperandNo();
617  assert((++DefI).atEnd() && "Register has multiple defs");
618  }
619 };
620 }
621 
622 // Get the input data dependencies that must be ready before UseMI can issue.
623 // Return true if UseMI has any physreg operands.
624 static bool getDataDeps(const MachineInstr *UseMI,
626  const MachineRegisterInfo *MRI) {
627  // Debug values should not be included in any calculations.
628  if (UseMI->isDebugValue())
629  return false;
630 
631  bool HasPhysRegs = false;
633  E = UseMI->operands_end(); I != E; ++I) {
634  const MachineOperand &MO = *I;
635  if (!MO.isReg())
636  continue;
637  unsigned Reg = MO.getReg();
638  if (!Reg)
639  continue;
641  HasPhysRegs = true;
642  continue;
643  }
644  // Collect virtual register reads.
645  if (MO.readsReg())
646  Deps.push_back(DataDep(MRI, Reg, UseMI->getOperandNo(I)));
647  }
648  return HasPhysRegs;
649 }
650 
651 // Get the input data dependencies of a PHI instruction, using Pred as the
652 // preferred predecessor.
653 // This will add at most one dependency to Deps.
654 static void getPHIDeps(const MachineInstr *UseMI,
656  const MachineBasicBlock *Pred,
657  const MachineRegisterInfo *MRI) {
658  // No predecessor at the beginning of a trace. Ignore dependencies.
659  if (!Pred)
660  return;
661  assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
662  for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
663  if (UseMI->getOperand(i + 1).getMBB() == Pred) {
664  unsigned Reg = UseMI->getOperand(i).getReg();
665  Deps.push_back(DataDep(MRI, Reg, i));
666  return;
667  }
668  }
669 }
670 
671 // Keep track of physreg data dependencies by recording each live register unit.
672 // Associate each regunit with an instruction operand. Depending on the
673 // direction instructions are scanned, it could be the operand that defined the
674 // regunit, or the highest operand to read the regunit.
675 namespace {
676 struct LiveRegUnit {
677  unsigned RegUnit;
678  unsigned Cycle;
679  const MachineInstr *MI;
680  unsigned Op;
681 
682  unsigned getSparseSetIndex() const { return RegUnit; }
683 
684  LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
685 };
686 }
687 
688 // Identify physreg dependencies for UseMI, and update the live regunit
689 // tracking set when scanning instructions downwards.
690 static void updatePhysDepsDownwards(const MachineInstr *UseMI,
692  SparseSet<LiveRegUnit> &RegUnits,
693  const TargetRegisterInfo *TRI) {
695  SmallVector<unsigned, 8> LiveDefOps;
696 
698  ME = UseMI->operands_end(); MI != ME; ++MI) {
699  const MachineOperand &MO = *MI;
700  if (!MO.isReg())
701  continue;
702  unsigned Reg = MO.getReg();
704  continue;
705  // Track live defs and kills for updating RegUnits.
706  if (MO.isDef()) {
707  if (MO.isDead())
708  Kills.push_back(Reg);
709  else
710  LiveDefOps.push_back(UseMI->getOperandNo(MI));
711  } else if (MO.isKill())
712  Kills.push_back(Reg);
713  // Identify dependencies.
714  if (!MO.readsReg())
715  continue;
716  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
717  SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
718  if (I == RegUnits.end())
719  continue;
720  Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
721  break;
722  }
723  }
724 
725  // Update RegUnits to reflect live registers after UseMI.
726  // First kills.
727  for (unsigned i = 0, e = Kills.size(); i != e; ++i)
728  for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
729  RegUnits.erase(*Units);
730 
731  // Second, live defs.
732  for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
733  unsigned DefOp = LiveDefOps[i];
734  for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
735  Units.isValid(); ++Units) {
736  LiveRegUnit &LRU = RegUnits[*Units];
737  LRU.MI = UseMI;
738  LRU.Op = DefOp;
739  }
740  }
741 }
742 
743 /// The length of the critical path through a trace is the maximum of two path
744 /// lengths:
745 ///
746 /// 1. The maximum height+depth over all instructions in the trace center block.
747 ///
748 /// 2. The longest cross-block dependency chain. For small blocks, it is
749 /// possible that the critical path through the trace doesn't include any
750 /// instructions in the block.
751 ///
752 /// This function computes the second number from the live-in list of the
753 /// center block.
754 unsigned MachineTraceMetrics::Ensemble::
755 computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
756  assert(TBI.HasValidInstrDepths && "Missing depth info");
757  assert(TBI.HasValidInstrHeights && "Missing height info");
758  unsigned MaxLen = 0;
759  for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
760  const LiveInReg &LIR = TBI.LiveIns[i];
762  continue;
763  const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
764  // Ignore dependencies outside the current trace.
765  const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
766  if (!DefTBI.isUsefulDominator(TBI))
767  continue;
768  unsigned Len = LIR.Height + Cycles[DefMI].Depth;
769  MaxLen = std::max(MaxLen, Len);
770  }
771  return MaxLen;
772 }
773 
774 /// Compute instruction depths for all instructions above or in MBB in its
775 /// trace. This assumes that the trace through MBB has already been computed.
776 void MachineTraceMetrics::Ensemble::
777 computeInstrDepths(const MachineBasicBlock *MBB) {
778  // The top of the trace may already be computed, and HasValidInstrDepths
779  // implies Head->HasValidInstrDepths, so we only need to start from the first
780  // block in the trace that needs to be recomputed.
782  do {
783  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
784  assert(TBI.hasValidDepth() && "Incomplete trace");
785  if (TBI.HasValidInstrDepths)
786  break;
787  Stack.push_back(MBB);
788  MBB = TBI.Pred;
789  } while (MBB);
790 
791  // FIXME: If MBB is non-null at this point, it is the last pre-computed block
792  // in the trace. We should track any live-out physregs that were defined in
793  // the trace. This is quite rare in SSA form, typically created by CSE
794  // hoisting a compare.
795  SparseSet<LiveRegUnit> RegUnits;
796  RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
797 
798  // Go through trace blocks in top-down order, stopping after the center block.
800  while (!Stack.empty()) {
801  MBB = Stack.pop_back_val();
802  DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
803  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
804  TBI.HasValidInstrDepths = true;
805  TBI.CriticalPath = 0;
806 
807  // Print out resource depths here as well.
808  DEBUG({
809  dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
810  ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
811  for (unsigned K = 0; K != PRDepths.size(); ++K)
812  if (PRDepths[K]) {
813  unsigned Factor = MTM.SchedModel.getResourceFactor(K);
814  dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
815  << MTM.SchedModel.getProcResource(K)->Name << " ("
816  << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
817  }
818  });
819 
820  // Also compute the critical path length through MBB when possible.
821  if (TBI.HasValidInstrHeights)
822  TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
823 
824  for (const auto &UseMI : *MBB) {
825  // Collect all data dependencies.
826  Deps.clear();
827  if (UseMI.isPHI())
828  getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI);
829  else if (getDataDeps(&UseMI, Deps, MTM.MRI))
830  updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
831 
832  // Filter and process dependencies, computing the earliest issue cycle.
833  unsigned Cycle = 0;
834  for (const DataDep &Dep : Deps) {
835  const TraceBlockInfo&DepTBI =
836  BlockInfo[Dep.DefMI->getParent()->getNumber()];
837  // Ignore dependencies from outside the current trace.
838  if (!DepTBI.isUsefulDominator(TBI))
839  continue;
840  assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
841  unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
842  // Add latency if DefMI is a real instruction. Transients get latency 0.
843  if (!Dep.DefMI->isTransient())
844  DepCycle += MTM.SchedModel
845  .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
846  Cycle = std::max(Cycle, DepCycle);
847  }
848  // Remember the instruction depth.
849  InstrCycles &MICycles = Cycles[&UseMI];
850  MICycles.Depth = Cycle;
851 
852  if (!TBI.HasValidInstrHeights) {
853  DEBUG(dbgs() << Cycle << '\t' << UseMI);
854  continue;
855  }
856  // Update critical path length.
857  TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
858  DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
859  }
860  }
861 }
862 
863 // Identify physreg dependencies for MI when scanning instructions upwards.
864 // Return the issue height of MI after considering any live regunits.
865 // Height is the issue height computed from virtual register dependencies alone.
866 static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
867  SparseSet<LiveRegUnit> &RegUnits,
868  const TargetSchedModel &SchedModel,
869  const TargetInstrInfo *TII,
870  const TargetRegisterInfo *TRI) {
871  SmallVector<unsigned, 8> ReadOps;
872 
874  MOE = MI->operands_end(); MOI != MOE; ++MOI) {
875  const MachineOperand &MO = *MOI;
876  if (!MO.isReg())
877  continue;
878  unsigned Reg = MO.getReg();
880  continue;
881  if (MO.readsReg())
882  ReadOps.push_back(MI->getOperandNo(MOI));
883  if (!MO.isDef())
884  continue;
885  // This is a def of Reg. Remove corresponding entries from RegUnits, and
886  // update MI Height to consider the physreg dependencies.
887  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
888  SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
889  if (I == RegUnits.end())
890  continue;
891  unsigned DepHeight = I->Cycle;
892  if (!MI->isTransient()) {
893  // We may not know the UseMI of this dependency, if it came from the
894  // live-in list. SchedModel can handle a NULL UseMI.
895  DepHeight += SchedModel
896  .computeOperandLatency(MI, MI->getOperandNo(MOI), I->MI, I->Op);
897  }
898  Height = std::max(Height, DepHeight);
899  // This regunit is dead above MI.
900  RegUnits.erase(I);
901  }
902  }
903 
904  // Now we know the height of MI. Update any regunits read.
905  for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
906  unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
907  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
908  LiveRegUnit &LRU = RegUnits[*Units];
909  // Set the height to the highest reader of the unit.
910  if (LRU.Cycle <= Height && LRU.MI != MI) {
911  LRU.Cycle = Height;
912  LRU.MI = MI;
913  LRU.Op = ReadOps[i];
914  }
915  }
916  }
917 
918  return Height;
919 }
920 
921 
923 
924 // Push the height of DefMI upwards if required to match UseMI.
925 // Return true if this is the first time DefMI was seen.
926 static bool pushDepHeight(const DataDep &Dep,
927  const MachineInstr *UseMI, unsigned UseHeight,
928  MIHeightMap &Heights,
929  const TargetSchedModel &SchedModel,
930  const TargetInstrInfo *TII) {
931  // Adjust height by Dep.DefMI latency.
932  if (!Dep.DefMI->isTransient())
933  UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
934  UseMI, Dep.UseOp);
935 
936  // Update Heights[DefMI] to be the maximum height seen.
938  bool New;
939  std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
940  if (New)
941  return true;
942 
943  // DefMI has been pushed before. Give it the max height.
944  if (I->second < UseHeight)
945  I->second = UseHeight;
946  return false;
947 }
948 
949 /// Assuming that the virtual register defined by DefMI:DefOp was used by
950 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
951 /// when reaching the block that contains DefMI.
952 void MachineTraceMetrics::Ensemble::
953 addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
955  assert(!Trace.empty() && "Trace should contain at least one block");
956  unsigned Reg = DefMI->getOperand(DefOp).getReg();
958  const MachineBasicBlock *DefMBB = DefMI->getParent();
959 
960  // Reg is live-in to all blocks in Trace that follow DefMBB.
961  for (unsigned i = Trace.size(); i; --i) {
962  const MachineBasicBlock *MBB = Trace[i-1];
963  if (MBB == DefMBB)
964  return;
965  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
966  // Just add the register. The height will be updated later.
967  TBI.LiveIns.push_back(Reg);
968  }
969 }
970 
971 /// Compute instruction heights in the trace through MBB. This updates MBB and
972 /// the blocks below it in the trace. It is assumed that the trace has already
973 /// been computed.
974 void MachineTraceMetrics::Ensemble::
975 computeInstrHeights(const MachineBasicBlock *MBB) {
976  // The bottom of the trace may already be computed.
977  // Find the blocks that need updating.
979  do {
980  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
981  assert(TBI.hasValidHeight() && "Incomplete trace");
982  if (TBI.HasValidInstrHeights)
983  break;
984  Stack.push_back(MBB);
985  TBI.LiveIns.clear();
986  MBB = TBI.Succ;
987  } while (MBB);
988 
989  // As we move upwards in the trace, keep track of instructions that are
990  // required by deeper trace instructions. Map MI -> height required so far.
991  MIHeightMap Heights;
992 
993  // For physregs, the def isn't known when we see the use.
994  // Instead, keep track of the highest use of each regunit.
995  SparseSet<LiveRegUnit> RegUnits;
996  RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
997 
998  // If the bottom of the trace was already precomputed, initialize heights
999  // from its live-in list.
1000  // MBB is the highest precomputed block in the trace.
1001  if (MBB) {
1002  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1003  for (LiveInReg &LI : TBI.LiveIns) {
1005  // For virtual registers, the def latency is included.
1006  unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1007  if (Height < LI.Height)
1008  Height = LI.Height;
1009  } else {
1010  // For register units, the def latency is not included because we don't
1011  // know the def yet.
1012  RegUnits[LI.Reg].Cycle = LI.Height;
1013  }
1014  }
1015  }
1016 
1017  // Go through the trace blocks in bottom-up order.
1019  for (;!Stack.empty(); Stack.pop_back()) {
1020  MBB = Stack.back();
1021  DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
1022  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1023  TBI.HasValidInstrHeights = true;
1024  TBI.CriticalPath = 0;
1025 
1026  DEBUG({
1027  dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1028  ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1029  for (unsigned K = 0; K != PRHeights.size(); ++K)
1030  if (PRHeights[K]) {
1031  unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1032  dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1033  << MTM.SchedModel.getProcResource(K)->Name << " ("
1034  << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1035  }
1036  });
1037 
1038  // Get dependencies from PHIs in the trace successor.
1039  const MachineBasicBlock *Succ = TBI.Succ;
1040  // If MBB is the last block in the trace, and it has a back-edge to the
1041  // loop header, get loop-carried dependencies from PHIs in the header. For
1042  // that purpose, pretend that all the loop header PHIs have height 0.
1043  if (!Succ)
1044  if (const MachineLoop *Loop = getLoopFor(MBB))
1045  if (MBB->isSuccessor(Loop->getHeader()))
1046  Succ = Loop->getHeader();
1047 
1048  if (Succ) {
1049  for (const auto &PHI : *Succ) {
1050  if (!PHI.isPHI())
1051  break;
1052  Deps.clear();
1053  getPHIDeps(&PHI, Deps, MBB, MTM.MRI);
1054  if (!Deps.empty()) {
1055  // Loop header PHI heights are all 0.
1056  unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1057  DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1058  if (pushDepHeight(Deps.front(), &PHI, Height,
1059  Heights, MTM.SchedModel, MTM.TII))
1060  addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1061  }
1062  }
1063  }
1064 
1065  // Go through the block backwards.
1066  for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
1067  BI != BB;) {
1068  const MachineInstr *MI = --BI;
1069 
1070  // Find the MI height as determined by virtual register uses in the
1071  // trace below.
1072  unsigned Cycle = 0;
1073  MIHeightMap::iterator HeightI = Heights.find(MI);
1074  if (HeightI != Heights.end()) {
1075  Cycle = HeightI->second;
1076  // We won't be seeing any more MI uses.
1077  Heights.erase(HeightI);
1078  }
1079 
1080  // Don't process PHI deps. They depend on the specific predecessor, and
1081  // we'll get them when visiting the predecessor.
1082  Deps.clear();
1083  bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1084 
1085  // There may also be regunit dependencies to include in the height.
1086  if (HasPhysRegs)
1087  Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
1088  MTM.SchedModel, MTM.TII, MTM.TRI);
1089 
1090  // Update the required height of any virtual registers read by MI.
1091  for (const DataDep &Dep : Deps)
1092  if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1093  addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1094 
1095  InstrCycles &MICycles = Cycles[MI];
1096  MICycles.Height = Cycle;
1097  if (!TBI.HasValidInstrDepths) {
1098  DEBUG(dbgs() << Cycle << '\t' << *MI);
1099  continue;
1100  }
1101  // Update critical path length.
1102  TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1103  DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
1104  }
1105 
1106  // Update virtual live-in heights. They were added by addLiveIns() with a 0
1107  // height because the final height isn't known until now.
1108  DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
1109  for (LiveInReg &LIR : TBI.LiveIns) {
1110  const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1111  LIR.Height = Heights.lookup(DefMI);
1112  DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
1113  }
1114 
1115  // Transfer the live regunits to the live-in list.
1117  RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
1118  TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
1119  DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
1120  << '@' << RI->Cycle);
1121  }
1122  DEBUG(dbgs() << '\n');
1123 
1124  if (!TBI.HasValidInstrDepths)
1125  continue;
1126  // Add live-ins to the critical path length.
1127  TBI.CriticalPath = std::max(TBI.CriticalPath,
1128  computeCrossBlockCriticalPath(TBI));
1129  DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1130  }
1131 }
1132 
1135  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1136 
1137  if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1138  computeTrace(MBB);
1139  if (!TBI.HasValidInstrDepths)
1140  computeInstrDepths(MBB);
1141  if (!TBI.HasValidInstrHeights)
1142  computeInstrHeights(MBB);
1143 
1144  return Trace(*this, TBI);
1145 }
1146 
1147 unsigned
1149  assert(MI && "Not an instruction.");
1150  assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
1151  "MI must be in the trace center block");
1152  InstrCycles Cyc = getInstrCycles(MI);
1153  return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1154 }
1155 
1156 unsigned
1158  const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1160  getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1161  assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1162  DataDep &Dep = Deps.front();
1163  unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
1164  // Add latency if DefMI is a real instruction. Transients get latency 0.
1165  if (!Dep.DefMI->isTransient())
1166  DepCycle += TE.MTM.SchedModel
1167  .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp);
1168  return DepCycle;
1169 }
1170 
1171 /// When bottom is set include instructions in current block in estimate.
1173  // Find the limiting processor resource.
1174  // Numbers have been pre-scaled to be comparable.
1175  unsigned PRMax = 0;
1176  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1177  if (Bottom) {
1178  ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
1179  for (unsigned K = 0; K != PRDepths.size(); ++K)
1180  PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1181  } else {
1182  for (unsigned K = 0; K != PRDepths.size(); ++K)
1183  PRMax = std::max(PRMax, PRDepths[K]);
1184  }
1185  // Convert to cycle count.
1186  PRMax = TE.MTM.getCycles(PRMax);
1187 
1188  /// All instructions before current block
1189  unsigned Instrs = TBI.InstrDepth;
1190  // plus instructions in current block
1191  if (Bottom)
1192  Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1193  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1194  Instrs /= IW;
1195  // Assume issue width 1 without a schedule model.
1196  return std::max(Instrs, PRMax);
1197 }
1198 
1202  ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1203  // Add up resources above and below the center block.
1204  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1205  ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1206  unsigned PRMax = 0;
1207 
1208  // Capture computing cycles from extra instructions
1209  auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1210  unsigned ResourceIdx)
1211  ->unsigned {
1212  unsigned Cycles = 0;
1213  for (const MCSchedClassDesc *SC : Instrs) {
1214  if (!SC->isValid())
1215  continue;
1217  PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1218  PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1219  PI != PE; ++PI) {
1220  if (PI->ProcResourceIdx != ResourceIdx)
1221  continue;
1222  Cycles +=
1223  (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1224  }
1225  }
1226  return Cycles;
1227  };
1228 
1229  for (unsigned K = 0; K != PRDepths.size(); ++K) {
1230  unsigned PRCycles = PRDepths[K] + PRHeights[K];
1231  for (const MachineBasicBlock *MBB : Extrablocks)
1232  PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
1233  PRCycles += extraCycles(ExtraInstrs, K);
1234  PRCycles -= extraCycles(RemoveInstrs, K);
1235  PRMax = std::max(PRMax, PRCycles);
1236  }
1237  // Convert to cycle count.
1238  PRMax = TE.MTM.getCycles(PRMax);
1239 
1240  // Instrs: #instructions in current trace outside current block.
1241  unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1242  // Add instruction count from the extra blocks.
1243  for (const MachineBasicBlock *MBB : Extrablocks)
1244  Instrs += TE.MTM.getResources(MBB)->InstrCount;
1245  Instrs += ExtraInstrs.size();
1246  Instrs -= RemoveInstrs.size();
1247  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1248  Instrs /= IW;
1249  // Assume issue width 1 without a schedule model.
1250  return std::max(Instrs, PRMax);
1251 }
1252 
1254  const MachineInstr *UseMI) const {
1255  if (DefMI->getParent() == UseMI->getParent())
1256  return true;
1257 
1258  const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
1259  const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
1260 
1261  return DepTBI.isUsefulDominator(TBI);
1262 }
1263 
1265  OS << getName() << " ensemble:\n";
1266  for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1267  OS << " BB#" << i << '\t';
1268  BlockInfo[i].print(OS);
1269  OS << '\n';
1270  }
1271 }
1272 
1274  if (hasValidDepth()) {
1275  OS << "depth=" << InstrDepth;
1276  if (Pred)
1277  OS << " pred=BB#" << Pred->getNumber();
1278  else
1279  OS << " pred=null";
1280  OS << " head=BB#" << Head;
1281  if (HasValidInstrDepths)
1282  OS << " +instrs";
1283  } else
1284  OS << "depth invalid";
1285  OS << ", ";
1286  if (hasValidHeight()) {
1287  OS << "height=" << InstrHeight;
1288  if (Succ)
1289  OS << " succ=BB#" << Succ->getNumber();
1290  else
1291  OS << " succ=null";
1292  OS << " tail=BB#" << Tail;
1293  if (HasValidInstrHeights)
1294  OS << " +instrs";
1295  } else
1296  OS << "height invalid";
1297  if (HasValidInstrDepths && HasValidInstrHeights)
1298  OS << ", crit=" << CriticalPath;
1299 }
1300 
1302  unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1303 
1304  OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
1305  << " --> BB#" << TBI.Tail << ':';
1306  if (TBI.hasValidHeight() && TBI.hasValidDepth())
1307  OS << ' ' << getInstrCount() << " instrs.";
1308  if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1309  OS << ' ' << TBI.CriticalPath << " cycles.";
1310 
1312  OS << "\nBB#" << MBBNum;
1313  while (Block->hasValidDepth() && Block->Pred) {
1314  unsigned Num = Block->Pred->getNumber();
1315  OS << " <- BB#" << Num;
1316  Block = &TE.BlockInfo[Num];
1317  }
1318 
1319  Block = &TBI;
1320  OS << "\n ";
1321  while (Block->hasValidHeight() && Block->Succ) {
1322  unsigned Num = Block->Succ->getNumber();
1323  OS << " -> BB#" << Num;
1324  Block = &TE.BlockInfo[Num];
1325  }
1326  OS << '\n';
1327 }
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
mop_iterator operands_end()
Definition: MachineInstr.h:290
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:159
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
MachineBasicBlock * getMBB() const
bool atEnd() const
atEnd - return true if this iterator is equal to reg_end() on the value.
int getNumber() const
getNumber - MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a M...
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
iterator end() const
Definition: ArrayRef.h:123
bool isDead() const
bool isDepInTrace(const MachineInstr *DefMI, const MachineInstr *UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
bool HasCalls
True when the block contains calls.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:335
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
PrintRegUnit - Helper class for printing register units on a raw_ostream.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
machine trace metrics
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
static std::string getBlockNum(MachineBasicBlock *BB)
Helper to print the number of a MBB.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
machine trace Machine Trace false
BlockT * getHeader() const
Definition: LoopInfo.h:96
unsigned InstrCount
The number of non-trivial instructions in the block.
iterator_range< succ_iterator > successors()
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:169
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
Hexagon Hardware Loops
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
bool isPHI() const
Definition: MachineInstr.h:757
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:406
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
Strategy
Strategies for selecting traces.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const_iterator end() const
Definition: SparseSet.h:175
Reg
All possible values of the reg field in the ModR/M byte.
static StringRef getName(Value *V)
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
Select the trace through a block that has the fewest instructions.
PrintReg - Helper class for printing registers on a raw_ostream.
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
bool isKill() const
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
static bool getDataDeps(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:301
bool isValid() const
Definition: MCSchedule.h:118
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:134
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:280
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
format_object< Ts...> format(const char *Fmt, const Ts &...Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:111
TargetInstrInfo - Interface to description of machine instruction set.
static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, SparseSet< LiveRegUnit > &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
bool isDebugValue() const
Definition: MachineInstr.h:748
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
INITIALIZE_PASS_BEGIN(MachineTraceMetrics,"machine-trace-metrics","Machine Trace Metrics", false, true) INITIALIZE_PASS_END(MachineTraceMetrics
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
#define true
Definition: ConvertUTF.c:66
bool erase(const KeyT &Val)
Definition: DenseMap.h:206
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:221
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
Ensemble * getEnsemble(Strategy)
Get the trace ensemble representing the given trace selection strategy.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:55
static void getPHIDeps(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
Per-basic block information that doesn't depend on the trace through the block.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=None, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=None, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=None) const
Return the resource length of the trace.
static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB)
Add live-in registers of basic block MBB to LiveRegs.
Represent the analysis usage information of a pass.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:105
Default po_iterator_storage implementation with an internal set object.
iterator begin() const
Definition: ArrayRef.h:122
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
iterator_range< pred_iterator > predecessors()
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:129
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
const_iterator begin() const
Definition: SparseSet.h:174
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, SparseSet< LiveRegUnit > &RegUnits, const TargetRegisterInfo *TRI)
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
machine trace Machine Trace Metrics
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
static bool pushDepHeight(const DataDep &Dep, const MachineInstr *UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
MachineOperand class - Representation of each machine instruction operand.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isSuccessor(const MachineBasicBlock *MBB) const
isSuccessor - Return true if the specified MBB is a successor of this block.
CHAIN = SC CHAIN, Imm128 - System call.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
Definition: MachineInstr.h:804
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
Per-basic block information that relates to a specific trace through the block.
void setPreservesAll()
Set by analyses that do not transform their input at all.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
def_iterator def_begin(unsigned RegNo) const
bool hasResources() const
Returns true when resource information for this block has been computed.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
Representation of each machine instruction.
Definition: MachineInstr.h:51
bundle_iterator< const MachineInstr, const_instr_iterator > const_iterator
static bool isPhysicalRegister(unsigned Reg)
isPhysicalRegister - Return true if the specified register number is in the physical register namespa...
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:134
SparseSet - Fast set implmentation for objects that can be identified by small unsigned keys...
Definition: SparseSet.h:120
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition: SparseSet.h:224
InstrCycles represents the cycle height and depth of an instruction in a trace.
#define I(x, y, z)
Definition: MD5.cpp:54
iterator end()
Definition: DenseMap.h:68
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block...
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
unsigned getReg() const
getReg - Returns the register number.
unsigned getPHIDepth(const MachineInstr *PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
virtual const TargetInstrInfo * getInstrInfo() const
mop_iterator operands_begin()
Definition: MachineInstr.h:289
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
#define DEBUG(X)
Definition: Debug.h:92
bool isPredecessor(const MachineBasicBlock *MBB) const
isPredecessor - Return true if the specified MBB is a predecessor of this block.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
ArrayRef< unsigned > getProcResourceCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
DenseMap< const MachineInstr *, unsigned > MIHeightMap
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
loops
Definition: LoopInfo.cpp:696
unsigned getInstrSlack(const MachineInstr *MI) const
Return the slack of MI.
void resize(size_type N)
Definition: SmallVector.h:376
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.