LLVM  4.0.0
MachineTraceMetrics.cpp
Go to the documentation of this file.
1 //===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
12 #include "llvm/ADT/SparseSet.h"
17 #include "llvm/CodeGen/Passes.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Format.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "machine-trace-metrics"
29 
32 
34  "machine-trace-metrics", "Machine Trace Metrics", false, true)
38  "machine-trace-metrics", "Machine Trace Metrics", false, true)
39 
40 MachineTraceMetrics::MachineTraceMetrics()
41  : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
42  MRI(nullptr), Loops(nullptr) {
43  std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
44 }
45 
47  AU.setPreservesAll();
51 }
52 
54  MF = &Func;
55  const TargetSubtargetInfo &ST = MF->getSubtarget();
56  TII = ST.getInstrInfo();
57  TRI = ST.getRegisterInfo();
58  MRI = &MF->getRegInfo();
59  Loops = &getAnalysis<MachineLoopInfo>();
60  SchedModel.init(ST.getSchedModel(), &ST, TII);
61  BlockInfo.resize(MF->getNumBlockIDs());
62  ProcResourceCycles.resize(MF->getNumBlockIDs() *
63  SchedModel.getNumProcResourceKinds());
64  return false;
65 }
66 
68  MF = nullptr;
69  BlockInfo.clear();
70  for (unsigned i = 0; i != TS_NumStrategies; ++i) {
71  delete Ensembles[i];
72  Ensembles[i] = nullptr;
73  }
74 }
75 
76 //===----------------------------------------------------------------------===//
77 // Fixed block information
78 //===----------------------------------------------------------------------===//
79 //
80 // The number of instructions in a basic block and the CPU resources used by
81 // those instructions don't depend on any given trace strategy.
82 
83 /// Compute the resource usage in basic block MBB.
86  assert(MBB && "No basic block");
87  FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
88  if (FBI->hasResources())
89  return FBI;
90 
91  // Compute resource usage in the block.
92  FBI->HasCalls = false;
93  unsigned InstrCount = 0;
94 
95  // Add up per-processor resource cycles as well.
96  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
97  SmallVector<unsigned, 32> PRCycles(PRKinds);
98 
99  for (const auto &MI : *MBB) {
100  if (MI.isTransient())
101  continue;
102  ++InstrCount;
103  if (MI.isCall())
104  FBI->HasCalls = true;
105 
106  // Count processor resources used.
107  if (!SchedModel.hasInstrSchedModel())
108  continue;
109  const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
110  if (!SC->isValid())
111  continue;
112 
114  PI = SchedModel.getWriteProcResBegin(SC),
115  PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
116  assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
117  PRCycles[PI->ProcResourceIdx] += PI->Cycles;
118  }
119  }
120  FBI->InstrCount = InstrCount;
121 
122  // Scale the resource cycles so they are comparable.
123  unsigned PROffset = MBB->getNumber() * PRKinds;
124  for (unsigned K = 0; K != PRKinds; ++K)
125  ProcResourceCycles[PROffset + K] =
126  PRCycles[K] * SchedModel.getResourceFactor(K);
127 
128  return FBI;
129 }
130 
133  assert(BlockInfo[MBBNum].hasResources() &&
134  "getResources() must be called before getProcResourceCycles()");
135  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
136  assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
137  return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
138 }
139 
140 
141 //===----------------------------------------------------------------------===//
142 // Ensemble utility functions
143 //===----------------------------------------------------------------------===//
144 
146  : MTM(*ct) {
147  BlockInfo.resize(MTM.BlockInfo.size());
148  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
149  ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
150  ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
151 }
152 
153 // Virtual destructor serves as an anchor.
155 
156 const MachineLoop*
158  return MTM.Loops->getLoopFor(MBB);
159 }
160 
161 // Update resource-related information in the TraceBlockInfo for MBB.
162 // Only update resources related to the trace above MBB.
163 void MachineTraceMetrics::Ensemble::
164 computeDepthResources(const MachineBasicBlock *MBB) {
165  TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
166  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
167  unsigned PROffset = MBB->getNumber() * PRKinds;
168 
169  // Compute resources from trace above. The top block is simple.
170  if (!TBI->Pred) {
171  TBI->InstrDepth = 0;
172  TBI->Head = MBB->getNumber();
173  std::fill(ProcResourceDepths.begin() + PROffset,
174  ProcResourceDepths.begin() + PROffset + PRKinds, 0);
175  return;
176  }
177 
178  // Compute from the block above. A post-order traversal ensures the
179  // predecessor is always computed first.
180  unsigned PredNum = TBI->Pred->getNumber();
181  TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
182  assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
183  const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
184  TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
185  TBI->Head = PredTBI->Head;
186 
187  // Compute per-resource depths.
188  ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
189  ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
190  for (unsigned K = 0; K != PRKinds; ++K)
191  ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
192 }
193 
194 // Update resource-related information in the TraceBlockInfo for MBB.
195 // Only update resources related to the trace below MBB.
196 void MachineTraceMetrics::Ensemble::
197 computeHeightResources(const MachineBasicBlock *MBB) {
198  TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
199  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
200  unsigned PROffset = MBB->getNumber() * PRKinds;
201 
202  // Compute resources for the current block.
203  TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
204  ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
205 
206  // The trace tail is done.
207  if (!TBI->Succ) {
208  TBI->Tail = MBB->getNumber();
209  std::copy(PRCycles.begin(), PRCycles.end(),
210  ProcResourceHeights.begin() + PROffset);
211  return;
212  }
213 
214  // Compute from the block below. A post-order traversal ensures the
215  // predecessor is always computed first.
216  unsigned SuccNum = TBI->Succ->getNumber();
217  TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
218  assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
219  TBI->InstrHeight += SuccTBI->InstrHeight;
220  TBI->Tail = SuccTBI->Tail;
221 
222  // Compute per-resource heights.
223  ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
224  for (unsigned K = 0; K != PRKinds; ++K)
225  ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
226 }
227 
228 // Check if depth resources for MBB are valid and return the TBI.
229 // Return NULL if the resources have been invalidated.
233  const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
234  return TBI->hasValidDepth() ? TBI : nullptr;
235 }
236 
237 // Check if height resources for MBB are valid and return the TBI.
238 // Return NULL if the resources have been invalidated.
242  const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
243  return TBI->hasValidHeight() ? TBI : nullptr;
244 }
245 
246 /// Get an array of processor resource depths for MBB. Indexed by processor
247 /// resource kind, this array contains the scaled processor resources consumed
248 /// by all blocks preceding MBB in its trace. It does not include instructions
249 /// in MBB.
250 ///
251 /// Compare TraceBlockInfo::InstrDepth.
254 getProcResourceDepths(unsigned MBBNum) const {
255  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
256  assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
257  return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
258 }
259 
260 /// Get an array of processor resource heights for MBB. Indexed by processor
261 /// resource kind, this array contains the scaled processor resources consumed
262 /// by this block and all blocks following it in its trace.
263 ///
264 /// Compare TraceBlockInfo::InstrHeight.
267 getProcResourceHeights(unsigned MBBNum) const {
268  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
269  assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
270  return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
271 }
272 
273 //===----------------------------------------------------------------------===//
274 // Trace Selection Strategies
275 //===----------------------------------------------------------------------===//
276 //
277 // A trace selection strategy is implemented as a sub-class of Ensemble. The
278 // trace through a block B is computed by two DFS traversals of the CFG
279 // starting from B. One upwards, and one downwards. During the upwards DFS,
280 // pickTracePred() is called on the post-ordered blocks. During the downwards
281 // DFS, pickTraceSucc() is called in a post-order.
282 //
283 
284 // We never allow traces that leave loops, but we do allow traces to enter
285 // nested loops. We also never allow traces to contain back-edges.
286 //
287 // This means that a loop header can never appear above the center block of a
288 // trace, except as the trace head. Below the center block, loop exiting edges
289 // are banned.
290 //
291 // Return true if an edge from the From loop to the To loop is leaving a loop.
292 // Either of To and From can be null.
293 static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
294  return From && !From->contains(To);
295 }
296 
297 // MinInstrCountEnsemble - Pick the trace that executes the least number of
298 // instructions.
299 namespace {
300 class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
301  const char *getName() const override { return "MinInstr"; }
302  const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
303  const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
304 
305 public:
306  MinInstrCountEnsemble(MachineTraceMetrics *mtm)
307  : MachineTraceMetrics::Ensemble(mtm) {}
308 };
309 }
310 
311 // Select the preferred predecessor for MBB.
312 const MachineBasicBlock*
313 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
314  if (MBB->pred_empty())
315  return nullptr;
316  const MachineLoop *CurLoop = getLoopFor(MBB);
317  // Don't leave loops, and never follow back-edges.
318  if (CurLoop && MBB == CurLoop->getHeader())
319  return nullptr;
320  unsigned CurCount = MTM.getResources(MBB)->InstrCount;
321  const MachineBasicBlock *Best = nullptr;
322  unsigned BestDepth = 0;
323  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
324  const MachineTraceMetrics::TraceBlockInfo *PredTBI =
325  getDepthResources(Pred);
326  // Ignore cycles that aren't natural loops.
327  if (!PredTBI)
328  continue;
329  // Pick the predecessor that would give this block the smallest InstrDepth.
330  unsigned Depth = PredTBI->InstrDepth + CurCount;
331  if (!Best || Depth < BestDepth) {
332  Best = Pred;
333  BestDepth = Depth;
334  }
335  }
336  return Best;
337 }
338 
339 // Select the preferred successor for MBB.
340 const MachineBasicBlock*
341 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
342  if (MBB->pred_empty())
343  return nullptr;
344  const MachineLoop *CurLoop = getLoopFor(MBB);
345  const MachineBasicBlock *Best = nullptr;
346  unsigned BestHeight = 0;
347  for (const MachineBasicBlock *Succ : MBB->successors()) {
348  // Don't consider back-edges.
349  if (CurLoop && Succ == CurLoop->getHeader())
350  continue;
351  // Don't consider successors exiting CurLoop.
352  if (isExitingLoop(CurLoop, getLoopFor(Succ)))
353  continue;
354  const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
355  getHeightResources(Succ);
356  // Ignore cycles that aren't natural loops.
357  if (!SuccTBI)
358  continue;
359  // Pick the successor that would give this block the smallest InstrHeight.
360  unsigned Height = SuccTBI->InstrHeight;
361  if (!Best || Height < BestHeight) {
362  Best = Succ;
363  BestHeight = Height;
364  }
365  }
366  return Best;
367 }
368 
369 // Get an Ensemble sub-class for the requested trace strategy.
372  assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
373  Ensemble *&E = Ensembles[strategy];
374  if (E)
375  return E;
376 
377  // Allocate new Ensemble on demand.
378  switch (strategy) {
379  case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
380  default: llvm_unreachable("Invalid trace strategy enum");
381  }
382 }
383 
385  DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
386  BlockInfo[MBB->getNumber()].invalidate();
387  for (unsigned i = 0; i != TS_NumStrategies; ++i)
388  if (Ensembles[i])
389  Ensembles[i]->invalidate(MBB);
390 }
391 
393  if (!MF)
394  return;
395 #ifndef NDEBUG
396  assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
397  for (unsigned i = 0; i != TS_NumStrategies; ++i)
398  if (Ensembles[i])
399  Ensembles[i]->verify();
400 #endif
401 }
402 
403 //===----------------------------------------------------------------------===//
404 // Trace building
405 //===----------------------------------------------------------------------===//
406 //
407 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
408 // set abstraction that confines the search to the current loop, and doesn't
409 // revisit blocks.
410 
411 namespace {
412 struct LoopBounds {
415  const MachineLoopInfo *Loops;
416  bool Downward;
418  const MachineLoopInfo *loops)
419  : Blocks(blocks), Loops(loops), Downward(false) {}
420 };
421 }
422 
423 // Specialize po_iterator_storage in order to prune the post-order traversal so
424 // it is limited to the current loop and doesn't traverse the loop back edges.
425 namespace llvm {
426 template<>
427 class po_iterator_storage<LoopBounds, true> {
428  LoopBounds &LB;
429 public:
430  po_iterator_storage(LoopBounds &lb) : LB(lb) {}
432 
434  const MachineBasicBlock *To) {
435  // Skip already visited To blocks.
436  MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
437  if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
438  return false;
439  // From is null once when To is the trace center block.
440  if (From) {
441  if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
442  // Don't follow backedges, don't leave FromLoop when going upwards.
443  if ((LB.Downward ? To : *From) == FromLoop->getHeader())
444  return false;
445  // Don't leave FromLoop.
446  if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
447  return false;
448  }
449  }
450  // To is a new block. Mark the block as visited in case the CFG has cycles
451  // that MachineLoopInfo didn't recognize as a natural loop.
452  return LB.Visited.insert(To).second;
453  }
454 };
455 }
456 
457 /// Compute the trace through MBB.
458 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
459  DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
460  << MBB->getNumber() << '\n');
461  // Set up loop bounds for the backwards post-order traversal.
462  LoopBounds Bounds(BlockInfo, MTM.Loops);
463 
464  // Run an upwards post-order search for the trace start.
465  Bounds.Downward = false;
466  Bounds.Visited.clear();
467  for (auto I : inverse_post_order_ext(MBB, Bounds)) {
468  DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
469  TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
470  // All the predecessors have been visited, pick the preferred one.
471  TBI.Pred = pickTracePred(I);
472  DEBUG({
473  if (TBI.Pred)
474  dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
475  else
476  dbgs() << "null\n";
477  });
478  // The trace leading to I is now known, compute the depth resources.
479  computeDepthResources(I);
480  }
481 
482  // Run a downwards post-order search for the trace end.
483  Bounds.Downward = true;
484  Bounds.Visited.clear();
485  for (auto I : post_order_ext(MBB, Bounds)) {
486  DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
487  TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
488  // All the successors have been visited, pick the preferred one.
489  TBI.Succ = pickTraceSucc(I);
490  DEBUG({
491  if (TBI.Succ)
492  dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
493  else
494  dbgs() << "null\n";
495  });
496  // The trace leaving I is now known, compute the height resources.
497  computeHeightResources(I);
498  }
499 }
500 
501 /// Invalidate traces through BadMBB.
502 void
505  TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
506 
507  // Invalidate height resources of blocks above MBB.
508  if (BadTBI.hasValidHeight()) {
509  BadTBI.invalidateHeight();
510  WorkList.push_back(BadMBB);
511  do {
512  const MachineBasicBlock *MBB = WorkList.pop_back_val();
513  DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
514  << " height.\n");
515  // Find any MBB predecessors that have MBB as their preferred successor.
516  // They are the only ones that need to be invalidated.
517  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
518  TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
519  if (!TBI.hasValidHeight())
520  continue;
521  if (TBI.Succ == MBB) {
522  TBI.invalidateHeight();
523  WorkList.push_back(Pred);
524  continue;
525  }
526  // Verify that TBI.Succ is actually a *I successor.
527  assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
528  }
529  } while (!WorkList.empty());
530  }
531 
532  // Invalidate depth resources of blocks below MBB.
533  if (BadTBI.hasValidDepth()) {
534  BadTBI.invalidateDepth();
535  WorkList.push_back(BadMBB);
536  do {
537  const MachineBasicBlock *MBB = WorkList.pop_back_val();
538  DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
539  << " depth.\n");
540  // Find any MBB successors that have MBB as their preferred predecessor.
541  // They are the only ones that need to be invalidated.
542  for (const MachineBasicBlock *Succ : MBB->successors()) {
543  TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
544  if (!TBI.hasValidDepth())
545  continue;
546  if (TBI.Pred == MBB) {
547  TBI.invalidateDepth();
548  WorkList.push_back(Succ);
549  continue;
550  }
551  // Verify that TBI.Pred is actually a *I predecessor.
552  assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
553  }
554  } while (!WorkList.empty());
555  }
556 
557  // Clear any per-instruction data. We only have to do this for BadMBB itself
558  // because the instructions in that block may change. Other blocks may be
559  // invalidated, but their instructions will stay the same, so there is no
560  // need to erase the Cycle entries. They will be overwritten when we
561  // recompute.
562  for (const auto &I : *BadMBB)
563  Cycles.erase(&I);
564 }
565 
567 #ifndef NDEBUG
568  assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
569  "Outdated BlockInfo size");
570  for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
571  const TraceBlockInfo &TBI = BlockInfo[Num];
572  if (TBI.hasValidDepth() && TBI.Pred) {
573  const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
574  assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
575  assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
576  "Trace is broken, depth should have been invalidated.");
577  const MachineLoop *Loop = getLoopFor(MBB);
578  assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
579  }
580  if (TBI.hasValidHeight() && TBI.Succ) {
581  const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
582  assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
583  assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
584  "Trace is broken, height should have been invalidated.");
585  const MachineLoop *Loop = getLoopFor(MBB);
586  const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
587  assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
588  "Trace contains backedge");
589  }
590  }
591 #endif
592 }
593 
594 //===----------------------------------------------------------------------===//
595 // Data Dependencies
596 //===----------------------------------------------------------------------===//
597 //
598 // Compute the depth and height of each instruction based on data dependencies
599 // and instruction latencies. These cycle numbers assume that the CPU can issue
600 // an infinite number of instructions per cycle as long as their dependencies
601 // are ready.
602 
603 // A data dependency is represented as a defining MI and operand numbers on the
604 // defining and using MI.
605 namespace {
606 struct DataDep {
607  const MachineInstr *DefMI;
608  unsigned DefOp;
609  unsigned UseOp;
610 
611  DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
612  : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
613 
614  /// Create a DataDep from an SSA form virtual register.
615  DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
616  : UseOp(UseOp) {
618  MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
619  assert(!DefI.atEnd() && "Register has no defs");
620  DefMI = DefI->getParent();
621  DefOp = DefI.getOperandNo();
622  assert((++DefI).atEnd() && "Register has multiple defs");
623  }
624 };
625 }
626 
627 // Get the input data dependencies that must be ready before UseMI can issue.
628 // Return true if UseMI has any physreg operands.
629 static bool getDataDeps(const MachineInstr &UseMI,
631  const MachineRegisterInfo *MRI) {
632  // Debug values should not be included in any calculations.
633  if (UseMI.isDebugValue())
634  return false;
635 
636  bool HasPhysRegs = false;
638  E = UseMI.operands_end(); I != E; ++I) {
639  const MachineOperand &MO = *I;
640  if (!MO.isReg())
641  continue;
642  unsigned Reg = MO.getReg();
643  if (!Reg)
644  continue;
646  HasPhysRegs = true;
647  continue;
648  }
649  // Collect virtual register reads.
650  if (MO.readsReg())
651  Deps.push_back(DataDep(MRI, Reg, UseMI.getOperandNo(I)));
652  }
653  return HasPhysRegs;
654 }
655 
656 // Get the input data dependencies of a PHI instruction, using Pred as the
657 // preferred predecessor.
658 // This will add at most one dependency to Deps.
659 static void getPHIDeps(const MachineInstr &UseMI,
661  const MachineBasicBlock *Pred,
662  const MachineRegisterInfo *MRI) {
663  // No predecessor at the beginning of a trace. Ignore dependencies.
664  if (!Pred)
665  return;
666  assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
667  for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
668  if (UseMI.getOperand(i + 1).getMBB() == Pred) {
669  unsigned Reg = UseMI.getOperand(i).getReg();
670  Deps.push_back(DataDep(MRI, Reg, i));
671  return;
672  }
673  }
674 }
675 
676 // Keep track of physreg data dependencies by recording each live register unit.
677 // Associate each regunit with an instruction operand. Depending on the
678 // direction instructions are scanned, it could be the operand that defined the
679 // regunit, or the highest operand to read the regunit.
680 namespace {
681 struct LiveRegUnit {
682  unsigned RegUnit;
683  unsigned Cycle;
684  const MachineInstr *MI;
685  unsigned Op;
686 
687  unsigned getSparseSetIndex() const { return RegUnit; }
688 
689  LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
690 };
691 }
692 
693 // Identify physreg dependencies for UseMI, and update the live regunit
694 // tracking set when scanning instructions downwards.
697  SparseSet<LiveRegUnit> &RegUnits,
698  const TargetRegisterInfo *TRI) {
700  SmallVector<unsigned, 8> LiveDefOps;
701 
703  ME = UseMI->operands_end(); MI != ME; ++MI) {
704  const MachineOperand &MO = *MI;
705  if (!MO.isReg())
706  continue;
707  unsigned Reg = MO.getReg();
709  continue;
710  // Track live defs and kills for updating RegUnits.
711  if (MO.isDef()) {
712  if (MO.isDead())
713  Kills.push_back(Reg);
714  else
715  LiveDefOps.push_back(UseMI->getOperandNo(MI));
716  } else if (MO.isKill())
717  Kills.push_back(Reg);
718  // Identify dependencies.
719  if (!MO.readsReg())
720  continue;
721  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
722  SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
723  if (I == RegUnits.end())
724  continue;
725  Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
726  break;
727  }
728  }
729 
730  // Update RegUnits to reflect live registers after UseMI.
731  // First kills.
732  for (unsigned Kill : Kills)
733  for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
734  RegUnits.erase(*Units);
735 
736  // Second, live defs.
737  for (unsigned DefOp : LiveDefOps) {
738  for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
739  Units.isValid(); ++Units) {
740  LiveRegUnit &LRU = RegUnits[*Units];
741  LRU.MI = UseMI;
742  LRU.Op = DefOp;
743  }
744  }
745 }
746 
747 /// The length of the critical path through a trace is the maximum of two path
748 /// lengths:
749 ///
750 /// 1. The maximum height+depth over all instructions in the trace center block.
751 ///
752 /// 2. The longest cross-block dependency chain. For small blocks, it is
753 /// possible that the critical path through the trace doesn't include any
754 /// instructions in the block.
755 ///
756 /// This function computes the second number from the live-in list of the
757 /// center block.
758 unsigned MachineTraceMetrics::Ensemble::
759 computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
760  assert(TBI.HasValidInstrDepths && "Missing depth info");
761  assert(TBI.HasValidInstrHeights && "Missing height info");
762  unsigned MaxLen = 0;
763  for (const LiveInReg &LIR : TBI.LiveIns) {
765  continue;
766  const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
767  // Ignore dependencies outside the current trace.
768  const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
769  if (!DefTBI.isUsefulDominator(TBI))
770  continue;
771  unsigned Len = LIR.Height + Cycles[DefMI].Depth;
772  MaxLen = std::max(MaxLen, Len);
773  }
774  return MaxLen;
775 }
776 
777 /// Compute instruction depths for all instructions above or in MBB in its
778 /// trace. This assumes that the trace through MBB has already been computed.
779 void MachineTraceMetrics::Ensemble::
780 computeInstrDepths(const MachineBasicBlock *MBB) {
781  // The top of the trace may already be computed, and HasValidInstrDepths
782  // implies Head->HasValidInstrDepths, so we only need to start from the first
783  // block in the trace that needs to be recomputed.
785  do {
786  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
787  assert(TBI.hasValidDepth() && "Incomplete trace");
788  if (TBI.HasValidInstrDepths)
789  break;
790  Stack.push_back(MBB);
791  MBB = TBI.Pred;
792  } while (MBB);
793 
794  // FIXME: If MBB is non-null at this point, it is the last pre-computed block
795  // in the trace. We should track any live-out physregs that were defined in
796  // the trace. This is quite rare in SSA form, typically created by CSE
797  // hoisting a compare.
798  SparseSet<LiveRegUnit> RegUnits;
799  RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
800 
801  // Go through trace blocks in top-down order, stopping after the center block.
803  while (!Stack.empty()) {
804  MBB = Stack.pop_back_val();
805  DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
806  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
807  TBI.HasValidInstrDepths = true;
808  TBI.CriticalPath = 0;
809 
810  // Print out resource depths here as well.
811  DEBUG({
812  dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
813  ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
814  for (unsigned K = 0; K != PRDepths.size(); ++K)
815  if (PRDepths[K]) {
816  unsigned Factor = MTM.SchedModel.getResourceFactor(K);
817  dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
818  << MTM.SchedModel.getProcResource(K)->Name << " ("
819  << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
820  }
821  });
822 
823  // Also compute the critical path length through MBB when possible.
824  if (TBI.HasValidInstrHeights)
825  TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
826 
827  for (const auto &UseMI : *MBB) {
828  // Collect all data dependencies.
829  Deps.clear();
830  if (UseMI.isPHI())
831  getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
832  else if (getDataDeps(UseMI, Deps, MTM.MRI))
833  updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
834 
835  // Filter and process dependencies, computing the earliest issue cycle.
836  unsigned Cycle = 0;
837  for (const DataDep &Dep : Deps) {
838  const TraceBlockInfo&DepTBI =
839  BlockInfo[Dep.DefMI->getParent()->getNumber()];
840  // Ignore dependencies from outside the current trace.
841  if (!DepTBI.isUsefulDominator(TBI))
842  continue;
843  assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
844  unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
845  // Add latency if DefMI is a real instruction. Transients get latency 0.
846  if (!Dep.DefMI->isTransient())
847  DepCycle += MTM.SchedModel
848  .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
849  Cycle = std::max(Cycle, DepCycle);
850  }
851  // Remember the instruction depth.
852  InstrCycles &MICycles = Cycles[&UseMI];
853  MICycles.Depth = Cycle;
854 
855  if (!TBI.HasValidInstrHeights) {
856  DEBUG(dbgs() << Cycle << '\t' << UseMI);
857  continue;
858  }
859  // Update critical path length.
860  TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
861  DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
862  }
863  }
864 }
865 
866 // Identify physreg dependencies for MI when scanning instructions upwards.
867 // Return the issue height of MI after considering any live regunits.
868 // Height is the issue height computed from virtual register dependencies alone.
869 static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
870  SparseSet<LiveRegUnit> &RegUnits,
871  const TargetSchedModel &SchedModel,
872  const TargetInstrInfo *TII,
873  const TargetRegisterInfo *TRI) {
874  SmallVector<unsigned, 8> ReadOps;
875 
877  MOE = MI.operands_end();
878  MOI != MOE; ++MOI) {
879  const MachineOperand &MO = *MOI;
880  if (!MO.isReg())
881  continue;
882  unsigned Reg = MO.getReg();
884  continue;
885  if (MO.readsReg())
886  ReadOps.push_back(MI.getOperandNo(MOI));
887  if (!MO.isDef())
888  continue;
889  // This is a def of Reg. Remove corresponding entries from RegUnits, and
890  // update MI Height to consider the physreg dependencies.
891  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
892  SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
893  if (I == RegUnits.end())
894  continue;
895  unsigned DepHeight = I->Cycle;
896  if (!MI.isTransient()) {
897  // We may not know the UseMI of this dependency, if it came from the
898  // live-in list. SchedModel can handle a NULL UseMI.
899  DepHeight += SchedModel.computeOperandLatency(&MI, MI.getOperandNo(MOI),
900  I->MI, I->Op);
901  }
902  Height = std::max(Height, DepHeight);
903  // This regunit is dead above MI.
904  RegUnits.erase(I);
905  }
906  }
907 
908  // Now we know the height of MI. Update any regunits read.
909  for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
910  unsigned Reg = MI.getOperand(ReadOps[i]).getReg();
911  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
912  LiveRegUnit &LRU = RegUnits[*Units];
913  // Set the height to the highest reader of the unit.
914  if (LRU.Cycle <= Height && LRU.MI != &MI) {
915  LRU.Cycle = Height;
916  LRU.MI = &MI;
917  LRU.Op = ReadOps[i];
918  }
919  }
920  }
921 
922  return Height;
923 }
924 
925 
927 
928 // Push the height of DefMI upwards if required to match UseMI.
929 // Return true if this is the first time DefMI was seen.
930 static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
931  unsigned UseHeight, MIHeightMap &Heights,
932  const TargetSchedModel &SchedModel,
933  const TargetInstrInfo *TII) {
934  // Adjust height by Dep.DefMI latency.
935  if (!Dep.DefMI->isTransient())
936  UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
937  Dep.UseOp);
938 
939  // Update Heights[DefMI] to be the maximum height seen.
941  bool New;
942  std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
943  if (New)
944  return true;
945 
946  // DefMI has been pushed before. Give it the max height.
947  if (I->second < UseHeight)
948  I->second = UseHeight;
949  return false;
950 }
951 
952 /// Assuming that the virtual register defined by DefMI:DefOp was used by
953 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
954 /// when reaching the block that contains DefMI.
955 void MachineTraceMetrics::Ensemble::
956 addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
958  assert(!Trace.empty() && "Trace should contain at least one block");
959  unsigned Reg = DefMI->getOperand(DefOp).getReg();
961  const MachineBasicBlock *DefMBB = DefMI->getParent();
962 
963  // Reg is live-in to all blocks in Trace that follow DefMBB.
964  for (unsigned i = Trace.size(); i; --i) {
965  const MachineBasicBlock *MBB = Trace[i-1];
966  if (MBB == DefMBB)
967  return;
968  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
969  // Just add the register. The height will be updated later.
970  TBI.LiveIns.push_back(Reg);
971  }
972 }
973 
974 /// Compute instruction heights in the trace through MBB. This updates MBB and
975 /// the blocks below it in the trace. It is assumed that the trace has already
976 /// been computed.
977 void MachineTraceMetrics::Ensemble::
978 computeInstrHeights(const MachineBasicBlock *MBB) {
979  // The bottom of the trace may already be computed.
980  // Find the blocks that need updating.
982  do {
983  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
984  assert(TBI.hasValidHeight() && "Incomplete trace");
985  if (TBI.HasValidInstrHeights)
986  break;
987  Stack.push_back(MBB);
988  TBI.LiveIns.clear();
989  MBB = TBI.Succ;
990  } while (MBB);
991 
992  // As we move upwards in the trace, keep track of instructions that are
993  // required by deeper trace instructions. Map MI -> height required so far.
994  MIHeightMap Heights;
995 
996  // For physregs, the def isn't known when we see the use.
997  // Instead, keep track of the highest use of each regunit.
998  SparseSet<LiveRegUnit> RegUnits;
999  RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1000 
1001  // If the bottom of the trace was already precomputed, initialize heights
1002  // from its live-in list.
1003  // MBB is the highest precomputed block in the trace.
1004  if (MBB) {
1005  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1006  for (LiveInReg &LI : TBI.LiveIns) {
1008  // For virtual registers, the def latency is included.
1009  unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1010  if (Height < LI.Height)
1011  Height = LI.Height;
1012  } else {
1013  // For register units, the def latency is not included because we don't
1014  // know the def yet.
1015  RegUnits[LI.Reg].Cycle = LI.Height;
1016  }
1017  }
1018  }
1019 
1020  // Go through the trace blocks in bottom-up order.
1022  for (;!Stack.empty(); Stack.pop_back()) {
1023  MBB = Stack.back();
1024  DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
1025  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1026  TBI.HasValidInstrHeights = true;
1027  TBI.CriticalPath = 0;
1028 
1029  DEBUG({
1030  dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1031  ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1032  for (unsigned K = 0; K != PRHeights.size(); ++K)
1033  if (PRHeights[K]) {
1034  unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1035  dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1036  << MTM.SchedModel.getProcResource(K)->Name << " ("
1037  << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1038  }
1039  });
1040 
1041  // Get dependencies from PHIs in the trace successor.
1042  const MachineBasicBlock *Succ = TBI.Succ;
1043  // If MBB is the last block in the trace, and it has a back-edge to the
1044  // loop header, get loop-carried dependencies from PHIs in the header. For
1045  // that purpose, pretend that all the loop header PHIs have height 0.
1046  if (!Succ)
1047  if (const MachineLoop *Loop = getLoopFor(MBB))
1048  if (MBB->isSuccessor(Loop->getHeader()))
1049  Succ = Loop->getHeader();
1050 
1051  if (Succ) {
1052  for (const auto &PHI : *Succ) {
1053  if (!PHI.isPHI())
1054  break;
1055  Deps.clear();
1056  getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1057  if (!Deps.empty()) {
1058  // Loop header PHI heights are all 0.
1059  unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1060  DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1061  if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1062  MTM.TII))
1063  addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1064  }
1065  }
1066  }
1067 
1068  // Go through the block backwards.
1069  for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
1070  BI != BB;) {
1071  const MachineInstr &MI = *--BI;
1072 
1073  // Find the MI height as determined by virtual register uses in the
1074  // trace below.
1075  unsigned Cycle = 0;
1076  MIHeightMap::iterator HeightI = Heights.find(&MI);
1077  if (HeightI != Heights.end()) {
1078  Cycle = HeightI->second;
1079  // We won't be seeing any more MI uses.
1080  Heights.erase(HeightI);
1081  }
1082 
1083  // Don't process PHI deps. They depend on the specific predecessor, and
1084  // we'll get them when visiting the predecessor.
1085  Deps.clear();
1086  bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1087 
1088  // There may also be regunit dependencies to include in the height.
1089  if (HasPhysRegs)
1090  Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1091  MTM.TII, MTM.TRI);
1092 
1093  // Update the required height of any virtual registers read by MI.
1094  for (const DataDep &Dep : Deps)
1095  if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1096  addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1097 
1098  InstrCycles &MICycles = Cycles[&MI];
1099  MICycles.Height = Cycle;
1100  if (!TBI.HasValidInstrDepths) {
1101  DEBUG(dbgs() << Cycle << '\t' << MI);
1102  continue;
1103  }
1104  // Update critical path length.
1105  TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1106  DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1107  }
1108 
1109  // Update virtual live-in heights. They were added by addLiveIns() with a 0
1110  // height because the final height isn't known until now.
1111  DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
1112  for (LiveInReg &LIR : TBI.LiveIns) {
1113  const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1114  LIR.Height = Heights.lookup(DefMI);
1115  DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
1116  }
1117 
1118  // Transfer the live regunits to the live-in list.
1120  RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
1121  TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
1122  DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
1123  << '@' << RI->Cycle);
1124  }
1125  DEBUG(dbgs() << '\n');
1126 
1127  if (!TBI.HasValidInstrDepths)
1128  continue;
1129  // Add live-ins to the critical path length.
1130  TBI.CriticalPath = std::max(TBI.CriticalPath,
1131  computeCrossBlockCriticalPath(TBI));
1132  DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1133  }
1134 }
1135 
1138  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1139 
1140  if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1141  computeTrace(MBB);
1142  if (!TBI.HasValidInstrDepths)
1143  computeInstrDepths(MBB);
1144  if (!TBI.HasValidInstrHeights)
1145  computeInstrHeights(MBB);
1146 
1147  return Trace(*this, TBI);
1148 }
1149 
1150 unsigned
1152  assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1153  "MI must be in the trace center block");
1154  InstrCycles Cyc = getInstrCycles(MI);
1155  return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1156 }
1157 
1158 unsigned
1160  const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1162  getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1163  assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1164  DataDep &Dep = Deps.front();
1165  unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1166  // Add latency if DefMI is a real instruction. Transients get latency 0.
1167  if (!Dep.DefMI->isTransient())
1168  DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1169  &PHI, Dep.UseOp);
1170  return DepCycle;
1171 }
1172 
1173 /// When bottom is set include instructions in current block in estimate.
1175  // Find the limiting processor resource.
1176  // Numbers have been pre-scaled to be comparable.
1177  unsigned PRMax = 0;
1178  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1179  if (Bottom) {
1180  ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
1181  for (unsigned K = 0; K != PRDepths.size(); ++K)
1182  PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1183  } else {
1184  for (unsigned K = 0; K != PRDepths.size(); ++K)
1185  PRMax = std::max(PRMax, PRDepths[K]);
1186  }
1187  // Convert to cycle count.
1188  PRMax = TE.MTM.getCycles(PRMax);
1189 
1190  /// All instructions before current block
1191  unsigned Instrs = TBI.InstrDepth;
1192  // plus instructions in current block
1193  if (Bottom)
1194  Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1195  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1196  Instrs /= IW;
1197  // Assume issue width 1 without a schedule model.
1198  return std::max(Instrs, PRMax);
1199 }
1200 
1204  ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1205  // Add up resources above and below the center block.
1206  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1207  ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1208  unsigned PRMax = 0;
1209 
1210  // Capture computing cycles from extra instructions
1211  auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1212  unsigned ResourceIdx)
1213  ->unsigned {
1214  unsigned Cycles = 0;
1215  for (const MCSchedClassDesc *SC : Instrs) {
1216  if (!SC->isValid())
1217  continue;
1219  PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1220  PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1221  PI != PE; ++PI) {
1222  if (PI->ProcResourceIdx != ResourceIdx)
1223  continue;
1224  Cycles +=
1225  (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1226  }
1227  }
1228  return Cycles;
1229  };
1230 
1231  for (unsigned K = 0; K != PRDepths.size(); ++K) {
1232  unsigned PRCycles = PRDepths[K] + PRHeights[K];
1233  for (const MachineBasicBlock *MBB : Extrablocks)
1234  PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
1235  PRCycles += extraCycles(ExtraInstrs, K);
1236  PRCycles -= extraCycles(RemoveInstrs, K);
1237  PRMax = std::max(PRMax, PRCycles);
1238  }
1239  // Convert to cycle count.
1240  PRMax = TE.MTM.getCycles(PRMax);
1241 
1242  // Instrs: #instructions in current trace outside current block.
1243  unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1244  // Add instruction count from the extra blocks.
1245  for (const MachineBasicBlock *MBB : Extrablocks)
1246  Instrs += TE.MTM.getResources(MBB)->InstrCount;
1247  Instrs += ExtraInstrs.size();
1248  Instrs -= RemoveInstrs.size();
1249  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1250  Instrs /= IW;
1251  // Assume issue width 1 without a schedule model.
1252  return std::max(Instrs, PRMax);
1253 }
1254 
1256  const MachineInstr &UseMI) const {
1257  if (DefMI.getParent() == UseMI.getParent())
1258  return true;
1259 
1260  const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1261  const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1262 
1263  return DepTBI.isUsefulDominator(TBI);
1264 }
1265 
1267  OS << getName() << " ensemble:\n";
1268  for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1269  OS << " BB#" << i << '\t';
1270  BlockInfo[i].print(OS);
1271  OS << '\n';
1272  }
1273 }
1274 
1276  if (hasValidDepth()) {
1277  OS << "depth=" << InstrDepth;
1278  if (Pred)
1279  OS << " pred=BB#" << Pred->getNumber();
1280  else
1281  OS << " pred=null";
1282  OS << " head=BB#" << Head;
1283  if (HasValidInstrDepths)
1284  OS << " +instrs";
1285  } else
1286  OS << "depth invalid";
1287  OS << ", ";
1288  if (hasValidHeight()) {
1289  OS << "height=" << InstrHeight;
1290  if (Succ)
1291  OS << " succ=BB#" << Succ->getNumber();
1292  else
1293  OS << " succ=null";
1294  OS << " tail=BB#" << Tail;
1295  if (HasValidInstrHeights)
1296  OS << " +instrs";
1297  } else
1298  OS << "height invalid";
1299  if (HasValidInstrDepths && HasValidInstrHeights)
1300  OS << ", crit=" << CriticalPath;
1301 }
1302 
1304  unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1305 
1306  OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
1307  << " --> BB#" << TBI.Tail << ':';
1308  if (TBI.hasValidHeight() && TBI.hasValidDepth())
1309  OS << ' ' << getInstrCount() << " instrs.";
1310  if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1311  OS << ' ' << TBI.CriticalPath << " cycles.";
1312 
1313  const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
1314  OS << "\nBB#" << MBBNum;
1315  while (Block->hasValidDepth() && Block->Pred) {
1316  unsigned Num = Block->Pred->getNumber();
1317  OS << " <- BB#" << Num;
1318  Block = &TE.BlockInfo[Num];
1319  }
1320 
1321  Block = &TBI;
1322  OS << "\n ";
1323  while (Block->hasValidHeight() && Block->Succ) {
1324  unsigned Num = Block->Succ->getNumber();
1325  OS << " -> BB#" << Num;
1326  Block = &TE.BlockInfo[Num];
1327  }
1328  OS << '\n';
1329 }
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
mop_iterator operands_end()
Definition: MachineInstr.h:296
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
size_t i
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:162
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
MachineBasicBlock * getMBB() const
bool atEnd() const
atEnd - return true if this iterator is equal to reg_end() on the value.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
iterator end() const
Definition: ArrayRef.h:130
bool isDead() const
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool HasCalls
True when the block contains calls.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:353
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
machine trace metrics
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
static unsigned InstrCount
machine trace Machine Trace false
BlockT * getHeader() const
Definition: LoopInfo.h:102
unsigned InstrCount
The number of non-trivial instructions in the block.
iterator_range< succ_iterator > successors()
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
Hexagon Hardware Loops
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
bool isPHI() const
Definition: MachineInstr.h:786
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
Strategy
Strategies for selecting traces.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const_iterator end() const
Definition: SparseSet.h:175
Reg
All possible values of the reg field in the ModR/M byte.
static StringRef getName(Value *V)
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
Select the trace through a block that has the fewest instructions.
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
bool isKill() const
MachineBasicBlock * MBB
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:303
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubRegIdx=0)
Prints virtual and physical registers with or without a TRI instance.
bool isValid() const
Definition: MCSchedule.h:118
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:285
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
format_object< Ts...> format(const char *Fmt, const Ts &...Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
TargetInstrInfo - Interface to description of machine instruction set.
bool isDebugValue() const
Definition: MachineInstr.h:777
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
INITIALIZE_PASS_BEGIN(MachineTraceMetrics,"machine-trace-metrics","Machine Trace Metrics", false, true) INITIALIZE_PASS_END(MachineTraceMetrics
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
bool erase(const KeyT &Val)
Definition: DenseMap.h:243
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:283
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().
bool insertEdge(Optional< const MachineBasicBlock * > From, const MachineBasicBlock *To)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
Ensemble * getEnsemble(Strategy)
Get the trace ensemble representing the given trace selection strategy.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:55
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
MachineInstrBuilder & UseMI
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
Per-basic block information that doesn't depend on the trace through the block.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=None, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=None, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=None) const
Return the resource length of the trace.
Represent the analysis usage information of a pass.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
Default po_iterator_storage implementation with an internal set object.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
iterator begin() const
Definition: ArrayRef.h:129
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
iterator_range< pred_iterator > predecessors()
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const_iterator begin() const
Definition: SparseSet.h:174
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, SparseSet< LiveRegUnit > &RegUnits, const TargetRegisterInfo *TRI)
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
machine trace Machine Trace Metrics
Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
MachineOperand class - Representation of each machine instruction operand.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
CHAIN = SC CHAIN, Imm128 - System call.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
Definition: MachineInstr.h:833
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Per-basic block information that relates to a specific trace through the block.
void setPreservesAll()
Set by analyses that do not transform their input at all.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
def_iterator def_begin(unsigned RegNo) const
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
bool hasResources() const
Returns true when resource information for this block has been computed.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
Representation of each machine instruction.
Definition: MachineInstr.h:52
Basic Alias true
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:142
SparseSet - Fast set implmentation for objects that can be identified by small unsigned keys...
Definition: SparseSet.h:123
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition: SparseSet.h:224
InstrCycles represents the cycle height and depth of an instruction in a trace.
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
iterator end()
Definition: DenseMap.h:69
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block...
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
virtual const TargetInstrInfo * getInstrInfo() const
mop_iterator operands_begin()
Definition: MachineInstr.h:295
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:100
IRTranslator LLVM IR MI
bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
ArrayRef< unsigned > getProcResourceCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
DenseMap< const MachineInstr *, unsigned > MIHeightMap
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, SparseSet< LiveRegUnit > &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
loops
Definition: LoopInfo.cpp:709
void resize(size_type N)
Definition: SmallVector.h:352
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.