LLVM 22.0.0git
MachineTraceMetrics.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/SparseSet.h"
26#include "llvm/Pass.h"
27#include "llvm/Support/Debug.h"
29#include "llvm/Support/Format.h"
31#include <algorithm>
32#include <cassert>
33#include <tuple>
34#include <utility>
35
36using namespace llvm;
37
38#define DEBUG_TYPE "machine-trace-metrics"
39
40AnalysisKey MachineTraceMetricsAnalysis::Key;
41
47
54
56
58
60 "Machine Trace Metrics", false, true)
63 "Machine Trace Metrics", false, true)
64
67
73
75 const MachineLoopInfo &LI) {
76 MF = &Func;
77 const TargetSubtargetInfo &ST = MF->getSubtarget();
78 TII = ST.getInstrInfo();
79 TRI = ST.getRegisterInfo();
80 MRI = &MF->getRegInfo();
81 Loops = &LI;
82 SchedModel.init(&ST);
83 BlockInfo.resize(MF->getNumBlockIDs());
84 ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
85 SchedModel.getNumProcResourceKinds());
86}
87
92
94
96 MF = nullptr;
97 BlockInfo.clear();
98 for (auto &E : Ensembles)
99 E.reset();
100}
101
102//===----------------------------------------------------------------------===//
103// Fixed block information
104//===----------------------------------------------------------------------===//
105//
106// The number of instructions in a basic block and the CPU resources used by
107// those instructions don't depend on any given trace strategy.
108
109/// Compute the resource usage in basic block MBB.
112 assert(MBB && "No basic block");
113 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
114 if (FBI->hasResources())
115 return FBI;
116
117 // Compute resource usage in the block.
118 FBI->HasCalls = false;
119 unsigned InstrCount = 0;
120
121 // Add up per-processor resource cycles as well.
122 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
123 SmallVector<unsigned, 32> PRCycles(PRKinds);
124
125 for (const auto &MI : *MBB) {
126 if (MI.isTransient())
127 continue;
128 ++InstrCount;
129 if (MI.isCall())
130 FBI->HasCalls = true;
131
132 // Count processor resources used.
133 if (!SchedModel.hasInstrSchedModel())
134 continue;
135 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
136 if (!SC->isValid())
137 continue;
138
140 PI = SchedModel.getWriteProcResBegin(SC),
141 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
142 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
143 PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
144 }
145 }
146 FBI->InstrCount = InstrCount;
147
148 // Scale the resource cycles so they are comparable.
149 unsigned PROffset = MBB->getNumber() * PRKinds;
150 for (unsigned K = 0; K != PRKinds; ++K)
151 ProcReleaseAtCycles[PROffset + K] =
152 PRCycles[K] * SchedModel.getResourceFactor(K);
153
154 return FBI;
155}
156
159 assert(BlockInfo[MBBNum].hasResources() &&
160 "getResources() must be called before getProcReleaseAtCycles()");
161 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
162 assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
163 return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds);
164}
165
166//===----------------------------------------------------------------------===//
167// Ensemble utility functions
168//===----------------------------------------------------------------------===//
169
171 : MTM(*ct) {
172 BlockInfo.resize(MTM.BlockInfo.size());
173 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
174 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
175 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
176}
177
178// Virtual destructor serves as an anchor.
180
181const MachineLoop*
183 return MTM.Loops->getLoopFor(MBB);
184}
185
186// Update resource-related information in the TraceBlockInfo for MBB.
187// Only update resources related to the trace above MBB.
188void MachineTraceMetrics::Ensemble::
189computeDepthResources(const MachineBasicBlock *MBB) {
190 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
191 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
192 unsigned PROffset = MBB->getNumber() * PRKinds;
193
194 // Compute resources from trace above. The top block is simple.
195 if (!TBI->Pred) {
196 TBI->InstrDepth = 0;
197 TBI->Head = MBB->getNumber();
198 std::fill(ProcResourceDepths.begin() + PROffset,
199 ProcResourceDepths.begin() + PROffset + PRKinds, 0);
200 return;
201 }
202
203 // Compute from the block above. A post-order traversal ensures the
204 // predecessor is always computed first.
205 unsigned PredNum = TBI->Pred->getNumber();
206 TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
207 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
208 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
209 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
210 TBI->Head = PredTBI->Head;
211
212 // Compute per-resource depths.
213 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
214 ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
215 for (unsigned K = 0; K != PRKinds; ++K)
216 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
217}
218
219// Update resource-related information in the TraceBlockInfo for MBB.
220// Only update resources related to the trace below MBB.
221void MachineTraceMetrics::Ensemble::
222computeHeightResources(const MachineBasicBlock *MBB) {
223 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
224 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
225 unsigned PROffset = MBB->getNumber() * PRKinds;
226
227 // Compute resources for the current block.
228 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
229 ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber());
230
231 // The trace tail is done.
232 if (!TBI->Succ) {
233 TBI->Tail = MBB->getNumber();
234 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
235 return;
236 }
237
238 // Compute from the block below. A post-order traversal ensures the
239 // predecessor is always computed first.
240 unsigned SuccNum = TBI->Succ->getNumber();
241 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
242 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
243 TBI->InstrHeight += SuccTBI->InstrHeight;
244 TBI->Tail = SuccTBI->Tail;
245
246 // Compute per-resource heights.
247 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
248 for (unsigned K = 0; K != PRKinds; ++K)
249 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
250}
251
252// Check if depth resources for MBB are valid and return the TBI.
253// Return NULL if the resources have been invalidated.
254const MachineTraceMetrics::TraceBlockInfo*
257 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
258 return TBI->hasValidDepth() ? TBI : nullptr;
259}
260
261// Check if height resources for MBB are valid and return the TBI.
262// Return NULL if the resources have been invalidated.
266 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
267 return TBI->hasValidHeight() ? TBI : nullptr;
268}
269
270/// Get an array of processor resource depths for MBB. Indexed by processor
271/// resource kind, this array contains the scaled processor resources consumed
272/// by all blocks preceding MBB in its trace. It does not include instructions
273/// in MBB.
274///
275/// Compare TraceBlockInfo::InstrDepth.
278getProcResourceDepths(unsigned MBBNum) const {
279 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
280 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
281 return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
282}
283
284/// Get an array of processor resource heights for MBB. Indexed by processor
285/// resource kind, this array contains the scaled processor resources consumed
286/// by this block and all blocks following it in its trace.
287///
288/// Compare TraceBlockInfo::InstrHeight.
291getProcResourceHeights(unsigned MBBNum) const {
292 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
293 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
294 return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
295}
296
297//===----------------------------------------------------------------------===//
298// Trace Selection Strategies
299//===----------------------------------------------------------------------===//
300//
301// A trace selection strategy is implemented as a sub-class of Ensemble. The
302// trace through a block B is computed by two DFS traversals of the CFG
303// starting from B. One upwards, and one downwards. During the upwards DFS,
304// pickTracePred() is called on the post-ordered blocks. During the downwards
305// DFS, pickTraceSucc() is called in a post-order.
306//
307
308// We never allow traces that leave loops, but we do allow traces to enter
309// nested loops. We also never allow traces to contain back-edges.
310//
311// This means that a loop header can never appear above the center block of a
312// trace, except as the trace head. Below the center block, loop exiting edges
313// are banned.
314//
315// Return true if an edge from the From loop to the To loop is leaving a loop.
316// Either of To and From can be null.
317static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
318 return From && !From->contains(To);
319}
320
321// MinInstrCountEnsemble - Pick the trace that executes the least number of
322// instructions.
323namespace {
324
325class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
326 const char *getName() const override { return "MinInstr"; }
327 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
328 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
329
330public:
331 MinInstrCountEnsemble(MachineTraceMetrics *mtm)
332 : MachineTraceMetrics::Ensemble(mtm) {}
333};
334
335/// Pick only the current basic block for the trace and do not choose any
336/// predecessors/successors.
337class LocalEnsemble : public MachineTraceMetrics::Ensemble {
338 const char *getName() const override { return "Local"; }
339 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
340 return nullptr;
341 };
342 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
343 return nullptr;
344 };
345
346public:
347 LocalEnsemble(MachineTraceMetrics *MTM)
348 : MachineTraceMetrics::Ensemble(MTM) {}
349};
350} // end anonymous namespace
351
352// Select the preferred predecessor for MBB.
353const MachineBasicBlock*
354MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
355 if (MBB->pred_empty())
356 return nullptr;
357 const MachineLoop *CurLoop = getLoopFor(MBB);
358 // Don't leave loops, and never follow back-edges.
359 if (CurLoop && MBB == CurLoop->getHeader())
360 return nullptr;
361 unsigned CurCount = MTM.getResources(MBB)->InstrCount;
362 const MachineBasicBlock *Best = nullptr;
363 unsigned BestDepth = 0;
364 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
365 const MachineTraceMetrics::TraceBlockInfo *PredTBI =
366 getDepthResources(Pred);
367 // Ignore cycles that aren't natural loops.
368 if (!PredTBI)
369 continue;
370 // Pick the predecessor that would give this block the smallest InstrDepth.
371 unsigned Depth = PredTBI->InstrDepth + CurCount;
372 if (!Best || Depth < BestDepth) {
373 Best = Pred;
374 BestDepth = Depth;
375 }
376 }
377 return Best;
378}
379
380// Select the preferred successor for MBB.
381const MachineBasicBlock*
382MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
383 if (MBB->succ_empty())
384 return nullptr;
385 const MachineLoop *CurLoop = getLoopFor(MBB);
386 const MachineBasicBlock *Best = nullptr;
387 unsigned BestHeight = 0;
388 for (const MachineBasicBlock *Succ : MBB->successors()) {
389 // Don't consider back-edges.
390 if (CurLoop && Succ == CurLoop->getHeader())
391 continue;
392 // Don't consider successors exiting CurLoop.
393 if (isExitingLoop(CurLoop, getLoopFor(Succ)))
394 continue;
395 const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
396 getHeightResources(Succ);
397 // Ignore cycles that aren't natural loops.
398 if (!SuccTBI)
399 continue;
400 // Pick the successor that would give this block the smallest InstrHeight.
401 unsigned Height = SuccTBI->InstrHeight;
402 if (!Best || Height < BestHeight) {
403 Best = Succ;
404 BestHeight = Height;
405 }
406 }
407 return Best;
408}
409
410// Get an Ensemble sub-class for the requested trace strategy.
414 "Invalid trace strategy enum");
415 std::unique_ptr<MachineTraceMetrics::Ensemble> &E =
416 Ensembles[static_cast<size_t>(strategy)];
417 if (E)
418 return E.get();
419
420 // Allocate new Ensemble on demand.
421 switch (strategy) {
423 E = std::make_unique<MinInstrCountEnsemble>(MinInstrCountEnsemble(this));
424 break;
426 E = std::make_unique<LocalEnsemble>(LocalEnsemble(this));
427 break;
428 default: llvm_unreachable("Invalid trace strategy enum");
429 }
430 return E.get();
431}
432
434 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
435 << '\n');
436 BlockInfo[MBB->getNumber()].invalidate();
437 for (auto &E : Ensembles)
438 if (E)
439 E->invalidate(MBB);
440}
441
444 MachineFunctionAnalysisManager::Invalidator &) {
445 // Check whether the analysis, all analyses on machine functions, or the
446 // machine function's CFG have been preserved.
448 return !PAC.preserved() &&
449 !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() &&
450 !PAC.preservedSet<CFGAnalyses>();
451}
452
454 if (!MF)
455 return;
456#ifndef NDEBUG
457 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
458 for (auto &E : Ensembles)
459 if (E)
460 E->verify();
461#endif
462}
463
464//===----------------------------------------------------------------------===//
465// Trace building
466//===----------------------------------------------------------------------===//
467//
468// Traces are built by two CFG traversals. To avoid recomputing too much, use a
469// set abstraction that confines the search to the current loop, and doesn't
470// revisit blocks.
471
472namespace {
473
474struct LoopBounds {
477 const MachineLoopInfo *Loops;
478 bool Downward = false;
479
481 const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
482};
483
484} // end anonymous namespace
485
486// Specialize po_iterator_storage in order to prune the post-order traversal so
487// it is limited to the current loop and doesn't traverse the loop back edges.
488template <> class llvm::po_iterator_storage<LoopBounds, true> {
489 LoopBounds &LB;
490
491public:
492 po_iterator_storage(LoopBounds &lb) : LB(lb) {}
493
495
496 bool insertEdge(std::optional<const MachineBasicBlock *> From,
497 const MachineBasicBlock *To) {
498 // Skip already visited To blocks.
499 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
500 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
501 return false;
502 // From is null once when To is the trace center block.
503 if (From) {
504 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
505 // Don't follow backedges, don't leave FromLoop when going upwards.
506 if ((LB.Downward ? To : *From) == FromLoop->getHeader())
507 return false;
508 // Don't leave FromLoop.
509 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
510 return false;
511 }
512 }
513 // To is a new block. Mark the block as visited in case the CFG has cycles
514 // that MachineLoopInfo didn't recognize as a natural loop.
515 return LB.Visited.insert(To).second;
516 }
517};
518
519/// Compute the trace through MBB.
520void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
521 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
522 << printMBBReference(*MBB) << '\n');
523 // Set up loop bounds for the backwards post-order traversal.
524 LoopBounds Bounds(BlockInfo, MTM.Loops);
525
526 // Run an upwards post-order search for the trace start.
527 Bounds.Downward = false;
528 Bounds.Visited.clear();
529 for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
530 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
531 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
532 // All the predecessors have been visited, pick the preferred one.
533 TBI.Pred = pickTracePred(I);
534 LLVM_DEBUG({
535 if (TBI.Pred)
536 dbgs() << printMBBReference(*TBI.Pred) << '\n';
537 else
538 dbgs() << "null\n";
539 });
540 // The trace leading to I is now known, compute the depth resources.
541 computeDepthResources(I);
542 }
543
544 // Run a downwards post-order search for the trace end.
545 Bounds.Downward = true;
546 Bounds.Visited.clear();
547 for (const auto *I : post_order_ext(MBB, Bounds)) {
548 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
549 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
550 // All the successors have been visited, pick the preferred one.
551 TBI.Succ = pickTraceSucc(I);
552 LLVM_DEBUG({
553 if (TBI.Succ)
554 dbgs() << printMBBReference(*TBI.Succ) << '\n';
555 else
556 dbgs() << "null\n";
557 });
558 // The trace leaving I is now known, compute the height resources.
559 computeHeightResources(I);
560 }
561}
562
563/// Invalidate traces through BadMBB.
564void
567 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
568
569 // Invalidate height resources of blocks above MBB.
570 if (BadTBI.hasValidHeight()) {
571 BadTBI.invalidateHeight();
572 WorkList.push_back(BadMBB);
573 do {
574 const MachineBasicBlock *MBB = WorkList.pop_back_val();
575 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
576 << getName() << " height.\n");
577 // Find any MBB predecessors that have MBB as their preferred successor.
578 // They are the only ones that need to be invalidated.
579 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
580 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
581 if (!TBI.hasValidHeight())
582 continue;
583 if (TBI.Succ == MBB) {
584 TBI.invalidateHeight();
585 WorkList.push_back(Pred);
586 continue;
587 }
588 // Verify that TBI.Succ is actually a *I successor.
589 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
590 }
591 } while (!WorkList.empty());
592 }
593
594 // Invalidate depth resources of blocks below MBB.
595 if (BadTBI.hasValidDepth()) {
596 BadTBI.invalidateDepth();
597 WorkList.push_back(BadMBB);
598 do {
599 const MachineBasicBlock *MBB = WorkList.pop_back_val();
600 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
601 << getName() << " depth.\n");
602 // Find any MBB successors that have MBB as their preferred predecessor.
603 // They are the only ones that need to be invalidated.
604 for (const MachineBasicBlock *Succ : MBB->successors()) {
605 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
606 if (!TBI.hasValidDepth())
607 continue;
608 if (TBI.Pred == MBB) {
609 TBI.invalidateDepth();
610 WorkList.push_back(Succ);
611 continue;
612 }
613 // Verify that TBI.Pred is actually a *I predecessor.
614 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
615 }
616 } while (!WorkList.empty());
617 }
618
619 // Clear any per-instruction data. We only have to do this for BadMBB itself
620 // because the instructions in that block may change. Other blocks may be
621 // invalidated, but their instructions will stay the same, so there is no
622 // need to erase the Cycle entries. They will be overwritten when we
623 // recompute.
624 for (const auto &I : *BadMBB)
625 Cycles.erase(&I);
626}
627
629#ifndef NDEBUG
630 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
631 "Outdated BlockInfo size");
632 for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
633 const TraceBlockInfo &TBI = BlockInfo[Num];
634 if (TBI.hasValidDepth() && TBI.Pred) {
635 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
636 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
637 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
638 "Trace is broken, depth should have been invalidated.");
639 const MachineLoop *Loop = getLoopFor(MBB);
640 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
641 }
642 if (TBI.hasValidHeight() && TBI.Succ) {
643 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
644 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
645 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
646 "Trace is broken, height should have been invalidated.");
647 const MachineLoop *Loop = getLoopFor(MBB);
648 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
649 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
650 "Trace contains backedge");
651 }
652 }
653#endif
654}
655
656//===----------------------------------------------------------------------===//
657// Data Dependencies
658//===----------------------------------------------------------------------===//
659//
660// Compute the depth and height of each instruction based on data dependencies
661// and instruction latencies. These cycle numbers assume that the CPU can issue
662// an infinite number of instructions per cycle as long as their dependencies
663// are ready.
664
665// A data dependency is represented as a defining MI and operand numbers on the
666// defining and using MI.
667namespace {
668
669struct DataDep {
670 const MachineInstr *DefMI;
671 unsigned DefOp;
672 unsigned UseOp;
673
674 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
675 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
676
677 /// Create a DataDep from an SSA form virtual register.
678 DataDep(const MachineRegisterInfo *MRI, Register VirtReg, unsigned UseOp)
679 : UseOp(UseOp) {
680 assert(VirtReg.isVirtual());
681 MachineOperand *DefMO = MRI->getOneDef(VirtReg);
682 assert(DefMO && "Register does not have unique def");
683 DefMI = DefMO->getParent();
684 DefOp = DefMO->getOperandNo();
685 }
686};
687
688} // end anonymous namespace
689
690// Get the input data dependencies that must be ready before UseMI can issue.
691// Return true if UseMI has any physreg operands.
692static bool getDataDeps(const MachineInstr &UseMI,
694 const MachineRegisterInfo *MRI) {
695 // Debug values should not be included in any calculations.
696 if (UseMI.isDebugInstr())
697 return false;
698
699 bool HasPhysRegs = false;
700 for (const MachineOperand &MO : UseMI.operands()) {
701 if (!MO.isReg())
702 continue;
703 Register Reg = MO.getReg();
704 if (!Reg)
705 continue;
706 if (Reg.isPhysical()) {
707 HasPhysRegs = true;
708 continue;
709 }
710 // Collect virtual register reads.
711 if (MO.readsReg())
712 Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
713 }
714 return HasPhysRegs;
715}
716
717// Get the input data dependencies of a PHI instruction, using Pred as the
718// preferred predecessor.
719// This will add at most one dependency to Deps.
720static void getPHIDeps(const MachineInstr &UseMI,
722 const MachineBasicBlock *Pred,
723 const MachineRegisterInfo *MRI) {
724 // No predecessor at the beginning of a trace. Ignore dependencies.
725 if (!Pred)
726 return;
727 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
728 for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
729 if (UseMI.getOperand(i + 1).getMBB() == Pred) {
730 Register Reg = UseMI.getOperand(i).getReg();
731 Deps.push_back(DataDep(MRI, Reg, i));
732 return;
733 }
734 }
735}
736
737// Identify physreg dependencies for UseMI, and update the live regunit
738// tracking set when scanning instructions downwards.
741 SparseSet<LiveRegUnit> &RegUnits,
742 const TargetRegisterInfo *TRI) {
744 SmallVector<unsigned, 8> LiveDefOps;
745
746 for (const MachineOperand &MO : UseMI->operands()) {
747 if (!MO.isReg() || !MO.getReg().isPhysical())
748 continue;
749 MCRegister Reg = MO.getReg().asMCReg();
750 // Track live defs and kills for updating RegUnits.
751 if (MO.isDef()) {
752 if (MO.isDead())
753 Kills.push_back(Reg);
754 else
755 LiveDefOps.push_back(MO.getOperandNo());
756 } else if (MO.isKill())
757 Kills.push_back(Reg);
758 // Identify dependencies.
759 if (!MO.readsReg())
760 continue;
761 for (MCRegUnit Unit : TRI->regunits(Reg)) {
762 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
763 if (I == RegUnits.end())
764 continue;
765 Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
766 break;
767 }
768 }
769
770 // Update RegUnits to reflect live registers after UseMI.
771 // First kills.
772 for (MCRegister Kill : Kills)
773 for (MCRegUnit Unit : TRI->regunits(Kill))
774 RegUnits.erase(Unit);
775
776 // Second, live defs.
777 for (unsigned DefOp : LiveDefOps) {
778 for (MCRegUnit Unit :
779 TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
780 LiveRegUnit &LRU = RegUnits[Unit];
781 LRU.MI = UseMI;
782 LRU.Op = DefOp;
783 }
784 }
785}
786
787/// The length of the critical path through a trace is the maximum of two path
788/// lengths:
789///
790/// 1. The maximum height+depth over all instructions in the trace center block.
791///
792/// 2. The longest cross-block dependency chain. For small blocks, it is
793/// possible that the critical path through the trace doesn't include any
794/// instructions in the block.
795///
796/// This function computes the second number from the live-in list of the
797/// center block.
798unsigned MachineTraceMetrics::Ensemble::
799computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
800 assert(TBI.HasValidInstrDepths && "Missing depth info");
801 assert(TBI.HasValidInstrHeights && "Missing height info");
802 unsigned MaxLen = 0;
803 for (const LiveInReg &LIR : TBI.LiveIns) {
804 if (!LIR.Reg.isVirtual())
805 continue;
806 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
807 // Ignore dependencies outside the current trace.
808 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
809 if (!DefTBI.isUsefulDominator(TBI))
810 continue;
811 unsigned Len = LIR.Height + Cycles[DefMI].Depth;
812 MaxLen = std::max(MaxLen, Len);
813 }
814 return MaxLen;
815}
816
819 SparseSet<LiveRegUnit> &RegUnits) {
821 // Collect all data dependencies.
822 if (UseMI.isPHI())
823 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
824 else if (getDataDeps(UseMI, Deps, MTM.MRI))
825 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
826
827 // Filter and process dependencies, computing the earliest issue cycle.
828 unsigned Cycle = 0;
829 for (const DataDep &Dep : Deps) {
830 const TraceBlockInfo&DepTBI =
831 BlockInfo[Dep.DefMI->getParent()->getNumber()];
832 // Ignore dependencies from outside the current trace.
833 if (!DepTBI.isUsefulDominator(TBI))
834 continue;
835 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
836 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
837 // Add latency if DefMI is a real instruction. Transients get latency 0.
838 if (!Dep.DefMI->isTransient())
839 DepCycle += MTM.SchedModel
840 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
841 Cycle = std::max(Cycle, DepCycle);
842 }
843 // Remember the instruction depth.
844 InstrCycles &MICycles = Cycles[&UseMI];
845 MICycles.Depth = Cycle;
846
847 if (TBI.HasValidInstrHeights) {
848 // Update critical path length.
849 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
850 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
851 } else {
852 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
853 }
854}
855
858 SparseSet<LiveRegUnit> &RegUnits) {
859 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
860}
861
865 SparseSet<LiveRegUnit> &RegUnits) {
866 for (; Start != End; Start++)
867 updateDepth(Start->getParent(), *Start, RegUnits);
868}
869
870/// Compute instruction depths for all instructions above or in MBB in its
871/// trace. This assumes that the trace through MBB has already been computed.
872void MachineTraceMetrics::Ensemble::
873computeInstrDepths(const MachineBasicBlock *MBB) {
874 // The top of the trace may already be computed, and HasValidInstrDepths
875 // implies Head->HasValidInstrDepths, so we only need to start from the first
876 // block in the trace that needs to be recomputed.
878 do {
879 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
880 assert(TBI.hasValidDepth() && "Incomplete trace");
881 if (TBI.HasValidInstrDepths)
882 break;
883 Stack.push_back(MBB);
884 MBB = TBI.Pred;
885 } while (MBB);
886
887 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
888 // in the trace. We should track any live-out physregs that were defined in
889 // the trace. This is quite rare in SSA form, typically created by CSE
890 // hoisting a compare.
891 SparseSet<LiveRegUnit> RegUnits;
892 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
893
894 // Go through trace blocks in top-down order, stopping after the center block.
895 while (!Stack.empty()) {
896 MBB = Stack.pop_back_val();
897 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
898 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
899 TBI.HasValidInstrDepths = true;
900 TBI.CriticalPath = 0;
901
902 // Print out resource depths here as well.
903 LLVM_DEBUG({
904 dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
905 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
906 for (unsigned K = 0; K != PRDepths.size(); ++K)
907 if (PRDepths[K]) {
908 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
909 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
910 << MTM.SchedModel.getProcResource(K)->Name << " ("
911 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
912 }
913 });
914
915 // Also compute the critical path length through MBB when possible.
916 if (TBI.HasValidInstrHeights)
917 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
918
919 for (const auto &UseMI : *MBB) {
920 updateDepth(TBI, UseMI, RegUnits);
921 }
922 }
923}
924
925// Identify physreg dependencies for MI when scanning instructions upwards.
926// Return the issue height of MI after considering any live regunits.
927// Height is the issue height computed from virtual register dependencies alone.
928static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
929 SparseSet<LiveRegUnit> &RegUnits,
930 const TargetSchedModel &SchedModel,
931 const TargetInstrInfo *TII,
932 const TargetRegisterInfo *TRI) {
934
935 for (const MachineOperand &MO : MI.operands()) {
936 if (!MO.isReg())
937 continue;
938 Register Reg = MO.getReg();
939 if (!Reg.isPhysical())
940 continue;
941 if (MO.readsReg())
942 ReadOps.push_back(MO.getOperandNo());
943 if (!MO.isDef())
944 continue;
945 // This is a def of Reg. Remove corresponding entries from RegUnits, and
946 // update MI Height to consider the physreg dependencies.
947 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
948 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
949 if (I == RegUnits.end())
950 continue;
951 unsigned DepHeight = I->Cycle;
952 if (!MI.isTransient()) {
953 // We may not know the UseMI of this dependency, if it came from the
954 // live-in list. SchedModel can handle a NULL UseMI.
955 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
956 I->MI, I->Op);
957 }
958 Height = std::max(Height, DepHeight);
959 // This regunit is dead above MI.
960 RegUnits.erase(I);
961 }
962 }
963
964 // Now we know the height of MI. Update any regunits read.
965 for (unsigned Op : ReadOps) {
966 MCRegister Reg = MI.getOperand(Op).getReg().asMCReg();
967 for (MCRegUnit Unit : TRI->regunits(Reg)) {
968 LiveRegUnit &LRU = RegUnits[Unit];
969 // Set the height to the highest reader of the unit.
970 if (LRU.Cycle <= Height && LRU.MI != &MI) {
971 LRU.Cycle = Height;
972 LRU.MI = &MI;
973 LRU.Op = Op;
974 }
975 }
976 }
977
978 return Height;
979}
980
982
983// Push the height of DefMI upwards if required to match UseMI.
984// Return true if this is the first time DefMI was seen.
985static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
986 unsigned UseHeight, MIHeightMap &Heights,
987 const TargetSchedModel &SchedModel,
988 const TargetInstrInfo *TII) {
989 // Adjust height by Dep.DefMI latency.
990 if (!Dep.DefMI->isTransient())
991 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
992 Dep.UseOp);
993
994 // Update Heights[DefMI] to be the maximum height seen.
996 bool New;
997 std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
998 if (New)
999 return true;
1000
1001 // DefMI has been pushed before. Give it the max height.
1002 if (I->second < UseHeight)
1003 I->second = UseHeight;
1004 return false;
1005}
1006
1007/// Assuming that the virtual register defined by DefMI:DefOp was used by
1008/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
1009/// when reaching the block that contains DefMI.
1010void MachineTraceMetrics::Ensemble::
1011addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
1013 assert(!Trace.empty() && "Trace should contain at least one block");
1014 Register Reg = DefMI->getOperand(DefOp).getReg();
1015 assert(Reg.isVirtual());
1016 const MachineBasicBlock *DefMBB = DefMI->getParent();
1017
1018 // Reg is live-in to all blocks in Trace that follow DefMBB.
1019 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
1020 if (MBB == DefMBB)
1021 return;
1022 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1023 // Just add the register. The height will be updated later.
1024 TBI.LiveIns.push_back(Reg);
1025 }
1026}
1027
1028/// Compute instruction heights in the trace through MBB. This updates MBB and
1029/// the blocks below it in the trace. It is assumed that the trace has already
1030/// been computed.
1031void MachineTraceMetrics::Ensemble::
1032computeInstrHeights(const MachineBasicBlock *MBB) {
1033 // The bottom of the trace may already be computed.
1034 // Find the blocks that need updating.
1036 do {
1037 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1038 assert(TBI.hasValidHeight() && "Incomplete trace");
1039 if (TBI.HasValidInstrHeights)
1040 break;
1041 Stack.push_back(MBB);
1042 TBI.LiveIns.clear();
1043 MBB = TBI.Succ;
1044 } while (MBB);
1045
1046 // As we move upwards in the trace, keep track of instructions that are
1047 // required by deeper trace instructions. Map MI -> height required so far.
1048 MIHeightMap Heights;
1049
1050 // For physregs, the def isn't known when we see the use.
1051 // Instead, keep track of the highest use of each regunit.
1052 SparseSet<LiveRegUnit> RegUnits;
1053 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1054
1055 // If the bottom of the trace was already precomputed, initialize heights
1056 // from its live-in list.
1057 // MBB is the highest precomputed block in the trace.
1058 if (MBB) {
1059 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1060 for (LiveInReg &LI : TBI.LiveIns) {
1061 if (LI.Reg.isVirtual()) {
1062 // For virtual registers, the def latency is included.
1063 unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1064 if (Height < LI.Height)
1065 Height = LI.Height;
1066 } else {
1067 // For register units, the def latency is not included because we don't
1068 // know the def yet.
1069 RegUnits[LI.Reg.id()].Cycle = LI.Height;
1070 }
1071 }
1072 }
1073
1074 // Go through the trace blocks in bottom-up order.
1076 for (;!Stack.empty(); Stack.pop_back()) {
1077 MBB = Stack.back();
1078 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1079 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1080 TBI.HasValidInstrHeights = true;
1081 TBI.CriticalPath = 0;
1082
1083 LLVM_DEBUG({
1084 dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1085 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1086 for (unsigned K = 0; K != PRHeights.size(); ++K)
1087 if (PRHeights[K]) {
1088 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1089 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1090 << MTM.SchedModel.getProcResource(K)->Name << " ("
1091 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1092 }
1093 });
1094
1095 // Get dependencies from PHIs in the trace successor.
1096 const MachineBasicBlock *Succ = TBI.Succ;
1097 // If MBB is the last block in the trace, and it has a back-edge to the
1098 // loop header, get loop-carried dependencies from PHIs in the header. For
1099 // that purpose, pretend that all the loop header PHIs have height 0.
1100 if (!Succ)
1101 if (const MachineLoop *Loop = getLoopFor(MBB))
1102 if (MBB->isSuccessor(Loop->getHeader()))
1103 Succ = Loop->getHeader();
1104
1105 if (Succ) {
1106 for (const auto &PHI : *Succ) {
1107 if (!PHI.isPHI())
1108 break;
1109 Deps.clear();
1110 getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1111 if (!Deps.empty()) {
1112 // Loop header PHI heights are all 0.
1113 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1114 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1115 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1116 MTM.TII))
1117 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1118 }
1119 }
1120 }
1121
1122 // Go through the block backwards.
1123 for (const MachineInstr &MI : reverse(*MBB)) {
1124 // Find the MI height as determined by virtual register uses in the
1125 // trace below.
1126 unsigned Cycle = 0;
1127 MIHeightMap::iterator HeightI = Heights.find(&MI);
1128 if (HeightI != Heights.end()) {
1129 Cycle = HeightI->second;
1130 // We won't be seeing any more MI uses.
1131 Heights.erase(HeightI);
1132 }
1133
1134 // Don't process PHI deps. They depend on the specific predecessor, and
1135 // we'll get them when visiting the predecessor.
1136 Deps.clear();
1137 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1138
1139 // There may also be regunit dependencies to include in the height.
1140 if (HasPhysRegs)
1141 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1142 MTM.TII, MTM.TRI);
1143
1144 // Update the required height of any virtual registers read by MI.
1145 for (const DataDep &Dep : Deps)
1146 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1147 addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1148
1149 InstrCycles &MICycles = Cycles[&MI];
1150 MICycles.Height = Cycle;
1151 if (!TBI.HasValidInstrDepths) {
1152 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1153 continue;
1154 }
1155 // Update critical path length.
1156 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1157 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1158 }
1159
1160 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1161 // height because the final height isn't known until now.
1162 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1163 for (LiveInReg &LIR : TBI.LiveIns) {
1164 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1165 LIR.Height = Heights.lookup(DefMI);
1166 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
1167 }
1168
1169 // Transfer the live regunits to the live-in list.
1170 for (const LiveRegUnit &RU : RegUnits) {
1171 TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle));
1172 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
1173 << RU.Cycle);
1174 }
1175 LLVM_DEBUG(dbgs() << '\n');
1176
1177 if (!TBI.HasValidInstrDepths)
1178 continue;
1179 // Add live-ins to the critical path length.
1180 TBI.CriticalPath = std::max(TBI.CriticalPath,
1181 computeCrossBlockCriticalPath(TBI));
1182 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1183 }
1184}
1185
1188 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1189
1190 if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1191 computeTrace(MBB);
1192 if (!TBI.HasValidInstrDepths)
1193 computeInstrDepths(MBB);
1194 if (!TBI.HasValidInstrHeights)
1195 computeInstrHeights(MBB);
1196
1197 return Trace(*this, TBI);
1198}
1199
1200unsigned
1202 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1203 "MI must be in the trace center block");
1205 return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1206}
1207
1208unsigned
1210 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1212 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1213 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1214 DataDep &Dep = Deps.front();
1215 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1216 // Add latency if DefMI is a real instruction. Transients get latency 0.
1217 if (!Dep.DefMI->isTransient())
1218 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1219 &PHI, Dep.UseOp);
1220 return DepCycle;
1221}
1222
1223/// When bottom is set include instructions in current block in estimate.
1225 // Find the limiting processor resource.
1226 // Numbers have been pre-scaled to be comparable.
1227 unsigned PRMax = 0;
1228 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1229 if (Bottom) {
1230 ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum());
1231 for (unsigned K = 0; K != PRDepths.size(); ++K)
1232 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1233 } else {
1234 for (unsigned PRD : PRDepths)
1235 PRMax = std::max(PRMax, PRD);
1236 }
1237 // Convert to cycle count.
1238 PRMax = TE.MTM.getCycles(PRMax);
1239
1240 /// All instructions before current block
1241 unsigned Instrs = TBI.InstrDepth;
1242 // plus instructions in current block
1243 if (Bottom)
1244 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1245 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1246 Instrs /= IW;
1247 // Assume issue width 1 without a schedule model.
1248 return std::max(Instrs, PRMax);
1249}
1250
1254 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1255 // Add up resources above and below the center block.
1256 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1257 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1258 unsigned PRMax = 0;
1259
1260 // Capture computing cycles from extra instructions
1261 auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1262 unsigned ResourceIdx)
1263 ->unsigned {
1264 unsigned Cycles = 0;
1265 for (const MCSchedClassDesc *SC : Instrs) {
1266 if (!SC->isValid())
1267 continue;
1269 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1270 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1271 PI != PE; ++PI) {
1272 if (PI->ProcResourceIdx != ResourceIdx)
1273 continue;
1274 Cycles += (PI->ReleaseAtCycle *
1275 TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1276 }
1277 }
1278 return Cycles;
1279 };
1280
1281 for (unsigned K = 0; K != PRDepths.size(); ++K) {
1282 unsigned PRCycles = PRDepths[K] + PRHeights[K];
1283 for (const MachineBasicBlock *MBB : Extrablocks)
1284 PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K];
1285 PRCycles += extraCycles(ExtraInstrs, K);
1286 PRCycles -= extraCycles(RemoveInstrs, K);
1287 PRMax = std::max(PRMax, PRCycles);
1288 }
1289 // Convert to cycle count.
1290 PRMax = TE.MTM.getCycles(PRMax);
1291
1292 // Instrs: #instructions in current trace outside current block.
1293 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1294 // Add instruction count from the extra blocks.
1295 for (const MachineBasicBlock *MBB : Extrablocks)
1296 Instrs += TE.MTM.getResources(MBB)->InstrCount;
1297 Instrs += ExtraInstrs.size();
1298 Instrs -= RemoveInstrs.size();
1299 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1300 Instrs /= IW;
1301 // Assume issue width 1 without a schedule model.
1302 return std::max(Instrs, PRMax);
1303}
1304
1306 const MachineInstr &UseMI) const {
1307 if (DefMI.getParent() == UseMI.getParent())
1308 return true;
1309
1310 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1311 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1312
1313 return DepTBI.isUsefulDominator(TBI);
1314}
1315
1317 OS << getName() << " ensemble:\n";
1318 for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1319 OS << " %bb." << i << '\t';
1320 BlockInfo[i].print(OS);
1321 OS << '\n';
1322 }
1323}
1324
1326 if (hasValidDepth()) {
1327 OS << "depth=" << InstrDepth;
1328 if (Pred)
1329 OS << " pred=" << printMBBReference(*Pred);
1330 else
1331 OS << " pred=null";
1332 OS << " head=%bb." << Head;
1334 OS << " +instrs";
1335 } else
1336 OS << "depth invalid";
1337 OS << ", ";
1338 if (hasValidHeight()) {
1339 OS << "height=" << InstrHeight;
1340 if (Succ)
1341 OS << " succ=" << printMBBReference(*Succ);
1342 else
1343 OS << " succ=null";
1344 OS << " tail=%bb." << Tail;
1346 OS << " +instrs";
1347 } else
1348 OS << "height invalid";
1350 OS << ", crit=" << CriticalPath;
1351}
1352
1354 unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1355
1356 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1357 << " --> %bb." << TBI.Tail << ':';
1358 if (TBI.hasValidHeight() && TBI.hasValidDepth())
1359 OS << ' ' << getInstrCount() << " instrs.";
1360 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1361 OS << ' ' << TBI.CriticalPath << " cycles.";
1362
1364 OS << "\n%bb." << MBBNum;
1365 while (Block->hasValidDepth() && Block->Pred) {
1366 unsigned Num = Block->Pred->getNumber();
1367 OS << " <- " << printMBBReference(*Block->Pred);
1368 Block = &TE.BlockInfo[Num];
1369 }
1370
1371 Block = &TBI;
1372 OS << "\n ";
1373 while (Block->hasValidHeight() && Block->Succ) {
1374 unsigned Num = Block->Succ->getNumber();
1375 OS << " -> " << printMBBReference(*Block->Succ);
1376 Block = &TE.BlockInfo[Num];
1377 }
1378 OS << '\n';
1379}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static unsigned InstrCount
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
loops
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, SparseSet< LiveRegUnit > &RegUnits, const TargetRegisterInfo *TRI)
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, SparseSet< LiveRegUnit > &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
DenseMap< const MachineInstr *, unsigned > MIHeightMap
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
#define LLVM_DEBUG(...)
Definition Debug.h:114
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
bool erase(const KeyT &Val)
Definition DenseMap.h:311
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
const MachineOperand & getOperand(unsigned i) const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of the instructions from Start to End.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getInstrCount() const
Compute the total number of instructions in the trace.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks={}, ArrayRef< const MCSchedClassDesc * > ExtraInstrs={}, ArrayRef< const MCSchedClassDesc * > RemoveInstrs={}) const
Return the resource length of the trace.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getCriticalPath() const
Return the length of the (data dependency) critical path through the trace.
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
ArrayRef< unsigned > getProcReleaseAtCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
void init(MachineFunction &Func, const MachineLoopInfo &LI)
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition SparseSet.h:120
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition SparseSet.h:291
typename DenseT::iterator iterator
Definition SparseSet.h:171
const_iterator end() const
Definition SparseSet.h:175
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition SparseSet.h:156
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition SparseSet.h:225
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
const MCWriteProcResEntry * ProcResIter
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool insertEdge(std::optional< const MachineBasicBlock * > From, const MachineBasicBlock *To)
Default po_iterator_storage implementation with an internal set object.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
LLVM_ABI Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
unsigned MCRegUnit
Register units are used to compute register aliasing.
Definition MCRegister.h:30
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
LLVM_ABI char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1835
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs)
Adds registers contained in LiveRegs to the block live-in list of MBB.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
const MachineInstr * MI
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Per-basic block information that doesn't depend on the trace through the block.
bool hasResources() const
Returns true when resource information for this block has been computed.
unsigned InstrCount
The number of non-trivial instructions in the block.
bool HasCalls
True when the block contains calls.
InstrCycles represents the cycle height and depth of an instruction in a trace.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
A virtual register or regunit required by a basic block or its trace successors.
Per-basic block information that relates to a specific trace through the block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
SmallVector< LiveInReg, 4 > LiveIns
Live-in registers.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block.
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().