LLVM 20.0.0git
MachineTraceMetrics.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/SparseSet.h"
26#include "llvm/Pass.h"
27#include "llvm/Support/Debug.h"
29#include "llvm/Support/Format.h"
31#include <algorithm>
32#include <cassert>
33#include <tuple>
34#include <utility>
35
36using namespace llvm;
37
38#define DEBUG_TYPE "machine-trace-metrics"
39
40AnalysisKey MachineTraceMetricsAnalysis::Key;
41
45 return Result(MF, MFAM.getResult<MachineLoopAnalysis>(MF));
46}
47
51 MFAM.getResult<MachineTraceMetricsAnalysis>(MF).verifyAnalysis();
53}
54
56
58
60 "Machine Trace Metrics", false, true)
64
67
69 AU.setPreservesAll();
72}
73
75 const MachineLoopInfo &LI) {
76 MF = &Func;
77 const TargetSubtargetInfo &ST = MF->getSubtarget();
78 TII = ST.getInstrInfo();
79 TRI = ST.getRegisterInfo();
80 MRI = &MF->getRegInfo();
81 Loops = &LI;
82 SchedModel.init(&ST);
83 BlockInfo.resize(MF->getNumBlockIDs());
84 ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
85 SchedModel.getNumProcResourceKinds());
86}
87
89 MTM.init(MF, getAnalysis<MachineLoopInfoWrapperPass>().getLI());
90 return false;
91}
92
94
96 MF = nullptr;
97 BlockInfo.clear();
98 for (auto &E : Ensembles)
99 E.reset();
100}
101
102//===----------------------------------------------------------------------===//
103// Fixed block information
104//===----------------------------------------------------------------------===//
105//
106// The number of instructions in a basic block and the CPU resources used by
107// those instructions don't depend on any given trace strategy.
108
109/// Compute the resource usage in basic block MBB.
112 assert(MBB && "No basic block");
113 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
114 if (FBI->hasResources())
115 return FBI;
116
117 // Compute resource usage in the block.
118 FBI->HasCalls = false;
119 unsigned InstrCount = 0;
120
121 // Add up per-processor resource cycles as well.
122 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
123 SmallVector<unsigned, 32> PRCycles(PRKinds);
124
125 for (const auto &MI : *MBB) {
126 if (MI.isTransient())
127 continue;
128 ++InstrCount;
129 if (MI.isCall())
130 FBI->HasCalls = true;
131
132 // Count processor resources used.
133 if (!SchedModel.hasInstrSchedModel())
134 continue;
135 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
136 if (!SC->isValid())
137 continue;
138
140 PI = SchedModel.getWriteProcResBegin(SC),
141 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
142 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
143 PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
144 }
145 }
146 FBI->InstrCount = InstrCount;
147
148 // Scale the resource cycles so they are comparable.
149 unsigned PROffset = MBB->getNumber() * PRKinds;
150 for (unsigned K = 0; K != PRKinds; ++K)
151 ProcReleaseAtCycles[PROffset + K] =
152 PRCycles[K] * SchedModel.getResourceFactor(K);
153
154 return FBI;
155}
156
159 assert(BlockInfo[MBBNum].hasResources() &&
160 "getResources() must be called before getProcReleaseAtCycles()");
161 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
162 assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
163 return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds);
164}
165
166//===----------------------------------------------------------------------===//
167// Ensemble utility functions
168//===----------------------------------------------------------------------===//
169
171 : MTM(*ct) {
172 BlockInfo.resize(MTM.BlockInfo.size());
173 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
174 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
175 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
176}
177
178// Virtual destructor serves as an anchor.
180
181const MachineLoop*
183 return MTM.Loops->getLoopFor(MBB);
184}
185
186// Update resource-related information in the TraceBlockInfo for MBB.
187// Only update resources related to the trace above MBB.
188void MachineTraceMetrics::Ensemble::
189computeDepthResources(const MachineBasicBlock *MBB) {
190 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
191 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
192 unsigned PROffset = MBB->getNumber() * PRKinds;
193
194 // Compute resources from trace above. The top block is simple.
195 if (!TBI->Pred) {
196 TBI->InstrDepth = 0;
197 TBI->Head = MBB->getNumber();
198 std::fill(ProcResourceDepths.begin() + PROffset,
199 ProcResourceDepths.begin() + PROffset + PRKinds, 0);
200 return;
201 }
202
203 // Compute from the block above. A post-order traversal ensures the
204 // predecessor is always computed first.
205 unsigned PredNum = TBI->Pred->getNumber();
206 TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
207 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
208 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
209 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
210 TBI->Head = PredTBI->Head;
211
212 // Compute per-resource depths.
213 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
214 ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
215 for (unsigned K = 0; K != PRKinds; ++K)
216 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
217}
218
219// Update resource-related information in the TraceBlockInfo for MBB.
220// Only update resources related to the trace below MBB.
221void MachineTraceMetrics::Ensemble::
222computeHeightResources(const MachineBasicBlock *MBB) {
223 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
224 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
225 unsigned PROffset = MBB->getNumber() * PRKinds;
226
227 // Compute resources for the current block.
228 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
229 ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber());
230
231 // The trace tail is done.
232 if (!TBI->Succ) {
233 TBI->Tail = MBB->getNumber();
234 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
235 return;
236 }
237
238 // Compute from the block below. A post-order traversal ensures the
239 // predecessor is always computed first.
240 unsigned SuccNum = TBI->Succ->getNumber();
241 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
242 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
243 TBI->InstrHeight += SuccTBI->InstrHeight;
244 TBI->Tail = SuccTBI->Tail;
245
246 // Compute per-resource heights.
247 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
248 for (unsigned K = 0; K != PRKinds; ++K)
249 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
250}
251
252// Check if depth resources for MBB are valid and return the TBI.
253// Return NULL if the resources have been invalidated.
257 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
258 return TBI->hasValidDepth() ? TBI : nullptr;
259}
260
261// Check if height resources for MBB are valid and return the TBI.
262// Return NULL if the resources have been invalidated.
266 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
267 return TBI->hasValidHeight() ? TBI : nullptr;
268}
269
270/// Get an array of processor resource depths for MBB. Indexed by processor
271/// resource kind, this array contains the scaled processor resources consumed
272/// by all blocks preceding MBB in its trace. It does not include instructions
273/// in MBB.
274///
275/// Compare TraceBlockInfo::InstrDepth.
278getProcResourceDepths(unsigned MBBNum) const {
279 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
280 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
281 return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
282}
283
284/// Get an array of processor resource heights for MBB. Indexed by processor
285/// resource kind, this array contains the scaled processor resources consumed
286/// by this block and all blocks following it in its trace.
287///
288/// Compare TraceBlockInfo::InstrHeight.
291getProcResourceHeights(unsigned MBBNum) const {
292 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
293 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
294 return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
295}
296
297//===----------------------------------------------------------------------===//
298// Trace Selection Strategies
299//===----------------------------------------------------------------------===//
300//
301// A trace selection strategy is implemented as a sub-class of Ensemble. The
302// trace through a block B is computed by two DFS traversals of the CFG
303// starting from B. One upwards, and one downwards. During the upwards DFS,
304// pickTracePred() is called on the post-ordered blocks. During the downwards
305// DFS, pickTraceSucc() is called in a post-order.
306//
307
308// We never allow traces that leave loops, but we do allow traces to enter
309// nested loops. We also never allow traces to contain back-edges.
310//
311// This means that a loop header can never appear above the center block of a
312// trace, except as the trace head. Below the center block, loop exiting edges
313// are banned.
314//
315// Return true if an edge from the From loop to the To loop is leaving a loop.
316// Either of To and From can be null.
317static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
318 return From && !From->contains(To);
319}
320
321// MinInstrCountEnsemble - Pick the trace that executes the least number of
322// instructions.
323namespace {
324
325class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
326 const char *getName() const override { return "MinInstr"; }
327 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
328 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
329
330public:
331 MinInstrCountEnsemble(MachineTraceMetrics *mtm)
332 : MachineTraceMetrics::Ensemble(mtm) {}
333};
334
335/// Pick only the current basic block for the trace and do not choose any
336/// predecessors/successors.
337class LocalEnsemble : public MachineTraceMetrics::Ensemble {
338 const char *getName() const override { return "Local"; }
339 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
340 return nullptr;
341 };
342 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
343 return nullptr;
344 };
345
346public:
347 LocalEnsemble(MachineTraceMetrics *MTM)
348 : MachineTraceMetrics::Ensemble(MTM) {}
349};
350} // end anonymous namespace
351
352// Select the preferred predecessor for MBB.
354MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
355 if (MBB->pred_empty())
356 return nullptr;
357 const MachineLoop *CurLoop = getLoopFor(MBB);
358 // Don't leave loops, and never follow back-edges.
359 if (CurLoop && MBB == CurLoop->getHeader())
360 return nullptr;
361 unsigned CurCount = MTM.getResources(MBB)->InstrCount;
362 const MachineBasicBlock *Best = nullptr;
363 unsigned BestDepth = 0;
364 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
366 getDepthResources(Pred);
367 // Ignore cycles that aren't natural loops.
368 if (!PredTBI)
369 continue;
370 // Pick the predecessor that would give this block the smallest InstrDepth.
371 unsigned Depth = PredTBI->InstrDepth + CurCount;
372 if (!Best || Depth < BestDepth) {
373 Best = Pred;
374 BestDepth = Depth;
375 }
376 }
377 return Best;
378}
379
380// Select the preferred successor for MBB.
382MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
383 if (MBB->succ_empty())
384 return nullptr;
385 const MachineLoop *CurLoop = getLoopFor(MBB);
386 const MachineBasicBlock *Best = nullptr;
387 unsigned BestHeight = 0;
388 for (const MachineBasicBlock *Succ : MBB->successors()) {
389 // Don't consider back-edges.
390 if (CurLoop && Succ == CurLoop->getHeader())
391 continue;
392 // Don't consider successors exiting CurLoop.
393 if (isExitingLoop(CurLoop, getLoopFor(Succ)))
394 continue;
396 getHeightResources(Succ);
397 // Ignore cycles that aren't natural loops.
398 if (!SuccTBI)
399 continue;
400 // Pick the successor that would give this block the smallest InstrHeight.
401 unsigned Height = SuccTBI->InstrHeight;
402 if (!Best || Height < BestHeight) {
403 Best = Succ;
404 BestHeight = Height;
405 }
406 }
407 return Best;
408}
409
410// Get an Ensemble sub-class for the requested trace strategy.
414 "Invalid trace strategy enum");
415 std::unique_ptr<MachineTraceMetrics::Ensemble> &E =
416 Ensembles[static_cast<size_t>(strategy)];
417 if (E)
418 return E.get();
419
420 // Allocate new Ensemble on demand.
421 switch (strategy) {
423 E = std::make_unique<MinInstrCountEnsemble>(MinInstrCountEnsemble(this));
424 break;
426 E = std::make_unique<LocalEnsemble>(LocalEnsemble(this));
427 break;
428 default: llvm_unreachable("Invalid trace strategy enum");
429 }
430 return E.get();
431}
432
434 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
435 << '\n');
436 BlockInfo[MBB->getNumber()].invalidate();
437 for (auto &E : Ensembles)
438 if (E)
439 E->invalidate(MBB);
440}
441
445 // Check whether the analysis, all analyses on machine functions, or the
446 // machine function's CFG have been preserved.
448 return !PAC.preserved() &&
449 !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() &&
450 !PAC.preservedSet<CFGAnalyses>();
451}
452
454 if (!MF)
455 return;
456#ifndef NDEBUG
457 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
458 for (auto &E : Ensembles)
459 if (E)
460 E->verify();
461#endif
462}
463
464//===----------------------------------------------------------------------===//
465// Trace building
466//===----------------------------------------------------------------------===//
467//
468// Traces are built by two CFG traversals. To avoid recomputing too much, use a
469// set abstraction that confines the search to the current loop, and doesn't
470// revisit blocks.
471
472namespace {
473
474struct LoopBounds {
477 const MachineLoopInfo *Loops;
478 bool Downward = false;
479
482};
483
484} // end anonymous namespace
485
486// Specialize po_iterator_storage in order to prune the post-order traversal so
487// it is limited to the current loop and doesn't traverse the loop back edges.
488namespace llvm {
489
490template<>
491class po_iterator_storage<LoopBounds, true> {
492 LoopBounds &LB;
493
494public:
495 po_iterator_storage(LoopBounds &lb) : LB(lb) {}
496
498
499 bool insertEdge(std::optional<const MachineBasicBlock *> From,
500 const MachineBasicBlock *To) {
501 // Skip already visited To blocks.
502 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
503 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
504 return false;
505 // From is null once when To is the trace center block.
506 if (From) {
507 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
508 // Don't follow backedges, don't leave FromLoop when going upwards.
509 if ((LB.Downward ? To : *From) == FromLoop->getHeader())
510 return false;
511 // Don't leave FromLoop.
512 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
513 return false;
514 }
515 }
516 // To is a new block. Mark the block as visited in case the CFG has cycles
517 // that MachineLoopInfo didn't recognize as a natural loop.
518 return LB.Visited.insert(To).second;
519 }
520};
521
522} // end namespace llvm
523
524/// Compute the trace through MBB.
525void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
526 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
527 << printMBBReference(*MBB) << '\n');
528 // Set up loop bounds for the backwards post-order traversal.
529 LoopBounds Bounds(BlockInfo, MTM.Loops);
530
531 // Run an upwards post-order search for the trace start.
532 Bounds.Downward = false;
533 Bounds.Visited.clear();
534 for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
535 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
536 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
537 // All the predecessors have been visited, pick the preferred one.
538 TBI.Pred = pickTracePred(I);
539 LLVM_DEBUG({
540 if (TBI.Pred)
541 dbgs() << printMBBReference(*TBI.Pred) << '\n';
542 else
543 dbgs() << "null\n";
544 });
545 // The trace leading to I is now known, compute the depth resources.
546 computeDepthResources(I);
547 }
548
549 // Run a downwards post-order search for the trace end.
550 Bounds.Downward = true;
551 Bounds.Visited.clear();
552 for (const auto *I : post_order_ext(MBB, Bounds)) {
553 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
554 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
555 // All the successors have been visited, pick the preferred one.
556 TBI.Succ = pickTraceSucc(I);
557 LLVM_DEBUG({
558 if (TBI.Succ)
559 dbgs() << printMBBReference(*TBI.Succ) << '\n';
560 else
561 dbgs() << "null\n";
562 });
563 // The trace leaving I is now known, compute the height resources.
564 computeHeightResources(I);
565 }
566}
567
568/// Invalidate traces through BadMBB.
569void
572 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
573
574 // Invalidate height resources of blocks above MBB.
575 if (BadTBI.hasValidHeight()) {
576 BadTBI.invalidateHeight();
577 WorkList.push_back(BadMBB);
578 do {
579 const MachineBasicBlock *MBB = WorkList.pop_back_val();
580 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
581 << getName() << " height.\n");
582 // Find any MBB predecessors that have MBB as their preferred successor.
583 // They are the only ones that need to be invalidated.
584 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
585 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
586 if (!TBI.hasValidHeight())
587 continue;
588 if (TBI.Succ == MBB) {
589 TBI.invalidateHeight();
590 WorkList.push_back(Pred);
591 continue;
592 }
593 // Verify that TBI.Succ is actually a *I successor.
594 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
595 }
596 } while (!WorkList.empty());
597 }
598
599 // Invalidate depth resources of blocks below MBB.
600 if (BadTBI.hasValidDepth()) {
601 BadTBI.invalidateDepth();
602 WorkList.push_back(BadMBB);
603 do {
604 const MachineBasicBlock *MBB = WorkList.pop_back_val();
605 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
606 << getName() << " depth.\n");
607 // Find any MBB successors that have MBB as their preferred predecessor.
608 // They are the only ones that need to be invalidated.
609 for (const MachineBasicBlock *Succ : MBB->successors()) {
610 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
611 if (!TBI.hasValidDepth())
612 continue;
613 if (TBI.Pred == MBB) {
614 TBI.invalidateDepth();
615 WorkList.push_back(Succ);
616 continue;
617 }
618 // Verify that TBI.Pred is actually a *I predecessor.
619 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
620 }
621 } while (!WorkList.empty());
622 }
623
624 // Clear any per-instruction data. We only have to do this for BadMBB itself
625 // because the instructions in that block may change. Other blocks may be
626 // invalidated, but their instructions will stay the same, so there is no
627 // need to erase the Cycle entries. They will be overwritten when we
628 // recompute.
629 for (const auto &I : *BadMBB)
630 Cycles.erase(&I);
631}
632
634#ifndef NDEBUG
635 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
636 "Outdated BlockInfo size");
637 for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
638 const TraceBlockInfo &TBI = BlockInfo[Num];
639 if (TBI.hasValidDepth() && TBI.Pred) {
640 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
641 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
642 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
643 "Trace is broken, depth should have been invalidated.");
644 const MachineLoop *Loop = getLoopFor(MBB);
645 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
646 }
647 if (TBI.hasValidHeight() && TBI.Succ) {
648 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
649 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
650 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
651 "Trace is broken, height should have been invalidated.");
652 const MachineLoop *Loop = getLoopFor(MBB);
653 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
654 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
655 "Trace contains backedge");
656 }
657 }
658#endif
659}
660
661//===----------------------------------------------------------------------===//
662// Data Dependencies
663//===----------------------------------------------------------------------===//
664//
665// Compute the depth and height of each instruction based on data dependencies
666// and instruction latencies. These cycle numbers assume that the CPU can issue
667// an infinite number of instructions per cycle as long as their dependencies
668// are ready.
669
670// A data dependency is represented as a defining MI and operand numbers on the
671// defining and using MI.
672namespace {
673
674struct DataDep {
675 const MachineInstr *DefMI;
676 unsigned DefOp;
677 unsigned UseOp;
678
679 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
680 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
681
682 /// Create a DataDep from an SSA form virtual register.
683 DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
684 : UseOp(UseOp) {
686 MachineOperand *DefMO = MRI->getOneDef(VirtReg);
687 assert(DefMO && "Register does not have unique def");
688 DefMI = DefMO->getParent();
689 DefOp = DefMO->getOperandNo();
690 }
691};
692
693} // end anonymous namespace
694
695// Get the input data dependencies that must be ready before UseMI can issue.
696// Return true if UseMI has any physreg operands.
697static bool getDataDeps(const MachineInstr &UseMI,
699 const MachineRegisterInfo *MRI) {
700 // Debug values should not be included in any calculations.
701 if (UseMI.isDebugInstr())
702 return false;
703
704 bool HasPhysRegs = false;
705 for (const MachineOperand &MO : UseMI.operands()) {
706 if (!MO.isReg())
707 continue;
708 Register Reg = MO.getReg();
709 if (!Reg)
710 continue;
711 if (Reg.isPhysical()) {
712 HasPhysRegs = true;
713 continue;
714 }
715 // Collect virtual register reads.
716 if (MO.readsReg())
717 Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
718 }
719 return HasPhysRegs;
720}
721
722// Get the input data dependencies of a PHI instruction, using Pred as the
723// preferred predecessor.
724// This will add at most one dependency to Deps.
725static void getPHIDeps(const MachineInstr &UseMI,
727 const MachineBasicBlock *Pred,
728 const MachineRegisterInfo *MRI) {
729 // No predecessor at the beginning of a trace. Ignore dependencies.
730 if (!Pred)
731 return;
732 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
733 for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
734 if (UseMI.getOperand(i + 1).getMBB() == Pred) {
735 Register Reg = UseMI.getOperand(i).getReg();
736 Deps.push_back(DataDep(MRI, Reg, i));
737 return;
738 }
739 }
740}
741
742// Identify physreg dependencies for UseMI, and update the live regunit
743// tracking set when scanning instructions downwards.
746 SparseSet<LiveRegUnit> &RegUnits,
747 const TargetRegisterInfo *TRI) {
749 SmallVector<unsigned, 8> LiveDefOps;
750
751 for (const MachineOperand &MO : UseMI->operands()) {
752 if (!MO.isReg() || !MO.getReg().isPhysical())
753 continue;
754 MCRegister Reg = MO.getReg().asMCReg();
755 // Track live defs and kills for updating RegUnits.
756 if (MO.isDef()) {
757 if (MO.isDead())
758 Kills.push_back(Reg);
759 else
760 LiveDefOps.push_back(MO.getOperandNo());
761 } else if (MO.isKill())
762 Kills.push_back(Reg);
763 // Identify dependencies.
764 if (!MO.readsReg())
765 continue;
766 for (MCRegUnit Unit : TRI->regunits(Reg)) {
767 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
768 if (I == RegUnits.end())
769 continue;
770 Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
771 break;
772 }
773 }
774
775 // Update RegUnits to reflect live registers after UseMI.
776 // First kills.
777 for (MCRegister Kill : Kills)
778 for (MCRegUnit Unit : TRI->regunits(Kill))
779 RegUnits.erase(Unit);
780
781 // Second, live defs.
782 for (unsigned DefOp : LiveDefOps) {
783 for (MCRegUnit Unit :
784 TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
785 LiveRegUnit &LRU = RegUnits[Unit];
786 LRU.MI = UseMI;
787 LRU.Op = DefOp;
788 }
789 }
790}
791
792/// The length of the critical path through a trace is the maximum of two path
793/// lengths:
794///
795/// 1. The maximum height+depth over all instructions in the trace center block.
796///
797/// 2. The longest cross-block dependency chain. For small blocks, it is
798/// possible that the critical path through the trace doesn't include any
799/// instructions in the block.
800///
801/// This function computes the second number from the live-in list of the
802/// center block.
803unsigned MachineTraceMetrics::Ensemble::
804computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
805 assert(TBI.HasValidInstrDepths && "Missing depth info");
806 assert(TBI.HasValidInstrHeights && "Missing height info");
807 unsigned MaxLen = 0;
808 for (const LiveInReg &LIR : TBI.LiveIns) {
809 if (!LIR.Reg.isVirtual())
810 continue;
811 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
812 // Ignore dependencies outside the current trace.
813 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
814 if (!DefTBI.isUsefulDominator(TBI))
815 continue;
816 unsigned Len = LIR.Height + Cycles[DefMI].Depth;
817 MaxLen = std::max(MaxLen, Len);
818 }
819 return MaxLen;
820}
821
824 SparseSet<LiveRegUnit> &RegUnits) {
826 // Collect all data dependencies.
827 if (UseMI.isPHI())
828 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
829 else if (getDataDeps(UseMI, Deps, MTM.MRI))
830 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
831
832 // Filter and process dependencies, computing the earliest issue cycle.
833 unsigned Cycle = 0;
834 for (const DataDep &Dep : Deps) {
835 const TraceBlockInfo&DepTBI =
836 BlockInfo[Dep.DefMI->getParent()->getNumber()];
837 // Ignore dependencies from outside the current trace.
838 if (!DepTBI.isUsefulDominator(TBI))
839 continue;
840 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
841 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
842 // Add latency if DefMI is a real instruction. Transients get latency 0.
843 if (!Dep.DefMI->isTransient())
844 DepCycle += MTM.SchedModel
845 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
846 Cycle = std::max(Cycle, DepCycle);
847 }
848 // Remember the instruction depth.
849 InstrCycles &MICycles = Cycles[&UseMI];
850 MICycles.Depth = Cycle;
851
852 if (TBI.HasValidInstrHeights) {
853 // Update critical path length.
854 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
855 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
856 } else {
857 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
858 }
859}
860
863 SparseSet<LiveRegUnit> &RegUnits) {
864 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
865}
866
870 SparseSet<LiveRegUnit> &RegUnits) {
871 for (; Start != End; Start++)
872 updateDepth(Start->getParent(), *Start, RegUnits);
873}
874
875/// Compute instruction depths for all instructions above or in MBB in its
876/// trace. This assumes that the trace through MBB has already been computed.
877void MachineTraceMetrics::Ensemble::
878computeInstrDepths(const MachineBasicBlock *MBB) {
879 // The top of the trace may already be computed, and HasValidInstrDepths
880 // implies Head->HasValidInstrDepths, so we only need to start from the first
881 // block in the trace that needs to be recomputed.
883 do {
884 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
885 assert(TBI.hasValidDepth() && "Incomplete trace");
886 if (TBI.HasValidInstrDepths)
887 break;
888 Stack.push_back(MBB);
889 MBB = TBI.Pred;
890 } while (MBB);
891
892 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
893 // in the trace. We should track any live-out physregs that were defined in
894 // the trace. This is quite rare in SSA form, typically created by CSE
895 // hoisting a compare.
896 SparseSet<LiveRegUnit> RegUnits;
897 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
898
899 // Go through trace blocks in top-down order, stopping after the center block.
900 while (!Stack.empty()) {
901 MBB = Stack.pop_back_val();
902 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
903 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
904 TBI.HasValidInstrDepths = true;
905 TBI.CriticalPath = 0;
906
907 // Print out resource depths here as well.
908 LLVM_DEBUG({
909 dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
910 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
911 for (unsigned K = 0; K != PRDepths.size(); ++K)
912 if (PRDepths[K]) {
913 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
914 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
915 << MTM.SchedModel.getProcResource(K)->Name << " ("
916 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
917 }
918 });
919
920 // Also compute the critical path length through MBB when possible.
921 if (TBI.HasValidInstrHeights)
922 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
923
924 for (const auto &UseMI : *MBB) {
925 updateDepth(TBI, UseMI, RegUnits);
926 }
927 }
928}
929
930// Identify physreg dependencies for MI when scanning instructions upwards.
931// Return the issue height of MI after considering any live regunits.
932// Height is the issue height computed from virtual register dependencies alone.
933static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
934 SparseSet<LiveRegUnit> &RegUnits,
935 const TargetSchedModel &SchedModel,
936 const TargetInstrInfo *TII,
937 const TargetRegisterInfo *TRI) {
939
940 for (const MachineOperand &MO : MI.operands()) {
941 if (!MO.isReg())
942 continue;
943 Register Reg = MO.getReg();
944 if (!Reg.isPhysical())
945 continue;
946 if (MO.readsReg())
947 ReadOps.push_back(MO.getOperandNo());
948 if (!MO.isDef())
949 continue;
950 // This is a def of Reg. Remove corresponding entries from RegUnits, and
951 // update MI Height to consider the physreg dependencies.
952 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
953 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
954 if (I == RegUnits.end())
955 continue;
956 unsigned DepHeight = I->Cycle;
957 if (!MI.isTransient()) {
958 // We may not know the UseMI of this dependency, if it came from the
959 // live-in list. SchedModel can handle a NULL UseMI.
960 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
961 I->MI, I->Op);
962 }
963 Height = std::max(Height, DepHeight);
964 // This regunit is dead above MI.
965 RegUnits.erase(I);
966 }
967 }
968
969 // Now we know the height of MI. Update any regunits read.
970 for (unsigned Op : ReadOps) {
971 MCRegister Reg = MI.getOperand(Op).getReg().asMCReg();
972 for (MCRegUnit Unit : TRI->regunits(Reg)) {
973 LiveRegUnit &LRU = RegUnits[Unit];
974 // Set the height to the highest reader of the unit.
975 if (LRU.Cycle <= Height && LRU.MI != &MI) {
976 LRU.Cycle = Height;
977 LRU.MI = &MI;
978 LRU.Op = Op;
979 }
980 }
981 }
982
983 return Height;
984}
985
987
988// Push the height of DefMI upwards if required to match UseMI.
989// Return true if this is the first time DefMI was seen.
990static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
991 unsigned UseHeight, MIHeightMap &Heights,
992 const TargetSchedModel &SchedModel,
993 const TargetInstrInfo *TII) {
994 // Adjust height by Dep.DefMI latency.
995 if (!Dep.DefMI->isTransient())
996 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
997 Dep.UseOp);
998
999 // Update Heights[DefMI] to be the maximum height seen.
1001 bool New;
1002 std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
1003 if (New)
1004 return true;
1005
1006 // DefMI has been pushed before. Give it the max height.
1007 if (I->second < UseHeight)
1008 I->second = UseHeight;
1009 return false;
1010}
1011
1012/// Assuming that the virtual register defined by DefMI:DefOp was used by
1013/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
1014/// when reaching the block that contains DefMI.
1015void MachineTraceMetrics::Ensemble::
1016addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
1018 assert(!Trace.empty() && "Trace should contain at least one block");
1019 Register Reg = DefMI->getOperand(DefOp).getReg();
1020 assert(Reg.isVirtual());
1021 const MachineBasicBlock *DefMBB = DefMI->getParent();
1022
1023 // Reg is live-in to all blocks in Trace that follow DefMBB.
1024 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
1025 if (MBB == DefMBB)
1026 return;
1027 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1028 // Just add the register. The height will be updated later.
1029 TBI.LiveIns.push_back(Reg);
1030 }
1031}
1032
1033/// Compute instruction heights in the trace through MBB. This updates MBB and
1034/// the blocks below it in the trace. It is assumed that the trace has already
1035/// been computed.
1036void MachineTraceMetrics::Ensemble::
1037computeInstrHeights(const MachineBasicBlock *MBB) {
1038 // The bottom of the trace may already be computed.
1039 // Find the blocks that need updating.
1041 do {
1042 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1043 assert(TBI.hasValidHeight() && "Incomplete trace");
1044 if (TBI.HasValidInstrHeights)
1045 break;
1046 Stack.push_back(MBB);
1047 TBI.LiveIns.clear();
1048 MBB = TBI.Succ;
1049 } while (MBB);
1050
1051 // As we move upwards in the trace, keep track of instructions that are
1052 // required by deeper trace instructions. Map MI -> height required so far.
1053 MIHeightMap Heights;
1054
1055 // For physregs, the def isn't known when we see the use.
1056 // Instead, keep track of the highest use of each regunit.
1057 SparseSet<LiveRegUnit> RegUnits;
1058 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1059
1060 // If the bottom of the trace was already precomputed, initialize heights
1061 // from its live-in list.
1062 // MBB is the highest precomputed block in the trace.
1063 if (MBB) {
1064 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1065 for (LiveInReg &LI : TBI.LiveIns) {
1066 if (LI.Reg.isVirtual()) {
1067 // For virtual registers, the def latency is included.
1068 unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1069 if (Height < LI.Height)
1070 Height = LI.Height;
1071 } else {
1072 // For register units, the def latency is not included because we don't
1073 // know the def yet.
1074 RegUnits[LI.Reg].Cycle = LI.Height;
1075 }
1076 }
1077 }
1078
1079 // Go through the trace blocks in bottom-up order.
1081 for (;!Stack.empty(); Stack.pop_back()) {
1082 MBB = Stack.back();
1083 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1084 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1085 TBI.HasValidInstrHeights = true;
1086 TBI.CriticalPath = 0;
1087
1088 LLVM_DEBUG({
1089 dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1090 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1091 for (unsigned K = 0; K != PRHeights.size(); ++K)
1092 if (PRHeights[K]) {
1093 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1094 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1095 << MTM.SchedModel.getProcResource(K)->Name << " ("
1096 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1097 }
1098 });
1099
1100 // Get dependencies from PHIs in the trace successor.
1101 const MachineBasicBlock *Succ = TBI.Succ;
1102 // If MBB is the last block in the trace, and it has a back-edge to the
1103 // loop header, get loop-carried dependencies from PHIs in the header. For
1104 // that purpose, pretend that all the loop header PHIs have height 0.
1105 if (!Succ)
1106 if (const MachineLoop *Loop = getLoopFor(MBB))
1107 if (MBB->isSuccessor(Loop->getHeader()))
1108 Succ = Loop->getHeader();
1109
1110 if (Succ) {
1111 for (const auto &PHI : *Succ) {
1112 if (!PHI.isPHI())
1113 break;
1114 Deps.clear();
1115 getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1116 if (!Deps.empty()) {
1117 // Loop header PHI heights are all 0.
1118 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1119 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1120 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1121 MTM.TII))
1122 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1123 }
1124 }
1125 }
1126
1127 // Go through the block backwards.
1128 for (const MachineInstr &MI : reverse(*MBB)) {
1129 // Find the MI height as determined by virtual register uses in the
1130 // trace below.
1131 unsigned Cycle = 0;
1132 MIHeightMap::iterator HeightI = Heights.find(&MI);
1133 if (HeightI != Heights.end()) {
1134 Cycle = HeightI->second;
1135 // We won't be seeing any more MI uses.
1136 Heights.erase(HeightI);
1137 }
1138
1139 // Don't process PHI deps. They depend on the specific predecessor, and
1140 // we'll get them when visiting the predecessor.
1141 Deps.clear();
1142 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1143
1144 // There may also be regunit dependencies to include in the height.
1145 if (HasPhysRegs)
1146 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1147 MTM.TII, MTM.TRI);
1148
1149 // Update the required height of any virtual registers read by MI.
1150 for (const DataDep &Dep : Deps)
1151 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1152 addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1153
1154 InstrCycles &MICycles = Cycles[&MI];
1155 MICycles.Height = Cycle;
1156 if (!TBI.HasValidInstrDepths) {
1157 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1158 continue;
1159 }
1160 // Update critical path length.
1161 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1162 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1163 }
1164
1165 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1166 // height because the final height isn't known until now.
1167 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1168 for (LiveInReg &LIR : TBI.LiveIns) {
1169 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1170 LIR.Height = Heights.lookup(DefMI);
1171 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
1172 }
1173
1174 // Transfer the live regunits to the live-in list.
1175 for (const LiveRegUnit &RU : RegUnits) {
1176 TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle));
1177 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
1178 << RU.Cycle);
1179 }
1180 LLVM_DEBUG(dbgs() << '\n');
1181
1182 if (!TBI.HasValidInstrDepths)
1183 continue;
1184 // Add live-ins to the critical path length.
1185 TBI.CriticalPath = std::max(TBI.CriticalPath,
1186 computeCrossBlockCriticalPath(TBI));
1187 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1188 }
1189}
1190
1193 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1194
1195 if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1196 computeTrace(MBB);
1197 if (!TBI.HasValidInstrDepths)
1198 computeInstrDepths(MBB);
1199 if (!TBI.HasValidInstrHeights)
1200 computeInstrHeights(MBB);
1201
1202 return Trace(*this, TBI);
1203}
1204
1205unsigned
1207 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1208 "MI must be in the trace center block");
1209 InstrCycles Cyc = getInstrCycles(MI);
1210 return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1211}
1212
1213unsigned
1215 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1217 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1218 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1219 DataDep &Dep = Deps.front();
1220 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1221 // Add latency if DefMI is a real instruction. Transients get latency 0.
1222 if (!Dep.DefMI->isTransient())
1223 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1224 &PHI, Dep.UseOp);
1225 return DepCycle;
1226}
1227
1228/// When bottom is set include instructions in current block in estimate.
1230 // Find the limiting processor resource.
1231 // Numbers have been pre-scaled to be comparable.
1232 unsigned PRMax = 0;
1233 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1234 if (Bottom) {
1235 ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum());
1236 for (unsigned K = 0; K != PRDepths.size(); ++K)
1237 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1238 } else {
1239 for (unsigned PRD : PRDepths)
1240 PRMax = std::max(PRMax, PRD);
1241 }
1242 // Convert to cycle count.
1243 PRMax = TE.MTM.getCycles(PRMax);
1244
1245 /// All instructions before current block
1246 unsigned Instrs = TBI.InstrDepth;
1247 // plus instructions in current block
1248 if (Bottom)
1249 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1250 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1251 Instrs /= IW;
1252 // Assume issue width 1 without a schedule model.
1253 return std::max(Instrs, PRMax);
1254}
1255
1259 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1260 // Add up resources above and below the center block.
1261 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1262 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1263 unsigned PRMax = 0;
1264
1265 // Capture computing cycles from extra instructions
1266 auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1267 unsigned ResourceIdx)
1268 ->unsigned {
1269 unsigned Cycles = 0;
1270 for (const MCSchedClassDesc *SC : Instrs) {
1271 if (!SC->isValid())
1272 continue;
1274 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1275 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1276 PI != PE; ++PI) {
1277 if (PI->ProcResourceIdx != ResourceIdx)
1278 continue;
1279 Cycles += (PI->ReleaseAtCycle *
1280 TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1281 }
1282 }
1283 return Cycles;
1284 };
1285
1286 for (unsigned K = 0; K != PRDepths.size(); ++K) {
1287 unsigned PRCycles = PRDepths[K] + PRHeights[K];
1288 for (const MachineBasicBlock *MBB : Extrablocks)
1289 PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K];
1290 PRCycles += extraCycles(ExtraInstrs, K);
1291 PRCycles -= extraCycles(RemoveInstrs, K);
1292 PRMax = std::max(PRMax, PRCycles);
1293 }
1294 // Convert to cycle count.
1295 PRMax = TE.MTM.getCycles(PRMax);
1296
1297 // Instrs: #instructions in current trace outside current block.
1298 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1299 // Add instruction count from the extra blocks.
1300 for (const MachineBasicBlock *MBB : Extrablocks)
1301 Instrs += TE.MTM.getResources(MBB)->InstrCount;
1302 Instrs += ExtraInstrs.size();
1303 Instrs -= RemoveInstrs.size();
1304 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1305 Instrs /= IW;
1306 // Assume issue width 1 without a schedule model.
1307 return std::max(Instrs, PRMax);
1308}
1309
1311 const MachineInstr &UseMI) const {
1312 if (DefMI.getParent() == UseMI.getParent())
1313 return true;
1314
1315 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1316 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1317
1318 return DepTBI.isUsefulDominator(TBI);
1319}
1320
1322 OS << getName() << " ensemble:\n";
1323 for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1324 OS << " %bb." << i << '\t';
1325 BlockInfo[i].print(OS);
1326 OS << '\n';
1327 }
1328}
1329
1331 if (hasValidDepth()) {
1332 OS << "depth=" << InstrDepth;
1333 if (Pred)
1334 OS << " pred=" << printMBBReference(*Pred);
1335 else
1336 OS << " pred=null";
1337 OS << " head=%bb." << Head;
1338 if (HasValidInstrDepths)
1339 OS << " +instrs";
1340 } else
1341 OS << "depth invalid";
1342 OS << ", ";
1343 if (hasValidHeight()) {
1344 OS << "height=" << InstrHeight;
1345 if (Succ)
1346 OS << " succ=" << printMBBReference(*Succ);
1347 else
1348 OS << " succ=null";
1349 OS << " tail=%bb." << Tail;
1350 if (HasValidInstrHeights)
1351 OS << " +instrs";
1352 } else
1353 OS << "height invalid";
1354 if (HasValidInstrDepths && HasValidInstrHeights)
1355 OS << ", crit=" << CriticalPath;
1356}
1357
1359 unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1360
1361 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1362 << " --> %bb." << TBI.Tail << ':';
1363 if (TBI.hasValidHeight() && TBI.hasValidDepth())
1364 OS << ' ' << getInstrCount() << " instrs.";
1365 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1366 OS << ' ' << TBI.CriticalPath << " cycles.";
1367
1369 OS << "\n%bb." << MBBNum;
1370 while (Block->hasValidDepth() && Block->Pred) {
1371 unsigned Num = Block->Pred->getNumber();
1372 OS << " <- " << printMBBReference(*Block->Pred);
1373 Block = &TE.BlockInfo[Num];
1374 }
1375
1376 Block = &TBI;
1377 OS << "\n ";
1378 while (Block->hasValidHeight() && Block->Succ) {
1379 unsigned Num = Block->Succ->getNumber();
1380 OS << " -> " << printMBBReference(*Block->Succ);
1381 Block = &TE.BlockInfo[Num];
1382 }
1383 OS << '\n';
1384}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
MachineBasicBlock & MBB
basic Basic Alias true
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
BlockVerifier::State From
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:390
static unsigned InstrCount
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
#define DEBUG_TYPE
Hexagon Hardware Loops
IRTranslator LLVM IR MI
loops
Definition: LoopInfo.cpp:1209
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, SparseSet< LiveRegUnit > &RegUnits, const TargetRegisterInfo *TRI)
Machine Trace Metrics
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, SparseSet< LiveRegUnit > &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
A possibly irreducible generalization of a Loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:691
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of the instructions from Start to End.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks={}, ArrayRef< const MCSchedClassDesc * > ExtraInstrs={}, ArrayRef< const MCSchedClassDesc * > RemoveInstrs={}) const
Return the resource length of the trace.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
ArrayRef< unsigned > getProcReleaseAtCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
void init(MachineFunction &Func, const MachineLoopInfo &LI)
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:264
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:286
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:293
typename DenseT::iterator iterator
Definition: SparseSet.h:175
const_iterator end() const
Definition: SparseSet.h:179
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:160
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition: SparseSet.h:229
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
bool empty() const
Definition: Trace.h:96
bool insertEdge(std::optional< const MachineBasicBlock * > From, const MachineBasicBlock *To)
Default po_iterator_storage implementation with an internal set object.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
DWARFExpression::Operation Op
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1841
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs)
Adds registers contained in LiveRegs to the block live-in list of MBB.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
const MachineInstr * MI
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:121
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:66
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Per-basic block information that doesn't depend on the trace through the block.
bool hasResources() const
Returns true when resource information for this block has been computed.
unsigned InstrCount
The number of non-trivial instructions in the block.
bool HasCalls
True when the block contains calls.
InstrCycles represents the cycle height and depth of an instruction in a trace.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
Per-basic block information that relates to a specific trace through the block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned CriticalPath
Critical path length.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block.
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().