LLVM 20.0.0git
MachineTraceMetrics.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/SparseSet.h"
26#include "llvm/Pass.h"
27#include "llvm/Support/Debug.h"
29#include "llvm/Support/Format.h"
31#include <algorithm>
32#include <cassert>
33#include <tuple>
34#include <utility>
35
36using namespace llvm;
37
38#define DEBUG_TYPE "machine-trace-metrics"
39
40AnalysisKey MachineTraceMetricsAnalysis::Key;
41
45 return Result(MF, MFAM.getResult<MachineLoopAnalysis>(MF));
46}
47
51 MFAM.getResult<MachineTraceMetricsAnalysis>(MF).verifyAnalysis();
53}
54
56
58
60 "Machine Trace Metrics", false, true)
64
67
69 AU.setPreservesAll();
72}
73
75 const MachineLoopInfo &LI) {
76 MF = &Func;
77 const TargetSubtargetInfo &ST = MF->getSubtarget();
78 TII = ST.getInstrInfo();
79 TRI = ST.getRegisterInfo();
80 MRI = &MF->getRegInfo();
81 Loops = &LI;
82 SchedModel.init(&ST);
83 BlockInfo.resize(MF->getNumBlockIDs());
84 ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
85 SchedModel.getNumProcResourceKinds());
86}
87
89 MTM.init(MF, getAnalysis<MachineLoopInfoWrapperPass>().getLI());
90 return false;
91}
92
94
96 MF = nullptr;
97 BlockInfo.clear();
98 for (auto &E : Ensembles)
99 E.reset();
100}
101
102//===----------------------------------------------------------------------===//
103// Fixed block information
104//===----------------------------------------------------------------------===//
105//
106// The number of instructions in a basic block and the CPU resources used by
107// those instructions don't depend on any given trace strategy.
108
109/// Compute the resource usage in basic block MBB.
112 assert(MBB && "No basic block");
113 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
114 if (FBI->hasResources())
115 return FBI;
116
117 // Compute resource usage in the block.
118 FBI->HasCalls = false;
119 unsigned InstrCount = 0;
120
121 // Add up per-processor resource cycles as well.
122 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
123 SmallVector<unsigned, 32> PRCycles(PRKinds);
124
125 for (const auto &MI : *MBB) {
126 if (MI.isTransient())
127 continue;
128 ++InstrCount;
129 if (MI.isCall())
130 FBI->HasCalls = true;
131
132 // Count processor resources used.
133 if (!SchedModel.hasInstrSchedModel())
134 continue;
135 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
136 if (!SC->isValid())
137 continue;
138
140 PI = SchedModel.getWriteProcResBegin(SC),
141 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
142 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
143 PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
144 }
145 }
146 FBI->InstrCount = InstrCount;
147
148 // Scale the resource cycles so they are comparable.
149 unsigned PROffset = MBB->getNumber() * PRKinds;
150 for (unsigned K = 0; K != PRKinds; ++K)
151 ProcReleaseAtCycles[PROffset + K] =
152 PRCycles[K] * SchedModel.getResourceFactor(K);
153
154 return FBI;
155}
156
159 assert(BlockInfo[MBBNum].hasResources() &&
160 "getResources() must be called before getProcReleaseAtCycles()");
161 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
162 assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
163 return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds);
164}
165
166//===----------------------------------------------------------------------===//
167// Ensemble utility functions
168//===----------------------------------------------------------------------===//
169
171 : MTM(*ct) {
172 BlockInfo.resize(MTM.BlockInfo.size());
173 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
174 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
175 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
176}
177
178// Virtual destructor serves as an anchor.
180
181const MachineLoop*
183 return MTM.Loops->getLoopFor(MBB);
184}
185
186// Update resource-related information in the TraceBlockInfo for MBB.
187// Only update resources related to the trace above MBB.
188void MachineTraceMetrics::Ensemble::
189computeDepthResources(const MachineBasicBlock *MBB) {
190 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
191 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
192 unsigned PROffset = MBB->getNumber() * PRKinds;
193
194 // Compute resources from trace above. The top block is simple.
195 if (!TBI->Pred) {
196 TBI->InstrDepth = 0;
197 TBI->Head = MBB->getNumber();
198 std::fill(ProcResourceDepths.begin() + PROffset,
199 ProcResourceDepths.begin() + PROffset + PRKinds, 0);
200 return;
201 }
202
203 // Compute from the block above. A post-order traversal ensures the
204 // predecessor is always computed first.
205 unsigned PredNum = TBI->Pred->getNumber();
206 TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
207 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
208 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
209 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
210 TBI->Head = PredTBI->Head;
211
212 // Compute per-resource depths.
213 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
214 ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
215 for (unsigned K = 0; K != PRKinds; ++K)
216 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
217}
218
219// Update resource-related information in the TraceBlockInfo for MBB.
220// Only update resources related to the trace below MBB.
221void MachineTraceMetrics::Ensemble::
222computeHeightResources(const MachineBasicBlock *MBB) {
223 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
224 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
225 unsigned PROffset = MBB->getNumber() * PRKinds;
226
227 // Compute resources for the current block.
228 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
229 ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber());
230
231 // The trace tail is done.
232 if (!TBI->Succ) {
233 TBI->Tail = MBB->getNumber();
234 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
235 return;
236 }
237
238 // Compute from the block below. A post-order traversal ensures the
239 // predecessor is always computed first.
240 unsigned SuccNum = TBI->Succ->getNumber();
241 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
242 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
243 TBI->InstrHeight += SuccTBI->InstrHeight;
244 TBI->Tail = SuccTBI->Tail;
245
246 // Compute per-resource heights.
247 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
248 for (unsigned K = 0; K != PRKinds; ++K)
249 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
250}
251
252// Check if depth resources for MBB are valid and return the TBI.
253// Return NULL if the resources have been invalidated.
257 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
258 return TBI->hasValidDepth() ? TBI : nullptr;
259}
260
261// Check if height resources for MBB are valid and return the TBI.
262// Return NULL if the resources have been invalidated.
266 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
267 return TBI->hasValidHeight() ? TBI : nullptr;
268}
269
270/// Get an array of processor resource depths for MBB. Indexed by processor
271/// resource kind, this array contains the scaled processor resources consumed
272/// by all blocks preceding MBB in its trace. It does not include instructions
273/// in MBB.
274///
275/// Compare TraceBlockInfo::InstrDepth.
278getProcResourceDepths(unsigned MBBNum) const {
279 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
280 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
281 return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
282}
283
284/// Get an array of processor resource heights for MBB. Indexed by processor
285/// resource kind, this array contains the scaled processor resources consumed
286/// by this block and all blocks following it in its trace.
287///
288/// Compare TraceBlockInfo::InstrHeight.
291getProcResourceHeights(unsigned MBBNum) const {
292 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
293 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
294 return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
295}
296
297//===----------------------------------------------------------------------===//
298// Trace Selection Strategies
299//===----------------------------------------------------------------------===//
300//
301// A trace selection strategy is implemented as a sub-class of Ensemble. The
302// trace through a block B is computed by two DFS traversals of the CFG
303// starting from B. One upwards, and one downwards. During the upwards DFS,
304// pickTracePred() is called on the post-ordered blocks. During the downwards
305// DFS, pickTraceSucc() is called in a post-order.
306//
307
308// We never allow traces that leave loops, but we do allow traces to enter
309// nested loops. We also never allow traces to contain back-edges.
310//
311// This means that a loop header can never appear above the center block of a
312// trace, except as the trace head. Below the center block, loop exiting edges
313// are banned.
314//
315// Return true if an edge from the From loop to the To loop is leaving a loop.
316// Either of To and From can be null.
317static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
318 return From && !From->contains(To);
319}
320
321// MinInstrCountEnsemble - Pick the trace that executes the least number of
322// instructions.
323namespace {
324
325class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
326 const char *getName() const override { return "MinInstr"; }
327 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
328 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
329
330public:
331 MinInstrCountEnsemble(MachineTraceMetrics *mtm)
332 : MachineTraceMetrics::Ensemble(mtm) {}
333};
334
335/// Pick only the current basic block for the trace and do not choose any
336/// predecessors/successors.
337class LocalEnsemble : public MachineTraceMetrics::Ensemble {
338 const char *getName() const override { return "Local"; }
339 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
340 return nullptr;
341 };
342 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
343 return nullptr;
344 };
345
346public:
347 LocalEnsemble(MachineTraceMetrics *MTM)
348 : MachineTraceMetrics::Ensemble(MTM) {}
349};
350} // end anonymous namespace
351
352// Select the preferred predecessor for MBB.
354MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
355 if (MBB->pred_empty())
356 return nullptr;
357 const MachineLoop *CurLoop = getLoopFor(MBB);
358 // Don't leave loops, and never follow back-edges.
359 if (CurLoop && MBB == CurLoop->getHeader())
360 return nullptr;
361 unsigned CurCount = MTM.getResources(MBB)->InstrCount;
362 const MachineBasicBlock *Best = nullptr;
363 unsigned BestDepth = 0;
364 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
366 getDepthResources(Pred);
367 // Ignore cycles that aren't natural loops.
368 if (!PredTBI)
369 continue;
370 // Pick the predecessor that would give this block the smallest InstrDepth.
371 unsigned Depth = PredTBI->InstrDepth + CurCount;
372 if (!Best || Depth < BestDepth) {
373 Best = Pred;
374 BestDepth = Depth;
375 }
376 }
377 return Best;
378}
379
380// Select the preferred successor for MBB.
382MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
383 if (MBB->succ_empty())
384 return nullptr;
385 const MachineLoop *CurLoop = getLoopFor(MBB);
386 const MachineBasicBlock *Best = nullptr;
387 unsigned BestHeight = 0;
388 for (const MachineBasicBlock *Succ : MBB->successors()) {
389 // Don't consider back-edges.
390 if (CurLoop && Succ == CurLoop->getHeader())
391 continue;
392 // Don't consider successors exiting CurLoop.
393 if (isExitingLoop(CurLoop, getLoopFor(Succ)))
394 continue;
396 getHeightResources(Succ);
397 // Ignore cycles that aren't natural loops.
398 if (!SuccTBI)
399 continue;
400 // Pick the successor that would give this block the smallest InstrHeight.
401 unsigned Height = SuccTBI->InstrHeight;
402 if (!Best || Height < BestHeight) {
403 Best = Succ;
404 BestHeight = Height;
405 }
406 }
407 return Best;
408}
409
410// Get an Ensemble sub-class for the requested trace strategy.
414 "Invalid trace strategy enum");
415 std::unique_ptr<MachineTraceMetrics::Ensemble> &E =
416 Ensembles[static_cast<size_t>(strategy)];
417 if (E)
418 return E.get();
419
420 // Allocate new Ensemble on demand.
421 switch (strategy) {
423 E = std::make_unique<MinInstrCountEnsemble>(MinInstrCountEnsemble(this));
424 break;
426 E = std::make_unique<LocalEnsemble>(LocalEnsemble(this));
427 break;
428 default: llvm_unreachable("Invalid trace strategy enum");
429 }
430 return E.get();
431}
432
434 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
435 << '\n');
436 BlockInfo[MBB->getNumber()].invalidate();
437 for (auto &E : Ensembles)
438 if (E)
439 E->invalidate(MBB);
440}
441
445 // Check whether the analysis, all analyses on machine functions, or the
446 // machine function's CFG have been preserved.
448 return !PAC.preserved() &&
449 !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() &&
450 !PAC.preservedSet<CFGAnalyses>();
451}
452
454 if (!MF)
455 return;
456#ifndef NDEBUG
457 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
458 for (auto &E : Ensembles)
459 if (E)
460 E->verify();
461#endif
462}
463
464//===----------------------------------------------------------------------===//
465// Trace building
466//===----------------------------------------------------------------------===//
467//
468// Traces are built by two CFG traversals. To avoid recomputing too much, use a
469// set abstraction that confines the search to the current loop, and doesn't
470// revisit blocks.
471
472namespace {
473
474struct LoopBounds {
477 const MachineLoopInfo *Loops;
478 bool Downward = false;
479
482};
483
484} // end anonymous namespace
485
486// Specialize po_iterator_storage in order to prune the post-order traversal so
487// it is limited to the current loop and doesn't traverse the loop back edges.
488namespace llvm {
489
490template<>
491class po_iterator_storage<LoopBounds, true> {
492 LoopBounds &LB;
493
494public:
495 po_iterator_storage(LoopBounds &lb) : LB(lb) {}
496
498
499 bool insertEdge(std::optional<const MachineBasicBlock *> From,
500 const MachineBasicBlock *To) {
501 // Skip already visited To blocks.
502 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
503 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
504 return false;
505 // From is null once when To is the trace center block.
506 if (From) {
507 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
508 // Don't follow backedges, don't leave FromLoop when going upwards.
509 if ((LB.Downward ? To : *From) == FromLoop->getHeader())
510 return false;
511 // Don't leave FromLoop.
512 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
513 return false;
514 }
515 }
516 // To is a new block. Mark the block as visited in case the CFG has cycles
517 // that MachineLoopInfo didn't recognize as a natural loop.
518 return LB.Visited.insert(To).second;
519 }
520};
521
522} // end namespace llvm
523
524/// Compute the trace through MBB.
525void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
526 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
527 << printMBBReference(*MBB) << '\n');
528 // Set up loop bounds for the backwards post-order traversal.
529 LoopBounds Bounds(BlockInfo, MTM.Loops);
530
531 // Run an upwards post-order search for the trace start.
532 Bounds.Downward = false;
533 Bounds.Visited.clear();
534 for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
535 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
536 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
537 // All the predecessors have been visited, pick the preferred one.
538 TBI.Pred = pickTracePred(I);
539 LLVM_DEBUG({
540 if (TBI.Pred)
541 dbgs() << printMBBReference(*TBI.Pred) << '\n';
542 else
543 dbgs() << "null\n";
544 });
545 // The trace leading to I is now known, compute the depth resources.
546 computeDepthResources(I);
547 }
548
549 // Run a downwards post-order search for the trace end.
550 Bounds.Downward = true;
551 Bounds.Visited.clear();
552 for (const auto *I : post_order_ext(MBB, Bounds)) {
553 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
554 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
555 // All the successors have been visited, pick the preferred one.
556 TBI.Succ = pickTraceSucc(I);
557 LLVM_DEBUG({
558 if (TBI.Succ)
559 dbgs() << printMBBReference(*TBI.Succ) << '\n';
560 else
561 dbgs() << "null\n";
562 });
563 // The trace leaving I is now known, compute the height resources.
564 computeHeightResources(I);
565 }
566}
567
568/// Invalidate traces through BadMBB.
569void
572 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
573
574 // Invalidate height resources of blocks above MBB.
575 if (BadTBI.hasValidHeight()) {
576 BadTBI.invalidateHeight();
577 WorkList.push_back(BadMBB);
578 do {
579 const MachineBasicBlock *MBB = WorkList.pop_back_val();
580 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
581 << getName() << " height.\n");
582 // Find any MBB predecessors that have MBB as their preferred successor.
583 // They are the only ones that need to be invalidated.
584 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
585 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
586 if (!TBI.hasValidHeight())
587 continue;
588 if (TBI.Succ == MBB) {
589 TBI.invalidateHeight();
590 WorkList.push_back(Pred);
591 continue;
592 }
593 // Verify that TBI.Succ is actually a *I successor.
594 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
595 }
596 } while (!WorkList.empty());
597 }
598
599 // Invalidate depth resources of blocks below MBB.
600 if (BadTBI.hasValidDepth()) {
601 BadTBI.invalidateDepth();
602 WorkList.push_back(BadMBB);
603 do {
604 const MachineBasicBlock *MBB = WorkList.pop_back_val();
605 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
606 << getName() << " depth.\n");
607 // Find any MBB successors that have MBB as their preferred predecessor.
608 // They are the only ones that need to be invalidated.
609 for (const MachineBasicBlock *Succ : MBB->successors()) {
610 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
611 if (!TBI.hasValidDepth())
612 continue;
613 if (TBI.Pred == MBB) {
614 TBI.invalidateDepth();
615 WorkList.push_back(Succ);
616 continue;
617 }
618 // Verify that TBI.Pred is actually a *I predecessor.
619 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
620 }
621 } while (!WorkList.empty());
622 }
623
624 // Clear any per-instruction data. We only have to do this for BadMBB itself
625 // because the instructions in that block may change. Other blocks may be
626 // invalidated, but their instructions will stay the same, so there is no
627 // need to erase the Cycle entries. They will be overwritten when we
628 // recompute.
629 for (const auto &I : *BadMBB)
630 Cycles.erase(&I);
631}
632
634#ifndef NDEBUG
635 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
636 "Outdated BlockInfo size");
637 for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
638 const TraceBlockInfo &TBI = BlockInfo[Num];
639 if (TBI.hasValidDepth() && TBI.Pred) {
640 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
641 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
642 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
643 "Trace is broken, depth should have been invalidated.");
644 const MachineLoop *Loop = getLoopFor(MBB);
645 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
646 }
647 if (TBI.hasValidHeight() && TBI.Succ) {
648 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
649 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
650 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
651 "Trace is broken, height should have been invalidated.");
652 const MachineLoop *Loop = getLoopFor(MBB);
653 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
654 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
655 "Trace contains backedge");
656 }
657 }
658#endif
659}
660
661//===----------------------------------------------------------------------===//
662// Data Dependencies
663//===----------------------------------------------------------------------===//
664//
665// Compute the depth and height of each instruction based on data dependencies
666// and instruction latencies. These cycle numbers assume that the CPU can issue
667// an infinite number of instructions per cycle as long as their dependencies
668// are ready.
669
670// A data dependency is represented as a defining MI and operand numbers on the
671// defining and using MI.
672namespace {
673
674struct DataDep {
675 const MachineInstr *DefMI;
676 unsigned DefOp;
677 unsigned UseOp;
678
679 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
680 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
681
682 /// Create a DataDep from an SSA form virtual register.
683 DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
684 : UseOp(UseOp) {
686 MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
687 assert(!DefI.atEnd() && "Register has no defs");
688 DefMI = DefI->getParent();
689 DefOp = DefI.getOperandNo();
690 assert((++DefI).atEnd() && "Register has multiple defs");
691 }
692};
693
694} // end anonymous namespace
695
696// Get the input data dependencies that must be ready before UseMI can issue.
697// Return true if UseMI has any physreg operands.
698static bool getDataDeps(const MachineInstr &UseMI,
700 const MachineRegisterInfo *MRI) {
701 // Debug values should not be included in any calculations.
702 if (UseMI.isDebugInstr())
703 return false;
704
705 bool HasPhysRegs = false;
706 for (const MachineOperand &MO : UseMI.operands()) {
707 if (!MO.isReg())
708 continue;
709 Register Reg = MO.getReg();
710 if (!Reg)
711 continue;
712 if (Reg.isPhysical()) {
713 HasPhysRegs = true;
714 continue;
715 }
716 // Collect virtual register reads.
717 if (MO.readsReg())
718 Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
719 }
720 return HasPhysRegs;
721}
722
723// Get the input data dependencies of a PHI instruction, using Pred as the
724// preferred predecessor.
725// This will add at most one dependency to Deps.
726static void getPHIDeps(const MachineInstr &UseMI,
728 const MachineBasicBlock *Pred,
729 const MachineRegisterInfo *MRI) {
730 // No predecessor at the beginning of a trace. Ignore dependencies.
731 if (!Pred)
732 return;
733 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
734 for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
735 if (UseMI.getOperand(i + 1).getMBB() == Pred) {
736 Register Reg = UseMI.getOperand(i).getReg();
737 Deps.push_back(DataDep(MRI, Reg, i));
738 return;
739 }
740 }
741}
742
743// Identify physreg dependencies for UseMI, and update the live regunit
744// tracking set when scanning instructions downwards.
747 SparseSet<LiveRegUnit> &RegUnits,
748 const TargetRegisterInfo *TRI) {
750 SmallVector<unsigned, 8> LiveDefOps;
751
752 for (const MachineOperand &MO : UseMI->operands()) {
753 if (!MO.isReg() || !MO.getReg().isPhysical())
754 continue;
755 MCRegister Reg = MO.getReg().asMCReg();
756 // Track live defs and kills for updating RegUnits.
757 if (MO.isDef()) {
758 if (MO.isDead())
759 Kills.push_back(Reg);
760 else
761 LiveDefOps.push_back(MO.getOperandNo());
762 } else if (MO.isKill())
763 Kills.push_back(Reg);
764 // Identify dependencies.
765 if (!MO.readsReg())
766 continue;
767 for (MCRegUnit Unit : TRI->regunits(Reg)) {
768 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
769 if (I == RegUnits.end())
770 continue;
771 Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
772 break;
773 }
774 }
775
776 // Update RegUnits to reflect live registers after UseMI.
777 // First kills.
778 for (MCRegister Kill : Kills)
779 for (MCRegUnit Unit : TRI->regunits(Kill))
780 RegUnits.erase(Unit);
781
782 // Second, live defs.
783 for (unsigned DefOp : LiveDefOps) {
784 for (MCRegUnit Unit :
785 TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
786 LiveRegUnit &LRU = RegUnits[Unit];
787 LRU.MI = UseMI;
788 LRU.Op = DefOp;
789 }
790 }
791}
792
793/// The length of the critical path through a trace is the maximum of two path
794/// lengths:
795///
796/// 1. The maximum height+depth over all instructions in the trace center block.
797///
798/// 2. The longest cross-block dependency chain. For small blocks, it is
799/// possible that the critical path through the trace doesn't include any
800/// instructions in the block.
801///
802/// This function computes the second number from the live-in list of the
803/// center block.
804unsigned MachineTraceMetrics::Ensemble::
805computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
806 assert(TBI.HasValidInstrDepths && "Missing depth info");
807 assert(TBI.HasValidInstrHeights && "Missing height info");
808 unsigned MaxLen = 0;
809 for (const LiveInReg &LIR : TBI.LiveIns) {
810 if (!LIR.Reg.isVirtual())
811 continue;
812 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
813 // Ignore dependencies outside the current trace.
814 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
815 if (!DefTBI.isUsefulDominator(TBI))
816 continue;
817 unsigned Len = LIR.Height + Cycles[DefMI].Depth;
818 MaxLen = std::max(MaxLen, Len);
819 }
820 return MaxLen;
821}
822
825 SparseSet<LiveRegUnit> &RegUnits) {
827 // Collect all data dependencies.
828 if (UseMI.isPHI())
829 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
830 else if (getDataDeps(UseMI, Deps, MTM.MRI))
831 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
832
833 // Filter and process dependencies, computing the earliest issue cycle.
834 unsigned Cycle = 0;
835 for (const DataDep &Dep : Deps) {
836 const TraceBlockInfo&DepTBI =
837 BlockInfo[Dep.DefMI->getParent()->getNumber()];
838 // Ignore dependencies from outside the current trace.
839 if (!DepTBI.isUsefulDominator(TBI))
840 continue;
841 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
842 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
843 // Add latency if DefMI is a real instruction. Transients get latency 0.
844 if (!Dep.DefMI->isTransient())
845 DepCycle += MTM.SchedModel
846 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
847 Cycle = std::max(Cycle, DepCycle);
848 }
849 // Remember the instruction depth.
850 InstrCycles &MICycles = Cycles[&UseMI];
851 MICycles.Depth = Cycle;
852
853 if (TBI.HasValidInstrHeights) {
854 // Update critical path length.
855 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
856 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
857 } else {
858 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
859 }
860}
861
864 SparseSet<LiveRegUnit> &RegUnits) {
865 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
866}
867
871 SparseSet<LiveRegUnit> &RegUnits) {
872 for (; Start != End; Start++)
873 updateDepth(Start->getParent(), *Start, RegUnits);
874}
875
876/// Compute instruction depths for all instructions above or in MBB in its
877/// trace. This assumes that the trace through MBB has already been computed.
878void MachineTraceMetrics::Ensemble::
879computeInstrDepths(const MachineBasicBlock *MBB) {
880 // The top of the trace may already be computed, and HasValidInstrDepths
881 // implies Head->HasValidInstrDepths, so we only need to start from the first
882 // block in the trace that needs to be recomputed.
884 do {
885 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
886 assert(TBI.hasValidDepth() && "Incomplete trace");
887 if (TBI.HasValidInstrDepths)
888 break;
889 Stack.push_back(MBB);
890 MBB = TBI.Pred;
891 } while (MBB);
892
893 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
894 // in the trace. We should track any live-out physregs that were defined in
895 // the trace. This is quite rare in SSA form, typically created by CSE
896 // hoisting a compare.
897 SparseSet<LiveRegUnit> RegUnits;
898 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
899
900 // Go through trace blocks in top-down order, stopping after the center block.
901 while (!Stack.empty()) {
902 MBB = Stack.pop_back_val();
903 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
904 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
905 TBI.HasValidInstrDepths = true;
906 TBI.CriticalPath = 0;
907
908 // Print out resource depths here as well.
909 LLVM_DEBUG({
910 dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
911 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
912 for (unsigned K = 0; K != PRDepths.size(); ++K)
913 if (PRDepths[K]) {
914 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
915 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
916 << MTM.SchedModel.getProcResource(K)->Name << " ("
917 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
918 }
919 });
920
921 // Also compute the critical path length through MBB when possible.
922 if (TBI.HasValidInstrHeights)
923 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
924
925 for (const auto &UseMI : *MBB) {
926 updateDepth(TBI, UseMI, RegUnits);
927 }
928 }
929}
930
931// Identify physreg dependencies for MI when scanning instructions upwards.
932// Return the issue height of MI after considering any live regunits.
933// Height is the issue height computed from virtual register dependencies alone.
934static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
935 SparseSet<LiveRegUnit> &RegUnits,
936 const TargetSchedModel &SchedModel,
937 const TargetInstrInfo *TII,
938 const TargetRegisterInfo *TRI) {
940
941 for (const MachineOperand &MO : MI.operands()) {
942 if (!MO.isReg())
943 continue;
944 Register Reg = MO.getReg();
945 if (!Reg.isPhysical())
946 continue;
947 if (MO.readsReg())
948 ReadOps.push_back(MO.getOperandNo());
949 if (!MO.isDef())
950 continue;
951 // This is a def of Reg. Remove corresponding entries from RegUnits, and
952 // update MI Height to consider the physreg dependencies.
953 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
954 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
955 if (I == RegUnits.end())
956 continue;
957 unsigned DepHeight = I->Cycle;
958 if (!MI.isTransient()) {
959 // We may not know the UseMI of this dependency, if it came from the
960 // live-in list. SchedModel can handle a NULL UseMI.
961 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
962 I->MI, I->Op);
963 }
964 Height = std::max(Height, DepHeight);
965 // This regunit is dead above MI.
966 RegUnits.erase(I);
967 }
968 }
969
970 // Now we know the height of MI. Update any regunits read.
971 for (unsigned Op : ReadOps) {
972 MCRegister Reg = MI.getOperand(Op).getReg().asMCReg();
973 for (MCRegUnit Unit : TRI->regunits(Reg)) {
974 LiveRegUnit &LRU = RegUnits[Unit];
975 // Set the height to the highest reader of the unit.
976 if (LRU.Cycle <= Height && LRU.MI != &MI) {
977 LRU.Cycle = Height;
978 LRU.MI = &MI;
979 LRU.Op = Op;
980 }
981 }
982 }
983
984 return Height;
985}
986
988
989// Push the height of DefMI upwards if required to match UseMI.
990// Return true if this is the first time DefMI was seen.
991static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
992 unsigned UseHeight, MIHeightMap &Heights,
993 const TargetSchedModel &SchedModel,
994 const TargetInstrInfo *TII) {
995 // Adjust height by Dep.DefMI latency.
996 if (!Dep.DefMI->isTransient())
997 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
998 Dep.UseOp);
999
1000 // Update Heights[DefMI] to be the maximum height seen.
1002 bool New;
1003 std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
1004 if (New)
1005 return true;
1006
1007 // DefMI has been pushed before. Give it the max height.
1008 if (I->second < UseHeight)
1009 I->second = UseHeight;
1010 return false;
1011}
1012
1013/// Assuming that the virtual register defined by DefMI:DefOp was used by
1014/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
1015/// when reaching the block that contains DefMI.
1016void MachineTraceMetrics::Ensemble::
1017addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
1019 assert(!Trace.empty() && "Trace should contain at least one block");
1020 Register Reg = DefMI->getOperand(DefOp).getReg();
1021 assert(Reg.isVirtual());
1022 const MachineBasicBlock *DefMBB = DefMI->getParent();
1023
1024 // Reg is live-in to all blocks in Trace that follow DefMBB.
1025 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
1026 if (MBB == DefMBB)
1027 return;
1028 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1029 // Just add the register. The height will be updated later.
1030 TBI.LiveIns.push_back(Reg);
1031 }
1032}
1033
1034/// Compute instruction heights in the trace through MBB. This updates MBB and
1035/// the blocks below it in the trace. It is assumed that the trace has already
1036/// been computed.
1037void MachineTraceMetrics::Ensemble::
1038computeInstrHeights(const MachineBasicBlock *MBB) {
1039 // The bottom of the trace may already be computed.
1040 // Find the blocks that need updating.
1042 do {
1043 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1044 assert(TBI.hasValidHeight() && "Incomplete trace");
1045 if (TBI.HasValidInstrHeights)
1046 break;
1047 Stack.push_back(MBB);
1048 TBI.LiveIns.clear();
1049 MBB = TBI.Succ;
1050 } while (MBB);
1051
1052 // As we move upwards in the trace, keep track of instructions that are
1053 // required by deeper trace instructions. Map MI -> height required so far.
1054 MIHeightMap Heights;
1055
1056 // For physregs, the def isn't known when we see the use.
1057 // Instead, keep track of the highest use of each regunit.
1058 SparseSet<LiveRegUnit> RegUnits;
1059 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1060
1061 // If the bottom of the trace was already precomputed, initialize heights
1062 // from its live-in list.
1063 // MBB is the highest precomputed block in the trace.
1064 if (MBB) {
1065 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1066 for (LiveInReg &LI : TBI.LiveIns) {
1067 if (LI.Reg.isVirtual()) {
1068 // For virtual registers, the def latency is included.
1069 unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1070 if (Height < LI.Height)
1071 Height = LI.Height;
1072 } else {
1073 // For register units, the def latency is not included because we don't
1074 // know the def yet.
1075 RegUnits[LI.Reg].Cycle = LI.Height;
1076 }
1077 }
1078 }
1079
1080 // Go through the trace blocks in bottom-up order.
1082 for (;!Stack.empty(); Stack.pop_back()) {
1083 MBB = Stack.back();
1084 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1085 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1086 TBI.HasValidInstrHeights = true;
1087 TBI.CriticalPath = 0;
1088
1089 LLVM_DEBUG({
1090 dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1091 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1092 for (unsigned K = 0; K != PRHeights.size(); ++K)
1093 if (PRHeights[K]) {
1094 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1095 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1096 << MTM.SchedModel.getProcResource(K)->Name << " ("
1097 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1098 }
1099 });
1100
1101 // Get dependencies from PHIs in the trace successor.
1102 const MachineBasicBlock *Succ = TBI.Succ;
1103 // If MBB is the last block in the trace, and it has a back-edge to the
1104 // loop header, get loop-carried dependencies from PHIs in the header. For
1105 // that purpose, pretend that all the loop header PHIs have height 0.
1106 if (!Succ)
1107 if (const MachineLoop *Loop = getLoopFor(MBB))
1108 if (MBB->isSuccessor(Loop->getHeader()))
1109 Succ = Loop->getHeader();
1110
1111 if (Succ) {
1112 for (const auto &PHI : *Succ) {
1113 if (!PHI.isPHI())
1114 break;
1115 Deps.clear();
1116 getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1117 if (!Deps.empty()) {
1118 // Loop header PHI heights are all 0.
1119 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1120 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1121 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1122 MTM.TII))
1123 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1124 }
1125 }
1126 }
1127
1128 // Go through the block backwards.
1129 for (const MachineInstr &MI : reverse(*MBB)) {
1130 // Find the MI height as determined by virtual register uses in the
1131 // trace below.
1132 unsigned Cycle = 0;
1133 MIHeightMap::iterator HeightI = Heights.find(&MI);
1134 if (HeightI != Heights.end()) {
1135 Cycle = HeightI->second;
1136 // We won't be seeing any more MI uses.
1137 Heights.erase(HeightI);
1138 }
1139
1140 // Don't process PHI deps. They depend on the specific predecessor, and
1141 // we'll get them when visiting the predecessor.
1142 Deps.clear();
1143 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1144
1145 // There may also be regunit dependencies to include in the height.
1146 if (HasPhysRegs)
1147 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1148 MTM.TII, MTM.TRI);
1149
1150 // Update the required height of any virtual registers read by MI.
1151 for (const DataDep &Dep : Deps)
1152 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1153 addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1154
1155 InstrCycles &MICycles = Cycles[&MI];
1156 MICycles.Height = Cycle;
1157 if (!TBI.HasValidInstrDepths) {
1158 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1159 continue;
1160 }
1161 // Update critical path length.
1162 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1163 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1164 }
1165
1166 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1167 // height because the final height isn't known until now.
1168 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1169 for (LiveInReg &LIR : TBI.LiveIns) {
1170 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1171 LIR.Height = Heights.lookup(DefMI);
1172 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
1173 }
1174
1175 // Transfer the live regunits to the live-in list.
1176 for (const LiveRegUnit &RU : RegUnits) {
1177 TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle));
1178 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
1179 << RU.Cycle);
1180 }
1181 LLVM_DEBUG(dbgs() << '\n');
1182
1183 if (!TBI.HasValidInstrDepths)
1184 continue;
1185 // Add live-ins to the critical path length.
1186 TBI.CriticalPath = std::max(TBI.CriticalPath,
1187 computeCrossBlockCriticalPath(TBI));
1188 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1189 }
1190}
1191
1194 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1195
1196 if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1197 computeTrace(MBB);
1198 if (!TBI.HasValidInstrDepths)
1199 computeInstrDepths(MBB);
1200 if (!TBI.HasValidInstrHeights)
1201 computeInstrHeights(MBB);
1202
1203 return Trace(*this, TBI);
1204}
1205
1206unsigned
1208 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1209 "MI must be in the trace center block");
1210 InstrCycles Cyc = getInstrCycles(MI);
1211 return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1212}
1213
1214unsigned
1216 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1218 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1219 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1220 DataDep &Dep = Deps.front();
1221 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1222 // Add latency if DefMI is a real instruction. Transients get latency 0.
1223 if (!Dep.DefMI->isTransient())
1224 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1225 &PHI, Dep.UseOp);
1226 return DepCycle;
1227}
1228
1229/// When bottom is set include instructions in current block in estimate.
1231 // Find the limiting processor resource.
1232 // Numbers have been pre-scaled to be comparable.
1233 unsigned PRMax = 0;
1234 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1235 if (Bottom) {
1236 ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum());
1237 for (unsigned K = 0; K != PRDepths.size(); ++K)
1238 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1239 } else {
1240 for (unsigned PRD : PRDepths)
1241 PRMax = std::max(PRMax, PRD);
1242 }
1243 // Convert to cycle count.
1244 PRMax = TE.MTM.getCycles(PRMax);
1245
1246 /// All instructions before current block
1247 unsigned Instrs = TBI.InstrDepth;
1248 // plus instructions in current block
1249 if (Bottom)
1250 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1251 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1252 Instrs /= IW;
1253 // Assume issue width 1 without a schedule model.
1254 return std::max(Instrs, PRMax);
1255}
1256
1260 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1261 // Add up resources above and below the center block.
1262 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1263 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1264 unsigned PRMax = 0;
1265
1266 // Capture computing cycles from extra instructions
1267 auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1268 unsigned ResourceIdx)
1269 ->unsigned {
1270 unsigned Cycles = 0;
1271 for (const MCSchedClassDesc *SC : Instrs) {
1272 if (!SC->isValid())
1273 continue;
1275 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1276 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1277 PI != PE; ++PI) {
1278 if (PI->ProcResourceIdx != ResourceIdx)
1279 continue;
1280 Cycles += (PI->ReleaseAtCycle *
1281 TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1282 }
1283 }
1284 return Cycles;
1285 };
1286
1287 for (unsigned K = 0; K != PRDepths.size(); ++K) {
1288 unsigned PRCycles = PRDepths[K] + PRHeights[K];
1289 for (const MachineBasicBlock *MBB : Extrablocks)
1290 PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K];
1291 PRCycles += extraCycles(ExtraInstrs, K);
1292 PRCycles -= extraCycles(RemoveInstrs, K);
1293 PRMax = std::max(PRMax, PRCycles);
1294 }
1295 // Convert to cycle count.
1296 PRMax = TE.MTM.getCycles(PRMax);
1297
1298 // Instrs: #instructions in current trace outside current block.
1299 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1300 // Add instruction count from the extra blocks.
1301 for (const MachineBasicBlock *MBB : Extrablocks)
1302 Instrs += TE.MTM.getResources(MBB)->InstrCount;
1303 Instrs += ExtraInstrs.size();
1304 Instrs -= RemoveInstrs.size();
1305 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1306 Instrs /= IW;
1307 // Assume issue width 1 without a schedule model.
1308 return std::max(Instrs, PRMax);
1309}
1310
1312 const MachineInstr &UseMI) const {
1313 if (DefMI.getParent() == UseMI.getParent())
1314 return true;
1315
1316 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1317 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1318
1319 return DepTBI.isUsefulDominator(TBI);
1320}
1321
1323 OS << getName() << " ensemble:\n";
1324 for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1325 OS << " %bb." << i << '\t';
1326 BlockInfo[i].print(OS);
1327 OS << '\n';
1328 }
1329}
1330
1332 if (hasValidDepth()) {
1333 OS << "depth=" << InstrDepth;
1334 if (Pred)
1335 OS << " pred=" << printMBBReference(*Pred);
1336 else
1337 OS << " pred=null";
1338 OS << " head=%bb." << Head;
1339 if (HasValidInstrDepths)
1340 OS << " +instrs";
1341 } else
1342 OS << "depth invalid";
1343 OS << ", ";
1344 if (hasValidHeight()) {
1345 OS << "height=" << InstrHeight;
1346 if (Succ)
1347 OS << " succ=" << printMBBReference(*Succ);
1348 else
1349 OS << " succ=null";
1350 OS << " tail=%bb." << Tail;
1351 if (HasValidInstrHeights)
1352 OS << " +instrs";
1353 } else
1354 OS << "height invalid";
1355 if (HasValidInstrDepths && HasValidInstrHeights)
1356 OS << ", crit=" << CriticalPath;
1357}
1358
1360 unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1361
1362 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1363 << " --> %bb." << TBI.Tail << ':';
1364 if (TBI.hasValidHeight() && TBI.hasValidDepth())
1365 OS << ' ' << getInstrCount() << " instrs.";
1366 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1367 OS << ' ' << TBI.CriticalPath << " cycles.";
1368
1370 OS << "\n%bb." << MBBNum;
1371 while (Block->hasValidDepth() && Block->Pred) {
1372 unsigned Num = Block->Pred->getNumber();
1373 OS << " <- " << printMBBReference(*Block->Pred);
1374 Block = &TE.BlockInfo[Num];
1375 }
1376
1377 Block = &TBI;
1378 OS << "\n ";
1379 while (Block->hasValidHeight() && Block->Succ) {
1380 unsigned Num = Block->Succ->getNumber();
1381 OS << " -> " << printMBBReference(*Block->Succ);
1382 Block = &TE.BlockInfo[Num];
1383 }
1384 OS << '\n';
1385}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
MachineBasicBlock & MBB
basic Basic Alias true
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
BlockVerifier::State From
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:390
static unsigned InstrCount
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
#define DEBUG_TYPE
Hexagon Hardware Loops
IRTranslator LLVM IR MI
loops
Definition: LoopInfo.cpp:1209
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, SparseSet< LiveRegUnit > &RegUnits, const TargetRegisterInfo *TRI)
Machine Trace Metrics
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, SparseSet< LiveRegUnit > &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
A possibly irreducible generalization of a Loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:691
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
bool atEnd() const
atEnd - return true if this iterator is equal to reg_end() on the value.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of the instructions from Start to End.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks={}, ArrayRef< const MCSchedClassDesc * > ExtraInstrs={}, ArrayRef< const MCSchedClassDesc * > RemoveInstrs={}) const
Return the resource length of the trace.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
ArrayRef< unsigned > getProcReleaseAtCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
void init(MachineFunction &Func, const MachineLoopInfo &LI)
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:264
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:286
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:293
typename DenseT::iterator iterator
Definition: SparseSet.h:175
const_iterator end() const
Definition: SparseSet.h:179
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:160
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition: SparseSet.h:229
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
bool empty() const
Definition: Trace.h:96
bool insertEdge(std::optional< const MachineBasicBlock * > From, const MachineBasicBlock *To)
Default po_iterator_storage implementation with an internal set object.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
DWARFExpression::Operation Op
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1841
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs)
Adds registers contained in LiveRegs to the block live-in list of MBB.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
const MachineInstr * MI
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:121
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:66
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Per-basic block information that doesn't depend on the trace through the block.
bool hasResources() const
Returns true when resource information for this block has been computed.
unsigned InstrCount
The number of non-trivial instructions in the block.
bool HasCalls
True when the block contains calls.
InstrCycles represents the cycle height and depth of an instruction in a trace.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
Per-basic block information that relates to a specific trace through the block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned CriticalPath
Critical path length.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block.
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().