LLVM 20.0.0git
MachineTraceMetrics.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/SparseSet.h"
27#include "llvm/Pass.h"
28#include "llvm/Support/Debug.h"
30#include "llvm/Support/Format.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "machine-trace-metrics"
36
38
40
42 "Machine Trace Metrics", false, true)
47
49 std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
50}
51
53 AU.setPreservesAll();
57}
58
60 MF = &Func;
61 const TargetSubtargetInfo &ST = MF->getSubtarget();
62 TII = ST.getInstrInfo();
63 TRI = ST.getRegisterInfo();
64 MRI = &MF->getRegInfo();
65 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
66 SchedModel.init(&ST);
67 BlockInfo.resize(MF->getNumBlockIDs());
68 ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
69 SchedModel.getNumProcResourceKinds());
70 return false;
71}
72
74 MF = nullptr;
75 BlockInfo.clear();
76 for (Ensemble *&E : Ensembles) {
77 delete E;
78 E = nullptr;
79 }
80}
81
82//===----------------------------------------------------------------------===//
83// Fixed block information
84//===----------------------------------------------------------------------===//
85//
86// The number of instructions in a basic block and the CPU resources used by
87// those instructions don't depend on any given trace strategy.
88
89/// Compute the resource usage in basic block MBB.
92 assert(MBB && "No basic block");
93 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
94 if (FBI->hasResources())
95 return FBI;
96
97 // Compute resource usage in the block.
98 FBI->HasCalls = false;
99 unsigned InstrCount = 0;
100
101 // Add up per-processor resource cycles as well.
102 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
103 SmallVector<unsigned, 32> PRCycles(PRKinds);
104
105 for (const auto &MI : *MBB) {
106 if (MI.isTransient())
107 continue;
108 ++InstrCount;
109 if (MI.isCall())
110 FBI->HasCalls = true;
111
112 // Count processor resources used.
113 if (!SchedModel.hasInstrSchedModel())
114 continue;
115 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
116 if (!SC->isValid())
117 continue;
118
120 PI = SchedModel.getWriteProcResBegin(SC),
121 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
122 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
123 PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
124 }
125 }
126 FBI->InstrCount = InstrCount;
127
128 // Scale the resource cycles so they are comparable.
129 unsigned PROffset = MBB->getNumber() * PRKinds;
130 for (unsigned K = 0; K < PRKinds; ++K)
131 ProcReleaseAtCycles[PROffset + K] =
132 PRCycles[K] * SchedModel.getResourceFactor(K);
133
134 return FBI;
135}
136
139 assert(BlockInfo[MBBNum].hasResources() &&
140 "getResources() must be called before getProcReleaseAtCycles()");
141 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
142 assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
143 return ArrayRef{ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds};
144}
145
146//===----------------------------------------------------------------------===//
147// Ensemble utility functions
148//===----------------------------------------------------------------------===//
149
151 BlockInfo.resize(MTM.BlockInfo.size());
152 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
153 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
154 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
155}
156
157// Virtual destructor serves as an anchor.
159
160const MachineLoop*
162 return MTM.Loops->getLoopFor(MBB);
163}
164
165// Update resource-related information in the TraceBlockInfo for MBB.
166// Only update resources related to the trace above MBB.
167void MachineTraceMetrics::Ensemble::
168computeDepthResources(const MachineBasicBlock *MBB) {
169 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
170 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
171 unsigned PROffset = MBB->getNumber() * PRKinds;
172
173 // Compute resources from trace above. The top block is simple.
174 if (!TBI->Pred) {
175 TBI->InstrDepth = 0;
176 TBI->Head = MBB->getNumber();
177 std::fill(ProcResourceDepths.begin() + PROffset,
178 ProcResourceDepths.begin() + PROffset + PRKinds, 0);
179 return;
180 }
181
182 // Compute from the block above. A post-order traversal ensures the
183 // predecessor is always computed first.
184 unsigned PredNum = TBI->Pred->getNumber();
185 TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
186 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
187 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
188 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
189 TBI->Head = PredTBI->Head;
190
191 // Compute per-resource depths.
192 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
193 ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
194 for (unsigned K = 0; K < PRKinds; ++K)
195 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
196}
197
198// Update resource-related information in the TraceBlockInfo for MBB.
199// Only update resources related to the trace below MBB.
200void MachineTraceMetrics::Ensemble::
201computeHeightResources(const MachineBasicBlock *MBB) {
202 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
203 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
204 unsigned PROffset = MBB->getNumber() * PRKinds;
205
206 // Compute resources for the current block.
207 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
208 ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber());
209
210 // The trace tail is done.
211 if (!TBI->Succ) {
212 TBI->Tail = MBB->getNumber();
213 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
214 return;
215 }
216
217 // Compute from the block below. A post-order traversal ensures the
218 // predecessor is always computed first.
219 unsigned SuccNum = TBI->Succ->getNumber();
220 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
221 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
222 TBI->InstrHeight += SuccTBI->InstrHeight;
223 TBI->Tail = SuccTBI->Tail;
224
225 // Compute per-resource heights.
226 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
227 for (unsigned K = 0; K < PRKinds; ++K)
228 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
229}
230
231// Check if depth resources for MBB are valid and return the TBI.
232// Return NULL if the resources have been invalidated.
236 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
237 return TBI->hasValidDepth() ? TBI : nullptr;
238}
239
240// Check if height resources for MBB are valid and return the TBI.
241// Return NULL if the resources have been invalidated.
245 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
246 return TBI->hasValidHeight() ? TBI : nullptr;
247}
248
249/// Get an array of processor resource depths for MBB. Indexed by processor
250/// resource kind, this array contains the scaled processor resources consumed
251/// by all blocks preceding MBB in its trace. It does not include instructions
252/// in MBB.
253///
254/// Compare TraceBlockInfo::InstrDepth.
257getProcResourceDepths(unsigned MBBNum) const {
258 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
259 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
260 return ArrayRef{ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds};
261}
262
263/// Get an array of processor resource heights for MBB. Indexed by processor
264/// resource kind, this array contains the scaled processor resources consumed
265/// by this block and all blocks following it in its trace.
266///
267/// Compare TraceBlockInfo::InstrHeight.
270getProcResourceHeights(unsigned MBBNum) const {
271 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
272 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
273 return ArrayRef{ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds};
274}
275
276//===----------------------------------------------------------------------===//
277// Trace Selection Strategies
278//===----------------------------------------------------------------------===//
279//
280// A trace selection strategy is implemented as a sub-class of Ensemble. The
281// trace through a block B is computed by two DFS traversals of the CFG
282// starting from B. One upwards, and one downwards. During the upwards DFS,
283// pickTracePred() is called on the post-ordered blocks. During the downwards
284// DFS, pickTraceSucc() is called in a post-order.
285//
286
287// We never allow traces that leave loops, but we do allow traces to enter
288// nested loops. We also never allow traces to contain back-edges.
289//
290// This means that a loop header can never appear above the center block of a
291// trace, except as the trace head. Below the center block, loop exiting edges
292// are banned.
293//
294// Return true if an edge from the From loop to the To loop is leaving a loop.
295// Either of To and From can be null.
296static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
297 return From && !From->contains(To);
298}
299
300// MinInstrCountEnsemble - Pick the trace that executes the least number of
301// instructions.
302namespace {
303
304class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
305 const char *getName() const override { return "MinInstr"; }
306 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
307 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
308
309public:
310 MinInstrCountEnsemble(MachineTraceMetrics *MTM)
311 : MachineTraceMetrics::Ensemble(MTM) {}
312};
313
314/// Pick only the current basic block for the trace and do not choose any
315/// predecessors/successors.
316class LocalEnsemble : public MachineTraceMetrics::Ensemble {
317 const char *getName() const override { return "Local"; }
318 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
319 return nullptr;
320 };
321 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
322 return nullptr;
323 };
324
325public:
326 LocalEnsemble(MachineTraceMetrics *MTM)
327 : MachineTraceMetrics::Ensemble(MTM) {}
328};
329} // end anonymous namespace
330
331// Select the preferred predecessor for MBB.
333MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
334 if (MBB->pred_empty())
335 return nullptr;
336 const MachineLoop *CurLoop = getLoopFor(MBB);
337 // Don't leave loops, and never follow back-edges.
338 if (CurLoop && MBB == CurLoop->getHeader())
339 return nullptr;
340 unsigned CurCount = MTM.getResources(MBB)->InstrCount;
341 const MachineBasicBlock *Best = nullptr;
342 unsigned BestDepth = 0;
343 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
345 getDepthResources(Pred);
346 // Ignore cycles that aren't natural loops.
347 if (!PredTBI)
348 continue;
349 // Pick the predecessor that would give this block the smallest InstrDepth.
350 unsigned Depth = PredTBI->InstrDepth + CurCount;
351 if (!Best || Depth < BestDepth) {
352 Best = Pred;
353 BestDepth = Depth;
354 }
355 }
356 return Best;
357}
358
359// Select the preferred successor for MBB.
361MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
362 if (MBB->succ_empty())
363 return nullptr;
364 const MachineLoop *CurLoop = getLoopFor(MBB);
365 const MachineBasicBlock *Best = nullptr;
366 unsigned BestHeight = 0;
367 for (const MachineBasicBlock *Succ : MBB->successors()) {
368 // Don't consider back-edges.
369 if (CurLoop && Succ == CurLoop->getHeader())
370 continue;
371 // Don't consider successors exiting CurLoop.
372 if (isExitingLoop(CurLoop, getLoopFor(Succ)))
373 continue;
375 getHeightResources(Succ);
376 // Ignore cycles that aren't natural loops.
377 if (!SuccTBI)
378 continue;
379 // Pick the successor that would give this block the smallest InstrHeight.
380 unsigned Height = SuccTBI->InstrHeight;
381 if (!Best || Height < BestHeight) {
382 Best = Succ;
383 BestHeight = Height;
384 }
385 }
386 return Best;
387}
388
389// Get an Ensemble sub-class for the requested trace strategy.
393 "Invalid trace strategy enum");
394 Ensemble *&E = Ensembles[static_cast<size_t>(Strategy)];
395 if (E)
396 return E;
397
398 // Allocate new Ensemble on demand.
399 switch (Strategy) {
401 return (E = new MinInstrCountEnsemble(this));
403 return (E = new LocalEnsemble(this));
404 default: llvm_unreachable("Invalid trace strategy enum");
405 }
406}
407
409 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
410 << '\n');
411 BlockInfo[MBB->getNumber()].invalidate();
412 for (Ensemble *E : Ensembles)
413 if (E)
414 E->invalidate(MBB);
415}
416
418 if (!MF)
419 return;
420#ifndef NDEBUG
421 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
422 for (Ensemble *E : Ensembles)
423 if (E)
424 E->verify();
425#endif
426}
427
428//===----------------------------------------------------------------------===//
429// Trace building
430//===----------------------------------------------------------------------===//
431//
432// Traces are built by two CFG traversals. To avoid recomputing too much, use a
433// set abstraction that confines the search to the current loop, and doesn't
434// revisit blocks.
435
436namespace {
437
438struct LoopBounds {
441 const MachineLoopInfo *Loops;
442 bool Downward = false;
443
445 const MachineLoopInfo *Loops)
446 : Blocks(Blocks), Loops(Loops) {}
447};
448
449} // end anonymous namespace
450
451// Specialize po_iterator_storage in order to prune the post-order traversal so
452// it is limited to the current loop and doesn't traverse the loop back edges.
453namespace llvm {
454
455template<>
456class po_iterator_storage<LoopBounds, true> {
457 LoopBounds &LB;
458
459public:
460 po_iterator_storage(LoopBounds &LB) : LB(LB) {}
461
463
464 bool insertEdge(std::optional<const MachineBasicBlock *> From,
465 const MachineBasicBlock *To) {
466 // Skip already visited To blocks.
467 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
468 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
469 return false;
470 // From is null once when To is the trace center block.
471 if (From) {
472 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
473 // Don't follow backedges, don't leave FromLoop when going upwards.
474 if ((LB.Downward ? To : *From) == FromLoop->getHeader())
475 return false;
476 // Don't leave FromLoop.
477 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
478 return false;
479 }
480 }
481 // To is a new block. Mark the block as visited in case the CFG has cycles
482 // that MachineLoopInfo didn't recognize as a natural loop.
483 return LB.Visited.insert(To).second;
484 }
485};
486
487} // end namespace llvm
488
489/// Compute the trace through MBB.
490void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
491 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
492 << printMBBReference(*MBB) << '\n');
493 // Set up loop bounds for the backwards post-order traversal.
494 LoopBounds Bounds(BlockInfo, MTM.Loops);
495
496 // Run an upwards post-order search for the trace start.
497 Bounds.Downward = false;
498 Bounds.Visited.clear();
499 for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
500 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
501 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
502 // All the predecessors have been visited, pick the preferred one.
503 TBI.Pred = pickTracePred(I);
504 LLVM_DEBUG({
505 if (TBI.Pred)
506 dbgs() << printMBBReference(*TBI.Pred) << '\n';
507 else
508 dbgs() << "null\n";
509 });
510 // The trace leading to I is now known, compute the depth resources.
511 computeDepthResources(I);
512 }
513
514 // Run a downwards post-order search for the trace end.
515 Bounds.Downward = true;
516 Bounds.Visited.clear();
517 for (const auto *I : post_order_ext(MBB, Bounds)) {
518 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
519 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
520 // All the successors have been visited, pick the preferred one.
521 TBI.Succ = pickTraceSucc(I);
522 LLVM_DEBUG({
523 if (TBI.Succ)
524 dbgs() << printMBBReference(*TBI.Succ) << '\n';
525 else
526 dbgs() << "null\n";
527 });
528 // The trace leaving I is now known, compute the height resources.
529 computeHeightResources(I);
530 }
531}
532
533/// Invalidate traces through BadMBB.
534void
537 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
538
539 // Invalidate height resources of blocks above MBB.
540 if (BadTBI.hasValidHeight()) {
541 BadTBI.invalidateHeight();
542 WorkList.push_back(BadMBB);
543 while (!WorkList.empty()) {
544 const MachineBasicBlock *MBB = WorkList.pop_back_val();
545 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
546 << getName() << " height.\n");
547 // Find any MBB predecessors that have MBB as their preferred successor.
548 // They are the only ones that need to be invalidated.
549 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
550 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
551 if (!TBI.hasValidHeight())
552 continue;
553 if (TBI.Succ == MBB) {
554 TBI.invalidateHeight();
555 WorkList.push_back(Pred);
556 continue;
557 }
558 // Verify that TBI.Succ is actually a *I successor.
559 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
560 }
561 }
562 }
563
564 // Invalidate depth resources of blocks below MBB.
565 if (BadTBI.hasValidDepth()) {
566 BadTBI.invalidateDepth();
567 WorkList.push_back(BadMBB);
568 while (!WorkList.empty()) {
569 const MachineBasicBlock *MBB = WorkList.pop_back_val();
570 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
571 << getName() << " depth.\n");
572 // Find any MBB successors that have MBB as their preferred predecessor.
573 // They are the only ones that need to be invalidated.
574 for (const MachineBasicBlock *Succ : MBB->successors()) {
575 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
576 if (!TBI.hasValidDepth())
577 continue;
578 if (TBI.Pred == MBB) {
579 TBI.invalidateDepth();
580 WorkList.push_back(Succ);
581 continue;
582 }
583 // Verify that TBI.Pred is actually a *I predecessor.
584 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
585 }
586 }
587 }
588
589 // Clear any per-instruction data. We only have to do this for BadMBB itself
590 // because the instructions in that block may change. Other blocks may be
591 // invalidated, but their instructions will stay the same, so there is no
592 // need to erase the Cycle entries. They will be overwritten when we
593 // recompute.
594 for (const auto &I : *BadMBB)
595 Cycles.erase(&I);
596}
597
599#ifndef NDEBUG
600 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
601 "Outdated BlockInfo size");
602 for (unsigned Num = 0; Num < BlockInfo.size(); ++Num) {
603 const TraceBlockInfo &TBI = BlockInfo[Num];
604 if (TBI.hasValidDepth() && TBI.Pred) {
605 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
606 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
607 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
608 "Trace is broken, depth should have been invalidated.");
609 const MachineLoop *Loop = getLoopFor(MBB);
610 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
611 }
612 if (TBI.hasValidHeight() && TBI.Succ) {
613 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
614 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
615 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
616 "Trace is broken, height should have been invalidated.");
617 const MachineLoop *Loop = getLoopFor(MBB);
618 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
619 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
620 "Trace contains backedge");
621 }
622 }
623#endif
624}
625
626//===----------------------------------------------------------------------===//
627// Data Dependencies
628//===----------------------------------------------------------------------===//
629//
630// Compute the depth and height of each instruction based on data dependencies
631// and instruction latencies. These cycle numbers assume that the CPU can issue
632// an infinite number of instructions per cycle as long as their dependencies
633// are ready.
634
635// A data dependency is represented as a defining MI and operand numbers on the
636// defining and using MI.
637namespace {
638
639struct DataDep {
640 const MachineInstr *DefMI;
641 unsigned DefOp;
642 unsigned UseOp;
643
644 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
645 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
646
647 /// Create a DataDep from an SSA form virtual register.
648 DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
649 : UseOp(UseOp) {
651 MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
652 assert(!DefI.atEnd() && "Register has no defs");
653 DefMI = DefI->getParent();
654 DefOp = DefI.getOperandNo();
655 assert((++DefI).atEnd() && "Register has multiple defs");
656 }
657};
658
659} // end anonymous namespace
660
661// Get the input data dependencies that must be ready before UseMI can issue.
662// Return true if UseMI has any physreg operands.
663static bool getDataDeps(const MachineInstr &UseMI,
665 const MachineRegisterInfo *MRI) {
666 // Debug values should not be included in any calculations.
667 if (UseMI.isDebugInstr())
668 return false;
669
670 bool HasPhysRegs = false;
671 for (const MachineOperand &MO : UseMI.operands()) {
672 if (!MO.isReg())
673 continue;
674 Register Reg = MO.getReg();
675 if (!Reg)
676 continue;
677 if (Reg.isPhysical()) {
678 HasPhysRegs = true;
679 continue;
680 }
681 // Collect virtual register reads.
682 if (MO.readsReg())
683 Deps.emplace_back(MRI, Reg, MO.getOperandNo());
684 }
685 return HasPhysRegs;
686}
687
688// Get the input data dependencies of a PHI instruction, using Pred as the
689// preferred predecessor.
690// This will add at most one dependency to Deps.
691static void getPHIDeps(const MachineInstr &UseMI,
693 const MachineBasicBlock *Pred,
694 const MachineRegisterInfo *MRI) {
695 // No predecessor at the beginning of a trace. Ignore dependencies.
696 if (!Pred)
697 return;
698 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
699 for (unsigned Idx = 1; Idx < UseMI.getNumOperands(); Idx += 2) {
700 if (UseMI.getOperand(Idx + 1).getMBB() == Pred) {
701 Register Reg = UseMI.getOperand(Idx).getReg();
702 Deps.emplace_back(MRI, Reg, Idx);
703 return;
704 }
705 }
706}
707
708// Identify physreg dependencies for UseMI, and update the live regunit
709// tracking set when scanning instructions downwards.
712 SparseSet<LiveRegUnit> &RegUnits,
713 const TargetRegisterInfo *TRI) {
715 SmallVector<unsigned, 8> LiveDefOps;
716
717 for (const MachineOperand &MO : UseMI->operands()) {
718 if (!MO.isReg() || !MO.getReg().isPhysical())
719 continue;
720 MCRegister Reg = MO.getReg().asMCReg();
721 // Track live defs and kills for updating RegUnits.
722 if (MO.isDef()) {
723 if (MO.isDead())
724 Kills.push_back(Reg);
725 else
726 LiveDefOps.push_back(MO.getOperandNo());
727 } else if (MO.isKill())
728 Kills.push_back(Reg);
729 // Identify dependencies.
730 if (!MO.readsReg())
731 continue;
732 for (MCRegUnit Unit : TRI->regunits(Reg)) {
733 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
734 if (I == RegUnits.end())
735 continue;
736 Deps.emplace_back(I->MI, I->Op, MO.getOperandNo());
737 break;
738 }
739 }
740
741 // Update RegUnits to reflect live registers after UseMI.
742 // First kills.
743 for (MCRegister Kill : Kills)
744 for (MCRegUnit Unit : TRI->regunits(Kill))
745 RegUnits.erase(Unit);
746
747 // Second, live defs.
748 for (unsigned DefOp : LiveDefOps) {
749 for (MCRegUnit Unit :
750 TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
751 LiveRegUnit &LRU = RegUnits[Unit];
752 LRU.MI = UseMI;
753 LRU.Op = DefOp;
754 }
755 }
756}
757
758/// The length of the critical path through a trace is the maximum of two path
759/// lengths:
760///
761/// 1. The maximum height+depth over all instructions in the trace center block.
762///
763/// 2. The longest cross-block dependency chain. For small blocks, it is
764/// possible that the critical path through the trace doesn't include any
765/// instructions in the block.
766///
767/// This function computes the second number from the live-in list of the
768/// center block.
769unsigned MachineTraceMetrics::Ensemble::
770computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
771 assert(TBI.HasValidInstrDepths && "Missing depth info");
772 assert(TBI.HasValidInstrHeights && "Missing height info");
773 unsigned MaxLen = 0;
774 for (const LiveInReg &LIR : TBI.LiveIns) {
775 if (!LIR.Reg.isVirtual())
776 continue;
777 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
778 // Ignore dependencies outside the current trace.
779 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
780 if (!DefTBI.isUsefulDominator(TBI))
781 continue;
782 unsigned Len = LIR.Height + Cycles[DefMI].Depth;
783 MaxLen = std::max(MaxLen, Len);
784 }
785 return MaxLen;
786}
787
790 SparseSet<LiveRegUnit> &RegUnits) {
792 // Collect all data dependencies.
793 if (UseMI.isPHI())
794 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
795 else if (getDataDeps(UseMI, Deps, MTM.MRI))
796 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
797
798 // Filter and process dependencies, computing the earliest issue cycle.
799 unsigned Cycle = 0;
800 for (const DataDep &Dep : Deps) {
801 const TraceBlockInfo&DepTBI =
802 BlockInfo[Dep.DefMI->getParent()->getNumber()];
803 // Ignore dependencies from outside the current trace.
804 if (!DepTBI.isUsefulDominator(TBI))
805 continue;
806 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
807 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
808 // Add latency if DefMI is a real instruction. Transients get latency 0.
809 if (!Dep.DefMI->isTransient())
810 DepCycle += MTM.SchedModel
811 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
812 Cycle = std::max(Cycle, DepCycle);
813 }
814 // Remember the instruction depth.
815 InstrCycles &MICycles = Cycles[&UseMI];
816 MICycles.Depth = Cycle;
817
818 if (TBI.HasValidInstrHeights) {
819 // Update critical path length.
820 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
821 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
822 } else {
823 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
824 }
825}
826
829 SparseSet<LiveRegUnit> &RegUnits) {
830 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
831}
832
836 SparseSet<LiveRegUnit> &RegUnits) {
837 for (; Start != End; Start++)
838 updateDepth(Start->getParent(), *Start, RegUnits);
839}
840
841/// Compute instruction depths for all instructions above or in MBB in its
842/// trace. This assumes that the trace through MBB has already been computed.
843void MachineTraceMetrics::Ensemble::
844computeInstrDepths(const MachineBasicBlock *MBB) {
845 // The top of the trace may already be computed, and HasValidInstrDepths
846 // implies Head->HasValidInstrDepths, so we only need to start from the first
847 // block in the trace that needs to be recomputed.
849 while (MBB) {
850 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
851 assert(TBI.hasValidDepth() && "Incomplete trace");
852 if (TBI.HasValidInstrDepths)
853 break;
854 Stack.push_back(MBB);
855 MBB = TBI.Pred;
856 }
857
858 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
859 // in the trace. We should track any live-out physregs that were defined in
860 // the trace. This is quite rare in SSA form, typically created by CSE
861 // hoisting a compare.
862 SparseSet<LiveRegUnit> RegUnits;
863 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
864
865 // Go through trace blocks in top-down order, stopping after the center block.
866 while (!Stack.empty()) {
867 MBB = Stack.pop_back_val();
868 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
869 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
870 TBI.HasValidInstrDepths = true;
871 TBI.CriticalPath = 0;
872
873 // Print out resource depths here as well.
874 LLVM_DEBUG({
875 dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
876 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
877 for (unsigned K = 0; K < PRDepths.size(); ++K)
878 if (PRDepths[K]) {
879 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
880 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
881 << MTM.SchedModel.getProcResource(K)->Name << " ("
882 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
883 }
884 });
885
886 // Also compute the critical path length through MBB when possible.
887 if (TBI.HasValidInstrHeights)
888 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
889
890 for (const auto &UseMI : *MBB) {
891 updateDepth(TBI, UseMI, RegUnits);
892 }
893 }
894}
895
896// Identify physreg dependencies for MI when scanning instructions upwards.
897// Return the issue height of MI after considering any live regunits.
898// Height is the issue height computed from virtual register dependencies alone.
899static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
900 SparseSet<LiveRegUnit> &RegUnits,
901 const TargetSchedModel &SchedModel,
902 const TargetInstrInfo *TII,
903 const TargetRegisterInfo *TRI) {
905
906 for (const MachineOperand &MO : MI.operands()) {
907 if (!MO.isReg())
908 continue;
909 Register Reg = MO.getReg();
910 if (!Reg.isPhysical())
911 continue;
912 if (MO.readsReg())
913 ReadOps.push_back(MO.getOperandNo());
914 if (!MO.isDef())
915 continue;
916 // This is a def of Reg. Remove corresponding entries from RegUnits, and
917 // update MI Height to consider the physreg dependencies.
918 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
919 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
920 if (I == RegUnits.end())
921 continue;
922 unsigned DepHeight = I->Cycle;
923 if (!MI.isTransient()) {
924 // We may not know the UseMI of this dependency, if it came from the
925 // live-in list. SchedModel can handle a NULL UseMI.
926 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
927 I->MI, I->Op);
928 }
929 Height = std::max(Height, DepHeight);
930 // This regunit is dead above MI.
931 RegUnits.erase(I);
932 }
933 }
934
935 // Now we know the height of MI. Update any regunits read.
936 for (unsigned Op : ReadOps) {
937 MCRegister Reg = MI.getOperand(Op).getReg().asMCReg();
938 for (MCRegUnit Unit : TRI->regunits(Reg)) {
939 LiveRegUnit &LRU = RegUnits[Unit];
940 // Set the height to the highest reader of the unit.
941 if (LRU.Cycle <= Height && LRU.MI != &MI) {
942 LRU.Cycle = Height;
943 LRU.MI = &MI;
944 LRU.Op = Op;
945 }
946 }
947 }
948
949 return Height;
950}
951
953
954// Push the height of DefMI upwards if required to match UseMI.
955// Return true if this is the first time DefMI was seen.
956static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
957 unsigned UseHeight, MIHeightMap &Heights,
958 const TargetSchedModel &SchedModel,
959 const TargetInstrInfo *TII) {
960 // Adjust height by Dep.DefMI latency.
961 if (!Dep.DefMI->isTransient())
962 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
963 Dep.UseOp);
964
965 // Update Heights[DefMI] to be the maximum height seen.
966 const auto &[I, Inserted] = Heights.insert({Dep.DefMI, UseHeight});
967 if (Inserted)
968 return true;
969
970 // DefMI has been pushed before. Give it the max height.
971 if (I->second < UseHeight)
972 I->second = UseHeight;
973 return false;
974}
975
976/// Assuming that the virtual register defined by DefMI:DefOp was used by
977/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
978/// when reaching the block that contains DefMI.
979void MachineTraceMetrics::Ensemble::
980addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
982 assert(!Trace.empty() && "Trace should contain at least one block");
983 Register Reg = DefMI->getOperand(DefOp).getReg();
984 assert(Reg.isVirtual());
985 const MachineBasicBlock *DefMBB = DefMI->getParent();
986
987 // Reg is live-in to all blocks in Trace that follow DefMBB.
988 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
989 if (MBB == DefMBB)
990 return;
991 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
992 // Just add the register. The height will be updated later.
993 TBI.LiveIns.push_back(Reg);
994 }
995}
996
997/// Compute instruction heights in the trace through MBB. This updates MBB and
998/// the blocks below it in the trace. It is assumed that the trace has already
999/// been computed.
1000void MachineTraceMetrics::Ensemble::
1001computeInstrHeights(const MachineBasicBlock *MBB) {
1002 // The bottom of the trace may already be computed.
1003 // Find the blocks that need updating.
1005 while (MBB) {
1006 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1007 assert(TBI.hasValidHeight() && "Incomplete trace");
1008 if (TBI.HasValidInstrHeights)
1009 break;
1010 Stack.push_back(MBB);
1011 TBI.LiveIns.clear();
1012 MBB = TBI.Succ;
1013 }
1014
1015 // As we move upwards in the trace, keep track of instructions that are
1016 // required by deeper trace instructions. Map MI -> height required so far.
1017 MIHeightMap Heights;
1018
1019 // For physregs, the def isn't known when we see the use.
1020 // Instead, keep track of the highest use of each regunit.
1021 SparseSet<LiveRegUnit> RegUnits;
1022 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1023
1024 // If the bottom of the trace was already precomputed, initialize heights
1025 // from its live-in list.
1026 // MBB is the highest precomputed block in the trace.
1027 if (MBB) {
1028 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1029 for (LiveInReg &LI : TBI.LiveIns) {
1030 if (LI.Reg.isVirtual()) {
1031 // For virtual registers, the def latency is included.
1032 unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1033 if (Height < LI.Height)
1034 Height = LI.Height;
1035 } else {
1036 // For register units, the def latency is not included because we don't
1037 // know the def yet.
1038 RegUnits[LI.Reg].Cycle = LI.Height;
1039 }
1040 }
1041 }
1042
1043 // Go through the trace blocks in bottom-up order.
1045 for (;!Stack.empty(); Stack.pop_back()) {
1046 MBB = Stack.back();
1047 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1048 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1049 TBI.HasValidInstrHeights = true;
1050 TBI.CriticalPath = 0;
1051
1052 LLVM_DEBUG({
1053 dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1054 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1055 for (unsigned K = 0; K < PRHeights.size(); ++K)
1056 if (PRHeights[K]) {
1057 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1058 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1059 << MTM.SchedModel.getProcResource(K)->Name << " ("
1060 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1061 }
1062 });
1063
1064 // Get dependencies from PHIs in the trace successor.
1065 const MachineBasicBlock *Succ = TBI.Succ;
1066 // If MBB is the last block in the trace, and it has a back-edge to the
1067 // loop header, get loop-carried dependencies from PHIs in the header. For
1068 // that purpose, pretend that all the loop header PHIs have height 0.
1069 if (!Succ)
1070 if (const MachineLoop *Loop = getLoopFor(MBB))
1071 if (MBB->isSuccessor(Loop->getHeader()))
1072 Succ = Loop->getHeader();
1073
1074 if (Succ) {
1075 for (const auto &PHI : *Succ) {
1076 if (!PHI.isPHI())
1077 break;
1078 Deps.clear();
1079 getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1080 if (!Deps.empty()) {
1081 // Loop header PHI heights are all 0.
1082 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1083 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1084 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1085 MTM.TII))
1086 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1087 }
1088 }
1089 }
1090
1091 // Go through the block backwards.
1092 for (const MachineInstr &MI : reverse(*MBB)) {
1093 // Find the MI height as determined by virtual register uses in the
1094 // trace below.
1095 unsigned Cycle = 0;
1096 MIHeightMap::iterator HeightI = Heights.find(&MI);
1097 if (HeightI != Heights.end()) {
1098 Cycle = HeightI->second;
1099 // We won't be seeing any more MI uses.
1100 Heights.erase(HeightI);
1101 }
1102
1103 // Don't process PHI deps. They depend on the specific predecessor, and
1104 // we'll get them when visiting the predecessor.
1105 Deps.clear();
1106 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1107
1108 // There may also be regunit dependencies to include in the height.
1109 if (HasPhysRegs)
1110 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1111 MTM.TII, MTM.TRI);
1112
1113 // Update the required height of any virtual registers read by MI.
1114 for (const DataDep &Dep : Deps)
1115 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1116 addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1117
1118 InstrCycles &MICycles = Cycles[&MI];
1119 MICycles.Height = Cycle;
1120 if (!TBI.HasValidInstrDepths) {
1121 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1122 continue;
1123 }
1124 // Update critical path length.
1125 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1126 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1127 }
1128
1129 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1130 // height because the final height isn't known until now.
1131 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1132 for (LiveInReg &LIR : TBI.LiveIns) {
1133 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1134 LIR.Height = Heights.lookup(DefMI);
1135 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
1136 }
1137
1138 // Transfer the live regunits to the live-in list.
1139 for (const LiveRegUnit &RU : RegUnits) {
1140 TBI.LiveIns.emplace_back(RU.RegUnit, RU.Cycle);
1141 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
1142 << RU.Cycle);
1143 }
1144 LLVM_DEBUG(dbgs() << '\n');
1145
1146 if (!TBI.HasValidInstrDepths)
1147 continue;
1148 // Add live-ins to the critical path length.
1149 TBI.CriticalPath = std::max(TBI.CriticalPath,
1150 computeCrossBlockCriticalPath(TBI));
1151 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1152 }
1153}
1154
1157 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1158
1159 if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1160 computeTrace(MBB);
1161 if (!TBI.HasValidInstrDepths)
1162 computeInstrDepths(MBB);
1163 if (!TBI.HasValidInstrHeights)
1164 computeInstrHeights(MBB);
1165
1166 return Trace(*this, TBI);
1167}
1168
1169unsigned
1171 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1172 "MI must be in the trace center block");
1173 InstrCycles Cyc = getInstrCycles(MI);
1174 return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1175}
1176
1177unsigned
1179 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1181 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1182 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1183 DataDep &Dep = Deps.front();
1184 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1185 // Add latency if DefMI is a real instruction. Transients get latency 0.
1186 if (!Dep.DefMI->isTransient())
1187 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1188 &PHI, Dep.UseOp);
1189 return DepCycle;
1190}
1191
1192/// When bottom is set include instructions in current block in estimate.
1194 // Find the limiting processor resource.
1195 // Numbers have been pre-scaled to be comparable.
1196 unsigned PRMax = 0;
1197 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1198 if (Bottom) {
1199 ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum());
1200 for (unsigned K = 0; K < PRDepths.size(); ++K)
1201 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1202 } else {
1203 for (unsigned PRD : PRDepths)
1204 PRMax = std::max(PRMax, PRD);
1205 }
1206 // Convert to cycle count.
1207 PRMax = TE.MTM.getCycles(PRMax);
1208
1209 /// All instructions before current block
1210 unsigned Instrs = TBI.InstrDepth;
1211 // plus instructions in current block
1212 if (Bottom)
1213 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1214 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1215 Instrs /= IW;
1216 // Assume issue width 1 without a schedule model.
1217 return std::max(Instrs, PRMax);
1218}
1219
1223 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1224 // Add up resources above and below the center block.
1225 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1226 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1227 unsigned PRMax = 0;
1228
1229 // Capture computing cycles from extra instructions
1230 auto ExtraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1231 unsigned ResourceIdx) -> unsigned {
1232 unsigned Cycles = 0;
1233 for (const MCSchedClassDesc *SC : Instrs) {
1234 if (!SC->isValid())
1235 continue;
1237 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1238 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1239 PI != PE; ++PI) {
1240 if (PI->ProcResourceIdx != ResourceIdx)
1241 continue;
1242 Cycles += (PI->ReleaseAtCycle *
1243 TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1244 }
1245 }
1246 return Cycles;
1247 };
1248
1249 for (unsigned K = 0; K < PRDepths.size(); ++K) {
1250 unsigned PRCycles = PRDepths[K] + PRHeights[K];
1251 for (const MachineBasicBlock *MBB : Extrablocks)
1252 PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K];
1253 PRCycles += ExtraCycles(ExtraInstrs, K);
1254 PRCycles -= ExtraCycles(RemoveInstrs, K);
1255 PRMax = std::max(PRMax, PRCycles);
1256 }
1257 // Convert to cycle count.
1258 PRMax = TE.MTM.getCycles(PRMax);
1259
1260 // Instrs: #instructions in current trace outside current block.
1261 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1262 // Add instruction count from the extra blocks.
1263 for (const MachineBasicBlock *MBB : Extrablocks)
1264 Instrs += TE.MTM.getResources(MBB)->InstrCount;
1265 Instrs += ExtraInstrs.size();
1266 Instrs -= RemoveInstrs.size();
1267 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1268 Instrs /= IW;
1269 // Assume issue width 1 without a schedule model.
1270 return std::max(Instrs, PRMax);
1271}
1272
1274 const MachineInstr &UseMI) const {
1275 if (DefMI.getParent() == UseMI.getParent())
1276 return true;
1277
1278 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1279 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1280
1281 return DepTBI.isUsefulDominator(TBI);
1282}
1283
1285 OS << getName() << " ensemble:\n";
1286 for (unsigned Idx = 0; Idx < BlockInfo.size(); ++Idx) {
1287 OS << " %bb." << Idx << '\t';
1288 BlockInfo[Idx].print(OS);
1289 OS << '\n';
1290 }
1291}
1292
1294 if (hasValidDepth()) {
1295 OS << "depth=" << InstrDepth;
1296 if (Pred)
1297 OS << " pred=" << printMBBReference(*Pred);
1298 else
1299 OS << " pred=null";
1300 OS << " head=%bb." << Head;
1301 if (HasValidInstrDepths)
1302 OS << " +instrs";
1303 } else
1304 OS << "depth invalid";
1305 OS << ", ";
1306 if (hasValidHeight()) {
1307 OS << "height=" << InstrHeight;
1308 if (Succ)
1309 OS << " succ=" << printMBBReference(*Succ);
1310 else
1311 OS << " succ=null";
1312 OS << " tail=%bb." << Tail;
1313 if (HasValidInstrHeights)
1314 OS << " +instrs";
1315 } else
1316 OS << "height invalid";
1317 if (HasValidInstrDepths && HasValidInstrHeights)
1318 OS << ", crit=" << CriticalPath;
1319}
1320
1322 unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1323
1324 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1325 << " --> %bb." << TBI.Tail << ':';
1326 if (TBI.hasValidHeight() && TBI.hasValidDepth())
1327 OS << ' ' << getInstrCount() << " instrs.";
1328 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1329 OS << ' ' << TBI.CriticalPath << " cycles.";
1330
1332 OS << "\n%bb." << MBBNum;
1333 while (Block->hasValidDepth() && Block->Pred) {
1334 unsigned Num = Block->Pred->getNumber();
1335 OS << " <- " << printMBBReference(*Block->Pred);
1336 Block = &TE.BlockInfo[Num];
1337 }
1338
1339 Block = &TBI;
1340 OS << "\n ";
1341 while (Block->hasValidHeight() && Block->Succ) {
1342 unsigned Num = Block->Succ->getNumber();
1343 OS << " -> " << printMBBReference(*Block->Succ);
1344 Block = &TE.BlockInfo[Num];
1345 }
1346 OS << '\n';
1347}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
MachineBasicBlock & MBB
basic Basic Alias true
BlockVerifier::State From
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:371
static unsigned InstrCount
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Hardware Loops
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, SparseSet< LiveRegUnit > &RegUnits, const TargetRegisterInfo *TRI)
Machine Trace Metrics
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, SparseSet< LiveRegUnit > &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
#define DEBUG_TYPE
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const T * data() const
Definition: ArrayRef.h:162
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:345
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
A possibly irreducible generalization of a Loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:685
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand class - Representation of each machine instruction operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
bool atEnd() const
atEnd - return true if this iterator is equal to reg_end() on the value.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of the instructions from Start to End.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=std::nullopt, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=std::nullopt, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=std::nullopt) const
Return the resource length of the trace.
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
ArrayRef< unsigned > getProcReleaseAtCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:289
typename DenseT::iterator iterator
Definition: SparseSet.h:171
const_iterator end() const
Definition: SparseSet.h:175
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition: SparseSet.h:225
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
bool empty() const
Definition: Trace.h:96
bool insertEdge(std::optional< const MachineBasicBlock * > From, const MachineBasicBlock *To)
Default po_iterator_storage implementation with an internal set object.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
DWARFExpression::Operation Op
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1824
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs)
Adds registers contained in LiveRegs to the block live-in list of MBB.
const MachineInstr * MI
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
Per-basic block information that doesn't depend on the trace through the block.
bool hasResources() const
Returns true when resource information for this block has been computed.
unsigned InstrCount
The number of non-trivial instructions in the block.
bool HasCalls
True when the block contains calls.
InstrCycles represents the cycle height and depth of an instruction in a trace.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
Per-basic block information that relates to a specific trace through the block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned CriticalPath
Critical path length.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block.
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().