LLVM 20.0.0git
MachineTraceMetrics.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/SparseSet.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/Debug.h"
31#include "llvm/Support/Format.h"
33#include <algorithm>
34#include <cassert>
35#include <iterator>
36#include <tuple>
37#include <utility>
38
39using namespace llvm;
40
41#define DEBUG_TYPE "machine-trace-metrics"
42
44
46
48 "Machine Trace Metrics", false, true)
53
55 std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
56}
57
59 AU.setPreservesAll();
63}
64
66 MF = &Func;
67 const TargetSubtargetInfo &ST = MF->getSubtarget();
68 TII = ST.getInstrInfo();
69 TRI = ST.getRegisterInfo();
70 MRI = &MF->getRegInfo();
71 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
72 SchedModel.init(&ST);
73 BlockInfo.resize(MF->getNumBlockIDs());
74 ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
75 SchedModel.getNumProcResourceKinds());
76 return false;
77}
78
80 MF = nullptr;
81 BlockInfo.clear();
82 for (Ensemble *&E : Ensembles) {
83 delete E;
84 E = nullptr;
85 }
86}
87
88//===----------------------------------------------------------------------===//
89// Fixed block information
90//===----------------------------------------------------------------------===//
91//
92// The number of instructions in a basic block and the CPU resources used by
93// those instructions don't depend on any given trace strategy.
94
95/// Compute the resource usage in basic block MBB.
98 assert(MBB && "No basic block");
99 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
100 if (FBI->hasResources())
101 return FBI;
102
103 // Compute resource usage in the block.
104 FBI->HasCalls = false;
105 unsigned InstrCount = 0;
106
107 // Add up per-processor resource cycles as well.
108 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
109 SmallVector<unsigned, 32> PRCycles(PRKinds);
110
111 for (const auto &MI : *MBB) {
112 if (MI.isTransient())
113 continue;
114 ++InstrCount;
115 if (MI.isCall())
116 FBI->HasCalls = true;
117
118 // Count processor resources used.
119 if (!SchedModel.hasInstrSchedModel())
120 continue;
121 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
122 if (!SC->isValid())
123 continue;
124
126 PI = SchedModel.getWriteProcResBegin(SC),
127 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
128 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
129 PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
130 }
131 }
132 FBI->InstrCount = InstrCount;
133
134 // Scale the resource cycles so they are comparable.
135 unsigned PROffset = MBB->getNumber() * PRKinds;
136 for (unsigned K = 0; K != PRKinds; ++K)
137 ProcReleaseAtCycles[PROffset + K] =
138 PRCycles[K] * SchedModel.getResourceFactor(K);
139
140 return FBI;
141}
142
145 assert(BlockInfo[MBBNum].hasResources() &&
146 "getResources() must be called before getProcReleaseAtCycles()");
147 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
148 assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
149 return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds);
150}
151
152//===----------------------------------------------------------------------===//
153// Ensemble utility functions
154//===----------------------------------------------------------------------===//
155
157 : MTM(*ct) {
158 BlockInfo.resize(MTM.BlockInfo.size());
159 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
160 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
161 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
162}
163
164// Virtual destructor serves as an anchor.
166
167const MachineLoop*
169 return MTM.Loops->getLoopFor(MBB);
170}
171
172// Update resource-related information in the TraceBlockInfo for MBB.
173// Only update resources related to the trace above MBB.
174void MachineTraceMetrics::Ensemble::
175computeDepthResources(const MachineBasicBlock *MBB) {
176 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
177 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
178 unsigned PROffset = MBB->getNumber() * PRKinds;
179
180 // Compute resources from trace above. The top block is simple.
181 if (!TBI->Pred) {
182 TBI->InstrDepth = 0;
183 TBI->Head = MBB->getNumber();
184 std::fill(ProcResourceDepths.begin() + PROffset,
185 ProcResourceDepths.begin() + PROffset + PRKinds, 0);
186 return;
187 }
188
189 // Compute from the block above. A post-order traversal ensures the
190 // predecessor is always computed first.
191 unsigned PredNum = TBI->Pred->getNumber();
192 TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
193 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
194 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
195 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
196 TBI->Head = PredTBI->Head;
197
198 // Compute per-resource depths.
199 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
200 ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
201 for (unsigned K = 0; K != PRKinds; ++K)
202 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
203}
204
205// Update resource-related information in the TraceBlockInfo for MBB.
206// Only update resources related to the trace below MBB.
207void MachineTraceMetrics::Ensemble::
208computeHeightResources(const MachineBasicBlock *MBB) {
209 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
210 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
211 unsigned PROffset = MBB->getNumber() * PRKinds;
212
213 // Compute resources for the current block.
214 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
215 ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber());
216
217 // The trace tail is done.
218 if (!TBI->Succ) {
219 TBI->Tail = MBB->getNumber();
220 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
221 return;
222 }
223
224 // Compute from the block below. A post-order traversal ensures the
225 // predecessor is always computed first.
226 unsigned SuccNum = TBI->Succ->getNumber();
227 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
228 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
229 TBI->InstrHeight += SuccTBI->InstrHeight;
230 TBI->Tail = SuccTBI->Tail;
231
232 // Compute per-resource heights.
233 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
234 for (unsigned K = 0; K != PRKinds; ++K)
235 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
236}
237
238// Check if depth resources for MBB are valid and return the TBI.
239// Return NULL if the resources have been invalidated.
243 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
244 return TBI->hasValidDepth() ? TBI : nullptr;
245}
246
247// Check if height resources for MBB are valid and return the TBI.
248// Return NULL if the resources have been invalidated.
252 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
253 return TBI->hasValidHeight() ? TBI : nullptr;
254}
255
256/// Get an array of processor resource depths for MBB. Indexed by processor
257/// resource kind, this array contains the scaled processor resources consumed
258/// by all blocks preceding MBB in its trace. It does not include instructions
259/// in MBB.
260///
261/// Compare TraceBlockInfo::InstrDepth.
264getProcResourceDepths(unsigned MBBNum) const {
265 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
266 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
267 return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
268}
269
270/// Get an array of processor resource heights for MBB. Indexed by processor
271/// resource kind, this array contains the scaled processor resources consumed
272/// by this block and all blocks following it in its trace.
273///
274/// Compare TraceBlockInfo::InstrHeight.
277getProcResourceHeights(unsigned MBBNum) const {
278 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
279 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
280 return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
281}
282
283//===----------------------------------------------------------------------===//
284// Trace Selection Strategies
285//===----------------------------------------------------------------------===//
286//
287// A trace selection strategy is implemented as a sub-class of Ensemble. The
288// trace through a block B is computed by two DFS traversals of the CFG
289// starting from B. One upwards, and one downwards. During the upwards DFS,
290// pickTracePred() is called on the post-ordered blocks. During the downwards
291// DFS, pickTraceSucc() is called in a post-order.
292//
293
294// We never allow traces that leave loops, but we do allow traces to enter
295// nested loops. We also never allow traces to contain back-edges.
296//
297// This means that a loop header can never appear above the center block of a
298// trace, except as the trace head. Below the center block, loop exiting edges
299// are banned.
300//
301// Return true if an edge from the From loop to the To loop is leaving a loop.
302// Either of To and From can be null.
303static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
304 return From && !From->contains(To);
305}
306
307// MinInstrCountEnsemble - Pick the trace that executes the least number of
308// instructions.
309namespace {
310
311class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
312 const char *getName() const override { return "MinInstr"; }
313 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
314 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
315
316public:
317 MinInstrCountEnsemble(MachineTraceMetrics *mtm)
318 : MachineTraceMetrics::Ensemble(mtm) {}
319};
320
321/// Pick only the current basic block for the trace and do not choose any
322/// predecessors/successors.
323class LocalEnsemble : public MachineTraceMetrics::Ensemble {
324 const char *getName() const override { return "Local"; }
325 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
326 return nullptr;
327 };
328 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
329 return nullptr;
330 };
331
332public:
333 LocalEnsemble(MachineTraceMetrics *MTM)
334 : MachineTraceMetrics::Ensemble(MTM) {}
335};
336} // end anonymous namespace
337
338// Select the preferred predecessor for MBB.
340MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
341 if (MBB->pred_empty())
342 return nullptr;
343 const MachineLoop *CurLoop = getLoopFor(MBB);
344 // Don't leave loops, and never follow back-edges.
345 if (CurLoop && MBB == CurLoop->getHeader())
346 return nullptr;
347 unsigned CurCount = MTM.getResources(MBB)->InstrCount;
348 const MachineBasicBlock *Best = nullptr;
349 unsigned BestDepth = 0;
350 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
352 getDepthResources(Pred);
353 // Ignore cycles that aren't natural loops.
354 if (!PredTBI)
355 continue;
356 // Pick the predecessor that would give this block the smallest InstrDepth.
357 unsigned Depth = PredTBI->InstrDepth + CurCount;
358 if (!Best || Depth < BestDepth) {
359 Best = Pred;
360 BestDepth = Depth;
361 }
362 }
363 return Best;
364}
365
366// Select the preferred successor for MBB.
368MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
369 if (MBB->succ_empty())
370 return nullptr;
371 const MachineLoop *CurLoop = getLoopFor(MBB);
372 const MachineBasicBlock *Best = nullptr;
373 unsigned BestHeight = 0;
374 for (const MachineBasicBlock *Succ : MBB->successors()) {
375 // Don't consider back-edges.
376 if (CurLoop && Succ == CurLoop->getHeader())
377 continue;
378 // Don't consider successors exiting CurLoop.
379 if (isExitingLoop(CurLoop, getLoopFor(Succ)))
380 continue;
382 getHeightResources(Succ);
383 // Ignore cycles that aren't natural loops.
384 if (!SuccTBI)
385 continue;
386 // Pick the successor that would give this block the smallest InstrHeight.
387 unsigned Height = SuccTBI->InstrHeight;
388 if (!Best || Height < BestHeight) {
389 Best = Succ;
390 BestHeight = Height;
391 }
392 }
393 return Best;
394}
395
396// Get an Ensemble sub-class for the requested trace strategy.
400 "Invalid trace strategy enum");
401 Ensemble *&E = Ensembles[static_cast<size_t>(strategy)];
402 if (E)
403 return E;
404
405 // Allocate new Ensemble on demand.
406 switch (strategy) {
408 return (E = new MinInstrCountEnsemble(this));
410 return (E = new LocalEnsemble(this));
411 default: llvm_unreachable("Invalid trace strategy enum");
412 }
413}
414
416 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
417 << '\n');
418 BlockInfo[MBB->getNumber()].invalidate();
419 for (Ensemble *E : Ensembles)
420 if (E)
421 E->invalidate(MBB);
422}
423
425 if (!MF)
426 return;
427#ifndef NDEBUG
428 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
429 for (Ensemble *E : Ensembles)
430 if (E)
431 E->verify();
432#endif
433}
434
435//===----------------------------------------------------------------------===//
436// Trace building
437//===----------------------------------------------------------------------===//
438//
439// Traces are built by two CFG traversals. To avoid recomputing too much, use a
440// set abstraction that confines the search to the current loop, and doesn't
441// revisit blocks.
442
443namespace {
444
445struct LoopBounds {
448 const MachineLoopInfo *Loops;
449 bool Downward = false;
450
453};
454
455} // end anonymous namespace
456
457// Specialize po_iterator_storage in order to prune the post-order traversal so
458// it is limited to the current loop and doesn't traverse the loop back edges.
459namespace llvm {
460
461template<>
462class po_iterator_storage<LoopBounds, true> {
463 LoopBounds &LB;
464
465public:
466 po_iterator_storage(LoopBounds &lb) : LB(lb) {}
467
469
470 bool insertEdge(std::optional<const MachineBasicBlock *> From,
471 const MachineBasicBlock *To) {
472 // Skip already visited To blocks.
473 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
474 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
475 return false;
476 // From is null once when To is the trace center block.
477 if (From) {
478 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
479 // Don't follow backedges, don't leave FromLoop when going upwards.
480 if ((LB.Downward ? To : *From) == FromLoop->getHeader())
481 return false;
482 // Don't leave FromLoop.
483 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
484 return false;
485 }
486 }
487 // To is a new block. Mark the block as visited in case the CFG has cycles
488 // that MachineLoopInfo didn't recognize as a natural loop.
489 return LB.Visited.insert(To).second;
490 }
491};
492
493} // end namespace llvm
494
495/// Compute the trace through MBB.
496void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
497 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
498 << printMBBReference(*MBB) << '\n');
499 // Set up loop bounds for the backwards post-order traversal.
500 LoopBounds Bounds(BlockInfo, MTM.Loops);
501
502 // Run an upwards post-order search for the trace start.
503 Bounds.Downward = false;
504 Bounds.Visited.clear();
505 for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
506 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
507 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
508 // All the predecessors have been visited, pick the preferred one.
509 TBI.Pred = pickTracePred(I);
510 LLVM_DEBUG({
511 if (TBI.Pred)
512 dbgs() << printMBBReference(*TBI.Pred) << '\n';
513 else
514 dbgs() << "null\n";
515 });
516 // The trace leading to I is now known, compute the depth resources.
517 computeDepthResources(I);
518 }
519
520 // Run a downwards post-order search for the trace end.
521 Bounds.Downward = true;
522 Bounds.Visited.clear();
523 for (const auto *I : post_order_ext(MBB, Bounds)) {
524 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
525 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
526 // All the successors have been visited, pick the preferred one.
527 TBI.Succ = pickTraceSucc(I);
528 LLVM_DEBUG({
529 if (TBI.Succ)
530 dbgs() << printMBBReference(*TBI.Succ) << '\n';
531 else
532 dbgs() << "null\n";
533 });
534 // The trace leaving I is now known, compute the height resources.
535 computeHeightResources(I);
536 }
537}
538
539/// Invalidate traces through BadMBB.
540void
543 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
544
545 // Invalidate height resources of blocks above MBB.
546 if (BadTBI.hasValidHeight()) {
547 BadTBI.invalidateHeight();
548 WorkList.push_back(BadMBB);
549 do {
550 const MachineBasicBlock *MBB = WorkList.pop_back_val();
551 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
552 << getName() << " height.\n");
553 // Find any MBB predecessors that have MBB as their preferred successor.
554 // They are the only ones that need to be invalidated.
555 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
556 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
557 if (!TBI.hasValidHeight())
558 continue;
559 if (TBI.Succ == MBB) {
560 TBI.invalidateHeight();
561 WorkList.push_back(Pred);
562 continue;
563 }
564 // Verify that TBI.Succ is actually a *I successor.
565 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
566 }
567 } while (!WorkList.empty());
568 }
569
570 // Invalidate depth resources of blocks below MBB.
571 if (BadTBI.hasValidDepth()) {
572 BadTBI.invalidateDepth();
573 WorkList.push_back(BadMBB);
574 do {
575 const MachineBasicBlock *MBB = WorkList.pop_back_val();
576 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
577 << getName() << " depth.\n");
578 // Find any MBB successors that have MBB as their preferred predecessor.
579 // They are the only ones that need to be invalidated.
580 for (const MachineBasicBlock *Succ : MBB->successors()) {
581 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
582 if (!TBI.hasValidDepth())
583 continue;
584 if (TBI.Pred == MBB) {
585 TBI.invalidateDepth();
586 WorkList.push_back(Succ);
587 continue;
588 }
589 // Verify that TBI.Pred is actually a *I predecessor.
590 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
591 }
592 } while (!WorkList.empty());
593 }
594
595 // Clear any per-instruction data. We only have to do this for BadMBB itself
596 // because the instructions in that block may change. Other blocks may be
597 // invalidated, but their instructions will stay the same, so there is no
598 // need to erase the Cycle entries. They will be overwritten when we
599 // recompute.
600 for (const auto &I : *BadMBB)
601 Cycles.erase(&I);
602}
603
605#ifndef NDEBUG
606 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
607 "Outdated BlockInfo size");
608 for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
609 const TraceBlockInfo &TBI = BlockInfo[Num];
610 if (TBI.hasValidDepth() && TBI.Pred) {
611 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
612 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
613 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
614 "Trace is broken, depth should have been invalidated.");
615 const MachineLoop *Loop = getLoopFor(MBB);
616 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
617 }
618 if (TBI.hasValidHeight() && TBI.Succ) {
619 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
620 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
621 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
622 "Trace is broken, height should have been invalidated.");
623 const MachineLoop *Loop = getLoopFor(MBB);
624 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
625 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
626 "Trace contains backedge");
627 }
628 }
629#endif
630}
631
632//===----------------------------------------------------------------------===//
633// Data Dependencies
634//===----------------------------------------------------------------------===//
635//
636// Compute the depth and height of each instruction based on data dependencies
637// and instruction latencies. These cycle numbers assume that the CPU can issue
638// an infinite number of instructions per cycle as long as their dependencies
639// are ready.
640
641// A data dependency is represented as a defining MI and operand numbers on the
642// defining and using MI.
643namespace {
644
645struct DataDep {
646 const MachineInstr *DefMI;
647 unsigned DefOp;
648 unsigned UseOp;
649
650 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
651 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
652
653 /// Create a DataDep from an SSA form virtual register.
654 DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
655 : UseOp(UseOp) {
657 MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
658 assert(!DefI.atEnd() && "Register has no defs");
659 DefMI = DefI->getParent();
660 DefOp = DefI.getOperandNo();
661 assert((++DefI).atEnd() && "Register has multiple defs");
662 }
663};
664
665} // end anonymous namespace
666
667// Get the input data dependencies that must be ready before UseMI can issue.
668// Return true if UseMI has any physreg operands.
669static bool getDataDeps(const MachineInstr &UseMI,
671 const MachineRegisterInfo *MRI) {
672 // Debug values should not be included in any calculations.
673 if (UseMI.isDebugInstr())
674 return false;
675
676 bool HasPhysRegs = false;
677 for (const MachineOperand &MO : UseMI.operands()) {
678 if (!MO.isReg())
679 continue;
680 Register Reg = MO.getReg();
681 if (!Reg)
682 continue;
683 if (Reg.isPhysical()) {
684 HasPhysRegs = true;
685 continue;
686 }
687 // Collect virtual register reads.
688 if (MO.readsReg())
689 Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
690 }
691 return HasPhysRegs;
692}
693
694// Get the input data dependencies of a PHI instruction, using Pred as the
695// preferred predecessor.
696// This will add at most one dependency to Deps.
697static void getPHIDeps(const MachineInstr &UseMI,
699 const MachineBasicBlock *Pred,
700 const MachineRegisterInfo *MRI) {
701 // No predecessor at the beginning of a trace. Ignore dependencies.
702 if (!Pred)
703 return;
704 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
705 for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
706 if (UseMI.getOperand(i + 1).getMBB() == Pred) {
707 Register Reg = UseMI.getOperand(i).getReg();
708 Deps.push_back(DataDep(MRI, Reg, i));
709 return;
710 }
711 }
712}
713
714// Identify physreg dependencies for UseMI, and update the live regunit
715// tracking set when scanning instructions downwards.
718 SparseSet<LiveRegUnit> &RegUnits,
719 const TargetRegisterInfo *TRI) {
721 SmallVector<unsigned, 8> LiveDefOps;
722
723 for (const MachineOperand &MO : UseMI->operands()) {
724 if (!MO.isReg() || !MO.getReg().isPhysical())
725 continue;
726 MCRegister Reg = MO.getReg().asMCReg();
727 // Track live defs and kills for updating RegUnits.
728 if (MO.isDef()) {
729 if (MO.isDead())
730 Kills.push_back(Reg);
731 else
732 LiveDefOps.push_back(MO.getOperandNo());
733 } else if (MO.isKill())
734 Kills.push_back(Reg);
735 // Identify dependencies.
736 if (!MO.readsReg())
737 continue;
738 for (MCRegUnit Unit : TRI->regunits(Reg)) {
739 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
740 if (I == RegUnits.end())
741 continue;
742 Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
743 break;
744 }
745 }
746
747 // Update RegUnits to reflect live registers after UseMI.
748 // First kills.
749 for (MCRegister Kill : Kills)
750 for (MCRegUnit Unit : TRI->regunits(Kill))
751 RegUnits.erase(Unit);
752
753 // Second, live defs.
754 for (unsigned DefOp : LiveDefOps) {
755 for (MCRegUnit Unit :
756 TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
757 LiveRegUnit &LRU = RegUnits[Unit];
758 LRU.MI = UseMI;
759 LRU.Op = DefOp;
760 }
761 }
762}
763
764/// The length of the critical path through a trace is the maximum of two path
765/// lengths:
766///
767/// 1. The maximum height+depth over all instructions in the trace center block.
768///
769/// 2. The longest cross-block dependency chain. For small blocks, it is
770/// possible that the critical path through the trace doesn't include any
771/// instructions in the block.
772///
773/// This function computes the second number from the live-in list of the
774/// center block.
775unsigned MachineTraceMetrics::Ensemble::
776computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
777 assert(TBI.HasValidInstrDepths && "Missing depth info");
778 assert(TBI.HasValidInstrHeights && "Missing height info");
779 unsigned MaxLen = 0;
780 for (const LiveInReg &LIR : TBI.LiveIns) {
781 if (!LIR.Reg.isVirtual())
782 continue;
783 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
784 // Ignore dependencies outside the current trace.
785 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
786 if (!DefTBI.isUsefulDominator(TBI))
787 continue;
788 unsigned Len = LIR.Height + Cycles[DefMI].Depth;
789 MaxLen = std::max(MaxLen, Len);
790 }
791 return MaxLen;
792}
793
796 SparseSet<LiveRegUnit> &RegUnits) {
798 // Collect all data dependencies.
799 if (UseMI.isPHI())
800 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
801 else if (getDataDeps(UseMI, Deps, MTM.MRI))
802 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
803
804 // Filter and process dependencies, computing the earliest issue cycle.
805 unsigned Cycle = 0;
806 for (const DataDep &Dep : Deps) {
807 const TraceBlockInfo&DepTBI =
808 BlockInfo[Dep.DefMI->getParent()->getNumber()];
809 // Ignore dependencies from outside the current trace.
810 if (!DepTBI.isUsefulDominator(TBI))
811 continue;
812 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
813 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
814 // Add latency if DefMI is a real instruction. Transients get latency 0.
815 if (!Dep.DefMI->isTransient())
816 DepCycle += MTM.SchedModel
817 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
818 Cycle = std::max(Cycle, DepCycle);
819 }
820 // Remember the instruction depth.
821 InstrCycles &MICycles = Cycles[&UseMI];
822 MICycles.Depth = Cycle;
823
824 if (TBI.HasValidInstrHeights) {
825 // Update critical path length.
826 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
827 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
828 } else {
829 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
830 }
831}
832
835 SparseSet<LiveRegUnit> &RegUnits) {
836 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
837}
838
842 SparseSet<LiveRegUnit> &RegUnits) {
843 for (; Start != End; Start++)
844 updateDepth(Start->getParent(), *Start, RegUnits);
845}
846
847/// Compute instruction depths for all instructions above or in MBB in its
848/// trace. This assumes that the trace through MBB has already been computed.
849void MachineTraceMetrics::Ensemble::
850computeInstrDepths(const MachineBasicBlock *MBB) {
851 // The top of the trace may already be computed, and HasValidInstrDepths
852 // implies Head->HasValidInstrDepths, so we only need to start from the first
853 // block in the trace that needs to be recomputed.
855 do {
856 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
857 assert(TBI.hasValidDepth() && "Incomplete trace");
858 if (TBI.HasValidInstrDepths)
859 break;
860 Stack.push_back(MBB);
861 MBB = TBI.Pred;
862 } while (MBB);
863
864 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
865 // in the trace. We should track any live-out physregs that were defined in
866 // the trace. This is quite rare in SSA form, typically created by CSE
867 // hoisting a compare.
868 SparseSet<LiveRegUnit> RegUnits;
869 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
870
871 // Go through trace blocks in top-down order, stopping after the center block.
872 while (!Stack.empty()) {
873 MBB = Stack.pop_back_val();
874 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
875 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
876 TBI.HasValidInstrDepths = true;
877 TBI.CriticalPath = 0;
878
879 // Print out resource depths here as well.
880 LLVM_DEBUG({
881 dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
882 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
883 for (unsigned K = 0; K != PRDepths.size(); ++K)
884 if (PRDepths[K]) {
885 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
886 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
887 << MTM.SchedModel.getProcResource(K)->Name << " ("
888 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
889 }
890 });
891
892 // Also compute the critical path length through MBB when possible.
893 if (TBI.HasValidInstrHeights)
894 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
895
896 for (const auto &UseMI : *MBB) {
897 updateDepth(TBI, UseMI, RegUnits);
898 }
899 }
900}
901
902// Identify physreg dependencies for MI when scanning instructions upwards.
903// Return the issue height of MI after considering any live regunits.
904// Height is the issue height computed from virtual register dependencies alone.
905static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
906 SparseSet<LiveRegUnit> &RegUnits,
907 const TargetSchedModel &SchedModel,
908 const TargetInstrInfo *TII,
909 const TargetRegisterInfo *TRI) {
911
912 for (const MachineOperand &MO : MI.operands()) {
913 if (!MO.isReg())
914 continue;
915 Register Reg = MO.getReg();
916 if (!Reg.isPhysical())
917 continue;
918 if (MO.readsReg())
919 ReadOps.push_back(MO.getOperandNo());
920 if (!MO.isDef())
921 continue;
922 // This is a def of Reg. Remove corresponding entries from RegUnits, and
923 // update MI Height to consider the physreg dependencies.
924 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
925 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
926 if (I == RegUnits.end())
927 continue;
928 unsigned DepHeight = I->Cycle;
929 if (!MI.isTransient()) {
930 // We may not know the UseMI of this dependency, if it came from the
931 // live-in list. SchedModel can handle a NULL UseMI.
932 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
933 I->MI, I->Op);
934 }
935 Height = std::max(Height, DepHeight);
936 // This regunit is dead above MI.
937 RegUnits.erase(I);
938 }
939 }
940
941 // Now we know the height of MI. Update any regunits read.
942 for (unsigned Op : ReadOps) {
943 MCRegister Reg = MI.getOperand(Op).getReg().asMCReg();
944 for (MCRegUnit Unit : TRI->regunits(Reg)) {
945 LiveRegUnit &LRU = RegUnits[Unit];
946 // Set the height to the highest reader of the unit.
947 if (LRU.Cycle <= Height && LRU.MI != &MI) {
948 LRU.Cycle = Height;
949 LRU.MI = &MI;
950 LRU.Op = Op;
951 }
952 }
953 }
954
955 return Height;
956}
957
959
960// Push the height of DefMI upwards if required to match UseMI.
961// Return true if this is the first time DefMI was seen.
962static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
963 unsigned UseHeight, MIHeightMap &Heights,
964 const TargetSchedModel &SchedModel,
965 const TargetInstrInfo *TII) {
966 // Adjust height by Dep.DefMI latency.
967 if (!Dep.DefMI->isTransient())
968 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
969 Dep.UseOp);
970
971 // Update Heights[DefMI] to be the maximum height seen.
973 bool New;
974 std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
975 if (New)
976 return true;
977
978 // DefMI has been pushed before. Give it the max height.
979 if (I->second < UseHeight)
980 I->second = UseHeight;
981 return false;
982}
983
984/// Assuming that the virtual register defined by DefMI:DefOp was used by
985/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
986/// when reaching the block that contains DefMI.
987void MachineTraceMetrics::Ensemble::
988addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
990 assert(!Trace.empty() && "Trace should contain at least one block");
991 Register Reg = DefMI->getOperand(DefOp).getReg();
992 assert(Reg.isVirtual());
993 const MachineBasicBlock *DefMBB = DefMI->getParent();
994
995 // Reg is live-in to all blocks in Trace that follow DefMBB.
996 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
997 if (MBB == DefMBB)
998 return;
999 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1000 // Just add the register. The height will be updated later.
1001 TBI.LiveIns.push_back(Reg);
1002 }
1003}
1004
1005/// Compute instruction heights in the trace through MBB. This updates MBB and
1006/// the blocks below it in the trace. It is assumed that the trace has already
1007/// been computed.
1008void MachineTraceMetrics::Ensemble::
1009computeInstrHeights(const MachineBasicBlock *MBB) {
1010 // The bottom of the trace may already be computed.
1011 // Find the blocks that need updating.
1013 do {
1014 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1015 assert(TBI.hasValidHeight() && "Incomplete trace");
1016 if (TBI.HasValidInstrHeights)
1017 break;
1018 Stack.push_back(MBB);
1019 TBI.LiveIns.clear();
1020 MBB = TBI.Succ;
1021 } while (MBB);
1022
1023 // As we move upwards in the trace, keep track of instructions that are
1024 // required by deeper trace instructions. Map MI -> height required so far.
1025 MIHeightMap Heights;
1026
1027 // For physregs, the def isn't known when we see the use.
1028 // Instead, keep track of the highest use of each regunit.
1029 SparseSet<LiveRegUnit> RegUnits;
1030 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1031
1032 // If the bottom of the trace was already precomputed, initialize heights
1033 // from its live-in list.
1034 // MBB is the highest precomputed block in the trace.
1035 if (MBB) {
1036 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1037 for (LiveInReg &LI : TBI.LiveIns) {
1038 if (LI.Reg.isVirtual()) {
1039 // For virtual registers, the def latency is included.
1040 unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1041 if (Height < LI.Height)
1042 Height = LI.Height;
1043 } else {
1044 // For register units, the def latency is not included because we don't
1045 // know the def yet.
1046 RegUnits[LI.Reg].Cycle = LI.Height;
1047 }
1048 }
1049 }
1050
1051 // Go through the trace blocks in bottom-up order.
1053 for (;!Stack.empty(); Stack.pop_back()) {
1054 MBB = Stack.back();
1055 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1056 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1057 TBI.HasValidInstrHeights = true;
1058 TBI.CriticalPath = 0;
1059
1060 LLVM_DEBUG({
1061 dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1062 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1063 for (unsigned K = 0; K != PRHeights.size(); ++K)
1064 if (PRHeights[K]) {
1065 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1066 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1067 << MTM.SchedModel.getProcResource(K)->Name << " ("
1068 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1069 }
1070 });
1071
1072 // Get dependencies from PHIs in the trace successor.
1073 const MachineBasicBlock *Succ = TBI.Succ;
1074 // If MBB is the last block in the trace, and it has a back-edge to the
1075 // loop header, get loop-carried dependencies from PHIs in the header. For
1076 // that purpose, pretend that all the loop header PHIs have height 0.
1077 if (!Succ)
1078 if (const MachineLoop *Loop = getLoopFor(MBB))
1079 if (MBB->isSuccessor(Loop->getHeader()))
1080 Succ = Loop->getHeader();
1081
1082 if (Succ) {
1083 for (const auto &PHI : *Succ) {
1084 if (!PHI.isPHI())
1085 break;
1086 Deps.clear();
1087 getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1088 if (!Deps.empty()) {
1089 // Loop header PHI heights are all 0.
1090 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1091 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1092 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1093 MTM.TII))
1094 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1095 }
1096 }
1097 }
1098
1099 // Go through the block backwards.
1100 for (const MachineInstr &MI : reverse(*MBB)) {
1101 // Find the MI height as determined by virtual register uses in the
1102 // trace below.
1103 unsigned Cycle = 0;
1104 MIHeightMap::iterator HeightI = Heights.find(&MI);
1105 if (HeightI != Heights.end()) {
1106 Cycle = HeightI->second;
1107 // We won't be seeing any more MI uses.
1108 Heights.erase(HeightI);
1109 }
1110
1111 // Don't process PHI deps. They depend on the specific predecessor, and
1112 // we'll get them when visiting the predecessor.
1113 Deps.clear();
1114 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1115
1116 // There may also be regunit dependencies to include in the height.
1117 if (HasPhysRegs)
1118 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1119 MTM.TII, MTM.TRI);
1120
1121 // Update the required height of any virtual registers read by MI.
1122 for (const DataDep &Dep : Deps)
1123 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1124 addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1125
1126 InstrCycles &MICycles = Cycles[&MI];
1127 MICycles.Height = Cycle;
1128 if (!TBI.HasValidInstrDepths) {
1129 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1130 continue;
1131 }
1132 // Update critical path length.
1133 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1134 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1135 }
1136
1137 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1138 // height because the final height isn't known until now.
1139 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1140 for (LiveInReg &LIR : TBI.LiveIns) {
1141 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1142 LIR.Height = Heights.lookup(DefMI);
1143 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
1144 }
1145
1146 // Transfer the live regunits to the live-in list.
1147 for (const LiveRegUnit &RU : RegUnits) {
1148 TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle));
1149 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
1150 << RU.Cycle);
1151 }
1152 LLVM_DEBUG(dbgs() << '\n');
1153
1154 if (!TBI.HasValidInstrDepths)
1155 continue;
1156 // Add live-ins to the critical path length.
1157 TBI.CriticalPath = std::max(TBI.CriticalPath,
1158 computeCrossBlockCriticalPath(TBI));
1159 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1160 }
1161}
1162
1165 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1166
1167 if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1168 computeTrace(MBB);
1169 if (!TBI.HasValidInstrDepths)
1170 computeInstrDepths(MBB);
1171 if (!TBI.HasValidInstrHeights)
1172 computeInstrHeights(MBB);
1173
1174 return Trace(*this, TBI);
1175}
1176
1177unsigned
1179 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1180 "MI must be in the trace center block");
1181 InstrCycles Cyc = getInstrCycles(MI);
1182 return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1183}
1184
1185unsigned
1187 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1189 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1190 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1191 DataDep &Dep = Deps.front();
1192 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1193 // Add latency if DefMI is a real instruction. Transients get latency 0.
1194 if (!Dep.DefMI->isTransient())
1195 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1196 &PHI, Dep.UseOp);
1197 return DepCycle;
1198}
1199
1200/// When bottom is set include instructions in current block in estimate.
1202 // Find the limiting processor resource.
1203 // Numbers have been pre-scaled to be comparable.
1204 unsigned PRMax = 0;
1205 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1206 if (Bottom) {
1207 ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum());
1208 for (unsigned K = 0; K != PRDepths.size(); ++K)
1209 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1210 } else {
1211 for (unsigned PRD : PRDepths)
1212 PRMax = std::max(PRMax, PRD);
1213 }
1214 // Convert to cycle count.
1215 PRMax = TE.MTM.getCycles(PRMax);
1216
1217 /// All instructions before current block
1218 unsigned Instrs = TBI.InstrDepth;
1219 // plus instructions in current block
1220 if (Bottom)
1221 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1222 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1223 Instrs /= IW;
1224 // Assume issue width 1 without a schedule model.
1225 return std::max(Instrs, PRMax);
1226}
1227
1231 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1232 // Add up resources above and below the center block.
1233 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1234 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1235 unsigned PRMax = 0;
1236
1237 // Capture computing cycles from extra instructions
1238 auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1239 unsigned ResourceIdx)
1240 ->unsigned {
1241 unsigned Cycles = 0;
1242 for (const MCSchedClassDesc *SC : Instrs) {
1243 if (!SC->isValid())
1244 continue;
1246 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1247 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1248 PI != PE; ++PI) {
1249 if (PI->ProcResourceIdx != ResourceIdx)
1250 continue;
1251 Cycles += (PI->ReleaseAtCycle *
1252 TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1253 }
1254 }
1255 return Cycles;
1256 };
1257
1258 for (unsigned K = 0; K != PRDepths.size(); ++K) {
1259 unsigned PRCycles = PRDepths[K] + PRHeights[K];
1260 for (const MachineBasicBlock *MBB : Extrablocks)
1261 PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K];
1262 PRCycles += extraCycles(ExtraInstrs, K);
1263 PRCycles -= extraCycles(RemoveInstrs, K);
1264 PRMax = std::max(PRMax, PRCycles);
1265 }
1266 // Convert to cycle count.
1267 PRMax = TE.MTM.getCycles(PRMax);
1268
1269 // Instrs: #instructions in current trace outside current block.
1270 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1271 // Add instruction count from the extra blocks.
1272 for (const MachineBasicBlock *MBB : Extrablocks)
1273 Instrs += TE.MTM.getResources(MBB)->InstrCount;
1274 Instrs += ExtraInstrs.size();
1275 Instrs -= RemoveInstrs.size();
1276 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1277 Instrs /= IW;
1278 // Assume issue width 1 without a schedule model.
1279 return std::max(Instrs, PRMax);
1280}
1281
1283 const MachineInstr &UseMI) const {
1284 if (DefMI.getParent() == UseMI.getParent())
1285 return true;
1286
1287 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1288 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1289
1290 return DepTBI.isUsefulDominator(TBI);
1291}
1292
1294 OS << getName() << " ensemble:\n";
1295 for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1296 OS << " %bb." << i << '\t';
1297 BlockInfo[i].print(OS);
1298 OS << '\n';
1299 }
1300}
1301
1303 if (hasValidDepth()) {
1304 OS << "depth=" << InstrDepth;
1305 if (Pred)
1306 OS << " pred=" << printMBBReference(*Pred);
1307 else
1308 OS << " pred=null";
1309 OS << " head=%bb." << Head;
1310 if (HasValidInstrDepths)
1311 OS << " +instrs";
1312 } else
1313 OS << "depth invalid";
1314 OS << ", ";
1315 if (hasValidHeight()) {
1316 OS << "height=" << InstrHeight;
1317 if (Succ)
1318 OS << " succ=" << printMBBReference(*Succ);
1319 else
1320 OS << " succ=null";
1321 OS << " tail=%bb." << Tail;
1322 if (HasValidInstrHeights)
1323 OS << " +instrs";
1324 } else
1325 OS << "height invalid";
1326 if (HasValidInstrDepths && HasValidInstrHeights)
1327 OS << ", crit=" << CriticalPath;
1328}
1329
1331 unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1332
1333 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1334 << " --> %bb." << TBI.Tail << ':';
1335 if (TBI.hasValidHeight() && TBI.hasValidDepth())
1336 OS << ' ' << getInstrCount() << " instrs.";
1337 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1338 OS << ' ' << TBI.CriticalPath << " cycles.";
1339
1341 OS << "\n%bb." << MBBNum;
1342 while (Block->hasValidDepth() && Block->Pred) {
1343 unsigned Num = Block->Pred->getNumber();
1344 OS << " <- " << printMBBReference(*Block->Pred);
1345 Block = &TE.BlockInfo[Num];
1346 }
1347
1348 Block = &TBI;
1349 OS << "\n ";
1350 while (Block->hasValidHeight() && Block->Succ) {
1351 unsigned Num = Block->Succ->getNumber();
1352 OS << " -> " << printMBBReference(*Block->Succ);
1353 Block = &TE.BlockInfo[Num];
1354 }
1355 OS << '\n';
1356}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
MachineBasicBlock & MBB
basic Basic Alias true
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
BlockVerifier::State From
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:371
static unsigned InstrCount
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Hardware Loops
IRTranslator LLVM IR MI
loops
Definition: LoopInfo.cpp:1209
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, SparseSet< LiveRegUnit > &RegUnits, const TargetRegisterInfo *TRI)
Machine Trace Metrics
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, SparseSet< LiveRegUnit > &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
#define DEBUG_TYPE
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:336
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
A possibly irreducible generalization of a Loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:685
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand class - Representation of each machine instruction operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
bool atEnd() const
atEnd - return true if this iterator is equal to reg_end() on the value.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of the instructions from Start to End.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=std::nullopt, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=std::nullopt, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=std::nullopt) const
Return the resource length of the trace.
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
ArrayRef< unsigned > getProcReleaseAtCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:289
typename DenseT::iterator iterator
Definition: SparseSet.h:171
const_iterator end() const
Definition: SparseSet.h:175
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition: SparseSet.h:225
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
bool empty() const
Definition: Trace.h:96
bool insertEdge(std::optional< const MachineBasicBlock * > From, const MachineBasicBlock *To)
Default po_iterator_storage implementation with an internal set object.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
DWARFExpression::Operation Op
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1824
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs)
Adds registers contained in LiveRegs to the block live-in list of MBB.
const MachineInstr * MI
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
Per-basic block information that doesn't depend on the trace through the block.
bool hasResources() const
Returns true when resource information for this block has been computed.
unsigned InstrCount
The number of non-trivial instructions in the block.
bool HasCalls
True when the block contains calls.
InstrCycles represents the cycle height and depth of an instruction in a trace.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
Per-basic block information that relates to a specific trace through the block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned CriticalPath
Critical path length.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block.
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().