LLVM 17.0.0git
MachineTraceMetrics.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/SparseSet.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/Debug.h"
31#include "llvm/Support/Format.h"
33#include <algorithm>
34#include <cassert>
35#include <iterator>
36#include <tuple>
37#include <utility>
38
39using namespace llvm;
40
41#define DEBUG_TYPE "machine-trace-metrics"
42
44
46
48 "Machine Trace Metrics", false, true)
52 "Machine Trace Metrics", false, true)
53
55 std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
56}
57
59 AU.setPreservesAll();
63}
64
66 MF = &Func;
67 const TargetSubtargetInfo &ST = MF->getSubtarget();
68 TII = ST.getInstrInfo();
69 TRI = ST.getRegisterInfo();
70 MRI = &MF->getRegInfo();
71 Loops = &getAnalysis<MachineLoopInfo>();
72 SchedModel.init(&ST);
73 BlockInfo.resize(MF->getNumBlockIDs());
74 ProcResourceCycles.resize(MF->getNumBlockIDs() *
75 SchedModel.getNumProcResourceKinds());
76 return false;
77}
78
80 MF = nullptr;
81 BlockInfo.clear();
82 for (Ensemble *&E : Ensembles) {
83 delete E;
84 E = nullptr;
85 }
86}
87
88//===----------------------------------------------------------------------===//
89// Fixed block information
90//===----------------------------------------------------------------------===//
91//
92// The number of instructions in a basic block and the CPU resources used by
93// those instructions don't depend on any given trace strategy.
94
95/// Compute the resource usage in basic block MBB.
98 assert(MBB && "No basic block");
99 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
100 if (FBI->hasResources())
101 return FBI;
102
103 // Compute resource usage in the block.
104 FBI->HasCalls = false;
105 unsigned InstrCount = 0;
106
107 // Add up per-processor resource cycles as well.
108 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
109 SmallVector<unsigned, 32> PRCycles(PRKinds);
110
111 for (const auto &MI : *MBB) {
112 if (MI.isTransient())
113 continue;
114 ++InstrCount;
115 if (MI.isCall())
116 FBI->HasCalls = true;
117
118 // Count processor resources used.
119 if (!SchedModel.hasInstrSchedModel())
120 continue;
121 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
122 if (!SC->isValid())
123 continue;
124
126 PI = SchedModel.getWriteProcResBegin(SC),
127 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
128 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
129 PRCycles[PI->ProcResourceIdx] += PI->Cycles;
130 }
131 }
132 FBI->InstrCount = InstrCount;
133
134 // Scale the resource cycles so they are comparable.
135 unsigned PROffset = MBB->getNumber() * PRKinds;
136 for (unsigned K = 0; K != PRKinds; ++K)
137 ProcResourceCycles[PROffset + K] =
138 PRCycles[K] * SchedModel.getResourceFactor(K);
139
140 return FBI;
141}
142
145 assert(BlockInfo[MBBNum].hasResources() &&
146 "getResources() must be called before getProcResourceCycles()");
147 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
148 assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
149 return ArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
150}
151
152//===----------------------------------------------------------------------===//
153// Ensemble utility functions
154//===----------------------------------------------------------------------===//
155
157 : MTM(*ct) {
158 BlockInfo.resize(MTM.BlockInfo.size());
159 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
160 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
161 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
162}
163
164// Virtual destructor serves as an anchor.
166
167const MachineLoop*
169 return MTM.Loops->getLoopFor(MBB);
170}
171
172// Update resource-related information in the TraceBlockInfo for MBB.
173// Only update resources related to the trace above MBB.
174void MachineTraceMetrics::Ensemble::
175computeDepthResources(const MachineBasicBlock *MBB) {
176 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
177 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
178 unsigned PROffset = MBB->getNumber() * PRKinds;
179
180 // Compute resources from trace above. The top block is simple.
181 if (!TBI->Pred) {
182 TBI->InstrDepth = 0;
183 TBI->Head = MBB->getNumber();
184 std::fill(ProcResourceDepths.begin() + PROffset,
185 ProcResourceDepths.begin() + PROffset + PRKinds, 0);
186 return;
187 }
188
189 // Compute from the block above. A post-order traversal ensures the
190 // predecessor is always computed first.
191 unsigned PredNum = TBI->Pred->getNumber();
192 TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
193 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
194 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
195 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
196 TBI->Head = PredTBI->Head;
197
198 // Compute per-resource depths.
199 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
200 ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
201 for (unsigned K = 0; K != PRKinds; ++K)
202 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
203}
204
205// Update resource-related information in the TraceBlockInfo for MBB.
206// Only update resources related to the trace below MBB.
207void MachineTraceMetrics::Ensemble::
208computeHeightResources(const MachineBasicBlock *MBB) {
209 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
210 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
211 unsigned PROffset = MBB->getNumber() * PRKinds;
212
213 // Compute resources for the current block.
214 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
215 ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
216
217 // The trace tail is done.
218 if (!TBI->Succ) {
219 TBI->Tail = MBB->getNumber();
220 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
221 return;
222 }
223
224 // Compute from the block below. A post-order traversal ensures the
225 // predecessor is always computed first.
226 unsigned SuccNum = TBI->Succ->getNumber();
227 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
228 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
229 TBI->InstrHeight += SuccTBI->InstrHeight;
230 TBI->Tail = SuccTBI->Tail;
231
232 // Compute per-resource heights.
233 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
234 for (unsigned K = 0; K != PRKinds; ++K)
235 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
236}
237
238// Check if depth resources for MBB are valid and return the TBI.
239// Return NULL if the resources have been invalidated.
243 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
244 return TBI->hasValidDepth() ? TBI : nullptr;
245}
246
247// Check if height resources for MBB are valid and return the TBI.
248// Return NULL if the resources have been invalidated.
252 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
253 return TBI->hasValidHeight() ? TBI : nullptr;
254}
255
256/// Get an array of processor resource depths for MBB. Indexed by processor
257/// resource kind, this array contains the scaled processor resources consumed
258/// by all blocks preceding MBB in its trace. It does not include instructions
259/// in MBB.
260///
261/// Compare TraceBlockInfo::InstrDepth.
264getProcResourceDepths(unsigned MBBNum) const {
265 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
266 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
267 return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
268}
269
270/// Get an array of processor resource heights for MBB. Indexed by processor
271/// resource kind, this array contains the scaled processor resources consumed
272/// by this block and all blocks following it in its trace.
273///
274/// Compare TraceBlockInfo::InstrHeight.
277getProcResourceHeights(unsigned MBBNum) const {
278 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
279 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
280 return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
281}
282
283//===----------------------------------------------------------------------===//
284// Trace Selection Strategies
285//===----------------------------------------------------------------------===//
286//
287// A trace selection strategy is implemented as a sub-class of Ensemble. The
288// trace through a block B is computed by two DFS traversals of the CFG
289// starting from B. One upwards, and one downwards. During the upwards DFS,
290// pickTracePred() is called on the post-ordered blocks. During the downwards
291// DFS, pickTraceSucc() is called in a post-order.
292//
293
294// We never allow traces that leave loops, but we do allow traces to enter
295// nested loops. We also never allow traces to contain back-edges.
296//
297// This means that a loop header can never appear above the center block of a
298// trace, except as the trace head. Below the center block, loop exiting edges
299// are banned.
300//
301// Return true if an edge from the From loop to the To loop is leaving a loop.
302// Either of To and From can be null.
303static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
304 return From && !From->contains(To);
305}
306
307// MinInstrCountEnsemble - Pick the trace that executes the least number of
308// instructions.
309namespace {
310
311class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
312 const char *getName() const override { return "MinInstr"; }
313 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
314 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
315
316public:
317 MinInstrCountEnsemble(MachineTraceMetrics *mtm)
318 : MachineTraceMetrics::Ensemble(mtm) {}
319};
320
321/// Pick only the current basic block for the trace and do not choose any
322/// predecessors/successors.
323class LocalEnsemble : public MachineTraceMetrics::Ensemble {
324 const char *getName() const override { return "Local"; }
325 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
326 return nullptr;
327 };
328 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
329 return nullptr;
330 };
331
332public:
333 LocalEnsemble(MachineTraceMetrics *MTM)
334 : MachineTraceMetrics::Ensemble(MTM) {}
335};
336} // end anonymous namespace
337
338// Select the preferred predecessor for MBB.
340MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
341 if (MBB->pred_empty())
342 return nullptr;
343 const MachineLoop *CurLoop = getLoopFor(MBB);
344 // Don't leave loops, and never follow back-edges.
345 if (CurLoop && MBB == CurLoop->getHeader())
346 return nullptr;
347 unsigned CurCount = MTM.getResources(MBB)->InstrCount;
348 const MachineBasicBlock *Best = nullptr;
349 unsigned BestDepth = 0;
350 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
352 getDepthResources(Pred);
353 // Ignore cycles that aren't natural loops.
354 if (!PredTBI)
355 continue;
356 // Pick the predecessor that would give this block the smallest InstrDepth.
357 unsigned Depth = PredTBI->InstrDepth + CurCount;
358 if (!Best || Depth < BestDepth) {
359 Best = Pred;
360 BestDepth = Depth;
361 }
362 }
363 return Best;
364}
365
366// Select the preferred successor for MBB.
368MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
369 if (MBB->succ_empty())
370 return nullptr;
371 const MachineLoop *CurLoop = getLoopFor(MBB);
372 const MachineBasicBlock *Best = nullptr;
373 unsigned BestHeight = 0;
374 for (const MachineBasicBlock *Succ : MBB->successors()) {
375 // Don't consider back-edges.
376 if (CurLoop && Succ == CurLoop->getHeader())
377 continue;
378 // Don't consider successors exiting CurLoop.
379 if (isExitingLoop(CurLoop, getLoopFor(Succ)))
380 continue;
382 getHeightResources(Succ);
383 // Ignore cycles that aren't natural loops.
384 if (!SuccTBI)
385 continue;
386 // Pick the successor that would give this block the smallest InstrHeight.
387 unsigned Height = SuccTBI->InstrHeight;
388 if (!Best || Height < BestHeight) {
389 Best = Succ;
390 BestHeight = Height;
391 }
392 }
393 return Best;
394}
395
396// Get an Ensemble sub-class for the requested trace strategy.
400 "Invalid trace strategy enum");
401 Ensemble *&E = Ensembles[static_cast<size_t>(strategy)];
402 if (E)
403 return E;
404
405 // Allocate new Ensemble on demand.
406 switch (strategy) {
408 return (E = new MinInstrCountEnsemble(this));
410 return (E = new LocalEnsemble(this));
411 default: llvm_unreachable("Invalid trace strategy enum");
412 }
413}
414
416 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
417 << '\n');
418 BlockInfo[MBB->getNumber()].invalidate();
419 for (Ensemble *E : Ensembles)
420 if (E)
421 E->invalidate(MBB);
422}
423
425 if (!MF)
426 return;
427#ifndef NDEBUG
428 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
429 for (Ensemble *E : Ensembles)
430 if (E)
431 E->verify();
432#endif
433}
434
435//===----------------------------------------------------------------------===//
436// Trace building
437//===----------------------------------------------------------------------===//
438//
439// Traces are built by two CFG traversals. To avoid recomputing too much, use a
440// set abstraction that confines the search to the current loop, and doesn't
441// revisit blocks.
442
443namespace {
444
445struct LoopBounds {
448 const MachineLoopInfo *Loops;
449 bool Downward = false;
450
452 const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
453};
454
455} // end anonymous namespace
456
457// Specialize po_iterator_storage in order to prune the post-order traversal so
458// it is limited to the current loop and doesn't traverse the loop back edges.
459namespace llvm {
460
461template<>
462class po_iterator_storage<LoopBounds, true> {
463 LoopBounds &LB;
464
465public:
466 po_iterator_storage(LoopBounds &lb) : LB(lb) {}
467
469
470 bool insertEdge(std::optional<const MachineBasicBlock *> From,
471 const MachineBasicBlock *To) {
472 // Skip already visited To blocks.
473 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
474 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
475 return false;
476 // From is null once when To is the trace center block.
477 if (From) {
478 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
479 // Don't follow backedges, don't leave FromLoop when going upwards.
480 if ((LB.Downward ? To : *From) == FromLoop->getHeader())
481 return false;
482 // Don't leave FromLoop.
483 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
484 return false;
485 }
486 }
487 // To is a new block. Mark the block as visited in case the CFG has cycles
488 // that MachineLoopInfo didn't recognize as a natural loop.
489 return LB.Visited.insert(To).second;
490 }
491};
492
493} // end namespace llvm
494
495/// Compute the trace through MBB.
496void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
497 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
498 << printMBBReference(*MBB) << '\n');
499 // Set up loop bounds for the backwards post-order traversal.
500 LoopBounds Bounds(BlockInfo, MTM.Loops);
501
502 // Run an upwards post-order search for the trace start.
503 Bounds.Downward = false;
504 Bounds.Visited.clear();
505 for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
506 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
507 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
508 // All the predecessors have been visited, pick the preferred one.
509 TBI.Pred = pickTracePred(I);
510 LLVM_DEBUG({
511 if (TBI.Pred)
512 dbgs() << printMBBReference(*TBI.Pred) << '\n';
513 else
514 dbgs() << "null\n";
515 });
516 // The trace leading to I is now known, compute the depth resources.
517 computeDepthResources(I);
518 }
519
520 // Run a downwards post-order search for the trace end.
521 Bounds.Downward = true;
522 Bounds.Visited.clear();
523 for (const auto *I : post_order_ext(MBB, Bounds)) {
524 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
525 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
526 // All the successors have been visited, pick the preferred one.
527 TBI.Succ = pickTraceSucc(I);
528 LLVM_DEBUG({
529 if (TBI.Succ)
530 dbgs() << printMBBReference(*TBI.Succ) << '\n';
531 else
532 dbgs() << "null\n";
533 });
534 // The trace leaving I is now known, compute the height resources.
535 computeHeightResources(I);
536 }
537}
538
539/// Invalidate traces through BadMBB.
540void
543 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
544
545 // Invalidate height resources of blocks above MBB.
546 if (BadTBI.hasValidHeight()) {
547 BadTBI.invalidateHeight();
548 WorkList.push_back(BadMBB);
549 do {
550 const MachineBasicBlock *MBB = WorkList.pop_back_val();
551 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
552 << getName() << " height.\n");
553 // Find any MBB predecessors that have MBB as their preferred successor.
554 // They are the only ones that need to be invalidated.
555 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
556 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
557 if (!TBI.hasValidHeight())
558 continue;
559 if (TBI.Succ == MBB) {
560 TBI.invalidateHeight();
561 WorkList.push_back(Pred);
562 continue;
563 }
564 // Verify that TBI.Succ is actually a *I successor.
565 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
566 }
567 } while (!WorkList.empty());
568 }
569
570 // Invalidate depth resources of blocks below MBB.
571 if (BadTBI.hasValidDepth()) {
572 BadTBI.invalidateDepth();
573 WorkList.push_back(BadMBB);
574 do {
575 const MachineBasicBlock *MBB = WorkList.pop_back_val();
576 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
577 << getName() << " depth.\n");
578 // Find any MBB successors that have MBB as their preferred predecessor.
579 // They are the only ones that need to be invalidated.
580 for (const MachineBasicBlock *Succ : MBB->successors()) {
581 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
582 if (!TBI.hasValidDepth())
583 continue;
584 if (TBI.Pred == MBB) {
585 TBI.invalidateDepth();
586 WorkList.push_back(Succ);
587 continue;
588 }
589 // Verify that TBI.Pred is actually a *I predecessor.
590 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
591 }
592 } while (!WorkList.empty());
593 }
594
595 // Clear any per-instruction data. We only have to do this for BadMBB itself
596 // because the instructions in that block may change. Other blocks may be
597 // invalidated, but their instructions will stay the same, so there is no
598 // need to erase the Cycle entries. They will be overwritten when we
599 // recompute.
600 for (const auto &I : *BadMBB)
601 Cycles.erase(&I);
602}
603
605#ifndef NDEBUG
606 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
607 "Outdated BlockInfo size");
608 for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
609 const TraceBlockInfo &TBI = BlockInfo[Num];
610 if (TBI.hasValidDepth() && TBI.Pred) {
611 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
612 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
613 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
614 "Trace is broken, depth should have been invalidated.");
615 const MachineLoop *Loop = getLoopFor(MBB);
616 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
617 }
618 if (TBI.hasValidHeight() && TBI.Succ) {
619 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
620 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
621 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
622 "Trace is broken, height should have been invalidated.");
623 const MachineLoop *Loop = getLoopFor(MBB);
624 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
625 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
626 "Trace contains backedge");
627 }
628 }
629#endif
630}
631
632//===----------------------------------------------------------------------===//
633// Data Dependencies
634//===----------------------------------------------------------------------===//
635//
636// Compute the depth and height of each instruction based on data dependencies
637// and instruction latencies. These cycle numbers assume that the CPU can issue
638// an infinite number of instructions per cycle as long as their dependencies
639// are ready.
640
641// A data dependency is represented as a defining MI and operand numbers on the
642// defining and using MI.
643namespace {
644
645struct DataDep {
646 const MachineInstr *DefMI;
647 unsigned DefOp;
648 unsigned UseOp;
649
650 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
651 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
652
653 /// Create a DataDep from an SSA form virtual register.
654 DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
655 : UseOp(UseOp) {
657 MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
658 assert(!DefI.atEnd() && "Register has no defs");
659 DefMI = DefI->getParent();
660 DefOp = DefI.getOperandNo();
661 assert((++DefI).atEnd() && "Register has multiple defs");
662 }
663};
664
665} // end anonymous namespace
666
667// Get the input data dependencies that must be ready before UseMI can issue.
668// Return true if UseMI has any physreg operands.
669static bool getDataDeps(const MachineInstr &UseMI,
671 const MachineRegisterInfo *MRI) {
672 // Debug values should not be included in any calculations.
673 if (UseMI.isDebugInstr())
674 return false;
675
676 bool HasPhysRegs = false;
677 for (const MachineOperand &MO : UseMI.operands()) {
678 if (!MO.isReg())
679 continue;
680 Register Reg = MO.getReg();
681 if (!Reg)
682 continue;
683 if (Reg.isPhysical()) {
684 HasPhysRegs = true;
685 continue;
686 }
687 // Collect virtual register reads.
688 if (MO.readsReg())
689 Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
690 }
691 return HasPhysRegs;
692}
693
694// Get the input data dependencies of a PHI instruction, using Pred as the
695// preferred predecessor.
696// This will add at most one dependency to Deps.
697static void getPHIDeps(const MachineInstr &UseMI,
699 const MachineBasicBlock *Pred,
700 const MachineRegisterInfo *MRI) {
701 // No predecessor at the beginning of a trace. Ignore dependencies.
702 if (!Pred)
703 return;
704 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
705 for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
706 if (UseMI.getOperand(i + 1).getMBB() == Pred) {
707 Register Reg = UseMI.getOperand(i).getReg();
708 Deps.push_back(DataDep(MRI, Reg, i));
709 return;
710 }
711 }
712}
713
714// Identify physreg dependencies for UseMI, and update the live regunit
715// tracking set when scanning instructions downwards.
718 SparseSet<LiveRegUnit> &RegUnits,
719 const TargetRegisterInfo *TRI) {
721 SmallVector<unsigned, 8> LiveDefOps;
722
723 for (const MachineOperand &MO : UseMI->operands()) {
724 if (!MO.isReg() || !MO.getReg().isPhysical())
725 continue;
726 MCRegister Reg = MO.getReg().asMCReg();
727 // Track live defs and kills for updating RegUnits.
728 if (MO.isDef()) {
729 if (MO.isDead())
730 Kills.push_back(Reg);
731 else
732 LiveDefOps.push_back(MO.getOperandNo());
733 } else if (MO.isKill())
734 Kills.push_back(Reg);
735 // Identify dependencies.
736 if (!MO.readsReg())
737 continue;
738 for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
739 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
740 if (I == RegUnits.end())
741 continue;
742 Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
743 break;
744 }
745 }
746
747 // Update RegUnits to reflect live registers after UseMI.
748 // First kills.
749 for (MCRegister Kill : Kills)
750 for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
751 RegUnits.erase(*Units);
752
753 // Second, live defs.
754 for (unsigned DefOp : LiveDefOps) {
755 for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg().asMCReg(),
756 TRI);
757 Units.isValid(); ++Units) {
758 LiveRegUnit &LRU = RegUnits[*Units];
759 LRU.MI = UseMI;
760 LRU.Op = DefOp;
761 }
762 }
763}
764
765/// The length of the critical path through a trace is the maximum of two path
766/// lengths:
767///
768/// 1. The maximum height+depth over all instructions in the trace center block.
769///
770/// 2. The longest cross-block dependency chain. For small blocks, it is
771/// possible that the critical path through the trace doesn't include any
772/// instructions in the block.
773///
774/// This function computes the second number from the live-in list of the
775/// center block.
776unsigned MachineTraceMetrics::Ensemble::
777computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
778 assert(TBI.HasValidInstrDepths && "Missing depth info");
779 assert(TBI.HasValidInstrHeights && "Missing height info");
780 unsigned MaxLen = 0;
781 for (const LiveInReg &LIR : TBI.LiveIns) {
782 if (!LIR.Reg.isVirtual())
783 continue;
784 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
785 // Ignore dependencies outside the current trace.
786 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
787 if (!DefTBI.isUsefulDominator(TBI))
788 continue;
789 unsigned Len = LIR.Height + Cycles[DefMI].Depth;
790 MaxLen = std::max(MaxLen, Len);
791 }
792 return MaxLen;
793}
794
797 SparseSet<LiveRegUnit> &RegUnits) {
799 // Collect all data dependencies.
800 if (UseMI.isPHI())
801 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
802 else if (getDataDeps(UseMI, Deps, MTM.MRI))
803 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
804
805 // Filter and process dependencies, computing the earliest issue cycle.
806 unsigned Cycle = 0;
807 for (const DataDep &Dep : Deps) {
808 const TraceBlockInfo&DepTBI =
809 BlockInfo[Dep.DefMI->getParent()->getNumber()];
810 // Ignore dependencies from outside the current trace.
811 if (!DepTBI.isUsefulDominator(TBI))
812 continue;
813 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
814 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
815 // Add latency if DefMI is a real instruction. Transients get latency 0.
816 if (!Dep.DefMI->isTransient())
817 DepCycle += MTM.SchedModel
818 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
819 Cycle = std::max(Cycle, DepCycle);
820 }
821 // Remember the instruction depth.
822 InstrCycles &MICycles = Cycles[&UseMI];
823 MICycles.Depth = Cycle;
824
825 if (TBI.HasValidInstrHeights) {
826 // Update critical path length.
827 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
828 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
829 } else {
830 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
831 }
832}
833
836 SparseSet<LiveRegUnit> &RegUnits) {
837 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
838}
839
843 SparseSet<LiveRegUnit> &RegUnits) {
844 for (; Start != End; Start++)
845 updateDepth(Start->getParent(), *Start, RegUnits);
846}
847
848/// Compute instruction depths for all instructions above or in MBB in its
849/// trace. This assumes that the trace through MBB has already been computed.
850void MachineTraceMetrics::Ensemble::
851computeInstrDepths(const MachineBasicBlock *MBB) {
852 // The top of the trace may already be computed, and HasValidInstrDepths
853 // implies Head->HasValidInstrDepths, so we only need to start from the first
854 // block in the trace that needs to be recomputed.
856 do {
857 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
858 assert(TBI.hasValidDepth() && "Incomplete trace");
859 if (TBI.HasValidInstrDepths)
860 break;
861 Stack.push_back(MBB);
862 MBB = TBI.Pred;
863 } while (MBB);
864
865 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
866 // in the trace. We should track any live-out physregs that were defined in
867 // the trace. This is quite rare in SSA form, typically created by CSE
868 // hoisting a compare.
869 SparseSet<LiveRegUnit> RegUnits;
870 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
871
872 // Go through trace blocks in top-down order, stopping after the center block.
873 while (!Stack.empty()) {
874 MBB = Stack.pop_back_val();
875 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
876 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
877 TBI.HasValidInstrDepths = true;
878 TBI.CriticalPath = 0;
879
880 // Print out resource depths here as well.
881 LLVM_DEBUG({
882 dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
883 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
884 for (unsigned K = 0; K != PRDepths.size(); ++K)
885 if (PRDepths[K]) {
886 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
887 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
888 << MTM.SchedModel.getProcResource(K)->Name << " ("
889 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
890 }
891 });
892
893 // Also compute the critical path length through MBB when possible.
894 if (TBI.HasValidInstrHeights)
895 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
896
897 for (const auto &UseMI : *MBB) {
898 updateDepth(TBI, UseMI, RegUnits);
899 }
900 }
901}
902
903// Identify physreg dependencies for MI when scanning instructions upwards.
904// Return the issue height of MI after considering any live regunits.
905// Height is the issue height computed from virtual register dependencies alone.
906static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
907 SparseSet<LiveRegUnit> &RegUnits,
908 const TargetSchedModel &SchedModel,
909 const TargetInstrInfo *TII,
910 const TargetRegisterInfo *TRI) {
912
913 for (const MachineOperand &MO : MI.operands()) {
914 if (!MO.isReg())
915 continue;
916 Register Reg = MO.getReg();
917 if (!Reg.isPhysical())
918 continue;
919 if (MO.readsReg())
920 ReadOps.push_back(MO.getOperandNo());
921 if (!MO.isDef())
922 continue;
923 // This is a def of Reg. Remove corresponding entries from RegUnits, and
924 // update MI Height to consider the physreg dependencies.
925 for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
926 ++Units) {
927 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
928 if (I == RegUnits.end())
929 continue;
930 unsigned DepHeight = I->Cycle;
931 if (!MI.isTransient()) {
932 // We may not know the UseMI of this dependency, if it came from the
933 // live-in list. SchedModel can handle a NULL UseMI.
934 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
935 I->MI, I->Op);
936 }
937 Height = std::max(Height, DepHeight);
938 // This regunit is dead above MI.
939 RegUnits.erase(I);
940 }
941 }
942
943 // Now we know the height of MI. Update any regunits read.
944 for (size_t I = 0, E = ReadOps.size(); I != E; ++I) {
945 MCRegister Reg = MI.getOperand(ReadOps[I]).getReg().asMCReg();
946 for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
947 LiveRegUnit &LRU = RegUnits[*Units];
948 // Set the height to the highest reader of the unit.
949 if (LRU.Cycle <= Height && LRU.MI != &MI) {
950 LRU.Cycle = Height;
951 LRU.MI = &MI;
952 LRU.Op = ReadOps[I];
953 }
954 }
955 }
956
957 return Height;
958}
959
961
962// Push the height of DefMI upwards if required to match UseMI.
963// Return true if this is the first time DefMI was seen.
964static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
965 unsigned UseHeight, MIHeightMap &Heights,
966 const TargetSchedModel &SchedModel,
967 const TargetInstrInfo *TII) {
968 // Adjust height by Dep.DefMI latency.
969 if (!Dep.DefMI->isTransient())
970 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
971 Dep.UseOp);
972
973 // Update Heights[DefMI] to be the maximum height seen.
975 bool New;
976 std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
977 if (New)
978 return true;
979
980 // DefMI has been pushed before. Give it the max height.
981 if (I->second < UseHeight)
982 I->second = UseHeight;
983 return false;
984}
985
986/// Assuming that the virtual register defined by DefMI:DefOp was used by
987/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
988/// when reaching the block that contains DefMI.
989void MachineTraceMetrics::Ensemble::
990addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
992 assert(!Trace.empty() && "Trace should contain at least one block");
993 Register Reg = DefMI->getOperand(DefOp).getReg();
994 assert(Reg.isVirtual());
995 const MachineBasicBlock *DefMBB = DefMI->getParent();
996
997 // Reg is live-in to all blocks in Trace that follow DefMBB.
998 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
999 if (MBB == DefMBB)
1000 return;
1001 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1002 // Just add the register. The height will be updated later.
1003 TBI.LiveIns.push_back(Reg);
1004 }
1005}
1006
1007/// Compute instruction heights in the trace through MBB. This updates MBB and
1008/// the blocks below it in the trace. It is assumed that the trace has already
1009/// been computed.
1010void MachineTraceMetrics::Ensemble::
1011computeInstrHeights(const MachineBasicBlock *MBB) {
1012 // The bottom of the trace may already be computed.
1013 // Find the blocks that need updating.
1015 do {
1016 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1017 assert(TBI.hasValidHeight() && "Incomplete trace");
1018 if (TBI.HasValidInstrHeights)
1019 break;
1020 Stack.push_back(MBB);
1021 TBI.LiveIns.clear();
1022 MBB = TBI.Succ;
1023 } while (MBB);
1024
1025 // As we move upwards in the trace, keep track of instructions that are
1026 // required by deeper trace instructions. Map MI -> height required so far.
1027 MIHeightMap Heights;
1028
1029 // For physregs, the def isn't known when we see the use.
1030 // Instead, keep track of the highest use of each regunit.
1031 SparseSet<LiveRegUnit> RegUnits;
1032 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1033
1034 // If the bottom of the trace was already precomputed, initialize heights
1035 // from its live-in list.
1036 // MBB is the highest precomputed block in the trace.
1037 if (MBB) {
1038 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1039 for (LiveInReg &LI : TBI.LiveIns) {
1040 if (LI.Reg.isVirtual()) {
1041 // For virtual registers, the def latency is included.
1042 unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1043 if (Height < LI.Height)
1044 Height = LI.Height;
1045 } else {
1046 // For register units, the def latency is not included because we don't
1047 // know the def yet.
1048 RegUnits[LI.Reg].Cycle = LI.Height;
1049 }
1050 }
1051 }
1052
1053 // Go through the trace blocks in bottom-up order.
1055 for (;!Stack.empty(); Stack.pop_back()) {
1056 MBB = Stack.back();
1057 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1058 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1059 TBI.HasValidInstrHeights = true;
1060 TBI.CriticalPath = 0;
1061
1062 LLVM_DEBUG({
1063 dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1064 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1065 for (unsigned K = 0; K != PRHeights.size(); ++K)
1066 if (PRHeights[K]) {
1067 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1068 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1069 << MTM.SchedModel.getProcResource(K)->Name << " ("
1070 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1071 }
1072 });
1073
1074 // Get dependencies from PHIs in the trace successor.
1075 const MachineBasicBlock *Succ = TBI.Succ;
1076 // If MBB is the last block in the trace, and it has a back-edge to the
1077 // loop header, get loop-carried dependencies from PHIs in the header. For
1078 // that purpose, pretend that all the loop header PHIs have height 0.
1079 if (!Succ)
1080 if (const MachineLoop *Loop = getLoopFor(MBB))
1081 if (MBB->isSuccessor(Loop->getHeader()))
1082 Succ = Loop->getHeader();
1083
1084 if (Succ) {
1085 for (const auto &PHI : *Succ) {
1086 if (!PHI.isPHI())
1087 break;
1088 Deps.clear();
1089 getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1090 if (!Deps.empty()) {
1091 // Loop header PHI heights are all 0.
1092 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1093 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1094 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1095 MTM.TII))
1096 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1097 }
1098 }
1099 }
1100
1101 // Go through the block backwards.
1102 for (const MachineInstr &MI : reverse(*MBB)) {
1103 // Find the MI height as determined by virtual register uses in the
1104 // trace below.
1105 unsigned Cycle = 0;
1106 MIHeightMap::iterator HeightI = Heights.find(&MI);
1107 if (HeightI != Heights.end()) {
1108 Cycle = HeightI->second;
1109 // We won't be seeing any more MI uses.
1110 Heights.erase(HeightI);
1111 }
1112
1113 // Don't process PHI deps. They depend on the specific predecessor, and
1114 // we'll get them when visiting the predecessor.
1115 Deps.clear();
1116 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1117
1118 // There may also be regunit dependencies to include in the height.
1119 if (HasPhysRegs)
1120 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1121 MTM.TII, MTM.TRI);
1122
1123 // Update the required height of any virtual registers read by MI.
1124 for (const DataDep &Dep : Deps)
1125 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1126 addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1127
1128 InstrCycles &MICycles = Cycles[&MI];
1129 MICycles.Height = Cycle;
1130 if (!TBI.HasValidInstrDepths) {
1131 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1132 continue;
1133 }
1134 // Update critical path length.
1135 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1136 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1137 }
1138
1139 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1140 // height because the final height isn't known until now.
1141 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1142 for (LiveInReg &LIR : TBI.LiveIns) {
1143 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1144 LIR.Height = Heights.lookup(DefMI);
1145 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
1146 }
1147
1148 // Transfer the live regunits to the live-in list.
1149 for (const LiveRegUnit &RU : RegUnits) {
1150 TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle));
1151 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
1152 << RU.Cycle);
1153 }
1154 LLVM_DEBUG(dbgs() << '\n');
1155
1156 if (!TBI.HasValidInstrDepths)
1157 continue;
1158 // Add live-ins to the critical path length.
1159 TBI.CriticalPath = std::max(TBI.CriticalPath,
1160 computeCrossBlockCriticalPath(TBI));
1161 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1162 }
1163}
1164
1167 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1168
1169 if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1170 computeTrace(MBB);
1171 if (!TBI.HasValidInstrDepths)
1172 computeInstrDepths(MBB);
1173 if (!TBI.HasValidInstrHeights)
1174 computeInstrHeights(MBB);
1175
1176 return Trace(*this, TBI);
1177}
1178
1179unsigned
1181 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1182 "MI must be in the trace center block");
1183 InstrCycles Cyc = getInstrCycles(MI);
1184 return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1185}
1186
1187unsigned
1189 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1191 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1192 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1193 DataDep &Dep = Deps.front();
1194 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1195 // Add latency if DefMI is a real instruction. Transients get latency 0.
1196 if (!Dep.DefMI->isTransient())
1197 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1198 &PHI, Dep.UseOp);
1199 return DepCycle;
1200}
1201
1202/// When bottom is set include instructions in current block in estimate.
1204 // Find the limiting processor resource.
1205 // Numbers have been pre-scaled to be comparable.
1206 unsigned PRMax = 0;
1207 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1208 if (Bottom) {
1209 ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
1210 for (unsigned K = 0; K != PRDepths.size(); ++K)
1211 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1212 } else {
1213 for (unsigned PRD : PRDepths)
1214 PRMax = std::max(PRMax, PRD);
1215 }
1216 // Convert to cycle count.
1217 PRMax = TE.MTM.getCycles(PRMax);
1218
1219 /// All instructions before current block
1220 unsigned Instrs = TBI.InstrDepth;
1221 // plus instructions in current block
1222 if (Bottom)
1223 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1224 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1225 Instrs /= IW;
1226 // Assume issue width 1 without a schedule model.
1227 return std::max(Instrs, PRMax);
1228}
1229
1233 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1234 // Add up resources above and below the center block.
1235 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1236 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1237 unsigned PRMax = 0;
1238
1239 // Capture computing cycles from extra instructions
1240 auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1241 unsigned ResourceIdx)
1242 ->unsigned {
1243 unsigned Cycles = 0;
1244 for (const MCSchedClassDesc *SC : Instrs) {
1245 if (!SC->isValid())
1246 continue;
1248 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1249 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1250 PI != PE; ++PI) {
1251 if (PI->ProcResourceIdx != ResourceIdx)
1252 continue;
1253 Cycles +=
1254 (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1255 }
1256 }
1257 return Cycles;
1258 };
1259
1260 for (unsigned K = 0; K != PRDepths.size(); ++K) {
1261 unsigned PRCycles = PRDepths[K] + PRHeights[K];
1262 for (const MachineBasicBlock *MBB : Extrablocks)
1263 PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
1264 PRCycles += extraCycles(ExtraInstrs, K);
1265 PRCycles -= extraCycles(RemoveInstrs, K);
1266 PRMax = std::max(PRMax, PRCycles);
1267 }
1268 // Convert to cycle count.
1269 PRMax = TE.MTM.getCycles(PRMax);
1270
1271 // Instrs: #instructions in current trace outside current block.
1272 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1273 // Add instruction count from the extra blocks.
1274 for (const MachineBasicBlock *MBB : Extrablocks)
1275 Instrs += TE.MTM.getResources(MBB)->InstrCount;
1276 Instrs += ExtraInstrs.size();
1277 Instrs -= RemoveInstrs.size();
1278 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1279 Instrs /= IW;
1280 // Assume issue width 1 without a schedule model.
1281 return std::max(Instrs, PRMax);
1282}
1283
1285 const MachineInstr &UseMI) const {
1286 if (DefMI.getParent() == UseMI.getParent())
1287 return true;
1288
1289 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1290 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1291
1292 return DepTBI.isUsefulDominator(TBI);
1293}
1294
1296 OS << getName() << " ensemble:\n";
1297 for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1298 OS << " %bb." << i << '\t';
1299 BlockInfo[i].print(OS);
1300 OS << '\n';
1301 }
1302}
1303
1305 if (hasValidDepth()) {
1306 OS << "depth=" << InstrDepth;
1307 if (Pred)
1308 OS << " pred=" << printMBBReference(*Pred);
1309 else
1310 OS << " pred=null";
1311 OS << " head=%bb." << Head;
1312 if (HasValidInstrDepths)
1313 OS << " +instrs";
1314 } else
1315 OS << "depth invalid";
1316 OS << ", ";
1317 if (hasValidHeight()) {
1318 OS << "height=" << InstrHeight;
1319 if (Succ)
1320 OS << " succ=" << printMBBReference(*Succ);
1321 else
1322 OS << " succ=null";
1323 OS << " tail=%bb." << Tail;
1324 if (HasValidInstrHeights)
1325 OS << " +instrs";
1326 } else
1327 OS << "height invalid";
1328 if (HasValidInstrDepths && HasValidInstrHeights)
1329 OS << ", crit=" << CriticalPath;
1330}
1331
1333 unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1334
1335 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1336 << " --> %bb." << TBI.Tail << ':';
1337 if (TBI.hasValidHeight() && TBI.hasValidDepth())
1338 OS << ' ' << getInstrCount() << " instrs.";
1339 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1340 OS << ' ' << TBI.CriticalPath << " cycles.";
1341
1343 OS << "\n%bb." << MBBNum;
1344 while (Block->hasValidDepth() && Block->Pred) {
1345 unsigned Num = Block->Pred->getNumber();
1346 OS << " <- " << printMBBReference(*Block->Pred);
1347 Block = &TE.BlockInfo[Num];
1348 }
1349
1350 Block = &TBI;
1351 OS << "\n ";
1352 while (Block->hasValidHeight() && Block->Succ) {
1353 unsigned Num = Block->Succ->getNumber();
1354 OS << " -> " << printMBBReference(*Block->Succ);
1355 Block = &TE.BlockInfo[Num];
1356 }
1357 OS << '\n';
1358}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
Rewrite undef for PHI
basic Basic Alias true
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static unsigned InstrCount
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:464
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:491
Hexagon Hardware Loops
IRTranslator LLVM IR MI
loops
Definition: LoopInfo.cpp:1176
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, SparseSet< LiveRegUnit > &RegUnits, const TargetRegisterInfo *TRI)
Machine Trace Metrics
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, SparseSet< LiveRegUnit > &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
#define DEBUG_TYPE
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
A possibly irreducible generalization of a Loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:320
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:648
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:533
MachineOperand class - Representation of each machine instruction operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
bool atEnd() const
atEnd - return true if this iterator is equal to reg_end() on the value.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of the instructions from Start to End.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=std::nullopt, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=std::nullopt, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=std::nullopt) const
Return the resource length of the trace.
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
ArrayRef< unsigned > getProcResourceCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:120
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:289
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:289
typename DenseT::iterator iterator
Definition: SparseSet.h:171
const_iterator end() const
Definition: SparseSet.h:175
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition: SparseSet.h:225
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
bool empty() const
Definition: Trace.h:96
bool insertEdge(std::optional< const MachineBasicBlock * > From, const MachineBasicBlock *To)
Default po_iterator_storage implementation with an internal set object.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
CycleInfo::CycleT Cycle
Definition: CycleAnalysis.h:28
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1921
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs)
Adds registers contained in LiveRegs to the block live-in list of MBB.
const MachineInstr * MI
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:114
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
Per-basic block information that doesn't depend on the trace through the block.
bool hasResources() const
Returns true when resource information for this block has been computed.
unsigned InstrCount
The number of non-trivial instructions in the block.
bool HasCalls
True when the block contains calls.
InstrCycles represents the cycle height and depth of an instruction in a trace.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
Per-basic block information that relates to a specific trace through the block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned CriticalPath
Critical path length.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block.
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().