LLVM 22.0.0git
MachineTraceMetrics.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/SparseSet.h"
26#include "llvm/Pass.h"
27#include "llvm/Support/Debug.h"
29#include "llvm/Support/Format.h"
31#include <algorithm>
32#include <cassert>
33#include <tuple>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "machine-trace-metrics"
38
39AnalysisKey MachineTraceMetricsAnalysis::Key;
40
46
53
55
57
59 "Machine Trace Metrics", false, true)
62 "Machine Trace Metrics", false, true)
63
66
72
74 const MachineLoopInfo &LI) {
75 MF = &Func;
76 const TargetSubtargetInfo &ST = MF->getSubtarget();
77 TII = ST.getInstrInfo();
78 TRI = ST.getRegisterInfo();
79 MRI = &MF->getRegInfo();
80 Loops = &LI;
81 SchedModel.init(&ST);
82 BlockInfo.resize(MF->getNumBlockIDs());
83 ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
84 SchedModel.getNumProcResourceKinds());
85}
86
91
93
95 MF = nullptr;
96 BlockInfo.clear();
97 for (auto &E : Ensembles)
98 E.reset();
99}
100
101//===----------------------------------------------------------------------===//
102// Fixed block information
103//===----------------------------------------------------------------------===//
104//
105// The number of instructions in a basic block and the CPU resources used by
106// those instructions don't depend on any given trace strategy.
107
108/// Compute the resource usage in basic block MBB.
111 assert(MBB && "No basic block");
112 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
113 if (FBI->hasResources())
114 return FBI;
115
116 // Compute resource usage in the block.
117 FBI->HasCalls = false;
118 unsigned InstrCount = 0;
119
120 // Add up per-processor resource cycles as well.
121 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
122 SmallVector<unsigned, 32> PRCycles(PRKinds);
123
124 for (const auto &MI : *MBB) {
125 if (MI.isTransient())
126 continue;
127 ++InstrCount;
128 if (MI.isCall())
129 FBI->HasCalls = true;
130
131 // Count processor resources used.
132 if (!SchedModel.hasInstrSchedModel())
133 continue;
134 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
135 if (!SC->isValid())
136 continue;
137
139 PI = SchedModel.getWriteProcResBegin(SC),
140 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
141 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
142 PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
143 }
144 }
145 FBI->InstrCount = InstrCount;
146
147 // Scale the resource cycles so they are comparable.
148 unsigned PROffset = MBB->getNumber() * PRKinds;
149 for (unsigned K = 0; K != PRKinds; ++K)
150 ProcReleaseAtCycles[PROffset + K] =
151 PRCycles[K] * SchedModel.getResourceFactor(K);
152
153 return FBI;
154}
155
158 assert(BlockInfo[MBBNum].hasResources() &&
159 "getResources() must be called before getProcReleaseAtCycles()");
160 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
161 assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
162 return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds);
163}
164
165//===----------------------------------------------------------------------===//
166// Ensemble utility functions
167//===----------------------------------------------------------------------===//
168
170 : MTM(*ct) {
171 BlockInfo.resize(MTM.BlockInfo.size());
172 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
173 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
174 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
175}
176
177// Virtual destructor serves as an anchor.
179
180const MachineLoop*
182 return MTM.Loops->getLoopFor(MBB);
183}
184
185// Update resource-related information in the TraceBlockInfo for MBB.
186// Only update resources related to the trace above MBB.
187void MachineTraceMetrics::Ensemble::
188computeDepthResources(const MachineBasicBlock *MBB) {
189 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
190 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
191 unsigned PROffset = MBB->getNumber() * PRKinds;
192
193 // Compute resources from trace above. The top block is simple.
194 if (!TBI->Pred) {
195 TBI->InstrDepth = 0;
196 TBI->Head = MBB->getNumber();
197 std::fill(ProcResourceDepths.begin() + PROffset,
198 ProcResourceDepths.begin() + PROffset + PRKinds, 0);
199 return;
200 }
201
202 // Compute from the block above. A post-order traversal ensures the
203 // predecessor is always computed first.
204 unsigned PredNum = TBI->Pred->getNumber();
205 TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
206 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
207 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
208 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
209 TBI->Head = PredTBI->Head;
210
211 // Compute per-resource depths.
212 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
213 ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
214 for (unsigned K = 0; K != PRKinds; ++K)
215 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
216}
217
218// Update resource-related information in the TraceBlockInfo for MBB.
219// Only update resources related to the trace below MBB.
220void MachineTraceMetrics::Ensemble::
221computeHeightResources(const MachineBasicBlock *MBB) {
222 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
223 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
224 unsigned PROffset = MBB->getNumber() * PRKinds;
225
226 // Compute resources for the current block.
227 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
228 ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber());
229
230 // The trace tail is done.
231 if (!TBI->Succ) {
232 TBI->Tail = MBB->getNumber();
233 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
234 return;
235 }
236
237 // Compute from the block below. A post-order traversal ensures the
238 // predecessor is always computed first.
239 unsigned SuccNum = TBI->Succ->getNumber();
240 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
241 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
242 TBI->InstrHeight += SuccTBI->InstrHeight;
243 TBI->Tail = SuccTBI->Tail;
244
245 // Compute per-resource heights.
246 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
247 for (unsigned K = 0; K != PRKinds; ++K)
248 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
249}
250
251// Check if depth resources for MBB are valid and return the TBI.
252// Return NULL if the resources have been invalidated.
253const MachineTraceMetrics::TraceBlockInfo*
256 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
257 return TBI->hasValidDepth() ? TBI : nullptr;
258}
259
260// Check if height resources for MBB are valid and return the TBI.
261// Return NULL if the resources have been invalidated.
265 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
266 return TBI->hasValidHeight() ? TBI : nullptr;
267}
268
269/// Get an array of processor resource depths for MBB. Indexed by processor
270/// resource kind, this array contains the scaled processor resources consumed
271/// by all blocks preceding MBB in its trace. It does not include instructions
272/// in MBB.
273///
274/// Compare TraceBlockInfo::InstrDepth.
277getProcResourceDepths(unsigned MBBNum) const {
278 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
279 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
280 return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
281}
282
283/// Get an array of processor resource heights for MBB. Indexed by processor
284/// resource kind, this array contains the scaled processor resources consumed
285/// by this block and all blocks following it in its trace.
286///
287/// Compare TraceBlockInfo::InstrHeight.
290getProcResourceHeights(unsigned MBBNum) const {
291 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
292 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
293 return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
294}
295
296//===----------------------------------------------------------------------===//
297// Trace Selection Strategies
298//===----------------------------------------------------------------------===//
299//
300// A trace selection strategy is implemented as a sub-class of Ensemble. The
301// trace through a block B is computed by two DFS traversals of the CFG
302// starting from B. One upwards, and one downwards. During the upwards DFS,
303// pickTracePred() is called on the post-ordered blocks. During the downwards
304// DFS, pickTraceSucc() is called in a post-order.
305//
306
307// We never allow traces that leave loops, but we do allow traces to enter
308// nested loops. We also never allow traces to contain back-edges.
309//
310// This means that a loop header can never appear above the center block of a
311// trace, except as the trace head. Below the center block, loop exiting edges
312// are banned.
313//
314// Return true if an edge from the From loop to the To loop is leaving a loop.
315// Either of To and From can be null.
316static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
317 return From && !From->contains(To);
318}
319
320// MinInstrCountEnsemble - Pick the trace that executes the least number of
321// instructions.
322namespace {
323
324class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
325 const char *getName() const override { return "MinInstr"; }
326 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
327 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
328
329public:
330 MinInstrCountEnsemble(MachineTraceMetrics *mtm)
331 : MachineTraceMetrics::Ensemble(mtm) {}
332};
333
334/// Pick only the current basic block for the trace and do not choose any
335/// predecessors/successors.
336class LocalEnsemble : public MachineTraceMetrics::Ensemble {
337 const char *getName() const override { return "Local"; }
338 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
339 return nullptr;
340 };
341 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
342 return nullptr;
343 };
344
345public:
346 LocalEnsemble(MachineTraceMetrics *MTM)
347 : MachineTraceMetrics::Ensemble(MTM) {}
348};
349} // end anonymous namespace
350
351// Select the preferred predecessor for MBB.
352const MachineBasicBlock*
353MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
354 if (MBB->pred_empty())
355 return nullptr;
356 const MachineLoop *CurLoop = getLoopFor(MBB);
357 // Don't leave loops, and never follow back-edges.
358 if (CurLoop && MBB == CurLoop->getHeader())
359 return nullptr;
360 unsigned CurCount = MTM.getResources(MBB)->InstrCount;
361 const MachineBasicBlock *Best = nullptr;
362 unsigned BestDepth = 0;
363 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
364 const MachineTraceMetrics::TraceBlockInfo *PredTBI =
365 getDepthResources(Pred);
366 // Ignore cycles that aren't natural loops.
367 if (!PredTBI)
368 continue;
369 // Pick the predecessor that would give this block the smallest InstrDepth.
370 unsigned Depth = PredTBI->InstrDepth + CurCount;
371 if (!Best || Depth < BestDepth) {
372 Best = Pred;
373 BestDepth = Depth;
374 }
375 }
376 return Best;
377}
378
379// Select the preferred successor for MBB.
380const MachineBasicBlock*
381MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
382 if (MBB->succ_empty())
383 return nullptr;
384 const MachineLoop *CurLoop = getLoopFor(MBB);
385 const MachineBasicBlock *Best = nullptr;
386 unsigned BestHeight = 0;
387 for (const MachineBasicBlock *Succ : MBB->successors()) {
388 // Don't consider back-edges.
389 if (CurLoop && Succ == CurLoop->getHeader())
390 continue;
391 // Don't consider successors exiting CurLoop.
392 if (isExitingLoop(CurLoop, getLoopFor(Succ)))
393 continue;
394 const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
395 getHeightResources(Succ);
396 // Ignore cycles that aren't natural loops.
397 if (!SuccTBI)
398 continue;
399 // Pick the successor that would give this block the smallest InstrHeight.
400 unsigned Height = SuccTBI->InstrHeight;
401 if (!Best || Height < BestHeight) {
402 Best = Succ;
403 BestHeight = Height;
404 }
405 }
406 return Best;
407}
408
409// Get an Ensemble sub-class for the requested trace strategy.
413 "Invalid trace strategy enum");
414 std::unique_ptr<MachineTraceMetrics::Ensemble> &E =
415 Ensembles[static_cast<size_t>(strategy)];
416 if (E)
417 return E.get();
418
419 // Allocate new Ensemble on demand.
420 switch (strategy) {
422 E = std::make_unique<MinInstrCountEnsemble>(MinInstrCountEnsemble(this));
423 break;
425 E = std::make_unique<LocalEnsemble>(LocalEnsemble(this));
426 break;
427 default: llvm_unreachable("Invalid trace strategy enum");
428 }
429 return E.get();
430}
431
433 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
434 << '\n');
435 BlockInfo[MBB->getNumber()].invalidate();
436 for (auto &E : Ensembles)
437 if (E)
438 E->invalidate(MBB);
439}
440
443 MachineFunctionAnalysisManager::Invalidator &) {
444 // Check whether the analysis, all analyses on machine functions, or the
445 // machine function's CFG have been preserved.
447 return !PAC.preserved() &&
448 !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() &&
449 !PAC.preservedSet<CFGAnalyses>();
450}
451
453 if (!MF)
454 return;
455#ifndef NDEBUG
456 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
457 for (auto &E : Ensembles)
458 if (E)
459 E->verify();
460#endif
461}
462
463//===----------------------------------------------------------------------===//
464// Trace building
465//===----------------------------------------------------------------------===//
466//
467// Traces are built by two CFG traversals. To avoid recomputing too much, use a
468// set abstraction that confines the search to the current loop, and doesn't
469// revisit blocks.
470
471namespace {
472
473struct LoopBounds {
476 const MachineLoopInfo *Loops;
477 bool Downward = false;
478
480 const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
481};
482
483} // end anonymous namespace
484
485// Specialize po_iterator_storage in order to prune the post-order traversal so
486// it is limited to the current loop and doesn't traverse the loop back edges.
487template <> class llvm::po_iterator_storage<LoopBounds, true> {
488 LoopBounds &LB;
489
490public:
491 po_iterator_storage(LoopBounds &lb) : LB(lb) {}
492
494
495 bool insertEdge(std::optional<const MachineBasicBlock *> From,
496 const MachineBasicBlock *To) {
497 // Skip already visited To blocks.
498 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
499 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
500 return false;
501 // From is null once when To is the trace center block.
502 if (From) {
503 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
504 // Don't follow backedges, don't leave FromLoop when going upwards.
505 if ((LB.Downward ? To : *From) == FromLoop->getHeader())
506 return false;
507 // Don't leave FromLoop.
508 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
509 return false;
510 }
511 }
512 // To is a new block. Mark the block as visited in case the CFG has cycles
513 // that MachineLoopInfo didn't recognize as a natural loop.
514 return LB.Visited.insert(To).second;
515 }
516};
517
518/// Compute the trace through MBB.
519void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
520 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
521 << printMBBReference(*MBB) << '\n');
522 // Set up loop bounds for the backwards post-order traversal.
523 LoopBounds Bounds(BlockInfo, MTM.Loops);
524
525 // Run an upwards post-order search for the trace start.
526 Bounds.Downward = false;
527 Bounds.Visited.clear();
528 for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
529 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
530 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
531 // All the predecessors have been visited, pick the preferred one.
532 TBI.Pred = pickTracePred(I);
533 LLVM_DEBUG({
534 if (TBI.Pred)
535 dbgs() << printMBBReference(*TBI.Pred) << '\n';
536 else
537 dbgs() << "null\n";
538 });
539 // The trace leading to I is now known, compute the depth resources.
540 computeDepthResources(I);
541 }
542
543 // Run a downwards post-order search for the trace end.
544 Bounds.Downward = true;
545 Bounds.Visited.clear();
546 for (const auto *I : post_order_ext(MBB, Bounds)) {
547 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
548 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
549 // All the successors have been visited, pick the preferred one.
550 TBI.Succ = pickTraceSucc(I);
551 LLVM_DEBUG({
552 if (TBI.Succ)
553 dbgs() << printMBBReference(*TBI.Succ) << '\n';
554 else
555 dbgs() << "null\n";
556 });
557 // The trace leaving I is now known, compute the height resources.
558 computeHeightResources(I);
559 }
560}
561
562/// Invalidate traces through BadMBB.
563void
566 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
567
568 // Invalidate height resources of blocks above MBB.
569 if (BadTBI.hasValidHeight()) {
570 BadTBI.invalidateHeight();
571 WorkList.push_back(BadMBB);
572 do {
573 const MachineBasicBlock *MBB = WorkList.pop_back_val();
574 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
575 << getName() << " height.\n");
576 // Find any MBB predecessors that have MBB as their preferred successor.
577 // They are the only ones that need to be invalidated.
578 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
579 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
580 if (!TBI.hasValidHeight())
581 continue;
582 if (TBI.Succ == MBB) {
583 TBI.invalidateHeight();
584 WorkList.push_back(Pred);
585 continue;
586 }
587 // Verify that TBI.Succ is actually a *I successor.
588 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
589 }
590 } while (!WorkList.empty());
591 }
592
593 // Invalidate depth resources of blocks below MBB.
594 if (BadTBI.hasValidDepth()) {
595 BadTBI.invalidateDepth();
596 WorkList.push_back(BadMBB);
597 do {
598 const MachineBasicBlock *MBB = WorkList.pop_back_val();
599 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
600 << getName() << " depth.\n");
601 // Find any MBB successors that have MBB as their preferred predecessor.
602 // They are the only ones that need to be invalidated.
603 for (const MachineBasicBlock *Succ : MBB->successors()) {
604 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
605 if (!TBI.hasValidDepth())
606 continue;
607 if (TBI.Pred == MBB) {
608 TBI.invalidateDepth();
609 WorkList.push_back(Succ);
610 continue;
611 }
612 // Verify that TBI.Pred is actually a *I predecessor.
613 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
614 }
615 } while (!WorkList.empty());
616 }
617
618 // Clear any per-instruction data. We only have to do this for BadMBB itself
619 // because the instructions in that block may change. Other blocks may be
620 // invalidated, but their instructions will stay the same, so there is no
621 // need to erase the Cycle entries. They will be overwritten when we
622 // recompute.
623 for (const auto &I : *BadMBB)
624 Cycles.erase(&I);
625}
626
628#ifndef NDEBUG
629 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
630 "Outdated BlockInfo size");
631 for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
632 const TraceBlockInfo &TBI = BlockInfo[Num];
633 if (TBI.hasValidDepth() && TBI.Pred) {
634 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
635 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
636 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
637 "Trace is broken, depth should have been invalidated.");
638 const MachineLoop *Loop = getLoopFor(MBB);
639 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
640 }
641 if (TBI.hasValidHeight() && TBI.Succ) {
642 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
643 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
644 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
645 "Trace is broken, height should have been invalidated.");
646 const MachineLoop *Loop = getLoopFor(MBB);
647 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
648 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
649 "Trace contains backedge");
650 }
651 }
652#endif
653}
654
655//===----------------------------------------------------------------------===//
656// Data Dependencies
657//===----------------------------------------------------------------------===//
658//
659// Compute the depth and height of each instruction based on data dependencies
660// and instruction latencies. These cycle numbers assume that the CPU can issue
661// an infinite number of instructions per cycle as long as their dependencies
662// are ready.
663
664// A data dependency is represented as a defining MI and operand numbers on the
665// defining and using MI.
666namespace {
667
668struct DataDep {
669 const MachineInstr *DefMI;
670 unsigned DefOp;
671 unsigned UseOp;
672
673 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
674 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
675
676 /// Create a DataDep from an SSA form virtual register.
677 DataDep(const MachineRegisterInfo *MRI, Register VirtReg, unsigned UseOp)
678 : UseOp(UseOp) {
679 assert(VirtReg.isVirtual());
680 MachineOperand *DefMO = MRI->getOneDef(VirtReg);
681 assert(DefMO && "Register does not have unique def");
682 DefMI = DefMO->getParent();
683 DefOp = DefMO->getOperandNo();
684 }
685};
686
687} // end anonymous namespace
688
689// Get the input data dependencies that must be ready before UseMI can issue.
690// Return true if UseMI has any physreg operands.
691static bool getDataDeps(const MachineInstr &UseMI,
693 const MachineRegisterInfo *MRI) {
694 // Debug values should not be included in any calculations.
695 if (UseMI.isDebugInstr())
696 return false;
697
698 bool HasPhysRegs = false;
699 for (const MachineOperand &MO : UseMI.operands()) {
700 if (!MO.isReg())
701 continue;
702 Register Reg = MO.getReg();
703 if (!Reg)
704 continue;
705 if (Reg.isPhysical()) {
706 HasPhysRegs = true;
707 continue;
708 }
709 // Collect virtual register reads.
710 if (MO.readsReg())
711 Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
712 }
713 return HasPhysRegs;
714}
715
716// Get the input data dependencies of a PHI instruction, using Pred as the
717// preferred predecessor.
718// This will add at most one dependency to Deps.
719static void getPHIDeps(const MachineInstr &UseMI,
721 const MachineBasicBlock *Pred,
722 const MachineRegisterInfo *MRI) {
723 // No predecessor at the beginning of a trace. Ignore dependencies.
724 if (!Pred)
725 return;
726 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
727 for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
728 if (UseMI.getOperand(i + 1).getMBB() == Pred) {
729 Register Reg = UseMI.getOperand(i).getReg();
730 Deps.push_back(DataDep(MRI, Reg, i));
731 return;
732 }
733 }
734}
735
736// Identify physreg dependencies for UseMI, and update the live regunit
737// tracking set when scanning instructions downwards.
740 LiveRegUnitSet &RegUnits,
741 const TargetRegisterInfo *TRI) {
743 SmallVector<unsigned, 8> LiveDefOps;
744
745 for (const MachineOperand &MO : UseMI->operands()) {
746 if (!MO.isReg() || !MO.getReg().isPhysical())
747 continue;
748 MCRegister Reg = MO.getReg().asMCReg();
749 // Track live defs and kills for updating RegUnits.
750 if (MO.isDef()) {
751 if (MO.isDead())
752 Kills.push_back(Reg);
753 else
754 LiveDefOps.push_back(MO.getOperandNo());
755 } else if (MO.isKill())
756 Kills.push_back(Reg);
757 // Identify dependencies.
758 if (!MO.readsReg())
759 continue;
760 for (MCRegUnit Unit : TRI->regunits(Reg)) {
761 LiveRegUnitSet::iterator I = RegUnits.find(Unit);
762 if (I == RegUnits.end())
763 continue;
764 Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
765 break;
766 }
767 }
768
769 // Update RegUnits to reflect live registers after UseMI.
770 // First kills.
771 for (MCRegister Kill : Kills)
772 for (MCRegUnit Unit : TRI->regunits(Kill))
773 RegUnits.erase(Unit);
774
775 // Second, live defs.
776 for (unsigned DefOp : LiveDefOps) {
777 for (MCRegUnit Unit :
778 TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
779 LiveRegUnit &LRU = RegUnits[Unit];
780 LRU.MI = UseMI;
781 LRU.Op = DefOp;
782 }
783 }
784}
785
786/// The length of the critical path through a trace is the maximum of two path
787/// lengths:
788///
789/// 1. The maximum height+depth over all instructions in the trace center block.
790///
791/// 2. The longest cross-block dependency chain. For small blocks, it is
792/// possible that the critical path through the trace doesn't include any
793/// instructions in the block.
794///
795/// This function computes the second number from the live-in list of the
796/// center block.
797unsigned MachineTraceMetrics::Ensemble::
798computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
799 assert(TBI.HasValidInstrDepths && "Missing depth info");
800 assert(TBI.HasValidInstrHeights && "Missing height info");
801 unsigned MaxLen = 0;
802 for (const LiveInReg &LIR : TBI.LiveIns) {
803 if (!LIR.VRegOrUnit.isVirtualReg())
804 continue;
805 const MachineInstr *DefMI =
806 MTM.MRI->getVRegDef(LIR.VRegOrUnit.asVirtualReg());
807 // Ignore dependencies outside the current trace.
808 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
809 if (!DefTBI.isUsefulDominator(TBI))
810 continue;
811 unsigned Len = LIR.Height + Cycles[DefMI].Depth;
812 MaxLen = std::max(MaxLen, Len);
813 }
814 return MaxLen;
815}
816
818 const MachineInstr &UseMI,
819 LiveRegUnitSet &RegUnits) {
821 // Collect all data dependencies.
822 if (UseMI.isPHI())
823 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
824 else if (getDataDeps(UseMI, Deps, MTM.MRI))
825 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
826
827 // Filter and process dependencies, computing the earliest issue cycle.
828 unsigned Cycle = 0;
829 for (const DataDep &Dep : Deps) {
830 const TraceBlockInfo&DepTBI =
831 BlockInfo[Dep.DefMI->getParent()->getNumber()];
832 // Ignore dependencies from outside the current trace.
833 if (!DepTBI.isUsefulDominator(TBI))
834 continue;
835 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
836 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
837 // Add latency if DefMI is a real instruction. Transients get latency 0.
838 if (!Dep.DefMI->isTransient())
839 DepCycle += MTM.SchedModel
840 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
841 Cycle = std::max(Cycle, DepCycle);
842 }
843 // Remember the instruction depth.
844 InstrCycles &MICycles = Cycles[&UseMI];
845 MICycles.Depth = Cycle;
846
847 if (TBI.HasValidInstrHeights) {
848 // Update critical path length.
849 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
850 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
851 } else {
852 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
853 }
854}
855
857 const MachineInstr &UseMI,
858 LiveRegUnitSet &RegUnits) {
859 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
860}
861
864 LiveRegUnitSet &RegUnits) {
865 for (; Start != End; Start++)
866 updateDepth(Start->getParent(), *Start, RegUnits);
867}
868
869/// Compute instruction depths for all instructions above or in MBB in its
870/// trace. This assumes that the trace through MBB has already been computed.
871void MachineTraceMetrics::Ensemble::
872computeInstrDepths(const MachineBasicBlock *MBB) {
873 // The top of the trace may already be computed, and HasValidInstrDepths
874 // implies Head->HasValidInstrDepths, so we only need to start from the first
875 // block in the trace that needs to be recomputed.
877 do {
878 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
879 assert(TBI.hasValidDepth() && "Incomplete trace");
880 if (TBI.HasValidInstrDepths)
881 break;
882 Stack.push_back(MBB);
883 MBB = TBI.Pred;
884 } while (MBB);
885
886 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
887 // in the trace. We should track any live-out physregs that were defined in
888 // the trace. This is quite rare in SSA form, typically created by CSE
889 // hoisting a compare.
890 LiveRegUnitSet RegUnits;
891 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
892
893 // Go through trace blocks in top-down order, stopping after the center block.
894 while (!Stack.empty()) {
895 MBB = Stack.pop_back_val();
896 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
897 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
898 TBI.HasValidInstrDepths = true;
899 TBI.CriticalPath = 0;
900
901 // Print out resource depths here as well.
902 LLVM_DEBUG({
903 dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
904 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
905 for (unsigned K = 0; K != PRDepths.size(); ++K)
906 if (PRDepths[K]) {
907 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
908 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
909 << MTM.SchedModel.getProcResource(K)->Name << " ("
910 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
911 }
912 });
913
914 // Also compute the critical path length through MBB when possible.
915 if (TBI.HasValidInstrHeights)
916 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
917
918 for (const auto &UseMI : *MBB) {
919 updateDepth(TBI, UseMI, RegUnits);
920 }
921 }
922}
923
924// Identify physreg dependencies for MI when scanning instructions upwards.
925// Return the issue height of MI after considering any live regunits.
926// Height is the issue height computed from virtual register dependencies alone.
927static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
928 LiveRegUnitSet &RegUnits,
929 const TargetSchedModel &SchedModel,
930 const TargetInstrInfo *TII,
931 const TargetRegisterInfo *TRI) {
933
934 for (const MachineOperand &MO : MI.operands()) {
935 if (!MO.isReg())
936 continue;
937 Register Reg = MO.getReg();
938 if (!Reg.isPhysical())
939 continue;
940 if (MO.readsReg())
941 ReadOps.push_back(MO.getOperandNo());
942 if (!MO.isDef())
943 continue;
944 // This is a def of Reg. Remove corresponding entries from RegUnits, and
945 // update MI Height to consider the physreg dependencies.
946 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
947 LiveRegUnitSet::iterator I = RegUnits.find(Unit);
948 if (I == RegUnits.end())
949 continue;
950 unsigned DepHeight = I->Cycle;
951 if (!MI.isTransient()) {
952 // We may not know the UseMI of this dependency, if it came from the
953 // live-in list. SchedModel can handle a NULL UseMI.
954 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
955 I->MI, I->Op);
956 }
957 Height = std::max(Height, DepHeight);
958 // This regunit is dead above MI.
959 RegUnits.erase(I);
960 }
961 }
962
963 // Now we know the height of MI. Update any regunits read.
964 for (unsigned Op : ReadOps) {
965 MCRegister Reg = MI.getOperand(Op).getReg().asMCReg();
966 for (MCRegUnit Unit : TRI->regunits(Reg)) {
967 LiveRegUnit &LRU = RegUnits[Unit];
968 // Set the height to the highest reader of the unit.
969 if (LRU.Cycle <= Height && LRU.MI != &MI) {
970 LRU.Cycle = Height;
971 LRU.MI = &MI;
972 LRU.Op = Op;
973 }
974 }
975 }
976
977 return Height;
978}
979
981
982// Push the height of DefMI upwards if required to match UseMI.
983// Return true if this is the first time DefMI was seen.
984static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
985 unsigned UseHeight, MIHeightMap &Heights,
986 const TargetSchedModel &SchedModel,
987 const TargetInstrInfo *TII) {
988 // Adjust height by Dep.DefMI latency.
989 if (!Dep.DefMI->isTransient())
990 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
991 Dep.UseOp);
992
993 // Update Heights[DefMI] to be the maximum height seen.
995 bool New;
996 std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
997 if (New)
998 return true;
999
1000 // DefMI has been pushed before. Give it the max height.
1001 if (I->second < UseHeight)
1002 I->second = UseHeight;
1003 return false;
1004}
1005
1006/// Assuming that the virtual register defined by DefMI:DefOp was used by
1007/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
1008/// when reaching the block that contains DefMI.
1009void MachineTraceMetrics::Ensemble::
1010addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
1012 assert(!Trace.empty() && "Trace should contain at least one block");
1013 Register Reg = DefMI->getOperand(DefOp).getReg();
1014 assert(Reg.isVirtual());
1015 const MachineBasicBlock *DefMBB = DefMI->getParent();
1016
1017 // Reg is live-in to all blocks in Trace that follow DefMBB.
1018 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
1019 if (MBB == DefMBB)
1020 return;
1021 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1022 // Just add the register. The height will be updated later.
1023 TBI.LiveIns.emplace_back(VirtRegOrUnit(Reg));
1024 }
1025}
1026
1027/// Compute instruction heights in the trace through MBB. This updates MBB and
1028/// the blocks below it in the trace. It is assumed that the trace has already
1029/// been computed.
1030void MachineTraceMetrics::Ensemble::
1031computeInstrHeights(const MachineBasicBlock *MBB) {
1032 // The bottom of the trace may already be computed.
1033 // Find the blocks that need updating.
1035 do {
1036 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1037 assert(TBI.hasValidHeight() && "Incomplete trace");
1038 if (TBI.HasValidInstrHeights)
1039 break;
1040 Stack.push_back(MBB);
1041 TBI.LiveIns.clear();
1042 MBB = TBI.Succ;
1043 } while (MBB);
1044
1045 // As we move upwards in the trace, keep track of instructions that are
1046 // required by deeper trace instructions. Map MI -> height required so far.
1047 MIHeightMap Heights;
1048
1049 // For physregs, the def isn't known when we see the use.
1050 // Instead, keep track of the highest use of each regunit.
1051 LiveRegUnitSet RegUnits;
1052 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1053
1054 // If the bottom of the trace was already precomputed, initialize heights
1055 // from its live-in list.
1056 // MBB is the highest precomputed block in the trace.
1057 if (MBB) {
1058 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1059 for (LiveInReg &LI : TBI.LiveIns) {
1060 if (LI.VRegOrUnit.isVirtualReg()) {
1061 // For virtual registers, the def latency is included.
1062 unsigned &Height =
1063 Heights[MTM.MRI->getVRegDef(LI.VRegOrUnit.asVirtualReg())];
1064 if (Height < LI.Height)
1065 Height = LI.Height;
1066 } else {
1067 // For register units, the def latency is not included because we don't
1068 // know the def yet.
1069 RegUnits[LI.VRegOrUnit.asMCRegUnit()].Cycle = LI.Height;
1070 }
1071 }
1072 }
1073
1074 // Go through the trace blocks in bottom-up order.
1076 for (;!Stack.empty(); Stack.pop_back()) {
1077 MBB = Stack.back();
1078 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1079 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1080 TBI.HasValidInstrHeights = true;
1081 TBI.CriticalPath = 0;
1082
1083 LLVM_DEBUG({
1084 dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1085 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1086 for (unsigned K = 0; K != PRHeights.size(); ++K)
1087 if (PRHeights[K]) {
1088 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1089 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1090 << MTM.SchedModel.getProcResource(K)->Name << " ("
1091 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1092 }
1093 });
1094
1095 // Get dependencies from PHIs in the trace successor.
1096 const MachineBasicBlock *Succ = TBI.Succ;
1097 // If MBB is the last block in the trace, and it has a back-edge to the
1098 // loop header, get loop-carried dependencies from PHIs in the header. For
1099 // that purpose, pretend that all the loop header PHIs have height 0.
1100 if (!Succ)
1101 if (const MachineLoop *Loop = getLoopFor(MBB))
1102 if (MBB->isSuccessor(Loop->getHeader()))
1103 Succ = Loop->getHeader();
1104
1105 if (Succ) {
1106 for (const auto &PHI : *Succ) {
1107 if (!PHI.isPHI())
1108 break;
1109 Deps.clear();
1110 getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1111 if (!Deps.empty()) {
1112 // Loop header PHI heights are all 0.
1113 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1114 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1115 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1116 MTM.TII))
1117 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1118 }
1119 }
1120 }
1121
1122 // Go through the block backwards.
1123 for (const MachineInstr &MI : reverse(*MBB)) {
1124 // Find the MI height as determined by virtual register uses in the
1125 // trace below.
1126 unsigned Cycle = 0;
1127 MIHeightMap::iterator HeightI = Heights.find(&MI);
1128 if (HeightI != Heights.end()) {
1129 Cycle = HeightI->second;
1130 // We won't be seeing any more MI uses.
1131 Heights.erase(HeightI);
1132 }
1133
1134 // Don't process PHI deps. They depend on the specific predecessor, and
1135 // we'll get them when visiting the predecessor.
1136 Deps.clear();
1137 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1138
1139 // There may also be regunit dependencies to include in the height.
1140 if (HasPhysRegs)
1141 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1142 MTM.TII, MTM.TRI);
1143
1144 // Update the required height of any virtual registers read by MI.
1145 for (const DataDep &Dep : Deps)
1146 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1147 addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1148
1149 InstrCycles &MICycles = Cycles[&MI];
1150 MICycles.Height = Cycle;
1151 if (!TBI.HasValidInstrDepths) {
1152 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1153 continue;
1154 }
1155 // Update critical path length.
1156 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1157 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1158 }
1159
1160 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1161 // height because the final height isn't known until now.
1162 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1163 for (LiveInReg &LIR : TBI.LiveIns) {
1164 Register Reg = LIR.VRegOrUnit.asVirtualReg();
1165 const MachineInstr *DefMI = MTM.MRI->getVRegDef(Reg);
1166 LIR.Height = Heights.lookup(DefMI);
1167 LLVM_DEBUG(dbgs() << ' ' << printReg(Reg) << '@' << LIR.Height);
1168 }
1169
1170 // Transfer the live regunits to the live-in list.
1171 for (const LiveRegUnit &RU : RegUnits) {
1172 TBI.LiveIns.emplace_back(VirtRegOrUnit(RU.RegUnit), RU.Cycle);
1173 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
1174 << RU.Cycle);
1175 }
1176 LLVM_DEBUG(dbgs() << '\n');
1177
1178 if (!TBI.HasValidInstrDepths)
1179 continue;
1180 // Add live-ins to the critical path length.
1181 TBI.CriticalPath = std::max(TBI.CriticalPath,
1182 computeCrossBlockCriticalPath(TBI));
1183 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1184 }
1185}
1186
1189 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1190
1191 if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1192 computeTrace(MBB);
1193 if (!TBI.HasValidInstrDepths)
1194 computeInstrDepths(MBB);
1195 if (!TBI.HasValidInstrHeights)
1196 computeInstrHeights(MBB);
1197
1198 return Trace(*this, TBI);
1199}
1200
1201unsigned
1203 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1204 "MI must be in the trace center block");
1206 return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1207}
1208
1209unsigned
1211 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1213 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1214 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1215 DataDep &Dep = Deps.front();
1216 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1217 // Add latency if DefMI is a real instruction. Transients get latency 0.
1218 if (!Dep.DefMI->isTransient())
1219 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1220 &PHI, Dep.UseOp);
1221 return DepCycle;
1222}
1223
1224/// When bottom is set include instructions in current block in estimate.
1226 // Find the limiting processor resource.
1227 // Numbers have been pre-scaled to be comparable.
1228 unsigned PRMax = 0;
1229 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1230 if (Bottom) {
1231 ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum());
1232 for (unsigned K = 0; K != PRDepths.size(); ++K)
1233 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1234 } else {
1235 for (unsigned PRD : PRDepths)
1236 PRMax = std::max(PRMax, PRD);
1237 }
1238 // Convert to cycle count.
1239 PRMax = TE.MTM.getCycles(PRMax);
1240
1241 /// All instructions before current block
1242 unsigned Instrs = TBI.InstrDepth;
1243 // plus instructions in current block
1244 if (Bottom)
1245 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1246 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1247 Instrs /= IW;
1248 // Assume issue width 1 without a schedule model.
1249 return std::max(Instrs, PRMax);
1250}
1251
1255 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1256 // Add up resources above and below the center block.
1257 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1258 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1259 unsigned PRMax = 0;
1260
1261 // Capture computing cycles from extra instructions
1262 auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1263 unsigned ResourceIdx)
1264 ->unsigned {
1265 unsigned Cycles = 0;
1266 for (const MCSchedClassDesc *SC : Instrs) {
1267 if (!SC->isValid())
1268 continue;
1270 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1271 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1272 PI != PE; ++PI) {
1273 if (PI->ProcResourceIdx != ResourceIdx)
1274 continue;
1275 Cycles += (PI->ReleaseAtCycle *
1276 TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1277 }
1278 }
1279 return Cycles;
1280 };
1281
1282 for (unsigned K = 0; K != PRDepths.size(); ++K) {
1283 unsigned PRCycles = PRDepths[K] + PRHeights[K];
1284 for (const MachineBasicBlock *MBB : Extrablocks)
1285 PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K];
1286 PRCycles += extraCycles(ExtraInstrs, K);
1287 PRCycles -= extraCycles(RemoveInstrs, K);
1288 PRMax = std::max(PRMax, PRCycles);
1289 }
1290 // Convert to cycle count.
1291 PRMax = TE.MTM.getCycles(PRMax);
1292
1293 // Instrs: #instructions in current trace outside current block.
1294 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1295 // Add instruction count from the extra blocks.
1296 for (const MachineBasicBlock *MBB : Extrablocks)
1297 Instrs += TE.MTM.getResources(MBB)->InstrCount;
1298 Instrs += ExtraInstrs.size();
1299 Instrs -= RemoveInstrs.size();
1300 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1301 Instrs /= IW;
1302 // Assume issue width 1 without a schedule model.
1303 return std::max(Instrs, PRMax);
1304}
1305
1307 const MachineInstr &UseMI) const {
1308 if (DefMI.getParent() == UseMI.getParent())
1309 return true;
1310
1311 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1312 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1313
1314 return DepTBI.isUsefulDominator(TBI);
1315}
1316
1318 OS << getName() << " ensemble:\n";
1319 for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1320 OS << " %bb." << i << '\t';
1321 BlockInfo[i].print(OS);
1322 OS << '\n';
1323 }
1324}
1325
1327 if (hasValidDepth()) {
1328 OS << "depth=" << InstrDepth;
1329 if (Pred)
1330 OS << " pred=" << printMBBReference(*Pred);
1331 else
1332 OS << " pred=null";
1333 OS << " head=%bb." << Head;
1335 OS << " +instrs";
1336 } else
1337 OS << "depth invalid";
1338 OS << ", ";
1339 if (hasValidHeight()) {
1340 OS << "height=" << InstrHeight;
1341 if (Succ)
1342 OS << " succ=" << printMBBReference(*Succ);
1343 else
1344 OS << " succ=null";
1345 OS << " tail=%bb." << Tail;
1347 OS << " +instrs";
1348 } else
1349 OS << "height invalid";
1351 OS << ", crit=" << CriticalPath;
1352}
1353
1355 unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1356
1357 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1358 << " --> %bb." << TBI.Tail << ':';
1359 if (TBI.hasValidHeight() && TBI.hasValidDepth())
1360 OS << ' ' << getInstrCount() << " instrs.";
1361 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1362 OS << ' ' << TBI.CriticalPath << " cycles.";
1363
1365 OS << "\n%bb." << MBBNum;
1366 while (Block->hasValidDepth() && Block->Pred) {
1367 unsigned Num = Block->Pred->getNumber();
1368 OS << " <- " << printMBBReference(*Block->Pred);
1369 Block = &TE.BlockInfo[Num];
1370 }
1371
1372 Block = &TBI;
1373 OS << "\n ";
1374 while (Block->hasValidHeight() && Block->Succ) {
1375 unsigned Num = Block->Succ->getNumber();
1376 OS << " -> " << printMBBReference(*Block->Succ);
1377 Block = &TE.BlockInfo[Num];
1378 }
1379 OS << '\n';
1380}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static unsigned InstrCount
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
loops
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
DenseMap< const MachineInstr *, unsigned > MIHeightMap
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, LiveRegUnitSet &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, LiveRegUnitSet &RegUnits, const TargetRegisterInfo *TRI)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
#define LLVM_DEBUG(...)
Definition Debug.h:114
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:322
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
const MachineOperand & getOperand(unsigned i) const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, LiveRegUnitSet &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, LiveRegUnitSet &RegUnits)
Updates the depth of the instructions from Start to End.
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getInstrCount() const
Compute the total number of instructions in the trace.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks={}, ArrayRef< const MCSchedClassDesc * > ExtraInstrs={}, ArrayRef< const MCSchedClassDesc * > RemoveInstrs={}) const
Return the resource length of the trace.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getCriticalPath() const
Return the length of the (data dependency) critical path through the trace.
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
ArrayRef< unsigned > getProcReleaseAtCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
void init(MachineFunction &Func, const MachineLoopInfo &LI)
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition SparseSet.h:287
const_iterator end() const
Definition SparseSet.h:171
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition SparseSet.h:221
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition SparseSet.h:152
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
const MCWriteProcResEntry * ProcResIter
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool insertEdge(std::optional< const MachineBasicBlock * > From, const MachineBasicBlock *To)
Default po_iterator_storage implementation with an internal set object.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
iterator_range< po_ext_iterator< T, SetType > > post_order_ext(const T &G, SetType &S)
iterator_range< ipo_ext_iterator< T, SetType > > inverse_post_order_ext(const T &G, SetType &S)
LLVM_ABI Printable printRegUnit(MCRegUnit Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
SparseSet< LiveRegUnit, MCRegUnit, MCRegUnitToIndex > LiveRegUnitSet
LLVM_ABI char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1835
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs)
Adds registers contained in LiveRegs to the block live-in list of MBB.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
const MachineInstr * MI
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Per-basic block information that doesn't depend on the trace through the block.
bool hasResources() const
Returns true when resource information for this block has been computed.
unsigned InstrCount
The number of non-trivial instructions in the block.
bool HasCalls
True when the block contains calls.
InstrCycles represents the cycle height and depth of an instruction in a trace.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
A virtual register or regunit required by a basic block or its trace successors.
Per-basic block information that relates to a specific trace through the block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
SmallVector< LiveInReg, 4 > LiveIns
Live-in registers.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block.
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().