LLVM 23.0.0git
MachineTraceMetrics.cpp
Go to the documentation of this file.
1//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/SparseSet.h"
26#include "llvm/Pass.h"
27#include "llvm/Support/Debug.h"
29#include "llvm/Support/Format.h"
31#include <algorithm>
32#include <cassert>
33#include <tuple>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "machine-trace-metrics"
38
39AnalysisKey MachineTraceMetricsAnalysis::Key;
40
46
53
55
57
59 "Machine Trace Metrics", false, true)
62 "Machine Trace Metrics", false, true)
63
66
72
74 const MachineLoopInfo &LI) {
75 MF = &Func;
76 const TargetSubtargetInfo &ST = MF->getSubtarget();
77 TII = ST.getInstrInfo();
78 TRI = ST.getRegisterInfo();
79 MRI = &MF->getRegInfo();
80 Loops = &LI;
81 SchedModel.init(&ST);
82 BlockInfo.resize(MF->getNumBlockIDs());
83 ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
84 SchedModel.getNumProcResourceKinds());
85}
86
91
93
95 MF = nullptr;
96 BlockInfo.clear();
97 for (auto &E : Ensembles)
98 E.reset();
99}
100
101//===----------------------------------------------------------------------===//
102// Fixed block information
103//===----------------------------------------------------------------------===//
104//
105// The number of instructions in a basic block and the CPU resources used by
106// those instructions don't depend on any given trace strategy.
107
108/// Compute the resource usage in basic block MBB.
111 assert(MBB && "No basic block");
112 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
113 if (FBI->hasResources())
114 return FBI;
115
116 // Compute resource usage in the block.
117 FBI->HasCalls = false;
118 unsigned InstrCount = 0;
119
120 // Add up per-processor resource cycles as well.
121 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
122 SmallVector<unsigned, 32> PRCycles(PRKinds);
123
124 for (const auto &MI : *MBB) {
125 if (MI.isTransient())
126 continue;
127 ++InstrCount;
128 if (MI.isCall())
129 FBI->HasCalls = true;
130
131 // Count processor resources used.
132 if (!SchedModel.hasInstrSchedModel())
133 continue;
134 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
135 if (!SC->isValid())
136 continue;
137
139 PI = SchedModel.getWriteProcResBegin(SC),
140 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
141 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
142 PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
143 }
144 }
145 FBI->InstrCount = InstrCount;
146
147 // Scale the resource cycles so they are comparable.
148 unsigned PROffset = MBB->getNumber() * PRKinds;
149 for (unsigned K = 0; K != PRKinds; ++K)
150 ProcReleaseAtCycles[PROffset + K] =
151 PRCycles[K] * SchedModel.getResourceFactor(K);
152
153 return FBI;
154}
155
158 assert(BlockInfo[MBBNum].hasResources() &&
159 "getResources() must be called before getProcReleaseAtCycles()");
160 unsigned PRKinds = SchedModel.getNumProcResourceKinds();
161 assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
162 return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds);
163}
164
165//===----------------------------------------------------------------------===//
166// Ensemble utility functions
167//===----------------------------------------------------------------------===//
168
170 : MTM(*ct) {
171 BlockInfo.resize(MTM.BlockInfo.size());
172 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
173 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
174 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
175}
176
177// Virtual destructor serves as an anchor.
179
180const MachineLoop*
182 return MTM.Loops->getLoopFor(MBB);
183}
184
185// Update resource-related information in the TraceBlockInfo for MBB.
186// Only update resources related to the trace above MBB.
187void MachineTraceMetrics::Ensemble::
188computeDepthResources(const MachineBasicBlock *MBB) {
189 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
190 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
191 unsigned PROffset = MBB->getNumber() * PRKinds;
192
193 // Compute resources from trace above. The top block is simple.
194 if (!TBI->Pred) {
195 TBI->InstrDepth = 0;
196 TBI->Head = MBB->getNumber();
197 std::fill(ProcResourceDepths.begin() + PROffset,
198 ProcResourceDepths.begin() + PROffset + PRKinds, 0);
199 return;
200 }
201
202 // Compute from the block above. A post-order traversal ensures the
203 // predecessor is always computed first.
204 unsigned PredNum = TBI->Pred->getNumber();
205 TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
206 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
207 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
208 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
209 TBI->Head = PredTBI->Head;
210
211 // Compute per-resource depths.
212 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
213 ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
214 for (unsigned K = 0; K != PRKinds; ++K)
215 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
216}
217
218// Update resource-related information in the TraceBlockInfo for MBB.
219// Only update resources related to the trace below MBB.
220void MachineTraceMetrics::Ensemble::
221computeHeightResources(const MachineBasicBlock *MBB) {
222 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
223 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
224 unsigned PROffset = MBB->getNumber() * PRKinds;
225
226 // Compute resources for the current block.
227 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
228 ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber());
229
230 // The trace tail is done.
231 if (!TBI->Succ) {
232 TBI->Tail = MBB->getNumber();
233 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
234 return;
235 }
236
237 // Compute from the block below. A post-order traversal ensures the
238 // predecessor is always computed first.
239 unsigned SuccNum = TBI->Succ->getNumber();
240 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
241 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
242 TBI->InstrHeight += SuccTBI->InstrHeight;
243 TBI->Tail = SuccTBI->Tail;
244
245 // Compute per-resource heights.
246 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
247 for (unsigned K = 0; K != PRKinds; ++K)
248 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
249}
250
251// Check if depth resources for MBB are valid and return the TBI.
252// Return NULL if the resources have been invalidated.
253const MachineTraceMetrics::TraceBlockInfo*
256 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
257 return TBI->hasValidDepth() ? TBI : nullptr;
258}
259
260// Check if height resources for MBB are valid and return the TBI.
261// Return NULL if the resources have been invalidated.
265 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
266 return TBI->hasValidHeight() ? TBI : nullptr;
267}
268
269/// Get an array of processor resource depths for MBB. Indexed by processor
270/// resource kind, this array contains the scaled processor resources consumed
271/// by all blocks preceding MBB in its trace. It does not include instructions
272/// in MBB.
273///
274/// Compare TraceBlockInfo::InstrDepth.
277getProcResourceDepths(unsigned MBBNum) const {
278 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
279 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
280 return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
281}
282
283/// Get an array of processor resource heights for MBB. Indexed by processor
284/// resource kind, this array contains the scaled processor resources consumed
285/// by this block and all blocks following it in its trace.
286///
287/// Compare TraceBlockInfo::InstrHeight.
290getProcResourceHeights(unsigned MBBNum) const {
291 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
292 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
293 return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
294}
295
296//===----------------------------------------------------------------------===//
297// Trace Selection Strategies
298//===----------------------------------------------------------------------===//
299//
300// A trace selection strategy is implemented as a sub-class of Ensemble. The
301// trace through a block B is computed by two DFS traversals of the CFG
302// starting from B. One upwards, and one downwards. During the upwards DFS,
303// pickTracePred() is called on the post-ordered blocks. During the downwards
304// DFS, pickTraceSucc() is called in a post-order.
305//
306
307// We never allow traces that leave loops, but we do allow traces to enter
308// nested loops. We also never allow traces to contain back-edges.
309//
310// This means that a loop header can never appear above the center block of a
311// trace, except as the trace head. Below the center block, loop exiting edges
312// are banned.
313//
314// Return true if an edge from the From loop to the To loop is leaving a loop.
315// Either of To and From can be null.
316static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
317 return From && !From->contains(To);
318}
319
320// MinInstrCountEnsemble - Pick the trace that executes the least number of
321// instructions.
322namespace {
323
324class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
325 const char *getName() const override { return "MinInstr"; }
326 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
327 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
328
329public:
330 MinInstrCountEnsemble(MachineTraceMetrics *mtm)
331 : MachineTraceMetrics::Ensemble(mtm) {}
332};
333
334/// Pick only the current basic block for the trace and do not choose any
335/// predecessors/successors.
336class LocalEnsemble : public MachineTraceMetrics::Ensemble {
337 const char *getName() const override { return "Local"; }
338 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
339 return nullptr;
340 };
341 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
342 return nullptr;
343 };
344
345public:
346 LocalEnsemble(MachineTraceMetrics *MTM)
347 : MachineTraceMetrics::Ensemble(MTM) {}
348};
349} // end anonymous namespace
350
351// Select the preferred predecessor for MBB.
352const MachineBasicBlock*
353MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
354 if (MBB->pred_empty())
355 return nullptr;
356 const MachineLoop *CurLoop = getLoopFor(MBB);
357 // Don't leave loops, and never follow back-edges.
358 if (CurLoop && MBB == CurLoop->getHeader())
359 return nullptr;
360 unsigned CurCount = MTM.getResources(MBB)->InstrCount;
361 const MachineBasicBlock *Best = nullptr;
362 unsigned BestDepth = 0;
363 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
364 const MachineTraceMetrics::TraceBlockInfo *PredTBI =
365 getDepthResources(Pred);
366 // Ignore cycles that aren't natural loops.
367 if (!PredTBI)
368 continue;
369 // Pick the predecessor that would give this block the smallest InstrDepth.
370 unsigned Depth = PredTBI->InstrDepth + CurCount;
371 if (!Best || Depth < BestDepth) {
372 Best = Pred;
373 BestDepth = Depth;
374 }
375 }
376 return Best;
377}
378
379// Select the preferred successor for MBB.
380const MachineBasicBlock*
381MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
382 if (MBB->succ_empty())
383 return nullptr;
384 const MachineLoop *CurLoop = getLoopFor(MBB);
385 const MachineBasicBlock *Best = nullptr;
386 unsigned BestHeight = 0;
387 for (const MachineBasicBlock *Succ : MBB->successors()) {
388 // Don't consider back-edges.
389 if (CurLoop && Succ == CurLoop->getHeader())
390 continue;
391 // Don't consider successors exiting CurLoop.
392 if (isExitingLoop(CurLoop, getLoopFor(Succ)))
393 continue;
394 const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
395 getHeightResources(Succ);
396 // Ignore cycles that aren't natural loops.
397 if (!SuccTBI)
398 continue;
399 // Pick the successor that would give this block the smallest InstrHeight.
400 unsigned Height = SuccTBI->InstrHeight;
401 if (!Best || Height < BestHeight) {
402 Best = Succ;
403 BestHeight = Height;
404 }
405 }
406 return Best;
407}
408
409// Get an Ensemble sub-class for the requested trace strategy.
413 "Invalid trace strategy enum");
414 std::unique_ptr<MachineTraceMetrics::Ensemble> &E =
415 Ensembles[static_cast<size_t>(strategy)];
416 if (E)
417 return E.get();
418
419 // Allocate new Ensemble on demand.
420 switch (strategy) {
422 E = std::make_unique<MinInstrCountEnsemble>(MinInstrCountEnsemble(this));
423 break;
425 E = std::make_unique<LocalEnsemble>(LocalEnsemble(this));
426 break;
427 default: llvm_unreachable("Invalid trace strategy enum");
428 }
429 return E.get();
430}
431
433 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
434 << '\n');
435 BlockInfo[MBB->getNumber()].invalidate();
436 for (auto &E : Ensembles)
437 if (E)
438 E->invalidate(MBB);
439}
440
443 MachineFunctionAnalysisManager::Invalidator &) {
444 // Check whether the analysis, all analyses on machine functions, or the
445 // machine function's CFG have been preserved.
447 return !PAC.preserved() &&
448 !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() &&
449 !PAC.preservedSet<CFGAnalyses>();
450}
451
453 if (!MF)
454 return;
455#ifndef NDEBUG
456 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
457 for (auto &E : Ensembles)
458 if (E)
459 E->verify();
460#endif
461}
462
463//===----------------------------------------------------------------------===//
464// Trace building
465//===----------------------------------------------------------------------===//
466//
467// Traces are built by two CFG traversals. To avoid recomputing too much, use a
468// set abstraction that confines the search to the current loop, and doesn't
469// revisit blocks.
470
471namespace {
472
473struct LoopBounds {
476 const MachineLoopInfo *Loops;
477 bool Downward = false;
478
480 const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
481};
482
483} // end anonymous namespace
484
485// Restrict the post-order traversal to the current loop and don't traverse the
486// loop back edges.
487template <typename GraphT>
489 : public PostOrderTraversalBase<LoopBoundsPostOrderTraversal<GraphT>,
490 GraphTraits<GraphT>> {
491 LoopBounds &LB;
492
493public:
495 : LB(LB) {
496 this->init(Start);
497 }
498
499 bool insertEdge(std::optional<const MachineBasicBlock *> From,
500 const MachineBasicBlock *To) {
501 // Skip already visited To blocks.
502 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
503 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
504 return false;
505 // From is null once when To is the trace center block.
506 if (From) {
507 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
508 // Don't follow backedges, don't leave FromLoop when going upwards.
509 if ((LB.Downward ? To : *From) == FromLoop->getHeader())
510 return false;
511 // Don't leave FromLoop.
512 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
513 return false;
514 }
515 }
516 // To is a new block. Mark the block as visited in case the CFG has cycles
517 // that MachineLoopInfo didn't recognize as a natural loop.
518 return LB.Visited.insert(To).second;
519 }
520};
521
522/// Compute the trace through MBB.
523void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
524 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
525 << printMBBReference(*MBB) << '\n');
526 // Set up loop bounds for the backwards post-order traversal.
527 LoopBounds Bounds(BlockInfo, MTM.Loops);
528
529 // Run an upwards post-order search for the trace start.
530 Bounds.Downward = false;
531 Bounds.Visited.clear();
532 for (const auto *I :
533 LoopBoundsPostOrderTraversal<Inverse<const MachineBasicBlock *>>(
534 MBB, Bounds)) {
535 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
536 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
537 // All the predecessors have been visited, pick the preferred one.
538 TBI.Pred = pickTracePred(I);
539 LLVM_DEBUG({
540 if (TBI.Pred)
541 dbgs() << printMBBReference(*TBI.Pred) << '\n';
542 else
543 dbgs() << "null\n";
544 });
545 // The trace leading to I is now known, compute the depth resources.
546 computeDepthResources(I);
547 }
548
549 // Run a downwards post-order search for the trace end.
550 Bounds.Downward = true;
551 Bounds.Visited.clear();
552 for (const auto *I :
553 LoopBoundsPostOrderTraversal<const MachineBasicBlock *>(MBB, Bounds)) {
554 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
555 TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
556 // All the successors have been visited, pick the preferred one.
557 TBI.Succ = pickTraceSucc(I);
558 LLVM_DEBUG({
559 if (TBI.Succ)
560 dbgs() << printMBBReference(*TBI.Succ) << '\n';
561 else
562 dbgs() << "null\n";
563 });
564 // The trace leaving I is now known, compute the height resources.
565 computeHeightResources(I);
566 }
567}
568
569/// Invalidate traces through BadMBB.
570void
573 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
574
575 // Invalidate height resources of blocks above MBB.
576 if (BadTBI.hasValidHeight()) {
577 BadTBI.invalidateHeight();
578 WorkList.push_back(BadMBB);
579 do {
580 const MachineBasicBlock *MBB = WorkList.pop_back_val();
581 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
582 << getName() << " height.\n");
583 // Find any MBB predecessors that have MBB as their preferred successor.
584 // They are the only ones that need to be invalidated.
585 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
586 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
587 if (!TBI.hasValidHeight())
588 continue;
589 if (TBI.Succ == MBB) {
590 TBI.invalidateHeight();
591 WorkList.push_back(Pred);
592 continue;
593 }
594 // Verify that TBI.Succ is actually a *I successor.
595 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
596 }
597 } while (!WorkList.empty());
598 }
599
600 // Invalidate depth resources of blocks below MBB.
601 if (BadTBI.hasValidDepth()) {
602 BadTBI.invalidateDepth();
603 WorkList.push_back(BadMBB);
604 do {
605 const MachineBasicBlock *MBB = WorkList.pop_back_val();
606 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
607 << getName() << " depth.\n");
608 // Find any MBB successors that have MBB as their preferred predecessor.
609 // They are the only ones that need to be invalidated.
610 for (const MachineBasicBlock *Succ : MBB->successors()) {
611 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
612 if (!TBI.hasValidDepth())
613 continue;
614 if (TBI.Pred == MBB) {
615 TBI.invalidateDepth();
616 WorkList.push_back(Succ);
617 continue;
618 }
619 // Verify that TBI.Pred is actually a *I predecessor.
620 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
621 }
622 } while (!WorkList.empty());
623 }
624
625 // Clear any per-instruction data. We only have to do this for BadMBB itself
626 // because the instructions in that block may change. Other blocks may be
627 // invalidated, but their instructions will stay the same, so there is no
628 // need to erase the Cycle entries. They will be overwritten when we
629 // recompute.
630 for (const auto &I : *BadMBB)
631 Cycles.erase(&I);
632}
633
635#ifndef NDEBUG
636 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
637 "Outdated BlockInfo size");
638 for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
639 const TraceBlockInfo &TBI = BlockInfo[Num];
640 if (TBI.hasValidDepth() && TBI.Pred) {
641 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
642 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
643 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
644 "Trace is broken, depth should have been invalidated.");
645 const MachineLoop *Loop = getLoopFor(MBB);
646 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
647 }
648 if (TBI.hasValidHeight() && TBI.Succ) {
649 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
650 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
651 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
652 "Trace is broken, height should have been invalidated.");
653 const MachineLoop *Loop = getLoopFor(MBB);
654 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
655 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
656 "Trace contains backedge");
657 }
658 }
659#endif
660}
661
662//===----------------------------------------------------------------------===//
663// Data Dependencies
664//===----------------------------------------------------------------------===//
665//
666// Compute the depth and height of each instruction based on data dependencies
667// and instruction latencies. These cycle numbers assume that the CPU can issue
668// an infinite number of instructions per cycle as long as their dependencies
669// are ready.
670
671// A data dependency is represented as a defining MI and operand numbers on the
672// defining and using MI.
673namespace {
674
675struct DataDep {
676 const MachineInstr *DefMI;
677 unsigned DefOp;
678 unsigned UseOp;
679
680 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
681 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
682
683 /// Create a DataDep from an SSA form virtual register.
684 DataDep(const MachineRegisterInfo *MRI, Register VirtReg, unsigned UseOp)
685 : UseOp(UseOp) {
686 assert(VirtReg.isVirtual());
687 MachineOperand *DefMO = MRI->getOneDef(VirtReg);
688 assert(DefMO && "Register does not have unique def");
689 DefMI = DefMO->getParent();
690 DefOp = DefMO->getOperandNo();
691 }
692};
693
694} // end anonymous namespace
695
696// Get the input data dependencies that must be ready before UseMI can issue.
697// Return true if UseMI has any physreg operands.
698static bool getDataDeps(const MachineInstr &UseMI,
700 const MachineRegisterInfo *MRI) {
701 // Debug values should not be included in any calculations.
702 if (UseMI.isDebugInstr())
703 return false;
704
705 bool HasPhysRegs = false;
706 for (const MachineOperand &MO : UseMI.operands()) {
707 if (!MO.isReg())
708 continue;
709 Register Reg = MO.getReg();
710 if (!Reg)
711 continue;
712 if (Reg.isPhysical()) {
713 HasPhysRegs = true;
714 continue;
715 }
716 // Collect virtual register reads.
717 if (MO.readsReg())
718 Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
719 }
720 return HasPhysRegs;
721}
722
723// Get the input data dependencies of a PHI instruction, using Pred as the
724// preferred predecessor.
725// This will add at most one dependency to Deps.
726static void getPHIDeps(const MachineInstr &UseMI,
728 const MachineBasicBlock *Pred,
729 const MachineRegisterInfo *MRI) {
730 // No predecessor at the beginning of a trace. Ignore dependencies.
731 if (!Pred)
732 return;
733 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
734 for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
735 if (UseMI.getOperand(i + 1).getMBB() == Pred) {
736 Register Reg = UseMI.getOperand(i).getReg();
737 Deps.push_back(DataDep(MRI, Reg, i));
738 return;
739 }
740 }
741}
742
743// Identify physreg dependencies for UseMI, and update the live regunit
744// tracking set when scanning instructions downwards.
747 LiveRegUnitSet &RegUnits,
748 const TargetRegisterInfo *TRI) {
750 SmallVector<unsigned, 8> LiveDefOps;
751
752 for (const MachineOperand &MO : UseMI->operands()) {
753 if (!MO.isReg() || !MO.getReg().isPhysical())
754 continue;
755 MCRegister Reg = MO.getReg().asMCReg();
756 // Track live defs and kills for updating RegUnits.
757 if (MO.isDef()) {
758 if (MO.isDead())
759 Kills.push_back(Reg);
760 else
761 LiveDefOps.push_back(MO.getOperandNo());
762 } else if (MO.isKill())
763 Kills.push_back(Reg);
764 // Identify dependencies.
765 if (!MO.readsReg())
766 continue;
767 for (MCRegUnit Unit : TRI->regunits(Reg)) {
768 LiveRegUnitSet::iterator I = RegUnits.find(Unit);
769 if (I == RegUnits.end())
770 continue;
771 Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
772 break;
773 }
774 }
775
776 // Update RegUnits to reflect live registers after UseMI.
777 // First kills.
778 for (MCRegister Kill : Kills)
779 for (MCRegUnit Unit : TRI->regunits(Kill))
780 RegUnits.erase(Unit);
781
782 // Second, live defs.
783 for (unsigned DefOp : LiveDefOps) {
784 for (MCRegUnit Unit :
785 TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
786 LiveRegUnit &LRU = RegUnits[Unit];
787 LRU.MI = UseMI;
788 LRU.Op = DefOp;
789 }
790 }
791}
792
793/// The length of the critical path through a trace is the maximum of two path
794/// lengths:
795///
796/// 1. The maximum height+depth over all instructions in the trace center block.
797///
798/// 2. The longest cross-block dependency chain. For small blocks, it is
799/// possible that the critical path through the trace doesn't include any
800/// instructions in the block.
801///
802/// This function computes the second number from the live-in list of the
803/// center block.
804unsigned MachineTraceMetrics::Ensemble::
805computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
806 assert(TBI.HasValidInstrDepths && "Missing depth info");
807 assert(TBI.HasValidInstrHeights && "Missing height info");
808 unsigned MaxLen = 0;
809 for (const LiveInReg &LIR : TBI.LiveIns) {
810 if (!LIR.VRegOrUnit.isVirtualReg())
811 continue;
812 const MachineInstr *DefMI =
813 MTM.MRI->getVRegDef(LIR.VRegOrUnit.asVirtualReg());
814 // Ignore dependencies outside the current trace.
815 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
816 if (!DefTBI.isUsefulDominator(TBI))
817 continue;
818 unsigned Len = LIR.Height + Cycles[DefMI].Depth;
819 MaxLen = std::max(MaxLen, Len);
820 }
821 return MaxLen;
822}
823
825 const MachineInstr &UseMI,
826 LiveRegUnitSet &RegUnits) {
828 // Collect all data dependencies.
829 if (UseMI.isPHI())
830 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
831 else if (getDataDeps(UseMI, Deps, MTM.MRI))
832 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
833
834 // Filter and process dependencies, computing the earliest issue cycle.
835 unsigned Cycle = 0;
836 for (const DataDep &Dep : Deps) {
837 const TraceBlockInfo&DepTBI =
838 BlockInfo[Dep.DefMI->getParent()->getNumber()];
839 // Ignore dependencies from outside the current trace.
840 if (!DepTBI.isUsefulDominator(TBI))
841 continue;
842 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
843 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
844 // Add latency if DefMI is a real instruction. Transients get latency 0.
845 if (!Dep.DefMI->isTransient())
846 DepCycle += MTM.SchedModel
847 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
848 Cycle = std::max(Cycle, DepCycle);
849 }
850 // Remember the instruction depth.
851 InstrCycles &MICycles = Cycles[&UseMI];
852 MICycles.Depth = Cycle;
853
854 if (TBI.HasValidInstrHeights) {
855 // Update critical path length.
856 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
857 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
858 } else {
859 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
860 }
861}
862
864 const MachineInstr &UseMI,
865 LiveRegUnitSet &RegUnits) {
866 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
867}
868
871 LiveRegUnitSet &RegUnits) {
872 for (; Start != End; Start++)
873 updateDepth(Start->getParent(), *Start, RegUnits);
874}
875
876/// Compute instruction depths for all instructions above or in MBB in its
877/// trace. This assumes that the trace through MBB has already been computed.
878void MachineTraceMetrics::Ensemble::
879computeInstrDepths(const MachineBasicBlock *MBB) {
880 // The top of the trace may already be computed, and HasValidInstrDepths
881 // implies Head->HasValidInstrDepths, so we only need to start from the first
882 // block in the trace that needs to be recomputed.
884 do {
885 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
886 assert(TBI.hasValidDepth() && "Incomplete trace");
887 if (TBI.HasValidInstrDepths)
888 break;
889 Stack.push_back(MBB);
890 MBB = TBI.Pred;
891 } while (MBB);
892
893 // FIXME: If MBB is non-null at this point, it is the last pre-computed block
894 // in the trace. We should track any live-out physregs that were defined in
895 // the trace. This is quite rare in SSA form, typically created by CSE
896 // hoisting a compare.
897 LiveRegUnitSet RegUnits;
898 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
899
900 // Go through trace blocks in top-down order, stopping after the center block.
901 while (!Stack.empty()) {
902 MBB = Stack.pop_back_val();
903 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
904 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
905 TBI.HasValidInstrDepths = true;
906 TBI.CriticalPath = 0;
907
908 // Print out resource depths here as well.
909 LLVM_DEBUG({
910 dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
911 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
912 for (unsigned K = 0; K != PRDepths.size(); ++K)
913 if (PRDepths[K]) {
914 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
915 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
916 << MTM.SchedModel.getProcResource(K)->Name << " ("
917 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
918 }
919 });
920
921 // Also compute the critical path length through MBB when possible.
922 if (TBI.HasValidInstrHeights)
923 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
924
925 for (const auto &UseMI : *MBB) {
926 updateDepth(TBI, UseMI, RegUnits);
927 }
928 }
929}
930
931// Identify physreg dependencies for MI when scanning instructions upwards.
932// Return the issue height of MI after considering any live regunits.
933// Height is the issue height computed from virtual register dependencies alone.
934static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
935 LiveRegUnitSet &RegUnits,
936 const TargetSchedModel &SchedModel,
937 const TargetInstrInfo *TII,
938 const TargetRegisterInfo *TRI) {
940
941 for (const MachineOperand &MO : MI.operands()) {
942 if (!MO.isReg())
943 continue;
944 Register Reg = MO.getReg();
945 if (!Reg.isPhysical())
946 continue;
947 if (MO.readsReg())
948 ReadOps.push_back(MO.getOperandNo());
949 if (!MO.isDef())
950 continue;
951 // This is a def of Reg. Remove corresponding entries from RegUnits, and
952 // update MI Height to consider the physreg dependencies.
953 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
954 LiveRegUnitSet::iterator I = RegUnits.find(Unit);
955 if (I == RegUnits.end())
956 continue;
957 unsigned DepHeight = I->Cycle;
958 if (!MI.isTransient()) {
959 // We may not know the UseMI of this dependency, if it came from the
960 // live-in list. SchedModel can handle a NULL UseMI.
961 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
962 I->MI, I->Op);
963 }
964 Height = std::max(Height, DepHeight);
965 // This regunit is dead above MI.
966 RegUnits.erase(I);
967 }
968 }
969
970 // Now we know the height of MI. Update any regunits read.
971 for (unsigned Op : ReadOps) {
972 MCRegister Reg = MI.getOperand(Op).getReg().asMCReg();
973 for (MCRegUnit Unit : TRI->regunits(Reg)) {
974 LiveRegUnit &LRU = RegUnits[Unit];
975 // Set the height to the highest reader of the unit.
976 if (LRU.Cycle <= Height && LRU.MI != &MI) {
977 LRU.Cycle = Height;
978 LRU.MI = &MI;
979 LRU.Op = Op;
980 }
981 }
982 }
983
984 return Height;
985}
986
988
989// Push the height of DefMI upwards if required to match UseMI.
990// Return true if this is the first time DefMI was seen.
991static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
992 unsigned UseHeight, MIHeightMap &Heights,
993 const TargetSchedModel &SchedModel,
994 const TargetInstrInfo *TII) {
995 // Adjust height by Dep.DefMI latency.
996 if (!Dep.DefMI->isTransient())
997 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
998 Dep.UseOp);
999
1000 // Update Heights[DefMI] to be the maximum height seen.
1002 bool New;
1003 std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
1004 if (New)
1005 return true;
1006
1007 // DefMI has been pushed before. Give it the max height.
1008 if (I->second < UseHeight)
1009 I->second = UseHeight;
1010 return false;
1011}
1012
1013/// Assuming that the virtual register defined by DefMI:DefOp was used by
1014/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
1015/// when reaching the block that contains DefMI.
1016void MachineTraceMetrics::Ensemble::
1017addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
1019 assert(!Trace.empty() && "Trace should contain at least one block");
1020 Register Reg = DefMI->getOperand(DefOp).getReg();
1021 assert(Reg.isVirtual());
1022 const MachineBasicBlock *DefMBB = DefMI->getParent();
1023
1024 // Reg is live-in to all blocks in Trace that follow DefMBB.
1025 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
1026 if (MBB == DefMBB)
1027 return;
1028 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1029 // Just add the register. The height will be updated later.
1030 TBI.LiveIns.emplace_back(VirtRegOrUnit(Reg));
1031 }
1032}
1033
1034/// Compute instruction heights in the trace through MBB. This updates MBB and
1035/// the blocks below it in the trace. It is assumed that the trace has already
1036/// been computed.
1037void MachineTraceMetrics::Ensemble::
1038computeInstrHeights(const MachineBasicBlock *MBB) {
1039 // The bottom of the trace may already be computed.
1040 // Find the blocks that need updating.
1042 do {
1043 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1044 assert(TBI.hasValidHeight() && "Incomplete trace");
1045 if (TBI.HasValidInstrHeights)
1046 break;
1047 Stack.push_back(MBB);
1048 TBI.LiveIns.clear();
1049 MBB = TBI.Succ;
1050 } while (MBB);
1051
1052 // As we move upwards in the trace, keep track of instructions that are
1053 // required by deeper trace instructions. Map MI -> height required so far.
1054 MIHeightMap Heights;
1055
1056 // For physregs, the def isn't known when we see the use.
1057 // Instead, keep track of the highest use of each regunit.
1058 LiveRegUnitSet RegUnits;
1059 RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1060
1061 // If the bottom of the trace was already precomputed, initialize heights
1062 // from its live-in list.
1063 // MBB is the highest precomputed block in the trace.
1064 if (MBB) {
1065 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1066 for (LiveInReg &LI : TBI.LiveIns) {
1067 if (LI.VRegOrUnit.isVirtualReg()) {
1068 // For virtual registers, the def latency is included.
1069 unsigned &Height =
1070 Heights[MTM.MRI->getVRegDef(LI.VRegOrUnit.asVirtualReg())];
1071 if (Height < LI.Height)
1072 Height = LI.Height;
1073 } else {
1074 // For register units, the def latency is not included because we don't
1075 // know the def yet.
1076 RegUnits[LI.VRegOrUnit.asMCRegUnit()].Cycle = LI.Height;
1077 }
1078 }
1079 }
1080
1081 // Go through the trace blocks in bottom-up order.
1083 for (;!Stack.empty(); Stack.pop_back()) {
1084 MBB = Stack.back();
1085 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1086 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1087 TBI.HasValidInstrHeights = true;
1088 TBI.CriticalPath = 0;
1089
1090 LLVM_DEBUG({
1091 dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1092 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1093 for (unsigned K = 0; K != PRHeights.size(); ++K)
1094 if (PRHeights[K]) {
1095 unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1096 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1097 << MTM.SchedModel.getProcResource(K)->Name << " ("
1098 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1099 }
1100 });
1101
1102 // Get dependencies from PHIs in the trace successor.
1103 const MachineBasicBlock *Succ = TBI.Succ;
1104 // If MBB is the last block in the trace, and it has a back-edge to the
1105 // loop header, get loop-carried dependencies from PHIs in the header. For
1106 // that purpose, pretend that all the loop header PHIs have height 0.
1107 if (!Succ)
1108 if (const MachineLoop *Loop = getLoopFor(MBB))
1109 if (MBB->isSuccessor(Loop->getHeader()))
1110 Succ = Loop->getHeader();
1111
1112 if (Succ) {
1113 for (const auto &PHI : *Succ) {
1114 if (!PHI.isPHI())
1115 break;
1116 Deps.clear();
1117 getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1118 if (!Deps.empty()) {
1119 // Loop header PHI heights are all 0.
1120 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1121 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1122 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1123 MTM.TII))
1124 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1125 }
1126 }
1127 }
1128
1129 // Go through the block backwards.
1130 for (const MachineInstr &MI : reverse(*MBB)) {
1131 // Find the MI height as determined by virtual register uses in the
1132 // trace below.
1133 unsigned Cycle = 0;
1134 MIHeightMap::iterator HeightI = Heights.find(&MI);
1135 if (HeightI != Heights.end()) {
1136 Cycle = HeightI->second;
1137 // We won't be seeing any more MI uses.
1138 Heights.erase(HeightI);
1139 }
1140
1141 // Don't process PHI deps. They depend on the specific predecessor, and
1142 // we'll get them when visiting the predecessor.
1143 Deps.clear();
1144 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1145
1146 // There may also be regunit dependencies to include in the height.
1147 if (HasPhysRegs)
1148 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1149 MTM.TII, MTM.TRI);
1150
1151 // Update the required height of any virtual registers read by MI.
1152 for (const DataDep &Dep : Deps)
1153 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1154 addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1155
1156 InstrCycles &MICycles = Cycles[&MI];
1157 MICycles.Height = Cycle;
1158 if (!TBI.HasValidInstrDepths) {
1159 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1160 continue;
1161 }
1162 // Update critical path length.
1163 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1164 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1165 }
1166
1167 // Update virtual live-in heights. They were added by addLiveIns() with a 0
1168 // height because the final height isn't known until now.
1169 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1170 for (LiveInReg &LIR : TBI.LiveIns) {
1171 Register Reg = LIR.VRegOrUnit.asVirtualReg();
1172 const MachineInstr *DefMI = MTM.MRI->getVRegDef(Reg);
1173 LIR.Height = Heights.lookup(DefMI);
1174 LLVM_DEBUG(dbgs() << ' ' << printReg(Reg) << '@' << LIR.Height);
1175 }
1176
1177 // Transfer the live regunits to the live-in list.
1178 for (const LiveRegUnit &RU : RegUnits) {
1179 TBI.LiveIns.emplace_back(VirtRegOrUnit(RU.RegUnit), RU.Cycle);
1180 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
1181 << RU.Cycle);
1182 }
1183 LLVM_DEBUG(dbgs() << '\n');
1184
1185 if (!TBI.HasValidInstrDepths)
1186 continue;
1187 // Add live-ins to the critical path length.
1188 TBI.CriticalPath = std::max(TBI.CriticalPath,
1189 computeCrossBlockCriticalPath(TBI));
1190 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1191 }
1192}
1193
1196 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1197
1198 if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1199 computeTrace(MBB);
1200 if (!TBI.HasValidInstrDepths)
1201 computeInstrDepths(MBB);
1202 if (!TBI.HasValidInstrHeights)
1203 computeInstrHeights(MBB);
1204
1205 return Trace(*this, TBI);
1206}
1207
1208unsigned
1210 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1211 "MI must be in the trace center block");
1213 return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1214}
1215
1216unsigned
1218 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1220 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1221 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1222 DataDep &Dep = Deps.front();
1223 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1224 // Add latency if DefMI is a real instruction. Transients get latency 0.
1225 if (!Dep.DefMI->isTransient())
1226 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1227 &PHI, Dep.UseOp);
1228 return DepCycle;
1229}
1230
1231/// When bottom is set include instructions in current block in estimate.
1233 // Find the limiting processor resource.
1234 // Numbers have been pre-scaled to be comparable.
1235 unsigned PRMax = 0;
1236 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1237 if (Bottom) {
1238 ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum());
1239 for (unsigned K = 0; K != PRDepths.size(); ++K)
1240 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1241 } else {
1242 for (unsigned PRD : PRDepths)
1243 PRMax = std::max(PRMax, PRD);
1244 }
1245 // Convert to cycle count.
1246 PRMax = TE.MTM.getCycles(PRMax);
1247
1248 /// All instructions before current block
1249 unsigned Instrs = TBI.InstrDepth;
1250 // plus instructions in current block
1251 if (Bottom)
1252 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1253 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1254 Instrs /= IW;
1255 // Assume issue width 1 without a schedule model.
1256 return std::max(Instrs, PRMax);
1257}
1258
1262 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1263 // Add up resources above and below the center block.
1264 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1265 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1266 unsigned PRMax = 0;
1267
1268 // Capture computing cycles from extra instructions
1269 auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1270 unsigned ResourceIdx)
1271 ->unsigned {
1272 unsigned Cycles = 0;
1273 for (const MCSchedClassDesc *SC : Instrs) {
1274 if (!SC->isValid())
1275 continue;
1277 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1278 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1279 PI != PE; ++PI) {
1280 if (PI->ProcResourceIdx != ResourceIdx)
1281 continue;
1282 Cycles += (PI->ReleaseAtCycle *
1283 TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1284 }
1285 }
1286 return Cycles;
1287 };
1288
1289 for (unsigned K = 0; K != PRDepths.size(); ++K) {
1290 unsigned PRCycles = PRDepths[K] + PRHeights[K];
1291 for (const MachineBasicBlock *MBB : Extrablocks)
1292 PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K];
1293 PRCycles += extraCycles(ExtraInstrs, K);
1294 PRCycles -= extraCycles(RemoveInstrs, K);
1295 PRMax = std::max(PRMax, PRCycles);
1296 }
1297 // Convert to cycle count.
1298 PRMax = TE.MTM.getCycles(PRMax);
1299
1300 // Instrs: #instructions in current trace outside current block.
1301 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1302 // Add instruction count from the extra blocks.
1303 for (const MachineBasicBlock *MBB : Extrablocks)
1304 Instrs += TE.MTM.getResources(MBB)->InstrCount;
1305 Instrs += ExtraInstrs.size();
1306 Instrs -= RemoveInstrs.size();
1307 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1308 Instrs /= IW;
1309 // Assume issue width 1 without a schedule model.
1310 return std::max(Instrs, PRMax);
1311}
1312
1314 const MachineInstr &UseMI) const {
1315 if (DefMI.getParent() == UseMI.getParent())
1316 return true;
1317
1318 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1319 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1320
1321 return DepTBI.isUsefulDominator(TBI);
1322}
1323
1325 OS << getName() << " ensemble:\n";
1326 for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1327 OS << " %bb." << i << '\t';
1328 BlockInfo[i].print(OS);
1329 OS << '\n';
1330 }
1331}
1332
1334 if (hasValidDepth()) {
1335 OS << "depth=" << InstrDepth;
1336 if (Pred)
1337 OS << " pred=" << printMBBReference(*Pred);
1338 else
1339 OS << " pred=null";
1340 OS << " head=%bb." << Head;
1342 OS << " +instrs";
1343 } else
1344 OS << "depth invalid";
1345 OS << ", ";
1346 if (hasValidHeight()) {
1347 OS << "height=" << InstrHeight;
1348 if (Succ)
1349 OS << " succ=" << printMBBReference(*Succ);
1350 else
1351 OS << " succ=null";
1352 OS << " tail=%bb." << Tail;
1354 OS << " +instrs";
1355 } else
1356 OS << "height invalid";
1358 OS << ", crit=" << CriticalPath;
1359}
1360
1362 unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1363
1364 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1365 << " --> %bb." << TBI.Tail << ':';
1366 if (TBI.hasValidHeight() && TBI.hasValidDepth())
1367 OS << ' ' << getInstrCount() << " instrs.";
1368 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1369 OS << ' ' << TBI.CriticalPath << " cycles.";
1370
1372 OS << "\n%bb." << MBBNum;
1373 while (Block->hasValidDepth() && Block->Pred) {
1374 unsigned Num = Block->Pred->getNumber();
1375 OS << " <- " << printMBBReference(*Block->Pred);
1376 Block = &TE.BlockInfo[Num];
1377 }
1378
1379 Block = &TBI;
1380 OS << "\n ";
1381 while (Block->hasValidHeight() && Block->Succ) {
1382 unsigned Num = Block->Succ->getNumber();
1383 OS << " -> " << printMBBReference(*Block->Succ);
1384 Block = &TE.BlockInfo[Num];
1385 }
1386 OS << '\n';
1387}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static unsigned InstrCount
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
loops
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII)
static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI)
static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl< DataDep > &Deps, const MachineRegisterInfo *MRI)
DenseMap< const MachineInstr *, unsigned > MIHeightMap
static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To)
static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, LiveRegUnitSet &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVectorImpl< DataDep > &Deps, LiveRegUnitSet &RegUnits, const TargetRegisterInfo *TRI)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
#define LLVM_DEBUG(...)
Definition Debug.h:114
LoopBoundsPostOrderTraversal(const MachineBasicBlock *Start, LoopBounds &LB)
bool insertEdge(std::optional< const MachineBasicBlock * > From, const MachineBasicBlock *To)
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:330
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
const MachineOperand & getOperand(unsigned i) const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, LiveRegUnitSet &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, LiveRegUnitSet &RegUnits)
Updates the depth of the instructions from Start to End.
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getInstrCount() const
Compute the total number of instructions in the trace.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks={}, ArrayRef< const MCSchedClassDesc * > ExtraInstrs={}, ArrayRef< const MCSchedClassDesc * > RemoveInstrs={}) const
Return the resource length of the trace.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getCriticalPath() const
Return the length of the (data dependency) critical path through the trace.
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
ArrayRef< unsigned > getProcReleaseAtCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
void init(MachineFunction &Func, const MachineLoopInfo &LI)
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition SparseSet.h:287
const_iterator end() const
Definition SparseSet.h:171
iterator find(const KeyT &Key)
find - Find an element by its key.
Definition SparseSet.h:221
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition SparseSet.h:152
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
const MCWriteProcResEntry * ProcResIter
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
TargetSubtargetInfo - Generic base class for all target subtargets.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
@ Kill
The last use of a register.
LLVM_ABI Printable printRegUnit(MCRegUnit Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
SparseSet< LiveRegUnit, MCRegUnit, MCRegUnitToIndex > LiveRegUnitSet
LLVM_ABI char & MachineTraceMetricsID
MachineTraceMetrics - This pass computes critical path and CPU resource usage in an ensemble of trace...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1885
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs)
Adds registers contained in LiveRegs to the block live-in list of MBB.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
const MachineInstr * MI
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Per-basic block information that doesn't depend on the trace through the block.
bool hasResources() const
Returns true when resource information for this block has been computed.
unsigned InstrCount
The number of non-trivial instructions in the block.
bool HasCalls
True when the block contains calls.
InstrCycles represents the cycle height and depth of an instruction in a trace.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
A virtual register or regunit required by a basic block or its trace successors.
Per-basic block information that relates to a specific trace through the block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
SmallVector< LiveInReg, 4 > LiveIns
Live-in registers.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block.
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().