LLVM 23.0.0git
AMDGPUNextUseAnalysis.cpp
Go to the documentation of this file.
1//===---------------------- AMDGPUNextUseAnalysis.cpp ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AMDGPUNextUseAnalysis pass, a machine-level analysis
10// that computes the distance from each instruction to the "nearest" next use of
11// every live virtual register. These distances guide register spilling
12// decisions by identifying which live values are furthest from their next use
13// and are therefore the best candidates to spill.
14//
15// The analysis is based on the Braun & Hack CC'09 paper "Register Spilling and
16// Live-Range Splitting for SSA-Form Programs."
17//
18// Key concepts:
19//
20// NextUseDistance A loop-depth-weighted instruction count representing
21// how far away a register's next use is. Distances
22// through deeper loops are scaled by fromLoopDepth() so
23// that uses inside hot loops appear closer.
24//
25// Inter-block Pre-computed shortest weighted distances between all
26// distances pairs of basic blocks, used to efficiently answer
27// cross-block next-use queries. Each intermediate block
28// is weighted by fromLoopDepth() applied once per loop
29// boundary crossing relative to the destination.
30//
31// Configuration flags (see Config struct in the header):
32//
33// CountPhis Count PHI instructions toward distance and block size.
34// ForwardOnly Restrict inter-block distances to forward-reachable
35// paths.
36// PreciseUseModeling Model PHI uses at their incoming edge block and filter
37// uses with intermediate redefinitions.
38// PromoteToPreheader Route loop-entry and inner-loop uses to the preheader.
39//
40// This file contains:
41//
42// - Command-line options for configuration and debug output
43// - LiveRegUse / JSON helpers
44// - AMDGPUNextUseAnalysisImpl (the main analysis implementation)
45// - Instruction ID assignment and block size computation
46// - CFG path pre-computation (reachability, loop depth, back-edges)
47// - Inter-block distance computation
48// - Per-register next-use distance queries and caching
49// - AMDGPUNextUseAnalysis (public facade, pimpl)
50// - Legacy and new pass manager wrappers
51//
52//===----------------------------------------------------------------------===//
53
55#include "AMDGPU.h"
56#include "GCNRegPressure.h"
57#include "GCNSubtarget.h"
58
59#include "llvm/ADT/DenseMap.h"
69#include "llvm/Support/JSON.h"
70#include "llvm/Support/Timer.h"
73
74#include <algorithm>
75#include <limits>
76#include <string>
77
78using namespace llvm;
79
80#define DEBUG_TYPE "amdgpu-next-use-analysis"
81
82//==============================================================================
83// Options etc
84//==============================================================================
85namespace {
86
88 DistanceCacheEnabled("amdgpu-next-use-analysis-distance-cache",
89 cl::init(true), cl::Hidden,
90 cl::desc("Enable live-reg-use distance cache"));
91
93 DumpNextUseDistanceAsJson("amdgpu-next-use-analysis-dump-distance-as-json",
95
96cl::opt<bool> DumpNextUseDistanceDefToUse(
97 "amdgpu-next-use-analysis-dump-distance-def-to-use", cl::init(false),
99
101 DumpNextUseDistanceVerbose("amdgpu-next-use-analysis-dump-distance-verbose",
102 cl::init(false), cl::Hidden);
103
104// 'graphics' and 'compute' modes arose due to initial competing implementations
105// of next-use analysis that emphasized different types of workloads. This
106// implementation is a compromise that combines aspects of both. Over time, the
107// hope is we will be able to remove some of these differences and settle on a
108// more unified implementation.
110 ConfigPresetOpt("amdgpu-next-use-analysis-config", cl::Hidden,
111 cl::init("graphics"),
112 cl::desc("Config preset: 'graphics' or 'compute'"));
113
114cl::opt<bool> ConfigCountPhisOpt(
115 "amdgpu-next-use-analysis-count-phis", cl::Hidden,
116 cl::desc("Count PHI instructions toward distance and block size"));
117cl::opt<bool> ConfigForwardOnlyOpt(
118 "amdgpu-next-use-analysis-forward-only", cl::Hidden,
119 cl::desc("Restrict inter-block distances to forward-reachable paths"));
120cl::opt<bool> ConfigPreciseUseModelingOpt(
121 "amdgpu-next-use-analysis-precise-use-modeling", cl::Hidden,
122 cl::desc("Model PHI uses via incoming edge block with loop-aware "
123 "reachability filtering"));
124cl::opt<bool> ConfigPromoteToPreheaderOpt(
125 "amdgpu-next-use-analysis-use-preheader-model", cl::Hidden,
126 cl::desc("Promote loop-entry and inner-loop uses to the loop preheader"));
127} // namespace
128
129//==============================================================================
130// LiveRegUse - Represents a live register use with its distance. Used for
131// tracking and sorting register uses by distance.
132//==============================================================================
133namespace {
134using UseDistancePair = AMDGPUNextUseAnalysis::UseDistancePair;
135struct LiveRegUse : public UseDistancePair {
136 // 'nullptr' indicates an unset/invalid state.
137 LiveRegUse() : UseDistancePair(nullptr, 0) {}
138 LiveRegUse(const MachineOperand *Use, NextUseDistance Dist)
139 : UseDistancePair(Use, Dist) {}
140 LiveRegUse(const UseDistancePair &P) : UseDistancePair(P) {}
141
142 bool isUnset() const { return Use == nullptr; }
143
144 Register getReg() const { return Use->getReg(); }
145 unsigned getSubReg() const { return Use->getSubReg(); }
146 LaneBitmask getLaneMask(const SIRegisterInfo *TRI) const {
147 return TRI->getSubRegIndexLaneMask(Use->getSubReg());
148 }
149
150 bool isCloserThan(const LiveRegUse &X) const {
151 if (Dist < X.Dist)
152 return true;
153
154 if (Dist > X.Dist)
155 return false;
156
157 if (Use == X.Use)
158 return false;
159
160 // Ugh. When !CountPhis, PHIs and the first non-PHI instruction have id
161 // 0. In this case, consider PHIs as less than the first non-PHI
162 // instruction.
163 const MachineInstr *ThisMI = Use->getParent();
164 const MachineInstr *XMI = X.Use->getParent();
165 const MachineBasicBlock *ThisMBB = ThisMI->getParent();
166 if (ThisMBB == XMI->getParent()) {
167 if (ThisMI->isPHI() && !XMI->isPHI() &&
168 XMI == &(*ThisMBB->getFirstNonPHI()))
169 return true;
170 }
171
172 // Ensure deterministic results
173 return X.getReg() < getReg();
174 }
175
176 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr,
177 const MachineRegisterInfo *MRI = nullptr) const {
178 if (isUnset()) {
179 OS << "<unset>";
180 return;
181 }
182 Dist.print(OS);
183 OS << " [" << printReg(getReg(), TRI, getSubReg(), MRI) << "]";
184 }
185
186 LLVM_DUMP_METHOD void dump() const {
187 print(dbgs());
188 dbgs() << '\n';
189 }
190};
191
192inline bool updateClosest(LiveRegUse &Closest, const LiveRegUse &X) {
193 if (!Closest.Use || X.isCloserThan(Closest)) {
194 Closest = X;
195 return true;
196 }
197 return false;
198}
199
200inline bool updateFurthest(LiveRegUse &Furthest, const LiveRegUse &X) {
201 if (!Furthest.Use || Furthest.isCloserThan(X)) {
202 Furthest = X;
203 return true;
204 }
205 return false;
206}
207} // namespace
208
209//==============================================================================
210// JSON helpers
211//==============================================================================
212namespace {
213template <typename Lambda>
214void printStringAttr(json::OStream &J, const char *Name, Lambda L) {
215 J.attributeBegin(Name);
216 raw_ostream &OS = J.rawValueBegin();
217 OS << '"';
218 L(OS);
219 OS << '"';
220 J.rawValueEnd();
221 J.attributeEnd();
222}
223void printStringAttr(json::OStream &J, const char *Name, Printable P) {
224 printStringAttr(J, Name, [&](raw_ostream &OS) { OS << P; });
225}
226
227void printStringAttr(json::OStream &J, const char *Name, const MachineInstr &MI,
228 ModuleSlotTracker &MST) {
229 printStringAttr(J, Name, [&](raw_ostream &OS) {
230 MI.print(OS, MST,
231 /* IsStandalone */ false,
232 /* SkipOpers */ false,
233 /* SkipDebugLoc */ false,
234 /* AddNewLine ---> */ false,
235 /* TargetInstrInfo */ nullptr);
236 });
237}
238
239void printMBBNameAttr(json::OStream &J, const char *Name,
241 printStringAttr(J, Name, [&](raw_ostream &OS) {
242 MBB.printName(OS, MachineBasicBlock::PrintNameIr, &MST);
243 });
244}
245
246template <typename NameLambda, typename ValueT>
247void printAttr(json::OStream &J, NameLambda NL, ValueT V) {
248 std::string Name;
249 raw_string_ostream NameOS(Name);
250 NL(NameOS);
251 J.attribute(NameOS.str(), V);
252}
253
254template <typename ValueT>
255void printAttr(json::OStream &J, const Printable &P, ValueT V) {
256 printAttr(J, [&](raw_ostream &OS) { OS << P; }, V);
257}
258
259} // namespace
260
261//==============================================================================
262// AMDGPUNextUseAnalysisImpl
263//==============================================================================
265public:
270 static constexpr bool InstrRelative = true;
271 static constexpr bool InstrInvariant = false;
272
273private:
274 const MachineFunction *MF = nullptr;
275 const SIRegisterInfo *TRI = nullptr;
276 const SIInstrInfo *TII = nullptr;
277 const MachineLoopInfo *MLI = nullptr;
278 const MachineRegisterInfo *MRI = nullptr;
279
280 using InstrIdTy = unsigned;
282 InstrToIdMap InstrToId;
284
285 void initializeTables() {
286 for (const MachineBasicBlock &BB : *MF)
287 calcInstrIds(&BB, InstrToId);
288 initializeCfgPaths();
289 initializeInterBlockDistances();
290 }
291
292 void clearTables() {
293 InstrToId.clear();
294 RegUseMap.clear();
295 Paths.clear();
296
297 resetDistanceCache();
298 }
299
300 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
301 // Instruction Ids
302 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
303private:
304 unsigned sizeOf(const MachineInstr &MI) const {
305 // When !Cfg.CountPhis, PHIs do not contribute to distances/sizes since they
306 // generally don't result in the generation of a machine instruction.
307 // FIXME: Consider using MI.isPseudo() or maybe MI.isMetaInstruction().
308 return Cfg.CountPhis ? 1 : !MI.isPHI();
309 }
310
311 void calcInstrIds(const MachineBasicBlock *BB,
312 InstrToIdMap &MutableInstrToId) const {
313 InstrIdTy Id = 0;
314 for (auto &MI : BB->instrs()) {
315 MutableInstrToId[&MI] = Id;
316 Id += sizeOf(MI);
317 }
318 }
319
320 /// Returns MI's instruction Id. It renumbers (part of) the BB if MI is not
321 /// found in the map.
322 InstrIdTy getInstrId(const MachineInstr *MI) const {
323 auto It = InstrToId.find(MI);
324 if (It != InstrToId.end())
325 return It->second;
326
327 // Renumber the MBB.
328 // TODO: Renumber from MI onwards.
329 auto &MutableInstrToId = const_cast<InstrToIdMap &>(InstrToId);
330 calcInstrIds(MI->getParent(), MutableInstrToId);
331 return InstrToId.find(MI)->second;
332 }
333
334 // Length of the segment from MI (inclusive) to the first instruction of the
335 // basic block.
336 InstrIdTy getHeadLen(const MachineInstr *MI) const {
337 const MachineBasicBlock *MBB = MI->getParent();
338 return getInstrId(MI) + getInstrId(&MBB->instr_front()) + 1;
339 }
340
341 // Length of the segment from MI (exclusive) to the last instruction of the
342 // basic block.
343 InstrIdTy getTailLen(const MachineInstr *MI) const {
344 const MachineBasicBlock *MBB = MI->getParent();
345 return getInstrId(&MBB->instr_back()) - getInstrId(MI);
346 }
347
348 // Length of the segment from 'From' to 'To' (exclusive). Both instructions
349 // must be in the same basic block.
350 InstrIdTy getDistance(const MachineInstr *From,
351 const MachineInstr *To) const {
352 assert(From->getParent() == To->getParent());
353 return getInstrId(To) - getInstrId(From);
354 }
355
356 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
357 // RegUses - cache of uses by register
358 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
359private:
360 DenseMap<Register, SmallVector<const MachineOperand *>> RegUseMap;
361
363 getRegisterUses(Register Reg) const {
364 auto I = RegUseMap.find(Reg);
365 if (I != RegUseMap.end())
366 return I->second;
367
368 auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
369 SmallVector<const MachineOperand *> &Uses = NonConstThis->RegUseMap[Reg];
370 for (const MachineOperand &UseMO : MRI->use_nodbg_operands(Reg)) {
371 if (!UseMO.isUndef())
372 Uses.push_back(&UseMO);
373 }
374 return Uses;
375 }
376
377 bool hasAtLeastOneUse(Register Reg) const {
378 return !getRegisterUses(Reg).empty();
379 }
380
381 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
382 // Paths
383 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
384private:
385 class Path {
386 using StorageTy =
387 std::pair<const MachineBasicBlock *, const MachineBasicBlock *>;
388 StorageTy P;
389
390 public:
391 constexpr Path() : P(nullptr, nullptr) {}
392 constexpr Path(const MachineBasicBlock *Src, const MachineBasicBlock *Dst)
393 : P(Src, Dst) {}
394 Path(const StorageTy &Pair) : P(Pair) {}
395
396 constexpr operator const StorageTy &() const { return P; }
397 using DenseMapInfo = llvm::DenseMapInfo<StorageTy>;
398
399 const MachineBasicBlock *src() const { return P.first; }
400 const MachineBasicBlock *dst() const { return P.second; }
401 };
402
403 enum class EdgeKind { Back = -1, None = 0, Forward = 1 };
404 static constexpr StringRef toString(EdgeKind EK) {
405 if (EK == EdgeKind::Back)
406 return "back";
407 if (EK == EdgeKind::Forward)
408 return "fwd";
409 return "none";
410 }
411
412 struct PathInfo {
413 EdgeKind EK;
414 bool Reachable;
415 int ForwardReachable;
416 unsigned RelativeLoopDepth;
417 std::optional<NextUseDistance> ShortestDistance;
418 std::optional<NextUseDistance> ShortestUnweightedDistance;
419 InstrIdTy Size;
420
421 PathInfo()
422 : EK(EdgeKind::None), Reachable(false), ForwardReachable(-1),
423 RelativeLoopDepth(0), Size(0) {}
424
425 bool isBackedge() const { return EK == EdgeKind::Back; }
426
427 bool isForwardReachableSet() const { return 0 <= ForwardReachable; }
428 bool isForwardReachableUnset() const { return ForwardReachable < 0; }
429 bool isForwardReachable() const { return ForwardReachable == 1; }
430 bool isNotForwardReachable() const { return ForwardReachable == 0; }
431
432 void print(raw_ostream &OS) const {
433 OS << "{ek=" << toString(EK) << " reach=" << Reachable
434 << " fwd-reach=" << ForwardReachable
435 << " loop-depth=" << RelativeLoopDepth << " size=" << Size;
436 if (ShortestDistance) {
437 OS << " shortest-dist=";
438 ShortestDistance->print(OS);
439 }
440 if (ShortestUnweightedDistance) {
441 OS << " shortest-unweighted-dist=";
442 ShortestUnweightedDistance->print(OS);
443 }
444 OS << "}";
445 }
446
447 LLVM_DUMP_METHOD void dump() const {
448 print(dbgs());
449 dbgs() << '\n';
450 }
451 };
452
453 //----------------------------------------------------------------------------
454 // Path Storage - 'Paths' is lazily populated and some members are lazily
455 // computed. All mutations should go through one of the 'initializePathInfo*'
456 // flavors below.
457 //----------------------------------------------------------------------------
458 DenseMap<Path, PathInfo, Path::DenseMapInfo> Paths;
459
460 const PathInfo *maybePathInfoFor(const MachineBasicBlock *From,
461 const MachineBasicBlock *To) const {
462 auto I = Paths.find({From, To});
463 return I == Paths.end() ? nullptr : &I->second;
464 }
465
466 PathInfo &getOrInitPathInfo(const MachineBasicBlock *From,
467 const MachineBasicBlock *To) const {
468 auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
469 auto &MutablePaths = NonConstThis->Paths;
470
471 Path P(From, To);
472 auto [I, Inserted] = MutablePaths.try_emplace(P);
473 if (!Inserted)
474 return I->second;
475
476 bool Reachable = calcIsReachable(P.src(), P.dst());
477
478 // Iterator may have been invalidated by calcIsReachable, so get a fresh
479 // reference to the slot.
480 return NonConstThis->initializePathInfo(MutablePaths.at(P), P,
481 EdgeKind::None, Reachable);
482 }
483
484 const PathInfo &pathInfoFor(const MachineBasicBlock *From,
485 const MachineBasicBlock *To) const {
486 return getOrInitPathInfo(From, To);
487 }
488
489 //----------------------------------------------------------------------------
490 // initializePathInfo* - various flavors of PathInfo initialization. They
491 // (should) always funnel to the first flavor below.
492 //----------------------------------------------------------------------------
493 PathInfo &initializePathInfo(PathInfo &Slot, Path P, EdgeKind EK,
494 bool Reachable) {
495 Slot.EK = EK;
496 Slot.Reachable = Reachable;
497 Slot.ForwardReachable = EK == EdgeKind::None ? -1 : EK == EdgeKind::Forward;
498 Slot.RelativeLoopDepth =
499 Slot.Reachable ? calcRelativeLoopDepth(P.src(), P.dst()) : 0;
500 Slot.Size = P.src() == P.dst() ? calcSize(P.src()) : 0;
501 if (EK != EdgeKind::None)
502 Slot.ShortestUnweightedDistance = 0;
503 return Slot;
504 }
505
506 PathInfo &initializePathInfo(Path P, EdgeKind EK, bool Reachable) const {
507 auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
508 auto &MutablePaths = NonConstThis->Paths;
509 return NonConstThis->initializePathInfo(MutablePaths[P], P, EK, Reachable);
510 }
511
512 std::pair<PathInfo *, bool> maybeInitializePathInfo(Path P, EdgeKind EK,
513 bool Reachable) const {
514 auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
515 auto &MutablePaths = NonConstThis->Paths;
516 auto [I, Inserted] = MutablePaths.try_emplace(P);
517 if (Inserted)
518 NonConstThis->initializePathInfo(I->second, P, EK, Reachable);
519 return {&I->second, Inserted};
520 }
521
522 bool initializePathInfoForwardReachable(const MachineBasicBlock *From,
523 const MachineBasicBlock *To,
524 bool Value) const {
525 PathInfo &Slot = getOrInitPathInfo(From, To);
526 assert(Slot.isForwardReachableUnset());
527 Slot.ForwardReachable = Value;
528 return Value;
529 }
530
531 NextUseDistance
532 initializePathInfoShortestDistance(const MachineBasicBlock *From,
533 const MachineBasicBlock *To,
534 NextUseDistance Value) const {
535 PathInfo &Slot = getOrInitPathInfo(From, To);
536 assert(!Slot.ShortestDistance.has_value());
537 Slot.ShortestDistance = Value;
538 return Value;
539 }
540
541 NextUseDistance
542 initializePathInfoShortestUnweightedDistance(const MachineBasicBlock *From,
543 const MachineBasicBlock *To,
544 NextUseDistance Value) const {
545 PathInfo &Slot = getOrInitPathInfo(From, To);
546 assert(!Slot.ShortestUnweightedDistance.has_value());
547 Slot.ShortestUnweightedDistance = Value;
548 return Value;
549 }
550
551 //----------------------------------------------------------------------------
552 // initialize*Paths
553 //----------------------------------------------------------------------------
554private:
555 void initializePaths(const SmallVector<Path> &ReachablePaths,
556 const SmallVector<Path> &UnreachablePaths) const {
557 for (const Path &P : ReachablePaths)
558 initializePathInfo(P, EdgeKind::None, true);
559 for (const Path &P : UnreachablePaths)
560 initializePathInfo(P, EdgeKind::None, false);
561 }
562
563 void
564 initializeForwardOnlyPaths(const SmallVector<Path> &ReachablePaths,
565 const SmallVector<Path> &UnreachablePaths) const {
566 for (bool R : {true, false}) {
567 const auto &ToInit = R ? ReachablePaths : UnreachablePaths;
568 for (const Path &P : ToInit) {
569 PathInfo &Slot = getOrInitPathInfo(P.src(), P.dst());
570 assert(Slot.isForwardReachableUnset() || Slot.ForwardReachable == R);
571 Slot.ForwardReachable = R;
572 }
573 }
574 }
575
576 // Follow the control flow graph starting at the entry block until all blocks
577 // have been visited. Along the way, initialize the PathInfo for each edge
578 // traversed.
579 void initializeCfgPaths() {
580 Paths.clear();
581
582 enum VisitState { Undiscovered, Visiting, Finished };
583 DenseMap<const MachineBasicBlock *, VisitState> State;
584
586 State[&MF->front()] = Undiscovered;
587
588 while (!Work.empty()) {
589 const MachineBasicBlock *Src = Work.back();
590 VisitState &SrcState = State[Src];
591
592 // A block may already be 'Finished' if it is reachable from multiple
593 // predecessors causing it to be pushed more than once while still
594 // 'Undiscovered'.
595 if (SrcState == Visiting || SrcState == Finished) {
596 Work.pop_back();
597 SrcState = Finished;
598 continue;
599 }
600
601 SrcState = Visiting;
602 for (const MachineBasicBlock *Dst : Src->successors()) {
603 const VisitState DstState = State.lookup(Dst);
604
605 EdgeKind EK;
606 if (DstState == Undiscovered) {
607 EK = EdgeKind::Forward;
608 Work.push_back(Dst);
609 } else if (DstState == Visiting) {
610 EK = EdgeKind::Back;
611 } else {
612 EK = EdgeKind::Forward;
613 }
614
615 Path P(Src, Dst);
616 assert(!Paths.contains(P));
617 initializePathInfo(P, EK, /*Reachable*/ true);
618 }
619 }
620
621 LLVM_DEBUG(dumpPaths());
622 }
623
624 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
625 // Loop helpers
626 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
627private:
628 static bool isStandAloneLoop(const MachineLoop *Loop) {
629 return Loop->getSubLoops().empty() && Loop->isOutermost();
630 }
631
632 static MachineLoop *findChildLoop(MachineLoop *const Parent,
633 MachineLoop *Descendant) {
634 for (MachineLoop *L = Descendant; L != Parent; L = L->getParentLoop()) {
635 if (L->getParentLoop() == Parent)
636 return L;
637 }
638 return nullptr;
639 }
640
641 // If loops 'A' and 'B' share a common parent loop, return that loop and the
642 // depth of 'A' relative to it. Otherwise return nullptr and the loop depth of
643 // 'A'.
644 static std::pair<MachineLoop *, unsigned>
645 findCommonParent(MachineLoop *A, const MachineLoop *B) {
646 unsigned Depth = 0;
647 for (; A != nullptr; A = A->getParentLoop(), ++Depth) {
648 if (A->contains(B))
649 break;
650 }
651 return {A, Depth};
652 }
653
654 static const MachineBasicBlock *
655 getOutermostPreheader(const MachineLoop *Loop) {
656 return Loop ? Loop->getOutermostLoop()->getLoopPreheader() : nullptr;
657 }
658
659 static MachineBasicBlock *findChildPreheader(MachineLoop *const Parent,
660 MachineLoop *Descendant) {
661 MachineLoop *ChildLoop = findChildLoop(Parent, Descendant);
662 return ChildLoop ? ChildLoop->getLoopPreheader() : nullptr;
663 }
664
665 static const MachineBasicBlock *
666 getIncomingBlockIfPhiUse(const MachineInstr *MI, const MachineOperand *MO) {
667 return MI->isPHI() ? MI->getOperand(MO->getOperandNo() + 1).getMBB()
668 : nullptr;
669 }
670
671 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
672 // Calculate features
673 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
674private:
675 InstrIdTy calcSize(const MachineBasicBlock *BB) const {
676 InstrIdTy Size = BB->size();
677 if (!Cfg.CountPhis)
678 Size -= std::distance(BB->begin(), BB->getFirstNonPHI());
679 return Size;
680 }
681
682 NextUseDistance calcWeightedSize(const MachineBasicBlock *From,
683 const MachineBasicBlock *To) const {
684 return NextUseDistance::fromSize(getSize(From),
685 getRelativeLoopDepth(From, To));
686 }
687
688 // Return the loop depth of 'From' relative to 'To'.
689 unsigned calcRelativeLoopDepth(const MachineBasicBlock *From,
690 const MachineBasicBlock *To) const {
691 MachineLoop *LoopFrom = MLI->getLoopFor(From);
692 MachineLoop *LoopTo = MLI->getLoopFor(To);
693
694 if (!LoopFrom)
695 return 0;
696
697 if (!LoopTo)
698 return LoopFrom->getLoopDepth();
699
700 if (LoopFrom->contains(LoopTo)) // covers LoopFrom == LoopTo
701 return 0;
702
703 if (LoopTo->contains(LoopFrom))
704 return LoopFrom->getLoopDepth() - LoopTo->getLoopDepth();
705
706 // Loops are siblings of some sort.
707 return findCommonParent(LoopFrom, LoopTo).second;
708 }
709
710 // Attempt to find a path from 'From' to 'To' using a depth first search. If
711 // 'ForwardOnly' is true, do not follow backedges. As a performance
712 // improvement, this may initialize reachable intermediate paths or paths we
713 // determine are unreachable.
714 bool calcIsReachable(const MachineBasicBlock *From,
715 const MachineBasicBlock *To,
716 bool ForwardOnly = false) const {
717 if (From == To && !MLI->getLoopFor(From))
718 return false;
719
720 if (!ForwardOnly && interBlockDistanceExists(From, To))
721 return true;
722
723 enum { VisitOp, PopOp };
724 using MBBOpPair = std::pair<const MachineBasicBlock *, int>;
725 SmallVector<MBBOpPair> Work{{From, VisitOp}};
726 DenseSet<const MachineBasicBlock *> Visited{From};
727
728 SmallVector<Path> IntermediatePath;
729 SmallVector<Path> Unreachable;
730
731 // Should be run at every function exit point.
732 auto Finally = [&](bool Reachable) {
733 // This is an optimization. For intermediate paths we found while
734 // calculating reachability for 'From' --> 'To', remember their
735 // reachability.
736 if (!Reachable) {
737 IntermediatePath.clear();
738 for (const MachineBasicBlock *MBB : Visited) {
739 if (MBB != From)
740 Unreachable.emplace_back(MBB, To);
741 }
742 }
743
744 if (ForwardOnly)
745 initializeForwardOnlyPaths(IntermediatePath, Unreachable);
746 else
747 initializePaths(IntermediatePath, Unreachable);
748
749 return Reachable;
750 };
751
752 while (!Work.empty()) {
753 auto [Current, Op] = Work.pop_back_val();
754
755 // Backtracking
756 if (Op == PopOp) {
757 IntermediatePath.pop_back();
758 if (ForwardOnly)
759 Unreachable.emplace_back(Current, To);
760 continue;
761 }
762
763 if (Current->succ_empty())
764 continue;
765
766 if (Current != From) {
767 IntermediatePath.emplace_back(Current, To);
768 Work.emplace_back(Current, PopOp);
769 }
770
771 for (const MachineBasicBlock *Succ : Current->successors()) {
772 if (ForwardOnly && isBackedge(Current, Succ))
773 continue;
774
775 if (Succ == To)
776 return Finally(true);
777
778 if (auto CachedReachable = isMaybeReachable(Succ, To, ForwardOnly)) {
779 if (CachedReachable.value())
780 return Finally(true);
781 Visited.insert(Succ);
782 continue;
783 }
784
785 if (Visited.insert(Succ).second)
786 Work.emplace_back(Succ, VisitOp);
787 }
788 }
789
790 return Finally(false);
791 }
792
793 //----------------------------------------------------------------------------
794 // Inter-block distance - the weighted and unweighted cost (i.e. "distance")
795 // to travel from one MachineBasicBlock to another.
796 //
797 // Values are pre-computed and stored in 'InterBlockDistances' using a
798 // backwards data-flow algorithm similar to the one described in 4.1 of a
799 // "Register Spilling and Live-Range Splitting for SSA-Form Programs" by
800 // Matthias Braun and Sebastian Hack, CC'09. This replaced a prior
801 // implementation based on Dijkstra's shortest path algorithm.
802 //----------------------------------------------------------------------------
803private:
804 struct InterBlockDistance {
805 NextUseDistance Weighted;
806 NextUseDistance Unweighted;
807 InterBlockDistance() : Weighted(-1), Unweighted(-1) {}
808 InterBlockDistance(NextUseDistance W, NextUseDistance UW)
809 : Weighted(W), Unweighted(UW) {}
810 bool operator==(const InterBlockDistance &Other) const {
811 return Weighted == Other.Weighted && Unweighted == Other.Unweighted;
812 }
813 bool operator!=(const InterBlockDistance &Other) const {
814 return !(*this == Other);
815 }
816
817 void print(raw_ostream &OS) const {
818 OS << "{W=";
819 Weighted.print(OS);
820 OS << " U=";
821 Unweighted.print(OS);
822 OS << "}";
823 }
824
825 LLVM_DUMP_METHOD void dump() const {
826 print(dbgs());
827 dbgs() << '\n';
828 }
829 };
830 using InterBlockDistanceMap =
831 DenseMap<unsigned, DenseMap<unsigned, InterBlockDistance>>;
832 InterBlockDistanceMap InterBlockDistances;
833
834 void initializeInterBlockDistances() {
835 InterBlockDistanceMap Distances;
836
837 bool Changed;
838 do {
839 Changed = false;
840 for (const MachineBasicBlock *MBB : post_order(MF)) {
841 unsigned MBBNum = MBB->getNumber();
842
843 // Save previous state for convergence check
844 InterBlockDistanceMap::mapped_type Prev = std::move(Distances[MBBNum]);
845 InterBlockDistanceMap::mapped_type Curr;
846 Curr.reserve(Prev.size());
847
848 // Direct successors are distance 0 by definition: no instructions are
849 // executed between exiting MBB and entering Succ.
850 for (const MachineBasicBlock *Succ : MBB->successors())
851 Curr[Succ->getNumber()] = InterBlockDistance(0, 0);
852
853 // Propagate further destinations through each successor.
854 for (const MachineBasicBlock *Succ : MBB->successors()) {
855 unsigned SuccNum = Succ->getNumber();
856 const unsigned UnweightedSize{getSize(Succ)};
857
858 for (const auto &[DestBlockNum, DestDist] : Distances[SuccNum]) {
859 // MBB -> MBB is considered unreachable (getInterBlockDistance
860 // asserts From != To).
861 if (DestBlockNum == MBBNum)
862 continue;
863
864 const MachineBasicBlock *DestMBB =
865 MF->getBlockNumbered(DestBlockNum);
866
867 const NextUseDistance UnweightedDist{UnweightedSize +
868 DestDist.Unweighted};
869
870 unsigned SuccToDestLoopDepth = calcRelativeLoopDepth(Succ, DestMBB);
871
872 const NextUseDistance WeightedDist =
873 DestDist.Weighted +
874 NextUseDistance::fromSize(UnweightedSize, SuccToDestLoopDepth);
875
876 // Insert or update distances (take minimum)
877 auto [I, First] =
878 Curr.try_emplace(DestBlockNum, WeightedDist, UnweightedDist);
879 if (!First) {
880 InterBlockDistance &Slot = I->second;
881 Slot.Weighted = min(Slot.Weighted, WeightedDist);
882 Slot.Unweighted = min(Slot.Unweighted, UnweightedDist);
883 }
884 }
885 }
886 Changed |= (Prev != Curr);
887 Distances[MBBNum] = std::move(Curr);
888 }
889 } while (Changed);
890
891 InterBlockDistances = std::move(Distances);
892 LLVM_DEBUG(dumpInterBlockDistances());
893 }
894
895 const InterBlockDistance *
896 getInterBlockDistanceMapValue(const MachineBasicBlock *From,
897 const MachineBasicBlock *To) const {
898 auto I = InterBlockDistances.find(From->getNumber());
899 if (I == InterBlockDistances.end())
900 return nullptr;
901 const InterBlockDistanceMap::mapped_type &FromSlot = I->second;
902 auto J = FromSlot.find(To->getNumber());
903 return J == FromSlot.end() ? nullptr : &J->second;
904 }
905
906 bool interBlockDistanceExists(const MachineBasicBlock *From,
907 const MachineBasicBlock *To) const {
908 return getInterBlockDistanceMapValue(From, To);
909 }
910
911 NextUseDistance getInterBlockDistance(const MachineBasicBlock *From,
912 const MachineBasicBlock *To,
913 bool Unweighted) const {
914
915 assert(From != To && "The basic blocks should be different.");
916 if (!From || !To)
918
919 if (Cfg.ForwardOnly && !isForwardReachable(From, To))
921
922 const InterBlockDistance *BD = getInterBlockDistanceMapValue(From, To);
923 if (!BD)
925
926 return Unweighted ? BD->Unweighted : BD->Weighted;
927 }
928
929 NextUseDistance
930 getWeightedInterBlockDistance(const MachineBasicBlock *From,
931 const MachineBasicBlock *To) const {
932 return getInterBlockDistance(From, To, false);
933 }
934
935 NextUseDistance
936 getUnweightedInterBlockDistance(const MachineBasicBlock *From,
937 const MachineBasicBlock *To) const {
938 return getInterBlockDistance(From, To, true);
939 }
940
941 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
942 // Feature getters. Use cached results if available. If not calculate.
943 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
944private:
945 InstrIdTy getSize(const MachineBasicBlock *BB) const {
946 return pathInfoFor(BB, BB).Size;
947 }
948
949 bool isReachable(const MachineBasicBlock *From,
950 const MachineBasicBlock *To) const {
951 return pathInfoFor(From, To).Reachable;
952 }
953
954 bool isReachableOrSame(const MachineBasicBlock *From,
955 const MachineBasicBlock *To) const {
956 return From == To || pathInfoFor(From, To).Reachable;
957 }
958
959 bool isForwardReachable(const MachineBasicBlock *From,
960 const MachineBasicBlock *To) const {
961 const PathInfo &PI = pathInfoFor(From, To);
962 if (PI.isForwardReachableSet())
963 return PI.isForwardReachable();
964
965 return initializePathInfoForwardReachable(
966 From, To,
967 PI.Reachable && calcIsReachable(From, To, /*ForwardOnly*/ true));
968 }
969
970 // Return true/false if we know that 'To' is reachable or not from
971 // 'From'. Otherwise return 'std::nullopt'.
972 std::optional<bool> isMaybeReachable(const MachineBasicBlock *From,
973 const MachineBasicBlock *To,
974 bool ForwardOnly) const {
975 const PathInfo *PI = maybePathInfoFor(From, To);
976 if (!PI)
977 return std::nullopt;
978
979 if (ForwardOnly) {
980 if (PI->isForwardReachable())
981 return true;
982
983 if (PI->isNotForwardReachable())
984 return false;
985 return std::nullopt;
986 }
987 return PI->Reachable;
988 }
989
990 bool isBackedge(const MachineBasicBlock *From,
991 const MachineBasicBlock *To) const {
992 return pathInfoFor(From, To).isBackedge();
993 }
994
995 // Can be used as a substitute for DT->dominates(A, B) if A and B are in the
996 // same basic block.
997 bool instrsAreInOrder(const MachineInstr *A, const MachineInstr *B) const {
998 assert(A->getParent() == B->getParent() &&
999 "instructions must be in the same basic block!");
1000 if (A == B || getInstrId(A) < getInstrId(B))
1001 return true;
1002 if (!A->isPHI())
1003 return false;
1004 if (!B->isPHI())
1005 return true;
1006 for (auto &PHI : A->getParent()->phis()) {
1007 if (&PHI == A)
1008 return true;
1009 if (&PHI == B)
1010 return false;
1011 }
1012 return false;
1013 }
1014
1015 unsigned getRelativeLoopDepth(const MachineBasicBlock *From,
1016 const MachineBasicBlock *To) const {
1017 return pathInfoFor(From, To).RelativeLoopDepth;
1018 }
1019
1020 NextUseDistance getShortestPath(const MachineBasicBlock *From,
1021 const MachineBasicBlock *To) const {
1022 std::optional<NextUseDistance> MaybeD =
1023 pathInfoFor(From, To).ShortestDistance;
1024 if (MaybeD.has_value())
1025 return MaybeD.value();
1026
1027 NextUseDistance Dist = getWeightedInterBlockDistance(From, To);
1028 return initializePathInfoShortestDistance(From, To, Dist);
1029 }
1030
1031 NextUseDistance getShortestUnweightedPath(const MachineBasicBlock *From,
1032 const MachineBasicBlock *To) const {
1033 std::optional<NextUseDistance> MaybeD =
1034 pathInfoFor(From, To).ShortestUnweightedDistance;
1035 if (MaybeD.has_value())
1036 return MaybeD.value();
1037
1038 return initializePathInfoShortestUnweightedDistance(
1039 From, To, getUnweightedInterBlockDistance(From, To));
1040 }
1041
1042 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1043 /// MBBDistPair - Represents the distance to a machine basic block.
1044 /// Used for returning both the distance and the target block together.
1045 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1046private:
1047 struct MBBDistPair {
1048 NextUseDistance Distance;
1049 const MachineBasicBlock *MBB;
1050 MBBDistPair() : Distance(NextUseDistance::unreachable()), MBB(nullptr) {}
1051 MBBDistPair(NextUseDistance D, const MachineBasicBlock *B)
1052 : Distance(D), MBB(B) {}
1053
1054 MBBDistPair operator+(NextUseDistance D) { return {Distance + D, MBB}; }
1055 MBBDistPair &operator+=(NextUseDistance D) {
1056 Distance += D;
1057 return *this;
1058 }
1059
1060 void print(raw_ostream &OS) const {
1061 OS << "{";
1062 Distance.print(OS);
1063 if (MBB)
1064 OS << " " << printMBBReference(*MBB);
1065 else
1066 OS << " <null>";
1067 OS << "}";
1068 }
1069
1070 LLVM_DUMP_METHOD void dump() const {
1071 print(dbgs());
1072 dbgs() << '\n';
1073 }
1074 };
1075
1076 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1077 // CFG Helpers
1078 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1079private:
1080 // Return the shortest distance to a latch
1081 MBBDistPair calcShortestDistanceToLatch(const MachineBasicBlock *CurMBB,
1082 const MachineLoop *CurLoop) const {
1084 CurLoop->getLoopLatches(Latches);
1085 MBBDistPair LD;
1086
1087 for (MachineBasicBlock *LMBB : Latches) {
1088 if (LMBB == CurMBB)
1089 return {0, CurMBB};
1090
1091 NextUseDistance Dst = getShortestPath(CurMBB, LMBB);
1092 if (Dst < LD.Distance) {
1093 LD.Distance = Dst;
1094 LD.MBB = LMBB;
1095 }
1096 }
1097 return LD;
1098 }
1099
1100 // Return the shortest unweighted distance to a latch
1101 MBBDistPair
1102 calcShortestUnweightedDistanceToLatch(const MachineBasicBlock *CurMBB,
1103 const MachineLoop *CurLoop) const {
1105 CurLoop->getLoopLatches(Latches);
1106 MBBDistPair LD;
1107
1108 for (MachineBasicBlock *LMBB : Latches) {
1109 if (LMBB == CurMBB)
1110 return {0, CurMBB};
1111
1112 NextUseDistance Dst = getShortestUnweightedPath(CurMBB, LMBB);
1113 if (Dst < LD.Distance) {
1114 LD.Distance = Dst;
1115 LD.MBB = LMBB;
1116 }
1117 }
1118 return LD;
1119 }
1120
1121 // Return the shortest distance to an exit
1122 MBBDistPair calcShortestDistanceToExit(const MachineBasicBlock *CurMBB,
1123 const MachineLoop *CurLoop) const {
1125 CurLoop->getExitEdges(ExitEdges);
1126 MBBDistPair LD;
1127
1128 for (auto [Exit, Dest] : ExitEdges) {
1129 if (Exit == CurMBB)
1130 return {0, CurMBB};
1131
1132 NextUseDistance Dst = getShortestPath(CurMBB, Exit);
1133 if (Dst < LD.Distance) {
1134 LD.Distance = Dst;
1135 LD.MBB = Exit;
1136 }
1137 }
1138 return LD;
1139 }
1140
1141 // Return the shortest distance through a loop (header to latch) that goes
1142 // through CurMBB.
1143 MBBDistPair
1144 calcShortestDistanceThroughInnermostLoop(const MachineBasicBlock *CurMBB,
1145 MachineLoop *CurLoop) const {
1146 assert(MLI->getLoopFor(CurMBB) == CurLoop);
1147
1148 // This is a hot spot. Check it before doing anything else.
1149 if (CurLoop->getNumBlocks() == 1)
1150 return {getSize(CurMBB), CurMBB};
1151
1152 MachineBasicBlock *LoopHeader = CurLoop->getHeader();
1153 MBBDistPair LD{0, nullptr};
1154
1155 LD += getSize(LoopHeader);
1156
1157 if (CurMBB != LoopHeader)
1158 LD += getShortestPath(LoopHeader, CurMBB);
1159
1160 if (CurLoop->isLoopExiting(CurMBB))
1161 LD.MBB = CurMBB;
1162 else
1163 LD = calcShortestDistanceToExit(CurMBB, CurLoop) + LD.Distance;
1164
1165 if (CurMBB != LoopHeader && CurMBB != LD.MBB)
1166 LD += getSize(CurMBB);
1167
1168 if (LD.MBB != LoopHeader)
1169 LD += getSize(LD.MBB);
1170
1171 return LD;
1172 }
1173
1174 // Return the shortest distance through a loop (header to latch) that goes
1175 // through CurMBB.
1176 MBBDistPair calcShortestDistanceThroughLoop(const MachineBasicBlock *CurMBB,
1177 MachineLoop *OuterLoop) const {
1178 MachineLoop *CurLoop = MLI->getLoopFor(CurMBB);
1179 MBBDistPair CurLD =
1180 calcShortestDistanceThroughInnermostLoop(CurMBB, CurLoop);
1181
1182 MachineBasicBlock *CurHdr = CurLoop->getHeader();
1183 for (;;) {
1184 if (OuterLoop == CurLoop)
1185 return CurLD;
1186
1187 MachineLoop *ParentLoop = CurLoop->getParentLoop();
1188 MachineBasicBlock *ParentHdr = ParentLoop->getHeader();
1189
1190 MBBDistPair LD{0, nullptr};
1191 LD += getSize(ParentHdr);
1192 LD += getShortestPath(ParentHdr, CurHdr);
1193 LD += CurLD.Distance.applyLoopWeight();
1194 LD = calcShortestDistanceToExit(CurLD.MBB, ParentLoop) + LD.Distance;
1195 LD += getSize(LD.MBB);
1196 CurLD = LD;
1197 CurLoop = ParentLoop;
1198 CurHdr = ParentHdr;
1199 }
1200 llvm_unreachable("CurMBB not contained in OuterLoop");
1201 }
1202
1203 // Similar to calcShortestDistanceThroughLoop with LoopWeight applied to the
1204 // returned distance.
1205 MBBDistPair
1206 calcWeightedDistanceThroughLoopViaMBB(const MachineBasicBlock *CurMBB,
1207 MachineLoop *CurLoop) const {
1208 MBBDistPair LD = calcShortestDistanceThroughLoop(CurMBB, CurLoop);
1209 LD.Distance = LD.Distance.applyLoopWeight();
1210 return LD;
1211 }
1212
1213 // Return the weighted, shortest distance through a loop (header to latch).
1214 // If ParentLoop is provided, use it to adjust the loop depth.
1215 MBBDistPair calcWeightedDistanceThroughLoop(
1216 const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
1217 const MachineLoop *ParentLoop = nullptr) const {
1218 if (CurLoop->getNumBlocks() != 1)
1219 return calcWeightedDistanceThroughLoopViaMBB(CurMBB, CurLoop);
1220
1221 unsigned LoopDepth = MLI->getLoopDepth(CurMBB);
1222 if (ParentLoop)
1223 LoopDepth -= ParentLoop->getLoopDepth();
1224
1225 return {NextUseDistance::fromSize(getSize(CurMBB), LoopDepth),
1226 CurLoop->getLoopLatch()};
1227 }
1228
1229 // Calculate total distance from exit point to use instruction
1230 NextUseDistance appendDistanceToUse(const MBBDistPair &Exit,
1231 const MachineInstr *UseMI,
1232 const MachineBasicBlock *UseMBB) const {
1233 return Exit.Distance + getShortestPath(Exit.MBB, UseMBB) +
1234 getHeadLen(UseMI);
1235 }
1236
1237 // Return the weighted, shortest distance through the CurLoop which is a
1238 // sub-loop of UseLoop.
1239 MBBDistPair calcDistanceThroughSubLoopUse(const MachineBasicBlock *CurMBB,
1240 MachineLoop *CurLoop,
1241 MachineLoop *UseLoop) const {
1242 // All the sub-loops of the UseLoop will be executed before the use.
1243 // Hence, we should take this into consideration in distance calculation.
1244 MachineLoop *UseLoopSubLoop = findChildLoop(UseLoop, CurLoop);
1245 assert(UseLoopSubLoop && "CurLoop should be nested in UseLoop");
1246 return calcWeightedDistanceThroughLoop(CurMBB, UseLoopSubLoop, UseLoop);
1247 }
1248
1249 // Similar to calcDistanceThroughSubLoopUse, adding the distance to 'UseMI'.
1250 NextUseDistance calcDistanceThroughSubLoopToUseMI(
1251 const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
1252 const MachineInstr *UseMI, const MachineBasicBlock *UseMBB,
1253 MachineLoop *UseLoop) const {
1254 return appendDistanceToUse(
1255 calcDistanceThroughSubLoopUse(CurMBB, CurLoop, UseLoop), UseMI, UseMBB);
1256 }
1257
1258 // Return the weighted distance through a loop to an outside use loop.
1259 // Differentiates between uses inside or outside of the current loop nest.
1260 MBBDistPair calcDistanceThroughLoopToOutsideLoopUse(
1261 const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
1262 const MachineBasicBlock *UseMBB, MachineLoop *UseLoop) const {
1263 assert(!CurLoop->contains(UseLoop));
1264
1265 if (isStandAloneLoop(CurLoop))
1266 return calcWeightedDistanceThroughLoopViaMBB(CurMBB, CurLoop);
1267
1268 MachineLoop *OutermostLoop = CurLoop->getOutermostLoop();
1269 if (!OutermostLoop->contains(UseLoop)) {
1270 // We should take into consideration the whole loop nest in the
1271 // calculation of the distance because we will reach the use after
1272 // executing the whole loop nest.
1273
1274 // ... But make sure that we pick a route that goes through CurMBB
1275 return calcWeightedDistanceThroughLoopViaMBB(CurMBB, OutermostLoop);
1276 }
1277
1278 // At this point we know that CurLoop and UseLoop are independent and they
1279 // are in the same loop nest.
1280
1281 if (MLI->getLoopDepth(CurMBB) <= MLI->getLoopDepth(UseMBB))
1282 return calcWeightedDistanceThroughLoop(CurMBB, CurLoop);
1283
1284 assert(CurLoop != OutermostLoop && "The loop cannot be the outermost.");
1285 const unsigned UseLoopDepth = MLI->getLoopDepth(UseMBB);
1286 for (;;) {
1287 if (CurLoop->getLoopDepth() == UseLoopDepth)
1288 break;
1289 CurLoop = CurLoop->getParentLoop();
1290 if (CurLoop == OutermostLoop)
1291 break;
1292 }
1293 return calcWeightedDistanceThroughLoop(CurMBB, CurLoop);
1294 }
1295
1296 // Similar to calcDistanceThroughLoopToOutsideLoopUse but adds the distance to
1297 // an instruction in the loop.
1298 NextUseDistance calcDistanceThroughLoopToOutsideLoopUseMI(
1299 const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
1300 const MachineInstr *UseMI, const MachineBasicBlock *UseMBB,
1301 MachineLoop *UseLoop) const {
1302 return appendDistanceToUse(calcDistanceThroughLoopToOutsideLoopUse(
1303 CurMBB, CurLoop, UseMBB, UseLoop),
1304 UseMI, UseMBB);
1305 }
1306
1307 // Return true if 'MO' is covered by 'LaneMask'
1308 bool machineOperandCoveredBy(const MachineOperand &MO,
1309 LaneBitmask LaneMask) const {
1310 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1311 return (Mask & LaneMask) == Mask;
1312 }
1313
1314 // Returns true iff uses of LiveReg/LiveLaneMask in PHI UseMI are coming from
1315 // a backedge when starting at CurMI.
1316 bool isIncomingValFromBackedge(Register LiveReg, LaneBitmask LiveLaneMask,
1317 const MachineInstr *CurMI,
1318 const MachineInstr *UseMI) const {
1319 if (!UseMI->isPHI())
1320 return false;
1321
1322 MachineLoop *CurLoop = MLI->getLoopFor(CurMI->getParent());
1323 MachineLoop *UseLoop = MLI->getLoopFor(UseMI->getParent());
1324
1325 // Not a backedge if ...
1326 // A: not in a loop at all
1327 // B: or CurMI is in a loop outside of UseLoop
1328 // C: or UseMI is not in the UseLoop header
1329 if (/*A:*/ !UseLoop ||
1330 /*B:*/ (CurLoop && !UseLoop->contains(CurLoop)) ||
1331 /*C:*/ UseMI->getParent() != UseLoop->getHeader())
1332 return false;
1333
1335 UseLoop->getLoopLatches(Latches);
1336
1337 const unsigned NumOps = UseMI->getNumOperands();
1338 for (unsigned I = 1; I < NumOps; I += 2) {
1339 const MachineOperand &RegMO = UseMI->getOperand(I - 1);
1340 const MachineOperand &MBBMO = UseMI->getOperand(I);
1341 assert(RegMO.isReg() && "Expected register operand of PHI");
1342 assert(MBBMO.isMBB() && "Expected MBB operand of PHI");
1343 if (RegMO.getReg() == LiveReg &&
1344 machineOperandCoveredBy(RegMO, LiveLaneMask)) {
1345 MachineBasicBlock *IncomingBB = MBBMO.getMBB();
1346 if (llvm::is_contained(Latches, IncomingBB))
1347 return true;
1348 }
1349 }
1350 return false;
1351 }
1352
1353 // Return the distance from 'CurMI' through a parent loop backedge PHI Use
1354 // ('UseMI').
1355 CacheableNextUseDistance calcDistanceViaEnclosingBackedge(
1356 const MachineInstr *CurMI, const MachineBasicBlock *CurMBB,
1357 MachineLoop *CurLoop, const MachineInstr *UseMI,
1358 const MachineBasicBlock *UseMBB, MachineLoop *UseLoop) const {
1359 assert(UseLoop && "There is no backedge.");
1360 assert(CurLoop && (UseLoop != CurLoop) && UseLoop->contains(CurLoop) &&
1361 "Unexpected loop configuration");
1362
1363 InstrIdTy UseHeadLen = getHeadLen(UseMI);
1364 MBBDistPair InnerLoopLD =
1365 calcDistanceThroughSubLoopUse(CurMBB, CurLoop, UseLoop);
1366 MBBDistPair LD = calcShortestDistanceToLatch(InnerLoopLD.MBB, UseLoop);
1367 return {InstrInvariant,
1368 InnerLoopLD.Distance + LD.Distance + getSize(LD.MBB) + UseHeadLen};
1369 }
1370
1371 // Optimized version of calcBackedgeDistance when we already know that CurMI
1372 // and UseMI are in the same basic block
1373 NextUseDistance calcBackedgeDistance(const MachineInstr *CurMI,
1374 const MachineBasicBlock *CurMBB,
1375 MachineLoop *CurLoop,
1376 const MachineInstr *UseMI) const {
1377 // use is in the next loop iteration
1378 InstrIdTy CurTailLen = getTailLen(CurMI);
1379 InstrIdTy UseHeadLen = getHeadLen(UseMI);
1380 MBBDistPair LD = calcShortestUnweightedDistanceToLatch(CurMBB, CurLoop);
1381 const MachineBasicBlock *HdrMBB = CurLoop->getHeader();
1382 NextUseDistance Hdr = CurMBB == HdrMBB ? 0 : getSize(HdrMBB);
1383 NextUseDistance Dst =
1384 CurMBB == HdrMBB ? 0 : getShortestUnweightedPath(HdrMBB, CurMBB);
1385
1386 return CurTailLen + LD.Distance + getSize(LD.MBB) + Hdr + Dst + UseHeadLen;
1387 }
1388
1389 //----------------------------------------------------------------------------
1390 // Calculate inter-instruction distances
1391 //----------------------------------------------------------------------------
1392private:
1393 // Calculate the shortest weighted path from MachineInstruction 'FromMI' to
1394 // 'ToMI'. It is weighted distance in that paths that exit loops are made to
1395 // look much further away.
1396 NextUseDistance calcShortestDistance(const MachineInstr *FromMI,
1397 const MachineInstr *ToMI) const {
1398 const MachineBasicBlock *FromMBB = FromMI->getParent();
1399 const MachineBasicBlock *ToMBB = ToMI->getParent();
1400
1401 if (FromMBB == ToMBB) {
1402 NextUseDistance RV = getDistance(FromMI, ToMI);
1403 assert(RV >= 0 && "unexpected negative distance from getDistance");
1404 return RV;
1405 }
1406
1407 InstrIdTy FromTailLen = getTailLen(FromMI);
1408 InstrIdTy ToHeadLen = getHeadLen(ToMI);
1409 NextUseDistance Dst = getShortestPath(FromMBB, ToMBB);
1410 assert(Dst.isReachable() &&
1411 "calcShortestDistance called for instructions in non-reachable"
1412 " basic blocks!");
1413 NextUseDistance RV = FromTailLen + Dst + ToHeadLen;
1414 assert(RV >= 0 && "unexpected negative distance");
1415 return RV;
1416 }
1417
1418 // Calculate the shortest unweighted path from MachineInstruction 'FromMI' to
1419 // 'ToMI'. In contrast with 'calcShortestDistance', distances are based solely
1420 // on basic block instruction counts and traversing a loop exit does not
1421 // affect the value.
1422 NextUseDistance
1423 calcShortestUnweightedDistance(const MachineInstr *FromMI,
1424 const MachineInstr *ToMI) const {
1425 const MachineBasicBlock *FromMBB = FromMI->getParent();
1426 const MachineBasicBlock *ToMBB = ToMI->getParent();
1427
1428 if (FromMBB == ToMBB)
1429 return getDistance(FromMI, ToMI);
1430
1431 InstrIdTy FromTailLen = getTailLen(FromMI);
1432 InstrIdTy ToHeadLen = getHeadLen(ToMI);
1433 NextUseDistance Dst = getShortestUnweightedPath(FromMBB, ToMBB);
1434 assert(Dst.isReachable() &&
1435 "calcShortestUnweightedDistance called for instructions in"
1436 " non-reachable basic blocks!");
1437 return FromTailLen + Dst + ToHeadLen;
1438 }
1439
1440 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1441 // calcDistanceToUse* - various flavors of calculating the distance from an
1442 // instruction 'CurMI' to the use of a live [sub]register.
1443 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1444private:
1445 // Return the distance from 'CurMI' to a live [sub]register use ('UseMI').
1446 //
1447 // Cfg flags controlling behavior:
1448 // PreciseUseModeling — rewrite PHI uses to their incoming edge block;
1449 // also selects unweighted cross-block distance
1450 // PromoteToPreheader — route loop-entry / inner-loop uses to the preheader
1452 calcDistanceToUse(Register LiveReg, LaneBitmask LiveLaneMask,
1453 const MachineInstr &CurMI,
1454 const MachineOperand *UseMO) const {
1455 const MachineInstr *UseMI = UseMO->getParent();
1456 const MachineBasicBlock *CurMBB = CurMI.getParent();
1457 const MachineBasicBlock *UseMBB = UseMI->getParent();
1458 MachineLoop *CurLoop = MLI->getLoopFor(CurMBB);
1459 MachineLoop *UseLoop = MLI->getLoopFor(UseMBB);
1460
1461 if (Cfg.PreciseUseModeling) {
1462 // Map PHI use to the end of its incoming edge block.
1463 if (auto *PhiUseEdge = getIncomingBlockIfPhiUse(UseMI, UseMO)) {
1464 UseMI = &PhiUseEdge->back();
1465 UseMBB = PhiUseEdge;
1466 UseLoop = MLI->getLoopFor(PhiUseEdge);
1467 }
1468 }
1469
1470 enum class LoopConfig {
1471 NoCur,
1472 Same,
1473 CurContainsUse,
1474 UseContainsCur,
1475 Siblings,
1476 Unrelated
1477 };
1478 auto [LpCfg, PreHdr, CommonParent] = [&]()
1479 -> std::tuple<LoopConfig, const MachineBasicBlock *, MachineLoop *> {
1480 if (!CurLoop) {
1481 return {LoopConfig::NoCur, getOutermostPreheader(UseLoop), nullptr};
1482 }
1483 if (CurLoop->contains(UseLoop)) {
1484 return {CurMBB == UseMBB ? LoopConfig::Same
1485 : LoopConfig::CurContainsUse,
1486 findChildPreheader(CurLoop, UseLoop), nullptr};
1487 }
1488
1489 if (MachineLoop *P = findCommonParent(UseLoop, CurLoop).first) {
1490 if (P != UseLoop)
1491 return {LoopConfig::Siblings, findChildPreheader(P, UseLoop), P};
1492 return {LoopConfig::UseContainsCur, nullptr, nullptr};
1493 }
1494 return {LoopConfig::Unrelated, getOutermostPreheader(UseLoop), nullptr};
1495 }();
1496
1497 //--------------------------------------------------------------------------
1498 // Don't PromoteToPreheader
1499 //--------------------------------------------------------------------------
1500 if (!Cfg.PromoteToPreheader) {
1501 switch (LpCfg) {
1502 case LoopConfig::NoCur:
1503 case LoopConfig::Same:
1504 case LoopConfig::CurContainsUse:
1505 return {InstrRelative, calcShortestDistance(&CurMI, UseMI)};
1506
1507 case LoopConfig::UseContainsCur: {
1508 if (isIncomingValFromBackedge(LiveReg, LiveLaneMask, &CurMI, UseMI)) {
1509 return calcDistanceViaEnclosingBackedge(&CurMI, CurMBB, CurLoop,
1510 UseMI, UseMBB, UseLoop);
1511 }
1512
1513 return {InstrInvariant, calcDistanceThroughSubLoopToUseMI(
1514 CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
1515 }
1516 case LoopConfig::Siblings:
1517 case LoopConfig::Unrelated:
1518 return {InstrInvariant, calcDistanceThroughLoopToOutsideLoopUseMI(
1519 CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
1520 }
1521 llvm_unreachable("unexpected loop configuration!");
1522 }
1523
1524 //--------------------------------------------------------------------------
1525 // PromoteToPreheader
1526 //--------------------------------------------------------------------------
1527 if (PreHdr) {
1528 UseMI = &PreHdr->back();
1529 UseMBB = PreHdr;
1530 UseLoop = CommonParent;
1531 }
1532
1533 switch (LpCfg) {
1534 case LoopConfig::NoCur:
1535 return {InstrRelative, calcShortestUnweightedDistance(&CurMI, UseMI) -
1536 (sizeOf(*UseMI) ? 0 : 1)};
1537
1538 case LoopConfig::Same:
1539 case LoopConfig::CurContainsUse:
1540 if (CurMBB == UseMBB && !instrsAreInOrder(&CurMI, UseMI))
1541 return {InstrRelative,
1542 calcBackedgeDistance(&CurMI, CurMBB, CurLoop, UseMI)};
1543
1544 return {InstrRelative, calcShortestUnweightedDistance(&CurMI, UseMI)};
1545
1546 case LoopConfig::UseContainsCur:
1547 case LoopConfig::Siblings:
1548 return {InstrInvariant, calcDistanceThroughSubLoopToUseMI(
1549 CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
1550
1551 case LoopConfig::Unrelated:
1552 return {InstrInvariant, calcDistanceThroughLoopToOutsideLoopUseMI(
1553 CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
1554 }
1555 llvm_unreachable("unexpected loop configuration!");
1556 }
1557
1558 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1559 // getUses helpers (compute mode)
1560 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1561private:
1562 // Returns true if Use is reachable from MI. Handles backedges and intervening
1563 // defs.
1564 bool isUseReachablePrecise(const MachineInstr &MI,
1565 const MachineBasicBlock *MBB,
1566 const MachineOperand *UseMO,
1567 const MachineInstr *UseMI,
1568 const MachineBasicBlock *UseMBB) const {
1569
1570 // Filter out uses that are clearly unreachable
1571 if (MBB != UseMBB && !isReachable(MBB, UseMBB))
1572 return false;
1573
1574 // PHI uses are considered part of the incoming BB. Check for reachability
1575 // at the edge.
1576 if (auto *PhiUseEdge = getIncomingBlockIfPhiUse(UseMI, UseMO)) {
1577 if (!isReachableOrSame(MBB, PhiUseEdge))
1578 return false;
1579 }
1580
1581 // Filter out uses with an intermediate def.
1582 const MachineInstr *DefMI = MRI->getUniqueVRegDef(UseMO->getReg());
1583 const MachineBasicBlock *DefMBB = DefMI->getParent();
1584 if (MBB == UseMBB) {
1585 if (UseMI->isPHI() && MBB == DefMBB)
1586 return true;
1587
1588 if (instrsAreInOrder(&MI, UseMI))
1589 return true;
1590
1591 // A Def in the loop means that the value at MI will not survive through
1592 // to this use.
1593 MachineLoop *UseLoop = MLI->getLoopFor(UseMBB);
1594 return UseLoop && !UseLoop->contains(DefMBB);
1595 }
1596
1597 if (MBB == DefMBB)
1598 return instrsAreInOrder(DefMI, &MI);
1599
1600 MachineLoop *Loop = MLI->getLoopFor(MBB);
1601 if (!Loop)
1602 return true;
1603
1604 MachineLoop *TopLoop = Loop->getOutermostLoop();
1605 return !TopLoop->contains(DefMBB) || !isReachable(MBB, DefMBB) ||
1606 !isForwardReachable(UseMBB, MBB);
1607 }
1608
1609 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1610 // Debug/Developer Helpers
1611 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1612private:
1613 /// Goes over all MBB pairs in \p MF, calculates the shortest path between
1614 /// them.
1615 void populatePathTable() {
1616 for (const MachineBasicBlock &MBB1 : *MF) {
1617 for (const MachineBasicBlock &MBB2 : *MF) {
1618 if (&MBB1 == &MBB2)
1619 continue;
1620 getShortestPath(&MBB1, &MBB2);
1621 }
1622 }
1623 }
1624
1625 void printPaths(raw_ostream &OS) const {
1626 OS << "\n---------------- Paths --------------- {\n";
1627 for (const auto &[P, PI] : Paths) {
1628 OS << " " << printMBBReference(*P.src()) << " -> "
1629 << printMBBReference(*P.dst()) << ": ";
1630 PI.print(OS);
1631 OS << '\n';
1632 }
1633 OS << "}\n";
1634 }
1635
1636 LLVM_DUMP_METHOD void dumpPaths() const { printPaths(dbgs()); }
1637
1638 // Legacy alias kept for existing call sites.
1639 void dumpShortestPaths() const {
1640 for (const auto &P : Paths) {
1641 const MachineBasicBlock *From = P.first.src();
1642 const MachineBasicBlock *To = P.first.dst();
1643 std::optional<NextUseDistance> Dist = P.second.ShortestDistance;
1644 dbgs() << "From: " << printMBBReference(*From)
1645 << "-> To:" << printMBBReference(*To) << " = "
1646 << Dist.value_or(-1).fmt() << "\n";
1647 }
1648 }
1649
1650 void printInterBlockDistances(raw_ostream &OS) const {
1651 using MBBPair = std::pair<unsigned, unsigned>;
1652 using Elem = std::pair<NextUseDistance, MBBPair>;
1653 std::vector<Elem> SortedDistances;
1654
1655 for (const auto &[FromNum, Dsts] : InterBlockDistances) {
1656 for (const auto &[ToNum, Dist] : Dsts) {
1657 SortedDistances.emplace_back(Dist.Weighted, MBBPair(FromNum, ToNum));
1658 }
1659 }
1660 llvm::sort(SortedDistances, [](const auto &A, const auto &B) {
1661 if (A.first != B.first)
1662 return A.first < B.first;
1663
1664 if (A.second.first != B.second.first)
1665 return A.second.first < B.second.first;
1666
1667 return A.second.second < B.second.second;
1668 });
1669
1670 OS << "\n--------- InterBlockDistances -------- {\n";
1671 for (const Elem &E : SortedDistances) {
1672
1673 OS << " bb." << E.second.first << " -> bb." << E.second.second << ": ";
1674 E.first.print(OS);
1675 OS << '\n';
1676 }
1677 OS << "}\n";
1678 }
1679
1680 LLVM_DUMP_METHOD void dumpInterBlockDistances() const {
1681 printInterBlockDistances(dbgs());
1682 }
1683
1684 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1685 // LiveRegUse Caching - A cache of the distances for the last
1686 // MachineInstruction. When getting the distances for a MachineInstruction, if
1687 // it is the same basic block as the cached instruction, we can generally use
1688 // an offset from the cached values to compute the distances. There are some
1689 // exceptions - see 'cacheLiveRegUse'.
1690 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1691private:
1692 struct LiveRegToUseMapElem {
1693 LiveRegUse Use;
1694 bool MIDependent;
1695 LiveRegToUseMapElem() : Use(), MIDependent(false) {}
1696 LiveRegToUseMapElem(LiveRegUse U, bool MIDep)
1697 : Use(U), MIDependent(MIDep) {}
1698
1699 void print(raw_ostream &OS) const {
1700 Use.print(OS);
1701 OS << (MIDependent ? " [mi-dep]" : " [mi-indep]");
1702 }
1703
1704 LLVM_DUMP_METHOD void dump() const {
1705 print(dbgs());
1706 dbgs() << '\n';
1707 }
1708 };
1709
1710 // Using std::map because LaneBitmask does not work out-of-the-box as a
1711 // DenseMap key and I did not see a performance benefit over std::map.
1712 using LaneBitmaskToUseMap = std::map<LaneBitmask, LiveRegToUseMapElem>;
1713 using LiveRegToUseMap = DenseMap<Register, LaneBitmaskToUseMap>;
1714
1715 const MachineInstr *CachedDistancesMI = nullptr;
1716 LiveRegToUseMap CachedDistances;
1717 LiveRegToUseMap PendingCachedDistances;
1718 unsigned DistanceCacheHits = 0;
1719 unsigned DistanceCacheMisses = 0;
1720
1721 void resetDistanceCache() {
1722 CachedDistancesMI = nullptr;
1723 CachedDistances.clear();
1724 DistanceCacheHits = 0;
1725 DistanceCacheMisses = 0;
1726 }
1727
1728 void maybeClearCachedLiveRegUses(const MachineInstr &MI) {
1729 if (CachedDistancesMI &&
1730 (CachedDistancesMI->getParent() != MI.getParent() ||
1731 !instrsAreInOrder(CachedDistancesMI, &MI))) {
1732 CachedDistancesMI = nullptr;
1733 CachedDistances.clear();
1734 }
1735 }
1736
1737 bool okToUseCacheElem(const LiveRegToUseMapElem &CacheElem,
1738 const MachineInstr &MI, const InstrIdTy LastDelta) {
1739 if (!CacheElem.MIDependent)
1740 return true;
1741
1742 const LiveRegUse &U = CacheElem.Use;
1743
1744 // Never okay to produce a negative distance
1745 if (U.Dist < LastDelta)
1746 return false;
1747
1748 const MachineInstr *UseMI = U.Use->getParent();
1749
1750 // Always okay if use is in another basic block or UseMI is MI
1751 if (UseMI->getParent() != MI.getParent() || UseMI == &MI)
1752 return true;
1753
1754 // If CachedDistancesMI <= Use < MI we could have a problem since we don't
1755 // know if Use is still reachable.
1756 return !instrsAreInOrder(CachedDistancesMI, UseMI) ||
1757 !instrsAreInOrder(UseMI, &MI);
1758 }
1759
1760 std::pair<const LaneBitmaskToUseMap *, const LiveRegToUseMapElem *>
1761 findCachedLiveRegUse(Register Reg, LaneBitmask LaneMask,
1762 const MachineInstr &MI, const InstrIdTy LastDelta) {
1763 if (!DistanceCacheEnabled)
1764 return {nullptr, nullptr};
1765
1766 ++DistanceCacheMisses; // Assume miss
1767 auto I = CachedDistances.find(Reg);
1768 if (I == CachedDistances.end())
1769 return {nullptr, nullptr};
1770 const LaneBitmaskToUseMap &RegSlot = I->second;
1771 if (RegSlot.empty())
1772 return {nullptr, nullptr};
1773
1774 auto J = RegSlot.find(LaneMask);
1775 if (J == RegSlot.end())
1776 return {nullptr, nullptr};
1777
1778 const LiveRegToUseMapElem &MaskSlot = J->second;
1779 if (!okToUseCacheElem(MaskSlot, MI, LastDelta))
1780 return {nullptr, nullptr};
1781
1782 --DistanceCacheMisses;
1783 ++DistanceCacheHits;
1784 return {&RegSlot, &MaskSlot};
1785 }
1786
1787 void cacheLiveRegUse(const MachineInstr &MI, Register Reg, LaneBitmask Mask,
1788 LiveRegUse U, bool MIDependent) {
1789 if (!DistanceCacheEnabled)
1790 return;
1791
1792 auto I = PendingCachedDistances.try_emplace(Reg).first;
1793 LaneBitmaskToUseMap &RegSlot = I->second;
1794 RegSlot.try_emplace(Mask, U, MIDependent);
1795 }
1796
1797 void updateCachedLiveRegUses(const MachineInstr &MI) {
1798 if (!DistanceCacheEnabled)
1799 return;
1800
1801 CachedDistancesMI = &MI;
1802 CachedDistances = std::move(PendingCachedDistances);
1803 PendingCachedDistances.clear();
1804 LLVM_DEBUG(dumpDistanceCache());
1805 }
1806
1807 void printDistanceCache(raw_ostream &OS) const {
1808 OS << "\n----------- Distance Cache ----------- {\n";
1809 OS << " CachedAt: ";
1810 if (CachedDistancesMI)
1811 OS << *CachedDistancesMI;
1812 else
1813 OS << "<none>\n";
1814
1815 constexpr size_t RegNameWidth = 20;
1816 for (const auto &[Reg, ByMask] : CachedDistances) {
1817 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
1818 LaneBitmask AllLanes = MRI->getMaxLaneMaskForVReg(Reg);
1819
1820 for (const auto &[Mask, Elem] : ByMask) {
1821 std::string RegName;
1822 raw_string_ostream KOS(RegName);
1823 if (Mask == AllLanes) {
1824 KOS << printReg(Reg);
1825 } else {
1826 SmallVector<unsigned> Indexes;
1827 TRI->getCoveringSubRegIndexes(RC, Mask, Indexes);
1828 if (Indexes.size() == 1)
1829 KOS << printReg(Reg, TRI, Indexes.front(), MRI);
1830 else
1831 KOS << printReg(Reg) << " mask=" << Mask.getAsInteger();
1832 }
1833 OS << " " << left_justify(RegName, RegNameWidth) << " : ";
1834 Elem.print(OS);
1835 OS << '\n';
1836 }
1837 }
1838 OS << " (hits=" << DistanceCacheHits << " misses=" << DistanceCacheMisses
1839 << ")\n";
1840 OS << "}\n";
1841 }
1842
1843 LLVM_DUMP_METHOD void dumpDistanceCache() const {
1844 printDistanceCache(dbgs());
1845 }
1846
1847 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1848 // Processing Live Reg Uses
1849 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1850private:
1851 // Decompose each use in 'Uses' by sub-reg and store the nearest one in
1852 // 'UseByMask'. Ignores subregs matching 'LiveRegLaneMask' - these are handled
1853 // as registers, not sub-regs.
1854 DenseMap<const TargetRegisterClass *, SmallVector<unsigned>>
1855 SubRegIndexesForRegClass;
1856 void collectSubRegUsesByMask(
1857 const SmallVectorImpl<const MachineOperand *> &Uses,
1858 const SmallVectorImpl<CacheableNextUseDistance> &Distances,
1859 LaneBitmask LiveRegLaneMask, LaneBitmaskToUseMap &UseByMask) {
1860
1861 assert(Uses.size());
1862 assert(Uses.size() == Distances.size());
1863
1864 const TargetRegisterClass *RC = MRI->getRegClass(Uses.front()->getReg());
1865 auto [SRI, Inserted] = SubRegIndexesForRegClass.try_emplace(RC);
1866 if (Inserted)
1867 TRI->getCoveringSubRegIndexes(RC, LaneBitmask::getAll(), SRI->second);
1868 const SmallVector<unsigned> &RCSubRegIndexes = SRI->second;
1869
1870 unsigned OneIndex; // Backing store for 'Indexes' below when 1 index
1871 for (size_t I = 0; I < Uses.size(); ++I) {
1872 const MachineOperand *MO = Uses[I];
1873 auto [SubRegMIDep, Dist] = Distances[I];
1874 const LiveRegUse LRU{MO, Dist};
1875
1876 ArrayRef<unsigned> Indexes;
1877 if (MO->getSubReg()) {
1878 OneIndex = MO->getSubReg();
1879 Indexes = ArrayRef(OneIndex);
1880 } else {
1881 Indexes = RCSubRegIndexes;
1882 }
1883
1884 for (unsigned Idx : Indexes) {
1885 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(Idx);
1886 if (Mask.all() || Mask == LiveRegLaneMask)
1887 continue;
1888
1889 auto &[SlotU, SlotMIDep] = UseByMask[Mask];
1890 if (updateClosest(SlotU, LRU))
1891 SlotMIDep = SubRegMIDep;
1892 }
1893 }
1894 }
1895
1896 // Similar to 'collectSubRegUsesByMask' above, but uses cached distances.
1897 void collectSubRegUsesByMaskFromCache(const LaneBitmaskToUseMap &CachedMap,
1898 LaneBitmask LiveRegLaneMask,
1899 const MachineInstr *MI,
1900 InstrIdTy LastDelta,
1901 LaneBitmaskToUseMap &UseByMask) {
1902
1903 for (const auto &KV : CachedMap) {
1904 LaneBitmask SubregLaneMask = KV.first;
1905 if (SubregLaneMask.all() || SubregLaneMask == LiveRegLaneMask)
1906 continue;
1907
1908 const LiveRegToUseMapElem &SubregE = KV.second;
1909 if (!okToUseCacheElem(SubregE, *MI, LastDelta))
1910 continue;
1911
1912 const bool MIDep = SubregE.MIDependent;
1913 LiveRegUse U = SubregE.Use;
1914 if (MIDep)
1915 U.Dist -= LastDelta;
1916
1917 auto &[SlotU, SlotMIDep] = UseByMask[SubregLaneMask];
1918 if (updateClosest(SlotU, U))
1919 SlotMIDep = MIDep;
1920 }
1921 }
1922
1923 // Loops through 'UseByMask' finding the furthest sub-register and updating
1924 // 'FurthestSubreg' accordingly.
1925 void updateFurthestSubReg(
1926 const MachineInstr &MI, const LiveRegUse &U,
1927 const LaneBitmaskToUseMap &UseByMask,
1928 DenseMap<const MachineOperand *, UseDistancePair> *RelevantUses,
1929 LiveRegUse &FurthestSubreg) {
1930
1931 if (UseByMask.empty()) {
1932 updateFurthest(FurthestSubreg, U);
1933 return;
1934 }
1935
1936 for (const auto &KV : UseByMask) {
1937 const LiveRegUse &SubregU = KV.second.Use;
1938 const bool SubregMIDep = KV.second.MIDependent;
1939
1940 if (RelevantUses)
1941 RelevantUses->try_emplace(SubregU.Use, SubregU);
1942 cacheLiveRegUse(MI, SubregU.Use->getReg(), KV.first, SubregU,
1943 SubregMIDep);
1944 updateFurthest(FurthestSubreg, SubregU);
1945 }
1946 }
1947
1948 // Used to populate 'MIDefs' to be passed to 'getNextUseDistances'.
1949 SmallSet<Register, 4> collectDefinedRegisters(const MachineInstr &MI) const {
1950 SmallSet<Register, 4> MIDefs;
1951
1952 for (const MachineOperand &MO : MI.all_defs()) {
1953 if (MO.isReg() && MO.getReg().isValid() && hasAtLeastOneUse(MO.getReg()))
1954 MIDefs.insert(MO.getReg());
1955 }
1956 return MIDefs;
1957 }
1958
1959 // Computes distances from 'MI' to each registers in 'LiveRegs'. Returns the
1960 // furthest register and (optionally) sub-register in 'Furthest' and
1961 // 'FurthestSubreg' respectively.
1962public:
1964 const MachineInstr &MI, LiveRegUse &Furthest,
1965 LiveRegUse *FurthestSubreg = nullptr,
1967 *RelevantUses = nullptr) {
1968 const SmallSet<Register, 4> MIDefs(collectDefinedRegisters(MI));
1969
1972 LaneBitmaskToUseMap UseByMask;
1973
1974 maybeClearCachedLiveRegUses(MI);
1975 const InstrIdTy LastDelta =
1976 CachedDistancesMI ? getDistance(CachedDistancesMI, &MI) : 0;
1977
1978 for (auto &KV : LiveRegs) {
1979 const Register Reg = KV.first;
1980 const LaneBitmask LaneMask = KV.second;
1981
1982 if (MIDefs.contains(Reg))
1983 continue;
1984
1985 Uses.clear();
1986 UseByMask.clear();
1987
1988 LiveRegUse U;
1989 bool MIDependent = false;
1990 auto [CacheMap, CacheElem] =
1991 findCachedLiveRegUse(Reg, LaneMask, MI, LastDelta);
1992 if (CacheMap && CacheElem) {
1993 MIDependent = CacheElem->MIDependent;
1994 U = CacheElem->Use;
1995 if (MIDependent)
1996 U.Dist -= LastDelta;
1997 } else {
1998 getReachableUses(Reg, LaneMask, MI, Uses);
1999 if (Uses.empty())
2000 continue;
2001
2002 const MachineOperand *NextUse = nullptr;
2004 Reg, LaneMask, MI, Uses, &NextUse, &MIDependent, &Distances);
2005 U = LiveRegUse{NextUse, Dist};
2006 }
2007
2008 if (RelevantUses)
2009 RelevantUses->try_emplace(U.Use, U);
2010 cacheLiveRegUse(MI, Reg, LaneMask, U, MIDependent);
2011
2012 updateFurthest(Furthest, U);
2013
2014 if (!FurthestSubreg)
2015 continue;
2016
2017 if (CacheMap) {
2018 collectSubRegUsesByMaskFromCache(*CacheMap, LaneMask, &MI, LastDelta,
2019 UseByMask);
2020 } else {
2021 collectSubRegUsesByMask(Uses, Distances, LaneMask, UseByMask);
2022 }
2023 updateFurthestSubReg(MI, U, UseByMask, RelevantUses, *FurthestSubreg);
2024 }
2025 updateCachedLiveRegUses(MI);
2026 }
2027
2028 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2029 // Helper methods for printAsJson
2030 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2031private:
2032 static format_object<unsigned> Fmt(unsigned Id) { return format("%u", Id); }
2033
2034public:
2036 J.attribute("id", getInstrId(&MI));
2037 J.attribute("head-len", getHeadLen(&MI));
2038 J.attribute("tail-len", getTailLen(&MI));
2039 }
2040
2042 J.attributeBegin("paths");
2043 J.arrayBegin();
2044 for (const auto &KV : Paths) {
2045 const Path &P = KV.first;
2046 const PathInfo &PI = KV.second;
2047
2048 J.objectBegin();
2049
2050 printMBBNameAttr(J, "src", *P.src(), MST);
2051 printMBBNameAttr(J, "dst", *P.dst(), MST);
2052
2053 if (PI.ShortestDistance.has_value()) {
2054 J.attribute("shortest-distance",
2055 PI.ShortestDistance.value().toJsonValue());
2056 } else {
2057 J.attribute("shortest-distance", nullptr);
2058 }
2059
2060 if (PI.ShortestUnweightedDistance.has_value()) {
2061 J.attribute("shortest-unweighted-distance",
2062 PI.ShortestUnweightedDistance.value().toJsonValue());
2063 } else {
2064 J.attribute("shortest-unweighted-distance", nullptr);
2065 }
2066
2067 J.attribute("edge-kind", static_cast<int>(PI.EK));
2068 J.attribute("reachable", PI.Reachable);
2069 J.attribute("forward-reachable", PI.ForwardReachable);
2070
2071 J.objectEnd();
2072 }
2073 J.arrayEnd();
2074 J.attributeEnd();
2075 }
2076
2077public:
2079 ~AMDGPUNextUseAnalysisImpl() { clearTables(); }
2080
2083 Cfg = NewCfg;
2084 clearTables();
2085 initializeTables();
2086 }
2087
2088 unsigned getDistanceCacheHits() const { return DistanceCacheHits; }
2089 unsigned getDistanceCacheMisses() const { return DistanceCacheMisses; }
2090
2091 void getReachableUses(Register LiveReg, LaneBitmask LaneMask,
2092 const MachineInstr &MI,
2094
2095 /// \Returns the shortest next-use distance for \p LiveReg.
2097 getShortestDistance(Register LiveReg, LaneBitmask LaneMask,
2098 const MachineInstr &FromMI,
2100 const MachineOperand **ShortestUseOut, bool *MIDependent,
2101 SmallVector<CacheableNextUseDistance> *Distances) const;
2102
2106 return getShortestDistance(LiveReg, LaneBitmask::getAll(), FromMI, Uses,
2107 nullptr, nullptr, nullptr);
2108 }
2109};
2110
2112 const MachineFunction *MF, const MachineLoopInfo *ML) {
2113
2114 this->MF = MF;
2115 this->MLI = ML;
2116
2117 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2118 TII = ST.getInstrInfo();
2119 TRI = &TII->getRegisterInfo();
2120 MRI = &MF->getRegInfo();
2121
2122 // FIXME: Hopefully we will soon converge on a single way of calculating
2123 // next-use distance and remove these presets.
2124 if (ConfigPresetOpt == "compute")
2126 else
2128
2129 if (ConfigCountPhisOpt.getNumOccurrences())
2130 Cfg.CountPhis = ConfigCountPhisOpt;
2131 if (ConfigForwardOnlyOpt.getNumOccurrences())
2132 Cfg.ForwardOnly = ConfigForwardOnlyOpt;
2133 if (ConfigPreciseUseModelingOpt.getNumOccurrences())
2134 Cfg.PreciseUseModeling = ConfigPreciseUseModelingOpt;
2135 if (ConfigPromoteToPreheaderOpt.getNumOccurrences())
2136 Cfg.PromoteToPreheader = ConfigPromoteToPreheaderOpt;
2137
2138 initializeTables();
2139}
2140
2142 Register LiveReg, LaneBitmask LaneMask, const MachineInstr &CurMI,
2144 const MachineOperand **ShortestUseOut, bool *CurMIDependentOut,
2145 SmallVector<CacheableNextUseDistance> *Distances) const {
2146
2147 assert(!LiveReg.isPhysical() && !TRI->isAGPR(*MRI, LiveReg) &&
2148 "Next-use distance is calculated for SGPRs and VGPRs");
2149 const MachineOperand *NextUse = nullptr;
2150 auto NextUseDist = NextUseDistance::unreachable();
2151 bool CurMIDependent = false;
2152
2153 if (Distances) {
2154 Distances->clear();
2155 Distances->reserve(Uses.size());
2156 }
2157 for (auto *UseMO : Uses) {
2158 auto [Dep, D] = calcDistanceToUse(LiveReg, LaneMask, CurMI, UseMO);
2159
2160 if (D < NextUseDist) {
2161 NextUseDist = D;
2162 NextUse = UseMO;
2163 CurMIDependent = Dep;
2164 }
2165
2166 if (Distances)
2167 Distances->push_back({Dep, D});
2168 }
2169 if (ShortestUseOut)
2170 *ShortestUseOut = NextUse;
2171 if (CurMIDependentOut)
2172 *CurMIDependentOut = CurMIDependent;
2173
2174 assert(NextUseDist.isReachable() &&
2175 "getShortestDistance called with no reachable uses");
2176 return NextUseDist;
2177}
2178
2180 Register Reg, LaneBitmask LaneMask, const MachineInstr &MI,
2182 const bool CheckMask = LaneMask != LaneBitmask::getAll() &&
2183 LaneMask != MRI->getMaxLaneMaskForVReg(Reg);
2184 const MachineBasicBlock *MBB = MI.getParent();
2185
2186 for (const MachineOperand *UseMO : getRegisterUses(Reg)) {
2187 const MachineInstr *UseMI = UseMO->getParent();
2188 const MachineBasicBlock *UseMBB = UseMI->getParent();
2189
2190 if (CheckMask && !machineOperandCoveredBy(*UseMO, LaneMask))
2191 continue;
2192
2193 bool Reachable;
2194 if (Cfg.PreciseUseModeling)
2195 Reachable = isUseReachablePrecise(MI, MBB, UseMO, UseMI, UseMBB);
2196 else if (MBB == UseMBB)
2197 Reachable = instrsAreInOrder(&MI, UseMI);
2198 else
2199 Reachable = isForwardReachable(MBB, UseMBB);
2200
2201 if (Reachable)
2202 Uses.push_back(UseMO);
2203 }
2204}
2205
2206//==============================================================================
2207// AMDGPUNextUseAnalysis
2208//==============================================================================
2209AMDGPUNextUseAnalysis::AMDGPUNextUseAnalysis(const MachineFunction *MF,
2210 const MachineLoopInfo *MLI) {
2211 Impl = std::make_unique<AMDGPUNextUseAnalysisImpl>(MF, MLI);
2212}
2213AMDGPUNextUseAnalysis::AMDGPUNextUseAnalysis(AMDGPUNextUseAnalysis &&Other)
2214 : Impl(std::move(Other.Impl)) {}
2216
2218AMDGPUNextUseAnalysis::operator=(AMDGPUNextUseAnalysis &&Other) {
2219 if (this != &Other)
2220 Impl = std::move(Other.Impl);
2221 return *this;
2222}
2223
2225 return Impl->getConfig();
2226}
2227
2228void AMDGPUNextUseAnalysis::setConfig(Config Cfg) { Impl->setConfig(Cfg); }
2229
2230/// \Returns the next-use distance for \p LiveReg.
2232 Register LiveReg, const MachineInstr &FromMI,
2234 const MachineOperand **ShortestUseOut,
2235 SmallVector<NextUseDistance> *DistancesOut) const {
2236
2238 auto Dist = Impl->getShortestDistance(LiveReg, LaneBitmask::getAll(), FromMI,
2239 Uses, ShortestUseOut, nullptr,
2240 DistancesOut ? &Distances : nullptr);
2241 if (DistancesOut) {
2242 for (auto [MIDep, D] : Distances)
2243 DistancesOut->push_back(D);
2244 }
2245 return Dist;
2246}
2247
2250 UseDistancePair &FurthestOut, UseDistancePair *FurthestSubregOut,
2252
2253 LiveRegUse Furthest;
2254 LiveRegUse FurthestSubreg;
2255 Impl->getNextUseDistances(LiveRegs, MI, Furthest,
2256 FurthestSubregOut ? &FurthestSubreg : nullptr,
2257 RelevantUses);
2258 FurthestOut = Furthest;
2259 if (FurthestSubregOut)
2260 *FurthestSubregOut = FurthestSubreg;
2261}
2263 Register LiveReg, LaneBitmask LaneMask, const MachineInstr &MI,
2265 return Impl->getReachableUses(LiveReg, LaneMask, MI, Uses);
2266}
2267
2268//==============================================================================
2269// AMDGPUNextUseAnalysisLegacyPass
2270//==============================================================================
2271
2272//------------------------------------------------------------------------------
2273// Legacy Analysis Pass
2274//------------------------------------------------------------------------------
2278 return "Next Use Analysis";
2279}
2280
2282 MachineFunction &MF) {
2283 const MachineLoopInfo *MLI =
2285 NUA.reset(new AMDGPUNextUseAnalysis(&MF, MLI));
2286 return false;
2287}
2288
2295
2298
2300 "Next Use Analysis", false, true)
2303 "Next Use Analysis", false, true)
2304
2308
2309//------------------------------------------------------------------------------
2310// New Pass Manager Analysis Pass
2311//------------------------------------------------------------------------------
2312AnalysisKey AMDGPUNextUseAnalysisPass::Key;
2313
2320
2321//==============================================================================
2322// AMDGPUNextUseAnalysisPrinterLegacyPass
2323//==============================================================================
2324namespace {
2325void printInstrMember(json::OStream &J, ModuleSlotTracker &MST,
2326 const MachineInstr &MI,
2327 const AMDGPUNextUseAnalysisImpl &NUA) {
2328 printStringAttr(J, "instr", MI, MST);
2329 if (DumpNextUseDistanceVerbose)
2331}
2332
2333void printDistances(
2334 json::OStream &J, const MachineRegisterInfo &MRI, const SIRegisterInfo &TRI,
2335 ModuleSlotTracker &MST,
2337 if (!DumpNextUseDistanceVerbose)
2338 return;
2339
2340 // Sorting isn't necessary for the purposes of JSON, but it reduces
2341 // FileCheck differences.
2343 for (const MachineOperand *K : Uses.keys())
2344 Keys.push_back(K);
2345 llvm::sort(Keys, [](const auto &A, const auto &B) {
2346 return A->getReg() < B->getReg() ||
2347 (A->getReg() == B->getReg() && A->getSubReg() < B->getSubReg());
2348 });
2349
2350 J.attributeBegin("distances");
2351 J.objectBegin();
2352
2353 for (const MachineOperand *K : Keys) {
2354 const LiveRegUse U = Uses.at(K);
2355 printAttr(J, printReg(U.getReg(), &TRI, U.getSubReg(), &MRI),
2356 U.Dist.toJsonValue());
2357 }
2358
2359 J.objectEnd();
2360 J.attributeEnd();
2361}
2362
2363void printFurthestUse(json::OStream &J, const MachineRegisterInfo &MRI,
2365 const LiveRegUse F, bool Subreg = false) {
2366 J.attributeBegin(Subreg ? "furthest-subreg" : "furthest");
2367 J.objectBegin();
2368
2369 if (F.Use) {
2370 printStringAttr(
2371 J, "register",
2372 printReg(F.getReg(), &TRI, Subreg ? F.getSubReg() : 0, &MRI));
2373
2374 if (DumpNextUseDistanceVerbose) {
2375 printStringAttr(J, "use", [&](raw_ostream &OS) { OS << (*F.Use); });
2376 printStringAttr(J, "use-mi", *F.Use->getParent(), MST);
2377 }
2378 J.attribute("distance", F.Dist.toJsonValue());
2379 }
2380
2381 J.objectEnd();
2382 J.attributeEnd();
2383}
2384
2385void printDistanceFromDefToUse(json::OStream &J, const MachineFunction &MF,
2386 const AMDGPUNextUseAnalysis &NUA,
2387 const SIRegisterInfo &TRI,
2388 const MachineRegisterInfo &MRI) {
2389 auto getRegNextUseDistance = [&](Register DefReg) {
2390 const MachineInstr &DefMI = *MRI.def_instr_begin(DefReg);
2391
2394 if (Uses.empty())
2396 return NUA.getShortestDistance(DefReg, DefMI, Uses);
2397 };
2398
2399 J.attributeBegin("distance-from-def-to-closest-use");
2400 J.objectBegin();
2401
2402 for (const MachineBasicBlock &MBB : MF) {
2403 for (const MachineInstr &MI : MBB) {
2404 for (const MachineOperand &MO : MI.all_defs()) {
2405 Register Reg = MO.getReg();
2406 if (Reg.isPhysical())
2407 continue;
2408 NextUseDistance D = getRegNextUseDistance(Reg);
2409 printAttr(J, printReg(Reg, &TRI, 0, &MRI), D.toJsonValue());
2410 }
2411 }
2412 }
2413
2414 J.objectEnd();
2415 J.attributeEnd();
2416}
2417
2418void printNextUseDistancesAsJson(json::OStream &J, const MachineFunction &MF,
2419 const AMDGPUNextUseAnalysis &NUA,
2420 const AMDGPUNextUseAnalysisImpl &NUAImpl,
2421 const LiveIntervals &LIS) {
2422 using UseDistancePair = AMDGPUNextUseAnalysis::UseDistancePair;
2423 const Function &F = MF.getFunction();
2424 const Module *M = F.getParent();
2425
2427 const SIInstrInfo *TII = ST.getInstrInfo();
2429 const MachineRegisterInfo &MRI = MF.getRegInfo();
2430
2431 // We don't actually care about register pressure here - just using
2432 // GCNDownwardRPTracker as a convenient way of getting the set of live
2433 // registers at a given instruction.
2434 GCNDownwardRPTracker RPTracker(LIS);
2435 ModuleSlotTracker MST(M);
2437
2439
2440 J.attributeBegin("furthest-distances");
2441 J.objectBegin();
2442
2443 for (const MachineBasicBlock &MBB : MF) {
2444 std::string BBName;
2445 raw_string_ostream BBOS(BBName);
2447
2448 J.attributeBegin(BBOS.str());
2449 J.arrayBegin();
2450
2451 const MachineInstr *PrevMI = nullptr;
2452 for (const MachineInstr &MI : MBB) {
2453 // Update register pressure tracker
2454 if (!PrevMI || PrevMI->getOpcode() == AMDGPU::PHI)
2455 RPTracker.reset(MI);
2456 RPTracker.advance();
2457
2458 UseDistancePair Furthest;
2459 UseDistancePair FurthestSubreg;
2460 RelevantUses.clear();
2461 NUA.getNextUseDistances(RPTracker.getLiveRegs(), MI, Furthest,
2462 &FurthestSubreg, &RelevantUses);
2463
2464 J.objectBegin();
2465 printInstrMember(J, MST, MI, NUAImpl);
2466 printDistances(J, MRI, TRI, MST, RelevantUses);
2467 printFurthestUse(J, MRI, TRI, MST, Furthest);
2468 printFurthestUse(J, MRI, TRI, MST, FurthestSubreg, /*Subreg*/ true);
2469 J.objectEnd();
2470
2471 PrevMI = &MI;
2472 }
2473
2474 J.arrayEnd();
2475 J.attributeEnd();
2476 }
2477
2478 J.objectEnd();
2479 J.attributeEnd();
2480
2481 if (DumpNextUseDistanceVerbose || DumpNextUseDistanceDefToUse)
2482 printDistanceFromDefToUse(J, MF, NUA, TRI, MRI);
2483
2484 if (DumpNextUseDistanceVerbose)
2485 NUAImpl.printPaths(J, MST);
2486
2487 if (DistanceCacheEnabled) {
2488 J.attributeBegin("metrics");
2489 J.objectBegin();
2490 {
2491 J.attributeBegin("distance-cache");
2492 J.objectBegin();
2493 {
2494 J.attribute("hits", NUAImpl.getDistanceCacheHits());
2495 J.attribute("misses", NUAImpl.getDistanceCacheMisses());
2496 }
2497 J.objectEnd();
2498 J.attributeEnd(); // distance-cache
2499 }
2500 J.objectEnd();
2501 J.attributeEnd(); // metrics
2502 }
2503}
2504
2505void printAsJson(raw_ostream &FallbackOS, TimerGroup &JsonTimerGroup,
2506 Timer &JsonTimer, const MachineFunction &MF,
2507 const AMDGPUNextUseAnalysis &NUA,
2508 const AMDGPUNextUseAnalysisImpl &NUAImpl,
2509 const LiveIntervals &LIS) {
2510 std::string FN = DumpNextUseDistanceAsJson;
2511
2512 auto dump = [&](raw_ostream &OS) {
2513 json::OStream J(OS, 2);
2514 J.objectBegin();
2515
2516 J.attributeBegin("next-use-analysis");
2517 J.objectBegin();
2518 printNextUseDistancesAsJson(J, MF, NUA, NUAImpl, LIS);
2519 J.objectEnd();
2520 J.attributeEnd();
2521
2522 JsonTimer.stopTimer();
2523 JsonTimerGroup.printJSONValues(OS, ",\n");
2524
2525 J.objectEnd();
2526 };
2527
2528 if (!DumpNextUseDistanceAsJson.getNumOccurrences()) {
2529 dump(FallbackOS);
2530 } else if (FN.empty() || FN == "-") {
2531 dump(outs());
2532 } else {
2533 std::error_code EC;
2534 ToolOutputFile OutF(FN, EC, sys::fs::OF_None);
2535 dump(OutF.os());
2536 OutF.keep();
2537 }
2538}
2539} // namespace
2540
2541//------------------------------------------------------------------------------
2542// Legacy Printer Pass
2543//------------------------------------------------------------------------------
2546
2548 return "AMDGPU Next Use Analysis Printer";
2549}
2550
2552 MachineFunction &MF) {
2553 TimerGroup JsonTimerGroup("amdgpu-next-use-analysis-json",
2554 "AMDGPU Next Use Analysis JSON Printer", false);
2555 Timer JsonTimer("json", "Total time spent generating json", JsonTimerGroup);
2556 JsonTimer.startTimer();
2557
2559 const AMDGPUNextUseAnalysis &NUA =
2560 getAnalysis<AMDGPUNextUseAnalysisLegacyPass>().getNextUseAnalysis();
2561
2562 printAsJson(errs(), JsonTimerGroup, JsonTimer, MF, NUA, *NUA.Impl, LIS);
2563
2564 return false;
2565}
2566
2575
2579
2581 "amdgpu-next-use-printer",
2582 "AMDGPU Next Use Analysis Printer", false, false)
2583
2586
2588 "amdgpu-next-use-printer",
2589 "AMDGPU Next Use Analysis Printer", false, false)
2590
2594
2595//------------------------------------------------------------------------------
2596// New Pass Manager Printer Pass
2597//------------------------------------------------------------------------------
2601
2602 TimerGroup JsonTimerGroup("amdgpu-next-use-analysis-json",
2603 "AMDGPU Next Use Analysis JSON Printer", false);
2604 Timer JsonTimer("json", "Total time spent generating json", JsonTimerGroup);
2605 JsonTimer.startTimer();
2606
2607 const LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
2608 const AMDGPUNextUseAnalysis &NUA =
2610
2611 printAsJson(OS, JsonTimerGroup, JsonTimer, MF, NUA, *NUA.Impl, LIS);
2612
2613 return PreservedAnalyses::all();
2614}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
Function Alias Analysis false
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file defines the DenseMap class.
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
This file supports working with JSON data.
#define RegName(no)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Remove Loads Into Fake Uses
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
void setConfig(AMDGPUNextUseAnalysis::Config NewCfg)
AMDGPUNextUseAnalysis::Config getConfig() const
void getNextUseDistances(const GCNRPTracker::LiveRegSet &LiveRegs, const MachineInstr &MI, LiveRegUse &Furthest, LiveRegUse *FurthestSubreg=nullptr, DenseMap< const MachineOperand *, UseDistancePair > *RelevantUses=nullptr)
void getReachableUses(Register LiveReg, LaneBitmask LaneMask, const MachineInstr &MI, SmallVector< const MachineOperand * > &Uses) const
void printVerboseInstrFields(json::OStream &J, const MachineInstr &MI) const
AMDGPUNextUseAnalysisImpl(const MachineFunction *, const MachineLoopInfo *)
void printPaths(json::OStream &J, ModuleSlotTracker &MST) const
NextUseDistance getShortestDistance(Register LiveReg, const MachineInstr &FromMI, const SmallVector< const MachineOperand * > &Uses) const
NextUseDistance getShortestDistance(Register LiveReg, LaneBitmask LaneMask, const MachineInstr &FromMI, const SmallVector< const MachineOperand * > &Uses, const MachineOperand **ShortestUseOut, bool *MIDependent, SmallVector< CacheableNextUseDistance > *Distances) const
\Returns the shortest next-use distance for LiveReg.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void getReachableUses(Register LiveReg, LaneBitmask LaneMask, const MachineInstr &MI, SmallVector< const MachineOperand * > &Uses) const
void getNextUseDistances(const DenseMap< unsigned, LaneBitmask > &LiveRegs, const MachineInstr &MI, UseDistancePair &Furthest, UseDistancePair *FurthestSubreg=nullptr, DenseMap< const MachineOperand *, UseDistancePair > *RelevantUses=nullptr) const
NextUseDistance getShortestDistance(Register LiveReg, const MachineInstr &CurMI, const SmallVector< const MachineOperand * > &Uses, const MachineOperand **ShortestUseOut=nullptr, SmallVector< NextUseDistance > *Distances=nullptr) const
\Returns the shortest next-use distance from CurMI for LiveReg.
AMDGPUNextUseAnalysis & operator=(AMDGPUNextUseAnalysis &&Other)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
DenseMap< unsigned, LaneBitmask > LiveRegSet
const HexagonRegisterInfo & getRegisterInfo() const
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
const LoopT * getOutermostLoop() const
Get the outermost loop in which this loop is contained.
void getLoopLatches(SmallVectorImpl< BlockT * > &LoopLatches) const
Return all loop latch blocks of this loop.
unsigned getLoopDepth() const
Return the nesting level of this loop.
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
@ PrintNameIr
Add IR name where available.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
iterator_range< succ_iterator > successors()
LLVM_ABI void printName(raw_ostream &os, unsigned printNameFlags=PrintNameIr, ModuleSlotTracker *moduleSlotTracker=nullptr) const
Print the basic block's name as:
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
const MachineOperand & getOperand(unsigned i) const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
def_instr_iterator def_instr_begin(Register RegNo) const
Manage lifetime of a slot tracker for printing IR.
void incorporateFunction(const Function &F)
Incorporate the given function.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static constexpr NextUseDistance fromSize(unsigned Size, unsigned Depth)
static constexpr NextUseDistance unreachable()
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition SmallSet.h:229
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
The TimerGroup class is used to group together related timers into a single report that is printed wh...
Definition Timer.h:191
LLVM_ABI const char * printJSONValues(raw_ostream &OS, const char *delim)
Definition Timer.cpp:464
This class is used to track the amount of time spent between invocations of its startTimer()/stopTime...
Definition Timer.h:87
LLVM_ABI void stopTimer()
Stop the timer.
Definition Timer.cpp:159
LLVM_ABI void startTimer()
Start the timer running.
Definition Timer.cpp:150
This class contains a raw_fd_ostream and adds a few extra features commonly needed for compiler-like ...
json::OStream allows writing well-formed JSON without materializing all structures as json::Value ahe...
Definition JSON.h:995
LLVM_ABI void attributeBegin(llvm::StringRef Key)
Definition JSON.cpp:883
void attribute(llvm::StringRef Key, const Value &Contents)
Emit an attribute whose value is self-contained (number, vector<int> etc).
Definition JSON.h:1050
LLVM_ABI void arrayBegin()
Definition JSON.cpp:845
LLVM_ABI void objectBegin()
Definition JSON.cpp:864
LLVM_ABI raw_ostream & rawValueBegin()
Definition JSON.cpp:911
LLVM_ABI void arrayEnd()
Definition JSON.cpp:853
LLVM_ABI void attributeEnd()
Definition JSON.cpp:903
LLVM_ABI void rawValueEnd()
Definition JSON.cpp:918
LLVM_ABI void objectEnd()
Definition JSON.cpp:872
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
initializer< Ty > init(const Ty &Val)
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
constexpr NextUseDistance min(NextUseDistance A, NextUseDistance B)
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAMDGPUNextUseAnalysisLegacyPass()
FunctionPass * createAMDGPUNextUseAnalysisPrinterLegacyPass()
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto post_order(const T &G)
Post-order traversal of a graph.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
char & AMDGPUNextUseAnalysisLegacyID
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition Format.h:150
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
char & AMDGPUNextUseAnalysisPrinterLegacyID
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1917
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
static Config Graphics()
Named presets.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
static constexpr LaneBitmask getAll()
Definition LaneBitmask.h:82
constexpr bool all() const
Definition LaneBitmask.h:54