LLVM 23.0.0git
RegAllocGreedy.cpp
Go to the documentation of this file.
1//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the RAGreedy function pass for register allocation in
10// optimized builds.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RegAllocGreedy.h"
15#include "AllocationOrder.h"
16#include "InterferenceCache.h"
17#include "RegAllocBase.h"
18#include "SplitKit.h"
19#include "llvm/ADT/ArrayRef.h"
20#include "llvm/ADT/BitVector.h"
21#include "llvm/ADT/IndexedMap.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/Analysis.h"
62#include "llvm/IR/Function.h"
63#include "llvm/IR/LLVMContext.h"
64#include "llvm/Pass.h"
68#include "llvm/Support/Debug.h"
70#include "llvm/Support/Timer.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <utility>
76
77using namespace llvm;
78
79#define DEBUG_TYPE "regalloc"
80
81STATISTIC(NumGlobalSplits, "Number of split global live ranges");
82STATISTIC(NumLocalSplits, "Number of split local live ranges");
83STATISTIC(NumEvicted, "Number of interferences evicted");
84
86 "split-spill-mode", cl::Hidden,
87 cl::desc("Spill mode for splitting live ranges"),
88 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
89 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
90 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
92
95 cl::desc("Last chance recoloring max depth"),
96 cl::init(5));
97
99 "lcr-max-interf", cl::Hidden,
100 cl::desc("Last chance recoloring maximum number of considered"
101 " interference at a time"),
102 cl::init(8));
103
105 "exhaustive-register-search", cl::NotHidden,
106 cl::desc("Exhaustive Search for registers bypassing the depth "
107 "and interference cutoffs of last chance recoloring"),
108 cl::Hidden);
109
110// FIXME: Find a good default for this flag and remove the flag.
112CSRFirstTimeCost("regalloc-csr-first-time-cost",
113 cl::desc("Cost for first time use of callee-saved register."),
114 cl::init(0), cl::Hidden);
115
117 "grow-region-complexity-budget",
118 cl::desc("growRegion() does not scale with the number of BB edges, so "
119 "limit its budget and bail out once we reach the limit."),
120 cl::init(10000), cl::Hidden);
121
123 "greedy-regclass-priority-trumps-globalness",
124 cl::desc("Change the greedy register allocator's live range priority "
125 "calculation to make the AllocationPriority of the register class "
126 "more important then whether the range is global"),
127 cl::Hidden);
128
130 "greedy-reverse-local-assignment",
131 cl::desc("Reverse allocation order of local live ranges, such that "
132 "shorter local live ranges will tend to be allocated first"),
133 cl::Hidden);
134
136 "split-threshold-for-reg-with-hint",
137 cl::desc("The threshold for splitting a virtual register with a hint, in "
138 "percentage"),
139 cl::init(75), cl::Hidden);
140
141static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
143
144namespace {
145class RAGreedyLegacy : public MachineFunctionPass {
147
148public:
149 RAGreedyLegacy(const RegAllocFilterFunc F = nullptr);
150
151 static char ID;
152 /// Return the pass name.
153 StringRef getPassName() const override { return "Greedy Register Allocator"; }
154
155 /// RAGreedy analysis usage.
156 void getAnalysisUsage(AnalysisUsage &AU) const override;
157 /// Perform register allocation.
158 bool runOnMachineFunction(MachineFunction &mf) override;
159
160 MachineFunctionProperties getRequiredProperties() const override {
161 return MachineFunctionProperties().setNoPHIs();
162 }
163
164 MachineFunctionProperties getClearedProperties() const override {
165 return MachineFunctionProperties().setIsSSA();
166 }
167};
168
169} // end anonymous namespace
170
171RAGreedyLegacy::RAGreedyLegacy(const RegAllocFilterFunc F)
172 : MachineFunctionPass(ID), F(std::move(F)) {}
173
197
199 : RegAllocBase(F) {
200 VRM = Analyses.VRM;
201 LIS = Analyses.LIS;
202 Matrix = Analyses.LRM;
203 Indexes = Analyses.Indexes;
204 MBFI = Analyses.MBFI;
205 DomTree = Analyses.DomTree;
206 Loops = Analyses.Loops;
207 ORE = Analyses.ORE;
208 Bundles = Analyses.Bundles;
209 SpillPlacer = Analyses.SpillPlacer;
210 DebugVars = Analyses.DebugVars;
211 LSS = Analyses.LSS;
212 EvictProvider = Analyses.EvictProvider;
213 PriorityProvider = Analyses.PriorityProvider;
214}
215
217 raw_ostream &OS,
218 function_ref<StringRef(StringRef)> MapClassName2PassName) const {
219 StringRef FilterName = Opts.FilterName.empty() ? "all" : Opts.FilterName;
220 OS << "greedy<" << FilterName << '>';
221}
222
241
244 MFPropsModifier _(*this, MF);
245
246 RAGreedy::RequiredAnalyses Analyses(MF, MFAM);
247 RAGreedy Impl(Analyses, Opts.Filter);
248
249 bool Changed = Impl.run(MF);
250 if (!Changed)
251 return PreservedAnalyses::all();
253 PA.preserveSet<CFGAnalyses>();
254 PA.preserve<MachineBlockFrequencyAnalysis>();
255 PA.preserve<LiveIntervalsAnalysis>();
256 PA.preserve<SlotIndexesAnalysis>();
257 PA.preserve<LiveDebugVariablesAnalysis>();
258 PA.preserve<LiveStacksAnalysis>();
259 PA.preserve<VirtRegMapAnalysis>();
260 PA.preserve<LiveRegMatrixAnalysis>();
261 return PA;
262}
263
265 VRM = &P.getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
266 LIS = &P.getAnalysis<LiveIntervalsWrapperPass>().getLIS();
267 LSS = &P.getAnalysis<LiveStacksWrapperLegacy>().getLS();
268 LRM = &P.getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
269 Indexes = &P.getAnalysis<SlotIndexesWrapperPass>().getSI();
270 MBFI = &P.getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
271 DomTree = &P.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
272 ORE = &P.getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
273 Loops = &P.getAnalysis<MachineLoopInfoWrapperPass>().getLI();
274 Bundles = &P.getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
275 SpillPlacer = &P.getAnalysis<SpillPlacementWrapperLegacy>().getResult();
276 DebugVars = &P.getAnalysis<LiveDebugVariablesWrapperLegacy>().getLDV();
278 &P.getAnalysis<RegAllocEvictionAdvisorAnalysisLegacy>().getProvider();
280 &P.getAnalysis<RegAllocPriorityAdvisorAnalysisLegacy>().getProvider();
281}
282
283bool RAGreedyLegacy::runOnMachineFunction(MachineFunction &MF) {
284 RAGreedy::RequiredAnalyses Analyses(*this);
285 RAGreedy Impl(Analyses, F);
286 return Impl.run(MF);
287}
288
289char RAGreedyLegacy::ID = 0;
290char &llvm::RAGreedyLegacyID = RAGreedyLegacy::ID;
291
292INITIALIZE_PASS_BEGIN(RAGreedyLegacy, "greedy", "Greedy Register Allocator",
293 false, false)
297INITIALIZE_PASS_DEPENDENCY(RegisterCoalescerLegacy)
298INITIALIZE_PASS_DEPENDENCY(MachineSchedulerLegacy)
309INITIALIZE_PASS_END(RAGreedyLegacy, "greedy", "Greedy Register Allocator",
311
312#ifndef NDEBUG
313const char *const RAGreedy::StageName[] = {
314 "RS_New",
315 "RS_Assign",
316 "RS_Split",
317 "RS_Split2",
318 "RS_Spill",
319 "RS_Done"
320};
321#endif
322
323// Hysteresis to use when comparing floats.
324// This helps stabilize decisions based on float comparisons.
325const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
326
328 return new RAGreedyLegacy();
329}
330
332 return new RAGreedyLegacy(Ftor);
333}
334
335void RAGreedyLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
336 AU.setPreservesCFG();
361}
362
363//===----------------------------------------------------------------------===//
364// LiveRangeEdit delegate methods
365//===----------------------------------------------------------------------===//
366
367bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
368 LiveInterval &LI = LIS->getInterval(VirtReg);
369 if (VRM->hasPhys(VirtReg)) {
370 Matrix->unassign(LI);
372 return true;
373 }
374 // Unassigned virtreg is probably in the priority queue.
375 // RegAllocBase will erase it after dequeueing.
376 // Nonetheless, clear the live-range so that the debug
377 // dump will show the right state for that VirtReg.
378 LI.clear();
379 return false;
380}
381
382void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
383 if (!VRM->hasPhys(VirtReg))
384 return;
385
386 // Register is assigned, put it back on the queue for reassignment.
387 LiveInterval &LI = LIS->getInterval(VirtReg);
388 Matrix->unassign(LI);
390}
391
392void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
393 ExtraInfo->LRE_DidCloneVirtReg(New, Old);
394}
395
397 // Cloning a register we haven't even heard about yet? Just ignore it.
398 if (!Info.inBounds(Old))
399 return;
400
401 // LRE may clone a virtual register because dead code elimination causes it to
402 // be split into connected components. The new components are much smaller
403 // than the original, so they should get a new chance at being assigned.
404 // same stage as the parent.
405 Info[Old].Stage = RS_Assign;
406 Info.grow(New.id());
407 Info[New] = Info[Old];
408}
409
411 SpillerInstance.reset();
412 GlobalCand.clear();
413}
414
415void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
416
417void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
418 // Prioritize live ranges by size, assigning larger ranges first.
419 // The queue holds (size, reg) pairs.
420 const Register Reg = LI->reg();
421 assert(Reg.isVirtual() && "Can only enqueue virtual registers");
422
423 auto Stage = ExtraInfo->getOrInitStage(Reg);
424 if (Stage == RS_New) {
425 Stage = RS_Assign;
426 ExtraInfo->setStage(Reg, Stage);
427 }
428
429 unsigned Ret = PriorityAdvisor->getPriority(*LI);
430
431 // The virtual register number is a tie breaker for same-sized ranges.
432 // Give lower vreg numbers higher priority to assign them first.
433 CurQueue.push(std::make_pair(Ret, ~Reg.id()));
434}
435
436unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
437 const unsigned Size = LI.getSize();
438 const Register Reg = LI.reg();
439 unsigned Prio;
440 LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
441
442 if (Stage == RS_Split) {
443 // Unsplit ranges that couldn't be allocated immediately are deferred until
444 // everything else has been allocated.
445 Prio = Size;
446 } else {
447 // Giant live ranges fall back to the global assignment heuristic, which
448 // prevents excessive spilling in pathological cases.
449 const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
450 bool ForceGlobal = RC.GlobalPriority ||
451 (!ReverseLocalAssignment &&
453 (2 * RegClassInfo.getNumAllocatableRegs(&RC)));
454 unsigned GlobalBit = 0;
455
456 if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
457 LIS->intervalIsInOneMBB(LI)) {
458 // Allocate original local ranges in linear instruction order. Since they
459 // are singly defined, this produces optimal coloring in the absence of
460 // global interference and other constraints.
461 if (!ReverseLocalAssignment)
462 Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
463 else {
464 // Allocating bottom up may allow many short LRGs to be assigned first
465 // to one of the cheap registers. This could be much faster for very
466 // large blocks on targets with many physical registers.
467 Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
468 }
469 } else {
470 // Allocate global and split ranges in long->short order. Long ranges that
471 // don't fit should be spilled (or split) ASAP so they don't create
472 // interference. Mark a bit to prioritize global above local ranges.
473 Prio = Size;
474 GlobalBit = 1;
475 }
476
477 // Priority bit layout:
478 // 31 RS_Assign priority
479 // 30 Preference priority
480 // if (RegClassPriorityTrumpsGlobalness)
481 // 29-25 AllocPriority
482 // 24 GlobalBit
483 // else
484 // 29 Global bit
485 // 28-24 AllocPriority
486 // 0-23 Size/Instr distance
487
488 // Clamp the size to fit with the priority masking scheme
489 Prio = std::min(Prio, (unsigned)maxUIntN(24));
490 assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
491
492 if (RegClassPriorityTrumpsGlobalness)
493 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
494 else
495 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
496
497 // Mark a higher bit to prioritize global and local above RS_Split.
498 Prio |= (1u << 31);
499
500 // Boost ranges that have a physical register hint.
501 if (VRM->hasKnownPreference(Reg))
502 Prio |= (1u << 30);
503 }
504
505 return Prio;
506}
507
508unsigned DummyPriorityAdvisor::getPriority(const LiveInterval &LI) const {
509 // Prioritize by virtual register number, lowest first.
510 Register Reg = LI.reg();
511 return ~Reg.virtRegIndex();
512}
513
514const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
515
516const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
517 if (CurQueue.empty())
518 return nullptr;
519 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
520 CurQueue.pop();
521 return LI;
522}
523
524//===----------------------------------------------------------------------===//
525// Direct Assignment
526//===----------------------------------------------------------------------===//
527
528/// tryAssign - Try to assign VirtReg to an available register.
529MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
530 AllocationOrder &Order,
532 const SmallVirtRegSet &FixedRegisters) {
533 MCRegister PhysReg;
534 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
535 assert(*I);
536 if (!Matrix->checkInterference(VirtReg, *I)) {
537 if (I.isHint())
538 return *I;
539 else
540 PhysReg = *I;
541 }
542 }
543 if (!PhysReg.isValid())
544 return PhysReg;
545
546 // PhysReg is available, but there may be a better choice.
547
548 // If we missed a simple hint, try to cheaply evict interference from the
549 // preferred register.
550 if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
551 if (Order.isHint(Hint)) {
552 MCRegister PhysHint = Hint.asMCReg();
553 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
554
555 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
556 FixedRegisters)) {
557 evictInterference(VirtReg, PhysHint, NewVRegs);
558 return PhysHint;
559 }
560
561 // We can also split the virtual register in cold blocks.
562 if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order))
563 return MCRegister();
564
565 // Record the missed hint, we may be able to recover
566 // at the end if the surrounding allocation changed.
567 SetOfBrokenHints.insert(&VirtReg);
568 }
569
570 // Try to evict interference from a cheaper alternative.
571 uint8_t Cost = RegCosts[PhysReg.id()];
572
573 // Most registers have 0 additional cost.
574 if (!Cost)
575 return PhysReg;
576
577 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
578 << (unsigned)Cost << '\n');
579 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
580 return CheapReg ? CheapReg : PhysReg;
581}
582
583//===----------------------------------------------------------------------===//
584// Interference eviction
585//===----------------------------------------------------------------------===//
586
588 MCRegister FromReg) const {
589 auto HasRegUnitInterference = [&](MCRegUnit Unit) {
590 // Instantiate a "subquery", not to be confused with the Queries array.
592 VirtReg, Matrix->getLiveUnions()[static_cast<unsigned>(Unit)]);
593 return SubQ.checkInterference();
594 };
595
596 for (MCRegister Reg :
598 if (Reg == FromReg)
599 continue;
600 // If no units have interference, reassignment is possible.
601 if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
602 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
603 << printReg(FromReg, TRI) << " to "
604 << printReg(Reg, TRI) << '\n');
605 return true;
606 }
607 }
608 return false;
609}
610
611/// evictInterference - Evict any interferring registers that prevent VirtReg
612/// from being assigned to Physreg. This assumes that canEvictInterference
613/// returned true.
614void RAGreedy::evictInterference(const LiveInterval &VirtReg,
615 MCRegister PhysReg,
616 SmallVectorImpl<Register> &NewVRegs) {
617 // Make sure that VirtReg has a cascade number, and assign that cascade
618 // number to every evicted register. These live ranges than then only be
619 // evicted by a newer cascade, preventing infinite loops.
620 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
621
622 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
623 << " interference: Cascade " << Cascade << '\n');
624
625 // Collect all interfering virtregs first.
627 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
628 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
629 // We usually have the interfering VRegs cached so collectInterferingVRegs()
630 // should be fast, we may need to recalculate if when different physregs
631 // overlap the same register unit so we had different SubRanges queried
632 // against it.
634 Intfs.append(IVR.begin(), IVR.end());
635 }
636
637 // Evict them second. This will invalidate the queries.
638 for (const LiveInterval *Intf : Intfs) {
639 // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
640 if (!VRM->hasPhys(Intf->reg()))
641 continue;
642
643 Matrix->unassign(*Intf);
644 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
645 VirtReg.isSpillable() < Intf->isSpillable()) &&
646 "Cannot decrease cascade number, illegal eviction");
647 ExtraInfo->setCascade(Intf->reg(), Cascade);
648 ++NumEvicted;
649 NewVRegs.push_back(Intf->reg());
650 }
651}
652
653/// Returns true if the given \p PhysReg is a callee saved register and has not
654/// been used for allocation yet.
656 MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
657 if (!CSR)
658 return false;
659
660 return !Matrix->isPhysRegUsed(PhysReg);
661}
662
663std::optional<unsigned>
665 const AllocationOrder &Order,
666 unsigned CostPerUseLimit) const {
667 unsigned OrderLimit = Order.getOrder().size();
668
669 if (CostPerUseLimit < uint8_t(~0u)) {
670 // Check of any registers in RC are below CostPerUseLimit.
671 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
672 uint8_t MinCost = RegClassInfo.getMinCost(RC);
673 if (MinCost >= CostPerUseLimit) {
674 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
675 << MinCost << ", no cheaper registers to be found.\n");
676 return std::nullopt;
677 }
678
679 // It is normal for register classes to have a long tail of registers with
680 // the same cost. We don't need to look at them if they're too expensive.
681 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
682 OrderLimit = RegClassInfo.getLastCostChange(RC);
683 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
684 << " regs.\n");
685 }
686 }
687 return OrderLimit;
688}
689
691 MCRegister PhysReg) const {
692 if (RegCosts[PhysReg.id()] >= CostPerUseLimit)
693 return false;
694 // The first use of a callee-saved register in a function has cost 1.
695 // Don't start using a CSR when the CostPerUseLimit is low.
696 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
698 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
699 << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
700 << '\n');
701 return false;
702 }
703 return true;
704}
705
706/// tryEvict - Try to evict all interferences for a physreg.
707/// @param VirtReg Currently unassigned virtual register.
708/// @param Order Physregs to try.
709/// @return Physreg to assign VirtReg, or 0.
710MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
711 AllocationOrder &Order,
713 uint8_t CostPerUseLimit,
714 const SmallVirtRegSet &FixedRegisters) {
717
718 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
719 VirtReg, Order, CostPerUseLimit, FixedRegisters);
720 if (BestPhys.isValid())
721 evictInterference(VirtReg, BestPhys, NewVRegs);
722 return BestPhys;
723}
724
725//===----------------------------------------------------------------------===//
726// Region Splitting
727//===----------------------------------------------------------------------===//
728
729/// addSplitConstraints - Fill out the SplitConstraints vector based on the
730/// interference pattern in Physreg and its aliases. Add the constraints to
731/// SpillPlacement and return the static cost of this split in Cost, assuming
732/// that all preferences in SplitConstraints are met.
733/// Return false if there are no bundles with positive bias.
734bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
735 BlockFrequency &Cost) {
736 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
737
738 // Reset interference dependent info.
739 SplitConstraints.resize(UseBlocks.size());
740 BlockFrequency StaticCost = BlockFrequency(0);
741 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
742 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
743 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
744
745 BC.Number = BI.MBB->getNumber();
746 Intf.moveToBlock(BC.Number);
748 BC.Exit = (BI.LiveOut &&
752 BC.ChangesValue = BI.FirstDef.isValid();
753
754 if (!Intf.hasInterference())
755 continue;
756
757 // Number of spill code instructions to insert.
758 unsigned Ins = 0;
759
760 // Interference for the live-in value.
761 if (BI.LiveIn) {
762 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
764 ++Ins;
765 } else if (Intf.first() < BI.FirstInstr) {
767 ++Ins;
768 } else if (Intf.first() < BI.LastInstr) {
769 ++Ins;
770 }
771
772 // Abort if the spill cannot be inserted at the MBB' start
773 if (((BC.Entry == SpillPlacement::MustSpill) ||
776 SA->getFirstSplitPoint(BC.Number)))
777 return false;
778 }
779
780 // Interference for the live-out value.
781 if (BI.LiveOut) {
782 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
784 ++Ins;
785 } else if (Intf.last() > BI.LastInstr) {
787 ++Ins;
788 } else if (Intf.last() > BI.FirstInstr) {
789 ++Ins;
790 }
791 }
792
793 // Accumulate the total frequency of inserted spill code.
794 while (Ins--)
795 StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
796 }
797 Cost = StaticCost;
798
799 // Add constraints for use-blocks. Note that these are the only constraints
800 // that may add a positive bias, it is downhill from here.
801 SpillPlacer->addConstraints(SplitConstraints);
802 return SpillPlacer->scanActiveBundles();
803}
804
805/// addThroughConstraints - Add constraints and links to SpillPlacer from the
806/// live-through blocks in Blocks.
807bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
808 ArrayRef<unsigned> Blocks) {
809 const unsigned GroupSize = 8;
810 SpillPlacement::BlockConstraint BCS[GroupSize];
811 unsigned TBS[GroupSize];
812 unsigned B = 0, T = 0;
813
814 for (unsigned Number : Blocks) {
815 Intf.moveToBlock(Number);
816
817 if (!Intf.hasInterference()) {
818 assert(T < GroupSize && "Array overflow");
819 TBS[T] = Number;
820 if (++T == GroupSize) {
821 SpillPlacer->addLinks(ArrayRef(TBS, T));
822 T = 0;
823 }
824 continue;
825 }
826
827 assert(B < GroupSize && "Array overflow");
828 BCS[B].Number = Number;
829
830 // Abort if the spill cannot be inserted at the MBB' start
831 MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
832 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
833 if (FirstNonDebugInstr != MBB->end() &&
834 SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
835 SA->getFirstSplitPoint(Number)))
836 return false;
837 // Interference for the live-in value.
838 if (Intf.first() <= Indexes->getMBBStartIdx(Number))
840 else
842
843 // Interference for the live-out value.
844 if (Intf.last() >= SA->getLastSplitPoint(Number))
846 else
848
849 if (++B == GroupSize) {
850 SpillPlacer->addConstraints(ArrayRef(BCS, B));
851 B = 0;
852 }
853 }
854
855 SpillPlacer->addConstraints(ArrayRef(BCS, B));
856 SpillPlacer->addLinks(ArrayRef(TBS, T));
857 return true;
858}
859
860bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
861 // Keep track of through blocks that have not been added to SpillPlacer.
862 BitVector Todo = SA->getThroughBlocks();
863 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
864 unsigned AddedTo = 0;
865#ifndef NDEBUG
866 unsigned Visited = 0;
867#endif
868
869 unsigned long Budget = GrowRegionComplexityBudget;
870 while (true) {
871 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
872 // Find new through blocks in the periphery of PrefRegBundles.
873 for (unsigned Bundle : NewBundles) {
874 // Look at all blocks connected to Bundle in the full graph.
875 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
876 // Limit compilation time by bailing out after we use all our budget.
877 if (Blocks.size() >= Budget)
878 return false;
879 Budget -= Blocks.size();
880 for (unsigned Block : Blocks) {
881 if (!Todo.test(Block))
882 continue;
883 Todo.reset(Block);
884 // This is a new through block. Add it to SpillPlacer later.
885 ActiveBlocks.push_back(Block);
886#ifndef NDEBUG
887 ++Visited;
888#endif
889 }
890 }
891 // Any new blocks to add?
892 if (ActiveBlocks.size() == AddedTo)
893 break;
894
895 // Compute through constraints from the interference, or assume that all
896 // through blocks prefer spilling when forming compact regions.
897 auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
898 if (Cand.PhysReg) {
899 if (!addThroughConstraints(Cand.Intf, NewBlocks))
900 return false;
901 } else {
902 // Providing that the variable being spilled does not look like a loop
903 // induction variable, which is expensive to spill around and better
904 // pushed into a condition inside the loop if possible, provide a strong
905 // negative bias on through blocks to prevent unwanted liveness on loop
906 // backedges.
907 bool PrefSpill = true;
908 if (SA->looksLikeLoopIV() && NewBlocks.size() >= 2) {
909 // Check that the current bundle is adding a Header + start+end of
910 // loop-internal blocks. If the block is indeed a header, don't make
911 // the NewBlocks as PrefSpill to allow the variable to be live in
912 // Header<->Latch.
913 MachineLoop *L = Loops->getLoopFor(MF->getBlockNumbered(NewBlocks[0]));
914 if (L && L->getHeader()->getNumber() == (int)NewBlocks[0] &&
915 all_of(NewBlocks.drop_front(), [&](unsigned Block) {
916 return L == Loops->getLoopFor(MF->getBlockNumbered(Block));
917 }))
918 PrefSpill = false;
919 }
920 if (PrefSpill)
921 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
922 }
923 AddedTo = ActiveBlocks.size();
924
925 // Perhaps iterating can enable more bundles?
926 SpillPlacer->iterate();
927 }
928 LLVM_DEBUG(dbgs() << ", v=" << Visited);
929 return true;
930}
931
932/// calcCompactRegion - Compute the set of edge bundles that should be live
933/// when splitting the current live range into compact regions. Compact
934/// regions can be computed without looking at interference. They are the
935/// regions formed by removing all the live-through blocks from the live range.
936///
937/// Returns false if the current live range is already compact, or if the
938/// compact regions would form single block regions anyway.
939bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
940 // Without any through blocks, the live range is already compact.
941 if (!SA->getNumThroughBlocks())
942 return false;
943
944 // Compact regions don't correspond to any physreg.
945 Cand.reset(IntfCache, MCRegister::NoRegister);
946
947 LLVM_DEBUG(dbgs() << "Compact region bundles");
948
949 // Use the spill placer to determine the live bundles. GrowRegion pretends
950 // that all the through blocks have interference when PhysReg is unset.
951 SpillPlacer->prepare(Cand.LiveBundles);
952
953 // The static split cost will be zero since Cand.Intf reports no interference.
954 BlockFrequency Cost;
955 if (!addSplitConstraints(Cand.Intf, Cost)) {
956 LLVM_DEBUG(dbgs() << ", none.\n");
957 return false;
958 }
959
960 if (!growRegion(Cand)) {
961 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
962 return false;
963 }
964
965 SpillPlacer->finish();
966
967 if (!Cand.LiveBundles.any()) {
968 LLVM_DEBUG(dbgs() << ", none.\n");
969 return false;
970 }
971
972 LLVM_DEBUG({
973 for (int I : Cand.LiveBundles.set_bits())
974 dbgs() << " EB#" << I;
975 dbgs() << ".\n";
976 });
977 return true;
978}
979
980/// calcSpillCost - Compute how expensive it would be to split the live range in
981/// SA around all use blocks instead of forming bundle regions.
982BlockFrequency RAGreedy::calcSpillCost() {
983 BlockFrequency Cost = BlockFrequency(0);
984 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
985 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
986 unsigned Number = BI.MBB->getNumber();
987 // We normally only need one spill instruction - a load or a store.
988 Cost += SpillPlacer->getBlockFrequency(Number);
989
990 // Unless the value is redefined in the block.
991 if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
992 Cost += SpillPlacer->getBlockFrequency(Number);
993 }
994 return Cost;
995}
996
997/// calcGlobalSplitCost - Return the global split cost of following the split
998/// pattern in LiveBundles. This cost should be added to the local cost of the
999/// interference pattern in SplitConstraints.
1000///
1001BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
1002 const AllocationOrder &Order) {
1003 BlockFrequency GlobalCost = BlockFrequency(0);
1004 const BitVector &LiveBundles = Cand.LiveBundles;
1005 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1006 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
1007 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
1008 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
1009 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
1010 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
1011 unsigned Ins = 0;
1012
1013 Cand.Intf.moveToBlock(BC.Number);
1014
1015 if (BI.LiveIn)
1016 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
1017 if (BI.LiveOut)
1018 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
1019 while (Ins--)
1020 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1021 }
1022
1023 for (unsigned Number : Cand.ActiveBlocks) {
1024 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
1025 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
1026 if (!RegIn && !RegOut)
1027 continue;
1028 if (RegIn && RegOut) {
1029 // We need double spill code if this block has interference.
1030 Cand.Intf.moveToBlock(Number);
1031 if (Cand.Intf.hasInterference()) {
1032 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1033 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1034 }
1035 continue;
1036 }
1037 // live-in / stack-out or stack-in live-out.
1038 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1039 }
1040 return GlobalCost;
1041}
1042
1043/// splitAroundRegion - Split the current live range around the regions
1044/// determined by BundleCand and GlobalCand.
1045///
1046/// Before calling this function, GlobalCand and BundleCand must be initialized
1047/// so each bundle is assigned to a valid candidate, or NoCand for the
1048/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
1049/// objects must be initialized for the current live range, and intervals
1050/// created for the used candidates.
1051///
1052/// @param LREdit The LiveRangeEdit object handling the current split.
1053/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
1054/// must appear in this list.
1055void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
1056 ArrayRef<unsigned> UsedCands) {
1057 // These are the intervals created for new global ranges. We may create more
1058 // intervals for local ranges.
1059 const unsigned NumGlobalIntvs = LREdit.size();
1060 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
1061 << " globals.\n");
1062 assert(NumGlobalIntvs && "No global intervals configured");
1063
1064 // Isolate even single instructions when dealing with a proper sub-class.
1065 // That guarantees register class inflation for the stack interval because it
1066 // is all copies.
1067 Register Reg = SA->getParent().reg();
1068 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1069
1070 // First handle all the blocks with uses.
1071 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1072 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1073 unsigned Number = BI.MBB->getNumber();
1074 unsigned IntvIn = 0, IntvOut = 0;
1075 SlotIndex IntfIn, IntfOut;
1076 if (BI.LiveIn) {
1077 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
1078 if (CandIn != NoCand) {
1079 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1080 IntvIn = Cand.IntvIdx;
1081 Cand.Intf.moveToBlock(Number);
1082 IntfIn = Cand.Intf.first();
1083 }
1084 }
1085 if (BI.LiveOut) {
1086 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1087 if (CandOut != NoCand) {
1088 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1089 IntvOut = Cand.IntvIdx;
1090 Cand.Intf.moveToBlock(Number);
1091 IntfOut = Cand.Intf.last();
1092 }
1093 }
1094
1095 // Create separate intervals for isolated blocks with multiple uses.
1096 if (!IntvIn && !IntvOut) {
1097 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
1098 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1099 SE->splitSingleBlock(BI);
1100 continue;
1101 }
1102
1103 if (IntvIn && IntvOut)
1104 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1105 else if (IntvIn)
1106 SE->splitRegInBlock(BI, IntvIn, IntfIn);
1107 else
1108 SE->splitRegOutBlock(BI, IntvOut, IntfOut);
1109 }
1110
1111 // Handle live-through blocks. The relevant live-through blocks are stored in
1112 // the ActiveBlocks list with each candidate. We need to filter out
1113 // duplicates.
1114 BitVector Todo = SA->getThroughBlocks();
1115 for (unsigned UsedCand : UsedCands) {
1116 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
1117 for (unsigned Number : Blocks) {
1118 if (!Todo.test(Number))
1119 continue;
1120 Todo.reset(Number);
1121
1122 unsigned IntvIn = 0, IntvOut = 0;
1123 SlotIndex IntfIn, IntfOut;
1124
1125 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
1126 if (CandIn != NoCand) {
1127 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1128 IntvIn = Cand.IntvIdx;
1129 Cand.Intf.moveToBlock(Number);
1130 IntfIn = Cand.Intf.first();
1131 }
1132
1133 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1134 if (CandOut != NoCand) {
1135 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1136 IntvOut = Cand.IntvIdx;
1137 Cand.Intf.moveToBlock(Number);
1138 IntfOut = Cand.Intf.last();
1139 }
1140 if (!IntvIn && !IntvOut)
1141 continue;
1142 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1143 }
1144 }
1145
1146 ++NumGlobalSplits;
1147
1148 SmallVector<unsigned, 8> IntvMap;
1149 SE->finish(&IntvMap);
1150 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1151
1152 unsigned OrigBlocks = SA->getNumLiveBlocks();
1153
1154 // Sort out the new intervals created by splitting. We get four kinds:
1155 // - Remainder intervals should not be split again.
1156 // - Candidate intervals can be assigned to Cand.PhysReg.
1157 // - Block-local splits are candidates for local splitting.
1158 // - DCE leftovers should go back on the queue.
1159 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1160 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1161
1162 // Ignore old intervals from DCE.
1163 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1164 continue;
1165
1166 // Remainder interval. Don't try splitting again, spill if it doesn't
1167 // allocate.
1168 if (IntvMap[I] == 0) {
1169 ExtraInfo->setStage(Reg, RS_Spill);
1170 continue;
1171 }
1172
1173 // Global intervals. Allow repeated splitting as long as the number of live
1174 // blocks is strictly decreasing.
1175 if (IntvMap[I] < NumGlobalIntvs) {
1176 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1177 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1178 << " blocks as original.\n");
1179 // Don't allow repeated splitting as a safe guard against looping.
1180 ExtraInfo->setStage(Reg, RS_Split2);
1181 }
1182 continue;
1183 }
1184
1185 // Other intervals are treated as new. This includes local intervals created
1186 // for blocks with multiple uses, and anything created by DCE.
1187 }
1188
1189 if (VerifyEnabled)
1190 MF->verify(LIS, Indexes, "After splitting live range around region",
1191 &errs());
1192}
1193
1194MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1195 AllocationOrder &Order,
1196 SmallVectorImpl<Register> &NewVRegs) {
1197 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1199 unsigned NumCands = 0;
1200 BlockFrequency SpillCost = calcSpillCost();
1201 BlockFrequency BestCost;
1202
1203 // Check if we can split this live range around a compact region.
1204 bool HasCompact = calcCompactRegion(GlobalCand.front());
1205 if (HasCompact) {
1206 // Yes, keep GlobalCand[0] as the compact region candidate.
1207 NumCands = 1;
1208 BestCost = BlockFrequency::max();
1209 } else {
1210 // No benefit from the compact region, our fallback will be per-block
1211 // splitting. Make sure we find a solution that is cheaper than spilling.
1212 BestCost = SpillCost;
1213 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "
1214 << printBlockFreq(*MBFI, BestCost) << '\n');
1215 }
1216
1217 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1218 NumCands, false /*IgnoreCSR*/);
1219
1220 // No solutions found, fall back to single block splitting.
1221 if (!HasCompact && BestCand == NoCand)
1223
1224 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1225}
1226
1227unsigned RAGreedy::calculateRegionSplitCostAroundReg(MCRegister PhysReg,
1228 AllocationOrder &Order,
1229 BlockFrequency &BestCost,
1230 unsigned &NumCands,
1231 unsigned &BestCand) {
1232 // Discard bad candidates before we run out of interference cache cursors.
1233 // This will only affect register classes with a lot of registers (>32).
1234 if (NumCands == IntfCache.getMaxCursors()) {
1235 unsigned WorstCount = ~0u;
1236 unsigned Worst = 0;
1237 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1238 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1239 continue;
1240 unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1241 if (Count < WorstCount) {
1242 Worst = CandIndex;
1243 WorstCount = Count;
1244 }
1245 }
1246 --NumCands;
1247 GlobalCand[Worst] = GlobalCand[NumCands];
1248 if (BestCand == NumCands)
1249 BestCand = Worst;
1250 }
1251
1252 if (GlobalCand.size() <= NumCands)
1253 GlobalCand.resize(NumCands+1);
1254 GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1255 Cand.reset(IntfCache, PhysReg);
1256
1257 SpillPlacer->prepare(Cand.LiveBundles);
1258 BlockFrequency Cost;
1259 if (!addSplitConstraints(Cand.Intf, Cost)) {
1260 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1261 return BestCand;
1262 }
1263 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
1264 << "\tstatic = " << printBlockFreq(*MBFI, Cost));
1265 if (Cost >= BestCost) {
1266 LLVM_DEBUG({
1267 if (BestCand == NoCand)
1268 dbgs() << " worse than no bundles\n";
1269 else
1270 dbgs() << " worse than "
1271 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1272 });
1273 return BestCand;
1274 }
1275 if (!growRegion(Cand)) {
1276 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1277 return BestCand;
1278 }
1279
1280 SpillPlacer->finish();
1281
1282 // No live bundles, defer to splitSingleBlocks().
1283 if (!Cand.LiveBundles.any()) {
1284 LLVM_DEBUG(dbgs() << " no bundles.\n");
1285 return BestCand;
1286 }
1287
1288 Cost += calcGlobalSplitCost(Cand, Order);
1289 LLVM_DEBUG({
1290 dbgs() << ", total = " << printBlockFreq(*MBFI, Cost) << " with bundles";
1291 for (int I : Cand.LiveBundles.set_bits())
1292 dbgs() << " EB#" << I;
1293 dbgs() << ".\n";
1294 });
1295 if (Cost < BestCost) {
1296 BestCand = NumCands;
1297 BestCost = Cost;
1298 }
1299 ++NumCands;
1300
1301 return BestCand;
1302}
1303
1304unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1305 AllocationOrder &Order,
1306 BlockFrequency &BestCost,
1307 unsigned &NumCands,
1308 bool IgnoreCSR) {
1309 unsigned BestCand = NoCand;
1310 for (MCRegister PhysReg : Order) {
1311 assert(PhysReg);
1312 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1313 continue;
1314
1315 calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands,
1316 BestCand);
1317 }
1318
1319 return BestCand;
1320}
1321
1322MCRegister RAGreedy::doRegionSplit(const LiveInterval &VirtReg,
1323 unsigned BestCand, bool HasCompact,
1324 SmallVectorImpl<Register> &NewVRegs) {
1325 SmallVector<unsigned, 8> UsedCands;
1326 // Prepare split editor.
1327 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1328 SE->reset(LREdit, SplitSpillMode);
1329
1330 // Assign all edge bundles to the preferred candidate, or NoCand.
1331 BundleCand.assign(Bundles->getNumBundles(), NoCand);
1332
1333 // Assign bundles for the best candidate region.
1334 if (BestCand != NoCand) {
1335 GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1336 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1337 UsedCands.push_back(BestCand);
1338 Cand.IntvIdx = SE->openIntv();
1339 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1340 << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1341 (void)B;
1342 }
1343 }
1344
1345 // Assign bundles for the compact region.
1346 if (HasCompact) {
1347 GlobalSplitCandidate &Cand = GlobalCand.front();
1348 assert(!Cand.PhysReg && "Compact region has no physreg");
1349 if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1350 UsedCands.push_back(0);
1351 Cand.IntvIdx = SE->openIntv();
1352 LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1353 << " bundles, intv " << Cand.IntvIdx << ".\n");
1354 (void)B;
1355 }
1356 }
1357
1358 splitAroundRegion(LREdit, UsedCands);
1359 return MCRegister();
1360}
1361
1362// VirtReg has a physical Hint, this function tries to split VirtReg around
1363// Hint if we can place new COPY instructions in cold blocks.
1364bool RAGreedy::trySplitAroundHintReg(MCRegister Hint,
1365 const LiveInterval &VirtReg,
1366 SmallVectorImpl<Register> &NewVRegs,
1367 AllocationOrder &Order) {
1368 // Split the VirtReg may generate COPY instructions in multiple cold basic
1369 // blocks, and increase code size. So we avoid it when the function is
1370 // optimized for size.
1371 if (MF->getFunction().hasOptSize())
1372 return false;
1373
1374 // Don't allow repeated splitting as a safe guard against looping.
1375 if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
1376 return false;
1377
1378 BlockFrequency Cost = BlockFrequency(0);
1379 Register Reg = VirtReg.reg();
1380
1381 // Compute the cost of assigning a non Hint physical register to VirtReg.
1382 // We define it as the total frequency of broken COPY instructions to/from
1383 // Hint register, and after split, they can be deleted.
1384
1385 // FIXME: This is miscounting the costs with subregisters. In particular, this
1386 // should support recognizing SplitKit formed copy bundles instead of direct
1387 // copy instructions, which will appear in the same block.
1388 for (const MachineOperand &Opnd : MRI->reg_nodbg_operands(Reg)) {
1389 const MachineInstr &Instr = *Opnd.getParent();
1390 if (!Instr.isCopy() || Opnd.isImplicit())
1391 continue;
1392
1393 // Look for the other end of the copy.
1394 const bool IsDef = Opnd.isDef();
1395 const MachineOperand &OtherOpnd = Instr.getOperand(IsDef);
1396 Register OtherReg = OtherOpnd.getReg();
1397 assert(Reg == Opnd.getReg());
1398 if (OtherReg == Reg)
1399 continue;
1400
1401 unsigned SubReg = Opnd.getSubReg();
1402 unsigned OtherSubReg = OtherOpnd.getSubReg();
1403 if (SubReg && OtherSubReg && SubReg != OtherSubReg)
1404 continue;
1405
1406 // Check if VirtReg interferes with OtherReg after this COPY instruction.
1407 if (Opnd.readsReg()) {
1408 SlotIndex Index = LIS->getInstructionIndex(Instr).getRegSlot();
1409
1410 if (SubReg) {
1411 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
1412 if (IsDef)
1413 Mask = ~Mask;
1414
1415 if (any_of(VirtReg.subranges(), [=](const LiveInterval::SubRange &S) {
1416 return (S.LaneMask & Mask).any() && S.liveAt(Index);
1417 })) {
1418 continue;
1419 }
1420 } else {
1421 if (VirtReg.liveAt(Index))
1422 continue;
1423 }
1424 }
1425
1426 MCRegister OtherPhysReg =
1427 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
1428 MCRegister ThisHint = SubReg ? TRI->getSubReg(Hint, SubReg) : Hint;
1429 if (OtherPhysReg == ThisHint)
1430 Cost += MBFI->getBlockFreq(Instr.getParent());
1431 }
1432
1433 // Decrease the cost so it will be split in colder blocks.
1434 BranchProbability Threshold(SplitThresholdForRegWithHint, 100);
1435 Cost *= Threshold;
1436 if (Cost == BlockFrequency(0))
1437 return false;
1438
1439 unsigned NumCands = 0;
1440 unsigned BestCand = NoCand;
1441 SA->analyze(&VirtReg);
1442 calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand);
1443 if (BestCand == NoCand)
1444 return false;
1445
1446 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
1447 return true;
1448}
1449
1450//===----------------------------------------------------------------------===//
1451// Per-Block Splitting
1452//===----------------------------------------------------------------------===//
1453
1454/// tryBlockSplit - Split a global live range around every block with uses. This
1455/// creates a lot of local live ranges, that will be split by tryLocalSplit if
1456/// they don't allocate.
1457MCRegister RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1458 AllocationOrder &Order,
1459 SmallVectorImpl<Register> &NewVRegs) {
1460 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1461 Register Reg = VirtReg.reg();
1462 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1463 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1464 SE->reset(LREdit, SplitSpillMode);
1465 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1466 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1467 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1468 SE->splitSingleBlock(BI);
1469 }
1470 // No blocks were split.
1471 if (LREdit.empty())
1472 return MCRegister();
1473
1474 // We did split for some blocks.
1475 SmallVector<unsigned, 8> IntvMap;
1476 SE->finish(&IntvMap);
1477
1478 // Tell LiveDebugVariables about the new ranges.
1479 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1480
1481 // Sort out the new intervals created by splitting. The remainder interval
1482 // goes straight to spilling, the new local ranges get to stay RS_New.
1483 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1484 const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1485 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1486 ExtraInfo->setStage(LI, RS_Spill);
1487 }
1488
1489 if (VerifyEnabled)
1490 MF->verify(LIS, Indexes, "After splitting live range around basic blocks",
1491 &errs());
1492 return MCRegister();
1493}
1494
1495//===----------------------------------------------------------------------===//
1496// Per-Instruction Splitting
1497//===----------------------------------------------------------------------===//
1498
1499/// Get the number of allocatable registers that match the constraints of \p Reg
1500/// on \p MI and that are also in \p SuperRC.
1502 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1504 const RegisterClassInfo &RCI) {
1505 assert(SuperRC && "Invalid register class");
1506
1507 const TargetRegisterClass *ConstrainedRC =
1508 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1509 /* ExploreBundle */ true);
1510 if (!ConstrainedRC)
1511 return 0;
1512 return RCI.getNumAllocatableRegs(ConstrainedRC);
1513}
1514
1516 const TargetRegisterInfo &TRI,
1517 const MachineInstr &FirstMI,
1518 Register Reg) {
1519 LaneBitmask Mask;
1521 (void)AnalyzeVirtRegInBundle(const_cast<MachineInstr &>(FirstMI), Reg, &Ops);
1522
1523 for (auto [MI, OpIdx] : Ops) {
1524 const MachineOperand &MO = MI->getOperand(OpIdx);
1525 assert(MO.isReg() && MO.getReg() == Reg);
1526 unsigned SubReg = MO.getSubReg();
1527 if (SubReg == 0 && MO.isUse()) {
1528 if (MO.isUndef())
1529 continue;
1530 return MRI.getMaxLaneMaskForVReg(Reg);
1531 }
1532
1533 LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
1534 if (MO.isDef()) {
1535 if (!MO.isUndef())
1536 Mask |= ~SubRegMask;
1537 } else
1538 Mask |= SubRegMask;
1539 }
1540
1541 return Mask;
1542}
1543
1544/// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1545/// VirtReg.
1547 const MachineInstr *MI, const LiveInterval &VirtReg,
1549 const TargetInstrInfo *TII) {
1550 // Early check the common case. Beware of the semi-formed bundles SplitKit
1551 // creates by setting the bundle flag on copies without a matching BUNDLE.
1552
1553 auto DestSrc = TII->isCopyInstr(*MI);
1554 if (DestSrc && !MI->isBundled() &&
1555 DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
1556 return false;
1557
1558 // FIXME: We're only considering uses, but should be consider defs too?
1559 LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1560
1561 LaneBitmask LiveAtMask;
1562 for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1563 if (S.liveAt(Use))
1564 LiveAtMask |= S.LaneMask;
1565 }
1566
1567 // If the live lanes aren't different from the lanes used by the instruction,
1568 // this doesn't help.
1569 return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1570}
1571
1572/// tryInstructionSplit - Split a live range around individual instructions.
1573/// This is normally not worthwhile since the spiller is doing essentially the
1574/// same thing. However, when the live range is in a constrained register
1575/// class, it may help to insert copies such that parts of the live range can
1576/// be moved to a larger register class.
1577///
1578/// This is similar to spilling to a larger register class.
1579MCRegister RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1580 AllocationOrder &Order,
1581 SmallVectorImpl<Register> &NewVRegs) {
1582 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1583 // There is no point to this if there are no larger sub-classes.
1584
1585 bool SplitSubClass = true;
1586 if (!RegClassInfo.isProperSubClass(CurRC)) {
1587 if (!VirtReg.hasSubRanges())
1588 return MCRegister();
1589 SplitSubClass = false;
1590 }
1591
1592 // Always enable split spill mode, since we're effectively spilling to a
1593 // register.
1594 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1595 SE->reset(LREdit, SplitEditor::SM_Size);
1596
1597 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1598 if (Uses.size() <= 1)
1599 return MCRegister();
1600
1601 LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1602 << " individual instrs.\n");
1603
1604 const TargetRegisterClass *SuperRC =
1605 TRI->getLargestLegalSuperClass(CurRC, *MF);
1606 unsigned SuperRCNumAllocatableRegs =
1607 RegClassInfo.getNumAllocatableRegs(SuperRC);
1608 // Split around every non-copy instruction if this split will relax
1609 // the constraints on the virtual register.
1610 // Otherwise, splitting just inserts uncoalescable copies that do not help
1611 // the allocation.
1612 for (const SlotIndex Use : Uses) {
1613 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1614 if (TII->isFullCopyInstr(*MI) ||
1615 (SplitSubClass &&
1616 SuperRCNumAllocatableRegs ==
1617 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1618 TII, TRI, RegClassInfo)) ||
1619 // TODO: Handle split for subranges with subclass constraints?
1620 (!SplitSubClass && VirtReg.hasSubRanges() &&
1621 !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
1622 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
1623 continue;
1624 }
1625 }
1626 SE->openIntv();
1627 SlotIndex SegStart = SE->enterIntvBefore(Use);
1628 SlotIndex SegStop = SE->leaveIntvAfter(Use);
1629 SE->useIntv(SegStart, SegStop);
1630 }
1631
1632 if (LREdit.empty()) {
1633 LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1634 return MCRegister();
1635 }
1636
1637 SmallVector<unsigned, 8> IntvMap;
1638 SE->finish(&IntvMap);
1639 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1640 // Assign all new registers to RS_Spill. This was the last chance.
1641 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1642 return MCRegister();
1643}
1644
1645//===----------------------------------------------------------------------===//
1646// Local Splitting
1647//===----------------------------------------------------------------------===//
1648
1649/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1650/// in order to use PhysReg between two entries in SA->UseSlots.
1651///
1652/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1653///
1654void RAGreedy::calcGapWeights(MCRegister PhysReg,
1655 SmallVectorImpl<float> &GapWeight) {
1656 assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1657 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1658 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1659 const unsigned NumGaps = Uses.size()-1;
1660
1661 // Start and end points for the interference check.
1662 SlotIndex StartIdx =
1664 SlotIndex StopIdx =
1666
1667 GapWeight.assign(NumGaps, 0.0f);
1668
1669 // Add interference from each overlapping register.
1670 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1671 if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
1672 .checkInterference())
1673 continue;
1674
1675 // We know that VirtReg is a continuous interval from FirstInstr to
1676 // LastInstr, so we don't need InterferenceQuery.
1677 //
1678 // Interference that overlaps an instruction is counted in both gaps
1679 // surrounding the instruction. The exception is interference before
1680 // StartIdx and after StopIdx.
1681 //
1683 Matrix->getLiveUnions()[static_cast<unsigned>(Unit)].find(StartIdx);
1684 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1685 // Skip the gaps before IntI.
1686 while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1687 if (++Gap == NumGaps)
1688 break;
1689 if (Gap == NumGaps)
1690 break;
1691
1692 // Update the gaps covered by IntI.
1693 const float weight = IntI.value()->weight();
1694 for (; Gap != NumGaps; ++Gap) {
1695 GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1696 if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1697 break;
1698 }
1699 if (Gap == NumGaps)
1700 break;
1701 }
1702 }
1703
1704 // Add fixed interference.
1705 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1706 const LiveRange &LR = LIS->getRegUnit(Unit);
1707 LiveRange::const_iterator I = LR.find(StartIdx);
1709
1710 // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1711 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1712 while (Uses[Gap+1].getBoundaryIndex() < I->start)
1713 if (++Gap == NumGaps)
1714 break;
1715 if (Gap == NumGaps)
1716 break;
1717
1718 for (; Gap != NumGaps; ++Gap) {
1719 GapWeight[Gap] = huge_valf;
1720 if (Uses[Gap+1].getBaseIndex() >= I->end)
1721 break;
1722 }
1723 if (Gap == NumGaps)
1724 break;
1725 }
1726 }
1727}
1728
1729/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1730/// basic block.
1731///
1732MCRegister RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1733 AllocationOrder &Order,
1734 SmallVectorImpl<Register> &NewVRegs) {
1735 // TODO: the function currently only handles a single UseBlock; it should be
1736 // possible to generalize.
1737 if (SA->getUseBlocks().size() != 1)
1738 return MCRegister();
1739
1740 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1741
1742 // Note that it is possible to have an interval that is live-in or live-out
1743 // while only covering a single block - A phi-def can use undef values from
1744 // predecessors, and the block could be a single-block loop.
1745 // We don't bother doing anything clever about such a case, we simply assume
1746 // that the interval is continuous from FirstInstr to LastInstr. We should
1747 // make sure that we don't do anything illegal to such an interval, though.
1748
1749 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1750 if (Uses.size() <= 2)
1751 return MCRegister();
1752 const unsigned NumGaps = Uses.size()-1;
1753
1754 LLVM_DEBUG({
1755 dbgs() << "tryLocalSplit: ";
1756 for (const auto &Use : Uses)
1757 dbgs() << ' ' << Use;
1758 dbgs() << '\n';
1759 });
1760
1761 // If VirtReg is live across any register mask operands, compute a list of
1762 // gaps with register masks.
1763 SmallVector<unsigned, 8> RegMaskGaps;
1764 if (Matrix->checkRegMaskInterference(VirtReg)) {
1765 // Get regmask slots for the whole block.
1766 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1767 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1768 // Constrain to VirtReg's live range.
1769 unsigned RI =
1770 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1771 unsigned RE = RMS.size();
1772 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1773 // Look for Uses[I] <= RMS <= Uses[I + 1].
1775 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1776 continue;
1777 // Skip a regmask on the same instruction as the last use. It doesn't
1778 // overlap the live range.
1779 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1780 break;
1781 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1782 << Uses[I + 1]);
1783 RegMaskGaps.push_back(I);
1784 // Advance ri to the next gap. A regmask on one of the uses counts in
1785 // both gaps.
1786 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1787 ++RI;
1788 }
1789 LLVM_DEBUG(dbgs() << '\n');
1790 }
1791
1792 // Since we allow local split results to be split again, there is a risk of
1793 // creating infinite loops. It is tempting to require that the new live
1794 // ranges have less instructions than the original. That would guarantee
1795 // convergence, but it is too strict. A live range with 3 instructions can be
1796 // split 2+3 (including the COPY), and we want to allow that.
1797 //
1798 // Instead we use these rules:
1799 //
1800 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1801 // noop split, of course).
1802 // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1803 // the new ranges must have fewer instructions than before the split.
1804 // 3. New ranges with the same number of instructions are marked RS_Split2,
1805 // smaller ranges are marked RS_New.
1806 //
1807 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1808 // excessive splitting and infinite loops.
1809 //
1810 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1811
1812 // Best split candidate.
1813 unsigned BestBefore = NumGaps;
1814 unsigned BestAfter = 0;
1815 float BestDiff = 0;
1816
1817 const float blockFreq =
1818 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1819 (1.0f / MBFI->getEntryFreq().getFrequency());
1820 SmallVector<float, 8> GapWeight;
1821
1822 for (MCRegister PhysReg : Order) {
1823 assert(PhysReg);
1824 // Keep track of the largest spill weight that would need to be evicted in
1825 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1826 calcGapWeights(PhysReg, GapWeight);
1827
1828 // Remove any gaps with regmask clobbers.
1829 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1830 for (unsigned Gap : RegMaskGaps)
1831 GapWeight[Gap] = huge_valf;
1832
1833 // Try to find the best sequence of gaps to close.
1834 // The new spill weight must be larger than any gap interference.
1835
1836 // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1837 unsigned SplitBefore = 0, SplitAfter = 1;
1838
1839 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1840 // It is the spill weight that needs to be evicted.
1841 float MaxGap = GapWeight[0];
1842
1843 while (true) {
1844 // Live before/after split?
1845 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1846 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1847
1848 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1849 << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1850
1851 // Stop before the interval gets so big we wouldn't be making progress.
1852 if (!LiveBefore && !LiveAfter) {
1853 LLVM_DEBUG(dbgs() << " all\n");
1854 break;
1855 }
1856 // Should the interval be extended or shrunk?
1857 bool Shrink = true;
1858
1859 // How many gaps would the new range have?
1860 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1861
1862 // Legally, without causing looping?
1863 bool Legal = !ProgressRequired || NewGaps < NumGaps;
1864
1865 if (Legal && MaxGap < huge_valf) {
1866 // Estimate the new spill weight. Each instruction reads or writes the
1867 // register. Conservatively assume there are no read-modify-write
1868 // instructions.
1869 //
1870 // Try to guess the size of the new interval.
1871 const float EstWeight = normalizeSpillWeight(
1872 blockFreq * (NewGaps + 1),
1873 Uses[SplitBefore].distance(Uses[SplitAfter]) +
1874 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1875 1);
1876 // Would this split be possible to allocate?
1877 // Never allocate all gaps, we wouldn't be making progress.
1878 LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1879 if (EstWeight * Hysteresis >= MaxGap) {
1880 Shrink = false;
1881 float Diff = EstWeight - MaxGap;
1882 if (Diff > BestDiff) {
1883 LLVM_DEBUG(dbgs() << " (best)");
1884 BestDiff = Hysteresis * Diff;
1885 BestBefore = SplitBefore;
1886 BestAfter = SplitAfter;
1887 }
1888 }
1889 }
1890
1891 // Try to shrink.
1892 if (Shrink) {
1893 if (++SplitBefore < SplitAfter) {
1894 LLVM_DEBUG(dbgs() << " shrink\n");
1895 // Recompute the max when necessary.
1896 if (GapWeight[SplitBefore - 1] >= MaxGap) {
1897 MaxGap = GapWeight[SplitBefore];
1898 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1899 MaxGap = std::max(MaxGap, GapWeight[I]);
1900 }
1901 continue;
1902 }
1903 MaxGap = 0;
1904 }
1905
1906 // Try to extend the interval.
1907 if (SplitAfter >= NumGaps) {
1908 LLVM_DEBUG(dbgs() << " end\n");
1909 break;
1910 }
1911
1912 LLVM_DEBUG(dbgs() << " extend\n");
1913 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1914 }
1915 }
1916
1917 // Didn't find any candidates?
1918 if (BestBefore == NumGaps)
1919 return MCRegister();
1920
1921 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1922 << Uses[BestAfter] << ", " << BestDiff << ", "
1923 << (BestAfter - BestBefore + 1) << " instrs\n");
1924
1925 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1926 SE->reset(LREdit);
1927
1928 SE->openIntv();
1929 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1930 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
1931 SE->useIntv(SegStart, SegStop);
1932 SmallVector<unsigned, 8> IntvMap;
1933 SE->finish(&IntvMap);
1934 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1935 // If the new range has the same number of instructions as before, mark it as
1936 // RS_Split2 so the next split will be forced to make progress. Otherwise,
1937 // leave the new intervals as RS_New so they can compete.
1938 bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1939 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1940 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1941 if (NewGaps >= NumGaps) {
1942 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1943 assert(!ProgressRequired && "Didn't make progress when it was required.");
1944 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1945 if (IntvMap[I] == 1) {
1946 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1947 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1948 }
1949 LLVM_DEBUG(dbgs() << '\n');
1950 }
1951 ++NumLocalSplits;
1952
1953 return MCRegister();
1954}
1955
1956//===----------------------------------------------------------------------===//
1957// Live Range Splitting
1958//===----------------------------------------------------------------------===//
1959
1960/// trySplit - Try to split VirtReg or one of its interferences, making it
1961/// assignable.
1962/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1963MCRegister RAGreedy::trySplit(const LiveInterval &VirtReg,
1964 AllocationOrder &Order,
1965 SmallVectorImpl<Register> &NewVRegs,
1966 const SmallVirtRegSet &FixedRegisters) {
1967 // Ranges must be Split2 or less.
1968 if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1969 return MCRegister();
1970
1971 // Local intervals are handled separately.
1972 if (LIS->intervalIsInOneMBB(VirtReg)) {
1973 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1975 SA->analyze(&VirtReg);
1976 MCRegister PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1977 if (PhysReg || !NewVRegs.empty())
1978 return PhysReg;
1979 return tryInstructionSplit(VirtReg, Order, NewVRegs);
1980 }
1981
1982 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1984
1985 SA->analyze(&VirtReg);
1986
1987 // First try to split around a region spanning multiple blocks. RS_Split2
1988 // ranges already made dubious progress with region splitting, so they go
1989 // straight to single block splitting.
1990 if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1991 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1992 if (PhysReg || !NewVRegs.empty())
1993 return PhysReg;
1994 }
1995
1996 // Then isolate blocks.
1997 return tryBlockSplit(VirtReg, Order, NewVRegs);
1998}
1999
2000//===----------------------------------------------------------------------===//
2001// Last Chance Recoloring
2002//===----------------------------------------------------------------------===//
2003
2004/// Return true if \p reg has any tied def operand.
2006 for (const MachineOperand &MO : MRI->def_operands(reg))
2007 if (MO.isTied())
2008 return true;
2009
2010 return false;
2011}
2012
2013/// Return true if the existing assignment of \p Intf overlaps, but is not the
2014/// same, as \p PhysReg.
2016 const VirtRegMap &VRM,
2017 MCRegister PhysReg,
2018 const LiveInterval &Intf) {
2019 MCRegister AssignedReg = VRM.getPhys(Intf.reg());
2020 if (PhysReg == AssignedReg)
2021 return false;
2022 return TRI.regsOverlap(PhysReg, AssignedReg);
2023}
2024
2025/// mayRecolorAllInterferences - Check if the virtual registers that
2026/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
2027/// recolored to free \p PhysReg.
2028/// When true is returned, \p RecoloringCandidates has been augmented with all
2029/// the live intervals that need to be recolored in order to free \p PhysReg
2030/// for \p VirtReg.
2031/// \p FixedRegisters contains all the virtual registers that cannot be
2032/// recolored.
2033bool RAGreedy::mayRecolorAllInterferences(
2034 MCRegister PhysReg, const LiveInterval &VirtReg,
2035 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
2036 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
2037
2038 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
2039 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
2040 // If there is LastChanceRecoloringMaxInterference or more interferences,
2041 // chances are one would not be recolorable.
2045 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
2046 CutOffInfo |= CO_Interf;
2047 return false;
2048 }
2049 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
2050 // If Intf is done and sits on the same register class as VirtReg, it
2051 // would not be recolorable as it is in the same state as
2052 // VirtReg. However there are at least two exceptions.
2053 //
2054 // If VirtReg has tied defs and Intf doesn't, then
2055 // there is still a point in examining if it can be recolorable.
2056 //
2057 // Additionally, if the register class has overlapping tuple members, it
2058 // may still be recolorable using a different tuple. This is more likely
2059 // if the existing assignment aliases with the candidate.
2060 //
2061 if (((ExtraInfo->getStage(*Intf) == RS_Done &&
2062 MRI->getRegClass(Intf->reg()) == CurRC &&
2063 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
2064 !(hasTiedDef(MRI, VirtReg.reg()) &&
2065 !hasTiedDef(MRI, Intf->reg()))) ||
2066 FixedRegisters.count(Intf->reg())) {
2067 LLVM_DEBUG(
2068 dbgs() << "Early abort: the interference is not recolorable.\n");
2069 return false;
2070 }
2071 RecoloringCandidates.insert(Intf);
2072 }
2073 }
2074 return true;
2075}
2076
2077/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
2078/// its interferences.
2079/// Last chance recoloring chooses a color for \p VirtReg and recolors every
2080/// virtual register that was using it. The recoloring process may recursively
2081/// use the last chance recoloring. Therefore, when a virtual register has been
2082/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
2083/// be last-chance-recolored again during this recoloring "session".
2084/// E.g.,
2085/// Let
2086/// vA can use {R1, R2 }
2087/// vB can use { R2, R3}
2088/// vC can use {R1 }
2089/// Where vA, vB, and vC cannot be split anymore (they are reloads for
2090/// instance) and they all interfere.
2091///
2092/// vA is assigned R1
2093/// vB is assigned R2
2094/// vC tries to evict vA but vA is already done.
2095/// Regular register allocation fails.
2096///
2097/// Last chance recoloring kicks in:
2098/// vC does as if vA was evicted => vC uses R1.
2099/// vC is marked as fixed.
2100/// vA needs to find a color.
2101/// None are available.
2102/// vA cannot evict vC: vC is a fixed virtual register now.
2103/// vA does as if vB was evicted => vA uses R2.
2104/// vB needs to find a color.
2105/// R3 is available.
2106/// Recoloring => vC = R1, vA = R2, vB = R3
2107///
2108/// \p Order defines the preferred allocation order for \p VirtReg.
2109/// \p NewRegs will contain any new virtual register that have been created
2110/// (split, spill) during the process and that must be assigned.
2111/// \p FixedRegisters contains all the virtual registers that cannot be
2112/// recolored.
2113///
2114/// \p RecolorStack tracks the original assignments of successfully recolored
2115/// registers.
2116///
2117/// \p Depth gives the current depth of the last chance recoloring.
2118/// \return a physical register that can be used for VirtReg or ~0u if none
2119/// exists.
2120MCRegister RAGreedy::tryLastChanceRecoloring(
2121 const LiveInterval &VirtReg, AllocationOrder &Order,
2122 SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters,
2123 RecoloringStack &RecolorStack, unsigned Depth) {
2124 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
2125 return ~0u;
2126
2127 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
2128
2129 const ssize_t EntryStackSize = RecolorStack.size();
2130
2131 // Ranges must be Done.
2132 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
2133 "Last chance recoloring should really be last chance");
2134 // Set the max depth to LastChanceRecoloringMaxDepth.
2135 // We may want to reconsider that if we end up with a too large search space
2136 // for target with hundreds of registers.
2137 // Indeed, in that case we may want to cut the search space earlier.
2139 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
2140 CutOffInfo |= CO_Depth;
2141 return ~0u;
2142 }
2143
2144 // Set of Live intervals that will need to be recolored.
2145 SmallLISet RecoloringCandidates;
2146
2147 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
2148 // this recoloring "session".
2149 assert(!FixedRegisters.count(VirtReg.reg()));
2150 FixedRegisters.insert(VirtReg.reg());
2151 SmallVector<Register, 4> CurrentNewVRegs;
2152
2153 for (MCRegister PhysReg : Order) {
2154 assert(PhysReg.isValid());
2155 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
2156 << printReg(PhysReg, TRI) << '\n');
2157 RecoloringCandidates.clear();
2158 CurrentNewVRegs.clear();
2159
2160 // It is only possible to recolor virtual register interference.
2161 if (Matrix->checkInterference(VirtReg, PhysReg) >
2163 LLVM_DEBUG(
2164 dbgs() << "Some interferences are not with virtual registers.\n");
2165
2166 continue;
2167 }
2168
2169 // Early give up on this PhysReg if it is obvious we cannot recolor all
2170 // the interferences.
2171 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2172 FixedRegisters)) {
2173 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
2174 continue;
2175 }
2176
2177 // RecoloringCandidates contains all the virtual registers that interfere
2178 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
2179 // recoloring and perform the actual recoloring.
2180 PQueue RecoloringQueue;
2181 for (const LiveInterval *RC : RecoloringCandidates) {
2182 Register ItVirtReg = RC->reg();
2183 enqueue(RecoloringQueue, RC);
2184 assert(VRM->hasPhys(ItVirtReg) &&
2185 "Interferences are supposed to be with allocated variables");
2186
2187 // Record the current allocation.
2188 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
2189
2190 // unset the related struct.
2191 Matrix->unassign(*RC);
2192 }
2193
2194 // Do as if VirtReg was assigned to PhysReg so that the underlying
2195 // recoloring has the right information about the interferes and
2196 // available colors.
2197 Matrix->assign(VirtReg, PhysReg);
2198
2199 // VirtReg may be deleted during tryRecoloringCandidates, save a copy.
2200 Register ThisVirtReg = VirtReg.reg();
2201
2202 // Save the current recoloring state.
2203 // If we cannot recolor all the interferences, we will have to start again
2204 // at this point for the next physical register.
2205 SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2206 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
2207 FixedRegisters, RecolorStack, Depth)) {
2208 // Push the queued vregs into the main queue.
2209 llvm::append_range(NewVRegs, CurrentNewVRegs);
2210 // Do not mess up with the global assignment process.
2211 // I.e., VirtReg must be unassigned.
2212 if (VRM->hasPhys(ThisVirtReg)) {
2213 Matrix->unassign(VirtReg);
2214 return PhysReg;
2215 }
2216
2217 // It is possible VirtReg will be deleted during tryRecoloringCandidates.
2218 LLVM_DEBUG(dbgs() << "tryRecoloringCandidates deleted a fixed register "
2219 << printReg(ThisVirtReg) << '\n');
2220 FixedRegisters.erase(ThisVirtReg);
2221 return MCRegister();
2222 }
2223
2224 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2225 << printReg(PhysReg, TRI) << '\n');
2226
2227 // The recoloring attempt failed, undo the changes.
2228 FixedRegisters = SaveFixedRegisters;
2229 Matrix->unassign(VirtReg);
2230
2231 // For a newly created vreg which is also in RecoloringCandidates,
2232 // don't add it to NewVRegs because its physical register will be restored
2233 // below. Other vregs in CurrentNewVRegs are created by calling
2234 // selectOrSplit and should be added into NewVRegs.
2235 for (Register R : CurrentNewVRegs) {
2236 if (RecoloringCandidates.count(&LIS->getInterval(R)))
2237 continue;
2238 NewVRegs.push_back(R);
2239 }
2240
2241 // Roll back our unsuccessful recoloring. Also roll back any successful
2242 // recolorings in any recursive recoloring attempts, since it's possible
2243 // they would have introduced conflicts with assignments we will be
2244 // restoring further up the stack. Perform all unassignments prior to
2245 // reassigning, since sub-recolorings may have conflicted with the registers
2246 // we are going to restore to their original assignments.
2247 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
2248 const LiveInterval *LI;
2249 MCRegister PhysReg;
2250 std::tie(LI, PhysReg) = RecolorStack[I];
2251
2252 if (VRM->hasPhys(LI->reg()))
2253 Matrix->unassign(*LI);
2254 }
2255
2256 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
2257 const LiveInterval *LI;
2258 MCRegister PhysReg;
2259 std::tie(LI, PhysReg) = RecolorStack[I];
2260 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
2261 Matrix->assign(*LI, PhysReg);
2262 }
2263
2264 // Pop the stack of recoloring attempts.
2265 RecolorStack.resize(EntryStackSize);
2266 }
2267
2268 // Last chance recoloring did not worked either, give up.
2269 return ~0u;
2270}
2271
2272/// tryRecoloringCandidates - Try to assign a new color to every register
2273/// in \RecoloringQueue.
2274/// \p NewRegs will contain any new virtual register created during the
2275/// recoloring process.
2276/// \p FixedRegisters[in/out] contains all the registers that have been
2277/// recolored.
2278/// \return true if all virtual registers in RecoloringQueue were successfully
2279/// recolored, false otherwise.
2280bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2281 SmallVectorImpl<Register> &NewVRegs,
2282 SmallVirtRegSet &FixedRegisters,
2283 RecoloringStack &RecolorStack,
2284 unsigned Depth) {
2285 while (!RecoloringQueue.empty()) {
2286 const LiveInterval *LI = dequeue(RecoloringQueue);
2287 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2288 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2289 RecolorStack, Depth + 1);
2290 // When splitting happens, the live-range may actually be empty.
2291 // In that case, this is okay to continue the recoloring even
2292 // if we did not find an alternative color for it. Indeed,
2293 // there will not be anything to color for LI in the end.
2294 if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2295 return false;
2296
2297 if (!PhysReg) {
2298 assert(LI->empty() && "Only empty live-range do not require a register");
2299 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2300 << " succeeded. Empty LI.\n");
2301 continue;
2302 }
2303 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2304 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2305
2306 Matrix->assign(*LI, PhysReg);
2307 FixedRegisters.insert(LI->reg());
2308 }
2309 return true;
2310}
2311
2312//===----------------------------------------------------------------------===//
2313// Main Entry Point
2314//===----------------------------------------------------------------------===//
2315
2317 SmallVectorImpl<Register> &NewVRegs) {
2318 CutOffInfo = CO_None;
2319 LLVMContext &Ctx = MF->getFunction().getContext();
2320 SmallVirtRegSet FixedRegisters;
2321 RecoloringStack RecolorStack;
2322 MCRegister Reg =
2323 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2324 if (Reg == ~0U && (CutOffInfo != CO_None)) {
2325 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2326 if (CutOffEncountered == CO_Depth)
2327 Ctx.emitError("register allocation failed: maximum depth for recoloring "
2328 "reached. Use -fexhaustive-register-search to skip "
2329 "cutoffs");
2330 else if (CutOffEncountered == CO_Interf)
2331 Ctx.emitError("register allocation failed: maximum interference for "
2332 "recoloring reached. Use -fexhaustive-register-search "
2333 "to skip cutoffs");
2334 else if (CutOffEncountered == (CO_Depth | CO_Interf))
2335 Ctx.emitError("register allocation failed: maximum interference and "
2336 "depth for recoloring reached. Use "
2337 "-fexhaustive-register-search to skip cutoffs");
2338 }
2339 return Reg;
2340}
2341
2342/// Using a CSR for the first time has a cost because it causes push|pop
2343/// to be added to prologue|epilogue. Splitting a cold section of the live
2344/// range can have lower cost than using the CSR for the first time;
2345/// Spilling a live range in the cold path can have lower cost than using
2346/// the CSR for the first time. Returns the physical register if we decide
2347/// to use the CSR; otherwise return MCRegister().
2348MCRegister RAGreedy::tryAssignCSRFirstTime(
2349 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2350 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2351 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2352 // We choose spill over using the CSR for the first time if the spill cost
2353 // is lower than CSRCost.
2354 SA->analyze(&VirtReg);
2355 if (calcSpillCost() >= CSRCost)
2356 return PhysReg;
2357
2358 // We are going to spill, set CostPerUseLimit to 1 to make sure that
2359 // we will not use a callee-saved register in tryEvict.
2360 CostPerUseLimit = 1;
2361 return MCRegister();
2362 }
2363 if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2364 // We choose pre-splitting over using the CSR for the first time if
2365 // the cost of splitting is lower than CSRCost.
2366 SA->analyze(&VirtReg);
2367 unsigned NumCands = 0;
2368 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2369 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2370 NumCands, true /*IgnoreCSR*/);
2371 if (BestCand == NoCand)
2372 // Use the CSR if we can't find a region split below CSRCost.
2373 return PhysReg;
2374
2375 // Perform the actual pre-splitting.
2376 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2377 return MCRegister();
2378 }
2379 return PhysReg;
2380}
2381
2383 // Do not keep invalid information around.
2384 SetOfBrokenHints.remove(&LI);
2385}
2386
2387void RAGreedy::initializeCSRCost() {
2388 // We use the command-line option if it is explicitly set, otherwise use the
2389 // larger one out of the command-line option and the value reported by TRI.
2390 CSRCost = BlockFrequency(
2391 CSRFirstTimeCost.getNumOccurrences()
2393 : std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2394 if (!CSRCost.getFrequency())
2395 return;
2396
2397 // Raw cost is relative to Entry == 2^14; scale it appropriately.
2398 uint64_t ActualEntry = MBFI->getEntryFreq().getFrequency();
2399 if (!ActualEntry) {
2400 CSRCost = BlockFrequency(0);
2401 return;
2402 }
2403 uint64_t FixedEntry = 1 << 14;
2404 if (ActualEntry < FixedEntry)
2405 CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2406 else if (ActualEntry <= UINT32_MAX)
2407 // Invert the fraction and divide.
2408 CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2409 else
2410 // Can't use BranchProbability in general, since it takes 32-bit numbers.
2411 CSRCost =
2412 BlockFrequency(CSRCost.getFrequency() * (ActualEntry / FixedEntry));
2413}
2414
2415/// Collect the hint info for \p Reg.
2416/// The results are stored into \p Out.
2417/// \p Out is not cleared before being populated.
2418void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2419 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2420
2421 for (const MachineOperand &Opnd : MRI->reg_nodbg_operands(Reg)) {
2422 const MachineInstr &Instr = *Opnd.getParent();
2423 if (!Instr.isCopy() || Opnd.isImplicit())
2424 continue;
2425
2426 // Look for the other end of the copy.
2427 const MachineOperand &OtherOpnd = Instr.getOperand(Opnd.isDef());
2428 Register OtherReg = OtherOpnd.getReg();
2429 if (OtherReg == Reg)
2430 continue;
2431 unsigned OtherSubReg = OtherOpnd.getSubReg();
2432 unsigned SubReg = Opnd.getSubReg();
2433
2434 // Get the current assignment.
2435 MCRegister OtherPhysReg;
2436 if (OtherReg.isPhysical()) {
2437 if (OtherSubReg)
2438 OtherPhysReg = TRI->getMatchingSuperReg(OtherReg, OtherSubReg, RC);
2439 else if (SubReg)
2440 OtherPhysReg = TRI->getMatchingSuperReg(OtherReg, SubReg, RC);
2441 else
2442 OtherPhysReg = OtherReg;
2443 } else {
2444 OtherPhysReg = VRM->getPhys(OtherReg);
2445 // TODO: Should find matching superregister, but applying this in the
2446 // non-hint case currently causes regressions
2447
2448 if (SubReg && OtherSubReg && SubReg != OtherSubReg)
2449 continue;
2450 }
2451
2452 // Push the collected information.
2453 if (OtherPhysReg) {
2454 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2455 OtherPhysReg));
2456 }
2457 }
2458}
2459
2460/// Using the given \p List, compute the cost of the broken hints if
2461/// \p PhysReg was used.
2462/// \return The cost of \p List for \p PhysReg.
2463BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2464 MCRegister PhysReg) {
2465 BlockFrequency Cost = BlockFrequency(0);
2466 for (const HintInfo &Info : List) {
2467 if (Info.PhysReg != PhysReg)
2468 Cost += Info.Freq;
2469 }
2470 return Cost;
2471}
2472
2473/// Using the register assigned to \p VirtReg, try to recolor
2474/// all the live ranges that are copy-related with \p VirtReg.
2475/// The recoloring is then propagated to all the live-ranges that have
2476/// been recolored and so on, until no more copies can be coalesced or
2477/// it is not profitable.
2478/// For a given live range, profitability is determined by the sum of the
2479/// frequencies of the non-identity copies it would introduce with the old
2480/// and new register.
2481void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2482 // We have a broken hint, check if it is possible to fix it by
2483 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2484 // some register and PhysReg may be available for the other live-ranges.
2485 HintsInfo Info;
2486 Register Reg = VirtReg.reg();
2487 MCRegister PhysReg = VRM->getPhys(Reg);
2488 // Start the recoloring algorithm from the input live-interval, then
2489 // it will propagate to the ones that are copy-related with it.
2490 SmallSet<Register, 4> Visited = {Reg};
2491 SmallVector<Register, 2> RecoloringCandidates = {Reg};
2492
2493 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2494 << '(' << printReg(PhysReg, TRI) << ")\n");
2495
2496 do {
2497 Reg = RecoloringCandidates.pop_back_val();
2498
2499 MCRegister CurrPhys = VRM->getPhys(Reg);
2500
2501 // This may be a skipped register.
2502 if (!CurrPhys) {
2504 "We have an unallocated variable which should have been handled");
2505 continue;
2506 }
2507
2508 // Get the live interval mapped with this virtual register to be able
2509 // to check for the interference with the new color.
2510 LiveInterval &LI = LIS->getInterval(Reg);
2511 // Check that the new color matches the register class constraints and
2512 // that it is free for this live range.
2513 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2514 Matrix->checkInterference(LI, PhysReg)))
2515 continue;
2516
2517 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2518 << ") is recolorable.\n");
2519
2520 // Gather the hint info.
2521 Info.clear();
2522 collectHintInfo(Reg, Info);
2523 // Check if recoloring the live-range will increase the cost of the
2524 // non-identity copies.
2525 if (CurrPhys != PhysReg) {
2526 LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2527 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2528 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2529 LLVM_DEBUG(dbgs() << "Old Cost: " << printBlockFreq(*MBFI, OldCopiesCost)
2530 << "\nNew Cost: "
2531 << printBlockFreq(*MBFI, NewCopiesCost) << '\n');
2532 if (OldCopiesCost < NewCopiesCost) {
2533 LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2534 continue;
2535 }
2536 // At this point, the cost is either cheaper or equal. If it is
2537 // equal, we consider this is profitable because it may expose
2538 // more recoloring opportunities.
2539 LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2540 // Recolor the live-range.
2541 Matrix->unassign(LI);
2542 Matrix->assign(LI, PhysReg);
2543 }
2544 // Push all copy-related live-ranges to keep reconciling the broken
2545 // hints.
2546 for (const HintInfo &HI : Info) {
2547 // We cannot recolor physical register.
2548 if (HI.Reg.isVirtual() && Visited.insert(HI.Reg).second)
2549 RecoloringCandidates.push_back(HI.Reg);
2550 }
2551 } while (!RecoloringCandidates.empty());
2552}
2553
2554/// Try to recolor broken hints.
2555/// Broken hints may be repaired by recoloring when an evicted variable
2556/// freed up a register for a larger live-range.
2557/// Consider the following example:
2558/// BB1:
2559/// a =
2560/// b =
2561/// BB2:
2562/// ...
2563/// = b
2564/// = a
2565/// Let us assume b gets split:
2566/// BB1:
2567/// a =
2568/// b =
2569/// BB2:
2570/// c = b
2571/// ...
2572/// d = c
2573/// = d
2574/// = a
2575/// Because of how the allocation work, b, c, and d may be assigned different
2576/// colors. Now, if a gets evicted later:
2577/// BB1:
2578/// a =
2579/// st a, SpillSlot
2580/// b =
2581/// BB2:
2582/// c = b
2583/// ...
2584/// d = c
2585/// = d
2586/// e = ld SpillSlot
2587/// = e
2588/// This is likely that we can assign the same register for b, c, and d,
2589/// getting rid of 2 copies.
2590void RAGreedy::tryHintsRecoloring() {
2591 for (const LiveInterval *LI : SetOfBrokenHints) {
2592 assert(LI->reg().isVirtual() &&
2593 "Recoloring is possible only for virtual registers");
2594 // Some dead defs may be around (e.g., because of debug uses).
2595 // Ignore those.
2596 if (!VRM->hasPhys(LI->reg()))
2597 continue;
2598 tryHintRecoloring(*LI);
2599 }
2600}
2601
2602MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2603 SmallVectorImpl<Register> &NewVRegs,
2604 SmallVirtRegSet &FixedRegisters,
2605 RecoloringStack &RecolorStack,
2606 unsigned Depth) {
2607 uint8_t CostPerUseLimit = uint8_t(~0u);
2608 // First try assigning a free register.
2609 auto Order =
2611 if (MCRegister PhysReg =
2612 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2613 // When NewVRegs is not empty, we may have made decisions such as evicting
2614 // a virtual register, go with the earlier decisions and use the physical
2615 // register.
2616 if (CSRCost.getFrequency() &&
2617 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2618 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2619 CostPerUseLimit, NewVRegs);
2620 if (CSRReg || !NewVRegs.empty())
2621 // Return now if we decide to use a CSR or create new vregs due to
2622 // pre-splitting.
2623 return CSRReg;
2624 } else
2625 return PhysReg;
2626 }
2627 // Non empty NewVRegs means VirtReg has been split.
2628 if (!NewVRegs.empty())
2629 return MCRegister();
2630
2631 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2632 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2633 << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2634
2635 // Try to evict a less worthy live range, but only for ranges from the primary
2636 // queue. The RS_Split ranges already failed to do this, and they should not
2637 // get a second chance until they have been split.
2638 if (Stage != RS_Split) {
2639 if (MCRegister PhysReg =
2640 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2641 FixedRegisters)) {
2642 Register Hint = MRI->getSimpleHint(VirtReg.reg());
2643 // If VirtReg has a hint and that hint is broken record this
2644 // virtual register as a recoloring candidate for broken hint.
2645 // Indeed, since we evicted a variable in its neighborhood it is
2646 // likely we can at least partially recolor some of the
2647 // copy-related live-ranges.
2648 if (Hint && Hint != PhysReg)
2649 SetOfBrokenHints.insert(&VirtReg);
2650 return PhysReg;
2651 }
2652 }
2653
2654 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2655
2656 // The first time we see a live range, don't try to split or spill.
2657 // Wait until the second time, when all smaller ranges have been allocated.
2658 // This gives a better picture of the interference to split around.
2659 if (Stage < RS_Split) {
2660 ExtraInfo->setStage(VirtReg, RS_Split);
2661 LLVM_DEBUG(dbgs() << "wait for second round\n");
2662 NewVRegs.push_back(VirtReg.reg());
2663 return MCRegister();
2664 }
2665
2666 if (Stage < RS_Spill && !VirtReg.empty()) {
2667 // Try splitting VirtReg or interferences.
2668 unsigned NewVRegSizeBefore = NewVRegs.size();
2669 MCRegister PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2670 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2671 return PhysReg;
2672 }
2673
2674 // If we couldn't allocate a register from spilling, there is probably some
2675 // invalid inline assembly. The base class will report it.
2676 if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2677 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2678 RecolorStack, Depth);
2679 }
2680
2681 // Finally spill VirtReg itself.
2682 NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2684 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2685 spiller().spill(LRE, &Order);
2686 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2687
2688 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2689 // the new regs are kept in LDV (still mapping to the old register), until
2690 // we rewrite spilled locations in LDV at a later stage.
2691 for (Register r : spiller().getSpilledRegs())
2692 DebugVars->splitRegister(r, LRE.regs(), *LIS);
2693 for (Register r : spiller().getReplacedRegs())
2694 DebugVars->splitRegister(r, LRE.regs(), *LIS);
2695
2696 if (VerifyEnabled)
2697 MF->verify(LIS, Indexes, "After spilling", &errs());
2698
2699 // The live virtual register requesting allocation was spilled, so tell
2700 // the caller not to allocate anything during this round.
2701 return MCRegister();
2702}
2703
2704void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2705 using namespace ore;
2706 if (Spills) {
2707 R << NV("NumSpills", Spills) << " spills ";
2708 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2709 }
2710 if (FoldedSpills) {
2711 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2712 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2713 << " total folded spills cost ";
2714 }
2715 if (Reloads) {
2716 R << NV("NumReloads", Reloads) << " reloads ";
2717 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2718 }
2719 if (FoldedReloads) {
2720 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2721 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2722 << " total folded reloads cost ";
2723 }
2724 if (ZeroCostFoldedReloads)
2725 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2726 << " zero cost folded reloads ";
2727 if (Copies) {
2728 R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2729 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2730 }
2731}
2732
2733RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2734 RAGreedyStats Stats;
2735 const MachineFrameInfo &MFI = MF->getFrameInfo();
2736 int FI;
2737
2738 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2740 A->getPseudoValue())->getFrameIndex());
2741 };
2742 auto isPatchpointInstr = [](const MachineInstr &MI) {
2743 return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2744 MI.getOpcode() == TargetOpcode::STACKMAP ||
2745 MI.getOpcode() == TargetOpcode::STATEPOINT;
2746 };
2747 for (MachineInstr &MI : MBB) {
2748 auto DestSrc = TII->isCopyInstr(MI);
2749 if (DestSrc) {
2750 const MachineOperand &Dest = *DestSrc->Destination;
2751 const MachineOperand &Src = *DestSrc->Source;
2752 Register SrcReg = Src.getReg();
2753 Register DestReg = Dest.getReg();
2754 // Only count `COPY`s with a virtual register as source or destination.
2755 if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2756 if (SrcReg.isVirtual()) {
2757 SrcReg = VRM->getPhys(SrcReg);
2758 if (SrcReg && Src.getSubReg())
2759 SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2760 }
2761 if (DestReg.isVirtual()) {
2762 DestReg = VRM->getPhys(DestReg);
2763 if (DestReg && Dest.getSubReg())
2764 DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2765 }
2766 if (SrcReg != DestReg)
2767 ++Stats.Copies;
2768 }
2769 continue;
2770 }
2771
2772 SmallVector<const MachineMemOperand *, 2> Accesses;
2773 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2774 ++Stats.Reloads;
2775 continue;
2776 }
2777 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2778 ++Stats.Spills;
2779 continue;
2780 }
2781 if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2782 llvm::any_of(Accesses, isSpillSlotAccess)) {
2783 if (!isPatchpointInstr(MI)) {
2784 Stats.FoldedReloads += Accesses.size();
2785 continue;
2786 }
2787 // For statepoint there may be folded and zero cost folded stack reloads.
2788 std::pair<unsigned, unsigned> NonZeroCostRange =
2789 TII->getPatchpointUnfoldableRange(MI);
2790 SmallSet<unsigned, 16> FoldedReloads;
2791 SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2792 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2793 MachineOperand &MO = MI.getOperand(Idx);
2794 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2795 continue;
2796 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2797 FoldedReloads.insert(MO.getIndex());
2798 else
2799 ZeroCostFoldedReloads.insert(MO.getIndex());
2800 }
2801 // If stack slot is used in folded reload it is not zero cost then.
2802 for (unsigned Slot : FoldedReloads)
2803 ZeroCostFoldedReloads.erase(Slot);
2804 Stats.FoldedReloads += FoldedReloads.size();
2805 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2806 continue;
2807 }
2808 Accesses.clear();
2809 if (TII->hasStoreToStackSlot(MI, Accesses) &&
2810 llvm::any_of(Accesses, isSpillSlotAccess)) {
2811 Stats.FoldedSpills += Accesses.size();
2812 }
2813 }
2814 // Set cost of collected statistic by multiplication to relative frequency of
2815 // this basic block.
2816 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2817 Stats.ReloadsCost = RelFreq * Stats.Reloads;
2818 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2819 Stats.SpillsCost = RelFreq * Stats.Spills;
2820 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2821 Stats.CopiesCost = RelFreq * Stats.Copies;
2822 return Stats;
2823}
2824
2825RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2826 RAGreedyStats Stats;
2827
2828 // Sum up the spill and reloads in subloops.
2829 for (MachineLoop *SubLoop : *L)
2830 Stats.add(reportStats(SubLoop));
2831
2832 for (MachineBasicBlock *MBB : L->getBlocks())
2833 // Handle blocks that were not included in subloops.
2834 if (Loops->getLoopFor(MBB) == L)
2835 Stats.add(computeStats(*MBB));
2836
2837 if (!Stats.isEmpty()) {
2838 using namespace ore;
2839
2840 ORE->emit([&]() {
2841 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2842 L->getStartLoc(), L->getHeader());
2843 Stats.report(R);
2844 R << "generated in loop";
2845 return R;
2846 });
2847 }
2848 return Stats;
2849}
2850
2851void RAGreedy::reportStats() {
2852 if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2853 return;
2854 RAGreedyStats Stats;
2855 for (MachineLoop *L : *Loops)
2856 Stats.add(reportStats(L));
2857 // Process non-loop blocks.
2858 for (MachineBasicBlock &MBB : *MF)
2859 if (!Loops->getLoopFor(&MBB))
2860 Stats.add(computeStats(MBB));
2861 if (!Stats.isEmpty()) {
2862 using namespace ore;
2863
2864 ORE->emit([&]() {
2865 DebugLoc Loc;
2866 if (auto *SP = MF->getFunction().getSubprogram())
2867 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2868 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2869 &MF->front());
2870 Stats.report(R);
2871 R << "generated in function";
2872 return R;
2873 });
2874 }
2875}
2876
2877bool RAGreedy::hasVirtRegAlloc() {
2878 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2880 if (MRI->reg_nodbg_empty(Reg))
2881 continue;
2883 return true;
2884 }
2885
2886 return false;
2887}
2888
2890 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2891 << "********** Function: " << mf.getName() << '\n');
2892
2893 MF = &mf;
2894 TII = MF->getSubtarget().getInstrInfo();
2895
2896 if (VerifyEnabled)
2897 MF->verify(LIS, Indexes, "Before greedy register allocator", &errs());
2898
2899 RegAllocBase::init(*this->VRM, *this->LIS, *this->Matrix);
2900
2901 // Early return if there is no virtual register to be allocated to a
2902 // physical register.
2903 if (!hasVirtRegAlloc())
2904 return false;
2905
2906 // Renumber to get accurate and consistent results from
2907 // SlotIndexes::getApproxInstrDistance.
2908 Indexes->packIndexes();
2909
2910 initializeCSRCost();
2911
2912 RegCosts = TRI->getRegisterCosts(*MF);
2913 RegClassPriorityTrumpsGlobalness =
2914 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2916 : TRI->regClassPriorityTrumpsGlobalness(*MF);
2917
2918 ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2920 : TRI->reverseLocalAssignment();
2921
2922 ExtraInfo.emplace();
2923
2924 EvictAdvisor = EvictProvider->getAdvisor(*MF, *this, MBFI, Loops);
2925 PriorityAdvisor = PriorityProvider->getAdvisor(*MF, *this, *Indexes);
2926
2927 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2928 SpillerInstance.reset(createInlineSpiller({*LIS, *LSS, *DomTree, *MBFI}, *MF,
2929 *VRM, *VRAI, Matrix));
2930
2931 VRAI->calculateSpillWeightsAndHints();
2932
2933 LLVM_DEBUG(LIS->dump());
2934
2935 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2936 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2937
2938 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2939 GlobalCand.resize(32); // This will grow as needed.
2940 SetOfBrokenHints.clear();
2941
2943 tryHintsRecoloring();
2944
2945 if (VerifyEnabled)
2946 MF->verify(LIS, Indexes, "Before post optimization", &errs());
2948 reportStats();
2949
2950 releaseMemory();
2951 return true;
2952}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
DXIL Forward Handle Accesses
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file implements an indexed map.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Live Register Matrix
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
block placement Basic Block Placement Stats
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static bool hasTiedDef(MachineRegisterInfo *MRI, Register reg)
Return true if reg has any tied def operand.
static cl::opt< bool > GreedyRegClassPriorityTrumpsGlobalness("greedy-regclass-priority-trumps-globalness", cl::desc("Change the greedy register allocator's live range priority " "calculation to make the AllocationPriority of the register class " "more important then whether the range is global"), cl::Hidden)
static cl::opt< bool > ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, cl::desc("Exhaustive Search for registers bypassing the depth " "and interference cutoffs of last chance recoloring"), cl::Hidden)
const float Hysteresis
static cl::opt< unsigned > LastChanceRecoloringMaxInterference("lcr-max-interf", cl::Hidden, cl::desc("Last chance recoloring maximum number of considered" " interference at a time"), cl::init(8))
static bool readsLaneSubset(const MachineRegisterInfo &MRI, const MachineInstr *MI, const LiveInterval &VirtReg, const TargetRegisterInfo *TRI, SlotIndex Use, const TargetInstrInfo *TII)
Return true if MI at \P Use reads a subset of the lanes live in VirtReg.
static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, const VirtRegMap &VRM, MCRegister PhysReg, const LiveInterval &Intf)
Return true if the existing assignment of Intf overlaps, but is not the same, as PhysReg.
static cl::opt< unsigned > CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, cl::desc("Last chance recoloring max depth"), cl::init(5))
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator)
static cl::opt< unsigned long > GrowRegionComplexityBudget("grow-region-complexity-budget", cl::desc("growRegion() does not scale with the number of BB edges, so " "limit its budget and bail out once we reach the limit."), cl::init(10000), cl::Hidden)
static cl::opt< unsigned > SplitThresholdForRegWithHint("split-threshold-for-reg-with-hint", cl::desc("The threshold for splitting a virtual register with a hint, in " "percentage"), cl::init(75), cl::Hidden)
static cl::opt< SplitEditor::ComplementSpillMode > SplitSpillMode("split-spill-mode", cl::Hidden, cl::desc("Spill mode for splitting live ranges"), cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), cl::init(SplitEditor::SM_Speed))
static unsigned getNumAllocatableRegsForConstraints(const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const RegisterClassInfo &RCI)
Get the number of allocatable registers that match the constraints of Reg on MI and that are also in ...
static cl::opt< bool > GreedyReverseLocalAssignment("greedy-reverse-local-assignment", cl::desc("Reverse allocation order of local live ranges, such that " "shorter local live ranges will tend to be allocated first"), cl::Hidden)
static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineInstr &FirstMI, Register Reg)
Remove Loads Into Fake Uses
SI Lower i1 Copies
SI optimize exec mask operations pre RA
SI Optimize VGPR LiveRange
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName) const
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM)
bool isHint(Register Reg) const
Return true if Reg is a preferred physical register.
ArrayRef< MCPhysReg > getOrder() const
Get the allocation order without reordered hints.
Iterator end() const
static AllocationOrder create(Register VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo, const LiveRegMatrix *Matrix)
Create a new AllocationOrder for VirtReg.
Iterator begin() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & reset()
Definition BitVector.h:411
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Cursor - The primary query interface for the block interference cache.
SlotIndex first()
first - Return the starting index of the first interfering range in the current block.
SlotIndex last()
last - Return the ending index of the last interfering range in the current block.
bool hasInterference()
hasInterference - Return true if the current block has any interference.
void moveToBlock(unsigned MBBNum)
moveTo - Move cursor to basic block MBBNum.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Query interferences between a single live virtual register and a live interval union.
const SmallVectorImpl< const LiveInterval * > & interferingVRegs(unsigned MaxInterferingRegs=std::numeric_limits< unsigned >::max())
LiveSegments::iterator SegmentIter
A live range for subregisters.
LiveInterval - This class represents the liveness of a register, or stack slot.
Register reg() const
bool isSpillable() const
isSpillable - Can this interval be spilled?
bool hasSubRanges() const
Returns true if subregister liveness information is available.
LLVM_ABI unsigned getSize() const
getSize - Returns the sum of sizes of all the LiveRange's.
iterator_range< subrange_iterator > subranges()
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
LiveInterval & getInterval(Register Reg)
unsigned size() const
Register get(unsigned idx) const
ArrayRef< Register > regs() const
iterator end() const
iterator begin() const
Segments::const_iterator const_iterator
bool liveAt(SlotIndex index) const
bool empty() const
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
LLVM_ABI iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
@ IK_VirtReg
Virtual register interference.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
static constexpr unsigned NoRegister
Definition MCRegister.h:60
constexpr unsigned id() const
Definition MCRegister.h:82
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1571
An RAII based helper class to modify MachineFunctionProperties when running pass.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Representation of each machine instruction.
bool isImplicitDef() const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void LRE_DidCloneVirtReg(Register New, Register Old)
bool run(MachineFunction &mf)
Perform register allocation.
Spiller & spiller() override
MCRegister selectOrSplit(const LiveInterval &, SmallVectorImpl< Register > &) override
RAGreedy(RequiredAnalyses &Analyses, const RegAllocFilterFunc F=nullptr)
const LiveInterval * dequeue() override
dequeue - Return the next unassigned register, or NULL.
void enqueueImpl(const LiveInterval *LI) override
enqueue - Add VirtReg to the priority queue of unassigned registers.
void aboutToRemoveInterval(const LiveInterval &) override
Method called when the allocator is about to remove a LiveInterval.
RegAllocBase(const RegAllocFilterFunc F=nullptr)
void enqueue(const LiveInterval *LI)
enqueue - Add VirtReg to the priority queue of unassigned registers.
void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat)
SmallPtrSet< MachineInstr *, 32 > DeadRemats
Inst which is a def of an original reg and whose defs are already all dead after remat is saved in De...
const TargetRegisterInfo * TRI
LiveIntervals * LIS
static const char TimerGroupName[]
static const char TimerGroupDescription[]
LiveRegMatrix * Matrix
virtual void postOptimization()
VirtRegMap * VRM
RegisterClassInfo RegClassInfo
MachineRegisterInfo * MRI
bool shouldAllocateRegister(Register Reg)
Get whether a given register should be allocated.
static bool VerifyEnabled
VerifyEnabled - True when -verify-regalloc is given.
ImmutableAnalysis abstraction for fetching the Eviction Advisor.
A MachineFunction analysis for fetching the Eviction Advisor.
Common provider for legacy and new pass managers.
const TargetRegisterInfo *const TRI
std::optional< unsigned > getOrderLimit(const LiveInterval &VirtReg, const AllocationOrder &Order, unsigned CostPerUseLimit) const
const RegisterClassInfo & RegClassInfo
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const
Returns true if the given PhysReg is a callee saved register and has not been used for allocation yet...
bool canReassign(const LiveInterval &VirtReg, MCRegister FromReg) const
bool canAllocatePhysReg(unsigned CostPerUseLimit, MCRegister PhysReg) const
Common provider for getting the priority advisor and logging rewards.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
unsigned virtRegIndex() const
Convert a virtual register number to a 0-based index.
Definition Register.h:87
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
@ InstrDist
The default distance between instructions as returned by distance().
bool isValid() const
Returns true if this is a valid index.
SlotIndex getBoundaryIndex() const
Returns the boundary index for associated with this index.
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
int getApproxInstrDistance(SlotIndex other) const
Return the scaled distance from this index to the given one, where all slots on the same instruction ...
SlotIndexes pass.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
size_type size() const
Definition SmallSet.h:170
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
@ MustSpill
A register is impossible, variable must be spilled.
@ DontCare
Block doesn't care / variable not live.
@ PrefReg
Block entry/exit prefers a register.
@ PrefSpill
Block entry/exit prefers a stack slot.
virtual void spill(LiveRangeEdit &LRE, AllocationOrder *Order=nullptr)=0
spill - Spill the LRE.getParent() live interval.
SplitAnalysis - Analyze a LiveInterval, looking for live range splitting opportunities.
Definition SplitKit.h:96
SplitEditor - Edit machine code and LiveIntervals for live range splitting.
Definition SplitKit.h:263
@ SM_Partition
SM_Partition(Default) - Try to create the complement interval so it doesn't overlap any other interva...
Definition SplitKit.h:286
@ SM_Speed
SM_Speed - Overlap intervals to minimize the expected execution frequency of the inserted copies.
Definition SplitKit.h:298
@ SM_Size
SM_Size - Overlap intervals to minimize the number of inserted COPY instructions.
Definition SplitKit.h:293
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
const uint8_t AllocationPriority
Classes with a higher priority value are assigned first by register allocators using a greedy heurist...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition VirtRegMap.h:91
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition VirtRegMap.h:87
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
Pass manager infrastructure for declaring and invalidating analyses.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
std::function< bool(const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, const Register Reg)> RegAllocFilterFunc
Filter function for register classes during regalloc.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
Definition MathExtras.h:207
InstructionCost Cost
SmallSet< Register, 16 > SmallVirtRegSet
LLVM_ABI FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
LLVM_ABI bool TimePassesIsEnabled
If the user specifies the -time-passes argument on an LLVM tool command line then the value of this b...
Definition IRReader.cpp:27
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
@ RS_Split2
Attempt more aggressive live range splitting that is guaranteed to make progress.
@ RS_Spill
Live range will be spilled. No more splitting will be attempted.
@ RS_Split
Attempt live range splitting if assignment is impossible.
@ RS_New
Newly created live range that has never been queued.
@ RS_Done
There is nothing more we can do to this live range.
@ RS_Assign
Only attempt assignment and eviction. Then requeue as RS_Split.
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI const float huge_valf
Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2042
Spiller * createInlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix=nullptr)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1915
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
LLVM_ABI char & RAGreedyLegacyID
Greedy register allocator.
static float normalizeSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr)
Normalize the spill weight of a live interval.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
MachineBlockFrequencyInfo * MBFI
RegAllocEvictionAdvisorProvider * EvictProvider
MachineOptimizationRemarkEmitter * ORE
RegAllocPriorityAdvisorProvider * PriorityProvider
constexpr bool any() const
Definition LaneBitmask.h:53
This class is basically a combination of TimeRegion and Timer.
Definition Timer.h:175
BlockConstraint - Entry and exit constraints for a basic block.
BorderConstraint Exit
Constraint on block exit.
bool ChangesValue
True when this block changes the value of the live range.
BorderConstraint Entry
Constraint on block entry.
unsigned Number
Basic block number (from MBB::getNumber()).
Additional information about basic blocks where the current variable is live.
Definition SplitKit.h:121
SlotIndex FirstDef
First non-phi valno->def, or SlotIndex().
Definition SplitKit.h:125
bool LiveOut
Current reg is live out.
Definition SplitKit.h:127
bool LiveIn
Current reg is live in.
Definition SplitKit.h:126
MachineBasicBlock * MBB
Definition SplitKit.h:122
SlotIndex LastInstr
Last instr accessing current reg.
Definition SplitKit.h:124
SlotIndex FirstInstr
First instr accessing current reg.
Definition SplitKit.h:123