LLVM 19.0.0git
RegAllocGreedy.cpp
Go to the documentation of this file.
1//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the RAGreedy function pass for register allocation in
10// optimized builds.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RegAllocGreedy.h"
15#include "AllocationOrder.h"
16#include "InterferenceCache.h"
17#include "RegAllocBase.h"
20#include "SpillPlacement.h"
21#include "SplitKit.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/IndexedMap.h"
25#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/Function.h"
61#include "llvm/IR/LLVMContext.h"
64#include "llvm/Pass.h"
68#include "llvm/Support/Debug.h"
70#include "llvm/Support/Timer.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <utility>
76
77using namespace llvm;
78
79#define DEBUG_TYPE "regalloc"
80
81STATISTIC(NumGlobalSplits, "Number of split global live ranges");
82STATISTIC(NumLocalSplits, "Number of split local live ranges");
83STATISTIC(NumEvicted, "Number of interferences evicted");
84
86 "split-spill-mode", cl::Hidden,
87 cl::desc("Spill mode for splitting live ranges"),
88 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
89 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
90 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
92
95 cl::desc("Last chance recoloring max depth"),
96 cl::init(5));
97
99 "lcr-max-interf", cl::Hidden,
100 cl::desc("Last chance recoloring maximum number of considered"
101 " interference at a time"),
102 cl::init(8));
103
105 "exhaustive-register-search", cl::NotHidden,
106 cl::desc("Exhaustive Search for registers bypassing the depth "
107 "and interference cutoffs of last chance recoloring"),
108 cl::Hidden);
109
111 "enable-deferred-spilling", cl::Hidden,
112 cl::desc("Instead of spilling a variable right away, defer the actual "
113 "code insertion to the end of the allocation. That way the "
114 "allocator might still find a suitable coloring for this "
115 "variable because of other evicted variables."),
116 cl::init(false));
117
118// FIXME: Find a good default for this flag and remove the flag.
120CSRFirstTimeCost("regalloc-csr-first-time-cost",
121 cl::desc("Cost for first time use of callee-saved register."),
122 cl::init(0), cl::Hidden);
123
125 "grow-region-complexity-budget",
126 cl::desc("growRegion() does not scale with the number of BB edges, so "
127 "limit its budget and bail out once we reach the limit."),
128 cl::init(10000), cl::Hidden);
129
131 "greedy-regclass-priority-trumps-globalness",
132 cl::desc("Change the greedy register allocator's live range priority "
133 "calculation to make the AllocationPriority of the register class "
134 "more important then whether the range is global"),
135 cl::Hidden);
136
138 "greedy-reverse-local-assignment",
139 cl::desc("Reverse allocation order of local live ranges, such that "
140 "shorter local live ranges will tend to be allocated first"),
141 cl::Hidden);
142
144 "split-threshold-for-reg-with-hint",
145 cl::desc("The threshold for splitting a virtual register with a hint, in "
146 "percentate"),
147 cl::init(75), cl::Hidden);
148
149static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
151
152char RAGreedy::ID = 0;
154
156 "Greedy Register Allocator", false, false)
160INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
161INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
174
175#ifndef NDEBUG
176const char *const RAGreedy::StageName[] = {
177 "RS_New",
178 "RS_Assign",
179 "RS_Split",
180 "RS_Split2",
181 "RS_Spill",
182 "RS_Memory",
183 "RS_Done"
184};
185#endif
186
187// Hysteresis to use when comparing floats.
188// This helps stabilize decisions based on float comparisons.
189const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
190
192 return new RAGreedy();
193}
194
196 return new RAGreedy(Ftor);
197}
198
201 RegAllocBase(F) {
202}
203
205 AU.setPreservesCFG();
230}
231
232//===----------------------------------------------------------------------===//
233// LiveRangeEdit delegate methods
234//===----------------------------------------------------------------------===//
235
236bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
237 LiveInterval &LI = LIS->getInterval(VirtReg);
238 if (VRM->hasPhys(VirtReg)) {
239 Matrix->unassign(LI);
241 return true;
242 }
243 // Unassigned virtreg is probably in the priority queue.
244 // RegAllocBase will erase it after dequeueing.
245 // Nonetheless, clear the live-range so that the debug
246 // dump will show the right state for that VirtReg.
247 LI.clear();
248 return false;
249}
250
251void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
252 if (!VRM->hasPhys(VirtReg))
253 return;
254
255 // Register is assigned, put it back on the queue for reassignment.
256 LiveInterval &LI = LIS->getInterval(VirtReg);
257 Matrix->unassign(LI);
259}
260
261void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
262 ExtraInfo->LRE_DidCloneVirtReg(New, Old);
263}
264
266 // Cloning a register we haven't even heard about yet? Just ignore it.
267 if (!Info.inBounds(Old))
268 return;
269
270 // LRE may clone a virtual register because dead code elimination causes it to
271 // be split into connected components. The new components are much smaller
272 // than the original, so they should get a new chance at being assigned.
273 // same stage as the parent.
274 Info[Old].Stage = RS_Assign;
275 Info.grow(New.id());
276 Info[New] = Info[Old];
277}
278
280 SpillerInstance.reset();
281 GlobalCand.clear();
282}
283
284void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
285
286void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
287 // Prioritize live ranges by size, assigning larger ranges first.
288 // The queue holds (size, reg) pairs.
289 const Register Reg = LI->reg();
290 assert(Reg.isVirtual() && "Can only enqueue virtual registers");
291
292 auto Stage = ExtraInfo->getOrInitStage(Reg);
293 if (Stage == RS_New) {
294 Stage = RS_Assign;
295 ExtraInfo->setStage(Reg, Stage);
296 }
297
298 unsigned Ret = PriorityAdvisor->getPriority(*LI);
299
300 // The virtual register number is a tie breaker for same-sized ranges.
301 // Give lower vreg numbers higher priority to assign them first.
302 CurQueue.push(std::make_pair(Ret, ~Reg));
303}
304
305unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
306 const unsigned Size = LI.getSize();
307 const Register Reg = LI.reg();
308 unsigned Prio;
309 LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
310
311 if (Stage == RS_Split) {
312 // Unsplit ranges that couldn't be allocated immediately are deferred until
313 // everything else has been allocated.
314 Prio = Size;
315 } else if (Stage == RS_Memory) {
316 // Memory operand should be considered last.
317 // Change the priority such that Memory operand are assigned in
318 // the reverse order that they came in.
319 // TODO: Make this a member variable and probably do something about hints.
320 static unsigned MemOp = 0;
321 Prio = MemOp++;
322 } else {
323 // Giant live ranges fall back to the global assignment heuristic, which
324 // prevents excessive spilling in pathological cases.
325 const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
326 bool ForceGlobal = RC.GlobalPriority ||
327 (!ReverseLocalAssignment &&
330 unsigned GlobalBit = 0;
331
332 if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
333 LIS->intervalIsInOneMBB(LI)) {
334 // Allocate original local ranges in linear instruction order. Since they
335 // are singly defined, this produces optimal coloring in the absence of
336 // global interference and other constraints.
337 if (!ReverseLocalAssignment)
338 Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
339 else {
340 // Allocating bottom up may allow many short LRGs to be assigned first
341 // to one of the cheap registers. This could be much faster for very
342 // large blocks on targets with many physical registers.
343 Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
344 }
345 } else {
346 // Allocate global and split ranges in long->short order. Long ranges that
347 // don't fit should be spilled (or split) ASAP so they don't create
348 // interference. Mark a bit to prioritize global above local ranges.
349 Prio = Size;
350 GlobalBit = 1;
351 }
352
353 // Priority bit layout:
354 // 31 RS_Assign priority
355 // 30 Preference priority
356 // if (RegClassPriorityTrumpsGlobalness)
357 // 29-25 AllocPriority
358 // 24 GlobalBit
359 // else
360 // 29 Global bit
361 // 28-24 AllocPriority
362 // 0-23 Size/Instr distance
363
364 // Clamp the size to fit with the priority masking scheme
365 Prio = std::min(Prio, (unsigned)maxUIntN(24));
366 assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
367
368 if (RegClassPriorityTrumpsGlobalness)
369 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
370 else
371 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
372
373 // Mark a higher bit to prioritize global and local above RS_Split.
374 Prio |= (1u << 31);
375
376 // Boost ranges that have a physical register hint.
377 if (VRM->hasKnownPreference(Reg))
378 Prio |= (1u << 30);
379 }
380
381 return Prio;
382}
383
384const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
385
386const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
387 if (CurQueue.empty())
388 return nullptr;
389 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
390 CurQueue.pop();
391 return LI;
392}
393
394//===----------------------------------------------------------------------===//
395// Direct Assignment
396//===----------------------------------------------------------------------===//
397
398/// tryAssign - Try to assign VirtReg to an available register.
399MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
400 AllocationOrder &Order,
402 const SmallVirtRegSet &FixedRegisters) {
403 MCRegister PhysReg;
404 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
405 assert(*I);
406 if (!Matrix->checkInterference(VirtReg, *I)) {
407 if (I.isHint())
408 return *I;
409 else
410 PhysReg = *I;
411 }
412 }
413 if (!PhysReg.isValid())
414 return PhysReg;
415
416 // PhysReg is available, but there may be a better choice.
417
418 // If we missed a simple hint, try to cheaply evict interference from the
419 // preferred register.
420 if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
421 if (Order.isHint(Hint)) {
422 MCRegister PhysHint = Hint.asMCReg();
423 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
424
425 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
426 FixedRegisters)) {
427 evictInterference(VirtReg, PhysHint, NewVRegs);
428 return PhysHint;
429 }
430
431 // We can also split the virtual register in cold blocks.
432 if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order))
433 return 0;
434
435 // Record the missed hint, we may be able to recover
436 // at the end if the surrounding allocation changed.
437 SetOfBrokenHints.insert(&VirtReg);
438 }
439
440 // Try to evict interference from a cheaper alternative.
441 uint8_t Cost = RegCosts[PhysReg];
442
443 // Most registers have 0 additional cost.
444 if (!Cost)
445 return PhysReg;
446
447 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
448 << (unsigned)Cost << '\n');
449 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
450 return CheapReg ? CheapReg : PhysReg;
451}
452
453//===----------------------------------------------------------------------===//
454// Interference eviction
455//===----------------------------------------------------------------------===//
456
458 MCRegister FromReg) const {
459 auto HasRegUnitInterference = [&](MCRegUnit Unit) {
460 // Instantiate a "subquery", not to be confused with the Queries array.
461 LiveIntervalUnion::Query SubQ(VirtReg, Matrix->getLiveUnions()[Unit]);
462 return SubQ.checkInterference();
463 };
464
465 for (MCRegister Reg :
467 if (Reg == FromReg)
468 continue;
469 // If no units have interference, reassignment is possible.
470 if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
471 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
472 << printReg(FromReg, TRI) << " to "
473 << printReg(Reg, TRI) << '\n');
474 return true;
475 }
476 }
477 return false;
478}
479
480/// evictInterference - Evict any interferring registers that prevent VirtReg
481/// from being assigned to Physreg. This assumes that canEvictInterference
482/// returned true.
483void RAGreedy::evictInterference(const LiveInterval &VirtReg,
484 MCRegister PhysReg,
485 SmallVectorImpl<Register> &NewVRegs) {
486 // Make sure that VirtReg has a cascade number, and assign that cascade
487 // number to every evicted register. These live ranges than then only be
488 // evicted by a newer cascade, preventing infinite loops.
489 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
490
491 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
492 << " interference: Cascade " << Cascade << '\n');
493
494 // Collect all interfering virtregs first.
496 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
497 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
498 // We usually have the interfering VRegs cached so collectInterferingVRegs()
499 // should be fast, we may need to recalculate if when different physregs
500 // overlap the same register unit so we had different SubRanges queried
501 // against it.
503 Intfs.append(IVR.begin(), IVR.end());
504 }
505
506 // Evict them second. This will invalidate the queries.
507 for (const LiveInterval *Intf : Intfs) {
508 // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
509 if (!VRM->hasPhys(Intf->reg()))
510 continue;
511
512 Matrix->unassign(*Intf);
513 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
514 VirtReg.isSpillable() < Intf->isSpillable()) &&
515 "Cannot decrease cascade number, illegal eviction");
516 ExtraInfo->setCascade(Intf->reg(), Cascade);
517 ++NumEvicted;
518 NewVRegs.push_back(Intf->reg());
519 }
520}
521
522/// Returns true if the given \p PhysReg is a callee saved register and has not
523/// been used for allocation yet.
526 if (!CSR)
527 return false;
528
529 return !Matrix->isPhysRegUsed(PhysReg);
530}
531
532std::optional<unsigned>
534 const AllocationOrder &Order,
535 unsigned CostPerUseLimit) const {
536 unsigned OrderLimit = Order.getOrder().size();
537
538 if (CostPerUseLimit < uint8_t(~0u)) {
539 // Check of any registers in RC are below CostPerUseLimit.
540 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
541 uint8_t MinCost = RegClassInfo.getMinCost(RC);
542 if (MinCost >= CostPerUseLimit) {
543 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
544 << MinCost << ", no cheaper registers to be found.\n");
545 return std::nullopt;
546 }
547
548 // It is normal for register classes to have a long tail of registers with
549 // the same cost. We don't need to look at them if they're too expensive.
550 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
551 OrderLimit = RegClassInfo.getLastCostChange(RC);
552 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
553 << " regs.\n");
554 }
555 }
556 return OrderLimit;
557}
558
560 MCRegister PhysReg) const {
561 if (RegCosts[PhysReg] >= CostPerUseLimit)
562 return false;
563 // The first use of a callee-saved register in a function has cost 1.
564 // Don't start using a CSR when the CostPerUseLimit is low.
565 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
567 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
569 << '\n');
570 return false;
571 }
572 return true;
573}
574
575/// tryEvict - Try to evict all interferences for a physreg.
576/// @param VirtReg Currently unassigned virtual register.
577/// @param Order Physregs to try.
578/// @return Physreg to assign VirtReg, or 0.
579MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
580 AllocationOrder &Order,
582 uint8_t CostPerUseLimit,
583 const SmallVirtRegSet &FixedRegisters) {
586
587 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
588 VirtReg, Order, CostPerUseLimit, FixedRegisters);
589 if (BestPhys.isValid())
590 evictInterference(VirtReg, BestPhys, NewVRegs);
591 return BestPhys;
592}
593
594//===----------------------------------------------------------------------===//
595// Region Splitting
596//===----------------------------------------------------------------------===//
597
598/// addSplitConstraints - Fill out the SplitConstraints vector based on the
599/// interference pattern in Physreg and its aliases. Add the constraints to
600/// SpillPlacement and return the static cost of this split in Cost, assuming
601/// that all preferences in SplitConstraints are met.
602/// Return false if there are no bundles with positive bias.
603bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
605 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
606
607 // Reset interference dependent info.
608 SplitConstraints.resize(UseBlocks.size());
609 BlockFrequency StaticCost = BlockFrequency(0);
610 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
611 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
612 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
613
614 BC.Number = BI.MBB->getNumber();
615 Intf.moveToBlock(BC.Number);
617 BC.Exit = (BI.LiveOut &&
621 BC.ChangesValue = BI.FirstDef.isValid();
622
623 if (!Intf.hasInterference())
624 continue;
625
626 // Number of spill code instructions to insert.
627 unsigned Ins = 0;
628
629 // Interference for the live-in value.
630 if (BI.LiveIn) {
631 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
633 ++Ins;
634 } else if (Intf.first() < BI.FirstInstr) {
636 ++Ins;
637 } else if (Intf.first() < BI.LastInstr) {
638 ++Ins;
639 }
640
641 // Abort if the spill cannot be inserted at the MBB' start
642 if (((BC.Entry == SpillPlacement::MustSpill) ||
645 SA->getFirstSplitPoint(BC.Number)))
646 return false;
647 }
648
649 // Interference for the live-out value.
650 if (BI.LiveOut) {
651 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
653 ++Ins;
654 } else if (Intf.last() > BI.LastInstr) {
656 ++Ins;
657 } else if (Intf.last() > BI.FirstInstr) {
658 ++Ins;
659 }
660 }
661
662 // Accumulate the total frequency of inserted spill code.
663 while (Ins--)
664 StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
665 }
666 Cost = StaticCost;
667
668 // Add constraints for use-blocks. Note that these are the only constraints
669 // that may add a positive bias, it is downhill from here.
670 SpillPlacer->addConstraints(SplitConstraints);
671 return SpillPlacer->scanActiveBundles();
672}
673
674/// addThroughConstraints - Add constraints and links to SpillPlacer from the
675/// live-through blocks in Blocks.
676bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
678 const unsigned GroupSize = 8;
679 SpillPlacement::BlockConstraint BCS[GroupSize];
680 unsigned TBS[GroupSize];
681 unsigned B = 0, T = 0;
682
683 for (unsigned Number : Blocks) {
684 Intf.moveToBlock(Number);
685
686 if (!Intf.hasInterference()) {
687 assert(T < GroupSize && "Array overflow");
688 TBS[T] = Number;
689 if (++T == GroupSize) {
690 SpillPlacer->addLinks(ArrayRef(TBS, T));
691 T = 0;
692 }
693 continue;
694 }
695
696 assert(B < GroupSize && "Array overflow");
697 BCS[B].Number = Number;
698
699 // Abort if the spill cannot be inserted at the MBB' start
701 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
702 if (FirstNonDebugInstr != MBB->end() &&
704 SA->getFirstSplitPoint(Number)))
705 return false;
706 // Interference for the live-in value.
707 if (Intf.first() <= Indexes->getMBBStartIdx(Number))
709 else
711
712 // Interference for the live-out value.
713 if (Intf.last() >= SA->getLastSplitPoint(Number))
715 else
717
718 if (++B == GroupSize) {
719 SpillPlacer->addConstraints(ArrayRef(BCS, B));
720 B = 0;
721 }
722 }
723
724 SpillPlacer->addConstraints(ArrayRef(BCS, B));
725 SpillPlacer->addLinks(ArrayRef(TBS, T));
726 return true;
727}
728
729bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
730 // Keep track of through blocks that have not been added to SpillPlacer.
731 BitVector Todo = SA->getThroughBlocks();
732 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
733 unsigned AddedTo = 0;
734#ifndef NDEBUG
735 unsigned Visited = 0;
736#endif
737
738 unsigned long Budget = GrowRegionComplexityBudget;
739 while (true) {
740 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
741 // Find new through blocks in the periphery of PrefRegBundles.
742 for (unsigned Bundle : NewBundles) {
743 // Look at all blocks connected to Bundle in the full graph.
744 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
745 // Limit compilation time by bailing out after we use all our budget.
746 if (Blocks.size() >= Budget)
747 return false;
748 Budget -= Blocks.size();
749 for (unsigned Block : Blocks) {
750 if (!Todo.test(Block))
751 continue;
752 Todo.reset(Block);
753 // This is a new through block. Add it to SpillPlacer later.
754 ActiveBlocks.push_back(Block);
755#ifndef NDEBUG
756 ++Visited;
757#endif
758 }
759 }
760 // Any new blocks to add?
761 if (ActiveBlocks.size() == AddedTo)
762 break;
763
764 // Compute through constraints from the interference, or assume that all
765 // through blocks prefer spilling when forming compact regions.
766 auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
767 if (Cand.PhysReg) {
768 if (!addThroughConstraints(Cand.Intf, NewBlocks))
769 return false;
770 } else {
771 // Providing that the variable being spilled does not look like a loop
772 // induction variable, which is expensive to spill around and better
773 // pushed into a condition inside the loop if possible, provide a strong
774 // negative bias on through blocks to prevent unwanted liveness on loop
775 // backedges.
776 bool PrefSpill = true;
777 if (SA->looksLikeLoopIV() && NewBlocks.size() >= 2) {
778 // Check that the current bundle is adding a Header + start+end of
779 // loop-internal blocks. If the block is indeed a header, don't make
780 // the NewBlocks as PrefSpill to allow the variable to be live in
781 // Header<->Latch.
782 MachineLoop *L = Loops->getLoopFor(MF->getBlockNumbered(NewBlocks[0]));
783 if (L && L->getHeader()->getNumber() == (int)NewBlocks[0] &&
784 all_of(NewBlocks.drop_front(), [&](unsigned Block) {
785 return L == Loops->getLoopFor(MF->getBlockNumbered(Block));
786 }))
787 PrefSpill = false;
788 }
789 if (PrefSpill)
790 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
791 }
792 AddedTo = ActiveBlocks.size();
793
794 // Perhaps iterating can enable more bundles?
795 SpillPlacer->iterate();
796 }
797 LLVM_DEBUG(dbgs() << ", v=" << Visited);
798 return true;
799}
800
801/// calcCompactRegion - Compute the set of edge bundles that should be live
802/// when splitting the current live range into compact regions. Compact
803/// regions can be computed without looking at interference. They are the
804/// regions formed by removing all the live-through blocks from the live range.
805///
806/// Returns false if the current live range is already compact, or if the
807/// compact regions would form single block regions anyway.
808bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
809 // Without any through blocks, the live range is already compact.
810 if (!SA->getNumThroughBlocks())
811 return false;
812
813 // Compact regions don't correspond to any physreg.
814 Cand.reset(IntfCache, MCRegister::NoRegister);
815
816 LLVM_DEBUG(dbgs() << "Compact region bundles");
817
818 // Use the spill placer to determine the live bundles. GrowRegion pretends
819 // that all the through blocks have interference when PhysReg is unset.
820 SpillPlacer->prepare(Cand.LiveBundles);
821
822 // The static split cost will be zero since Cand.Intf reports no interference.
824 if (!addSplitConstraints(Cand.Intf, Cost)) {
825 LLVM_DEBUG(dbgs() << ", none.\n");
826 return false;
827 }
828
829 if (!growRegion(Cand)) {
830 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
831 return false;
832 }
833
834 SpillPlacer->finish();
835
836 if (!Cand.LiveBundles.any()) {
837 LLVM_DEBUG(dbgs() << ", none.\n");
838 return false;
839 }
840
841 LLVM_DEBUG({
842 for (int I : Cand.LiveBundles.set_bits())
843 dbgs() << " EB#" << I;
844 dbgs() << ".\n";
845 });
846 return true;
847}
848
849/// calcSpillCost - Compute how expensive it would be to split the live range in
850/// SA around all use blocks instead of forming bundle regions.
851BlockFrequency RAGreedy::calcSpillCost() {
853 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
854 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
855 unsigned Number = BI.MBB->getNumber();
856 // We normally only need one spill instruction - a load or a store.
857 Cost += SpillPlacer->getBlockFrequency(Number);
858
859 // Unless the value is redefined in the block.
860 if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
861 Cost += SpillPlacer->getBlockFrequency(Number);
862 }
863 return Cost;
864}
865
866/// calcGlobalSplitCost - Return the global split cost of following the split
867/// pattern in LiveBundles. This cost should be added to the local cost of the
868/// interference pattern in SplitConstraints.
869///
870BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
871 const AllocationOrder &Order) {
872 BlockFrequency GlobalCost = BlockFrequency(0);
873 const BitVector &LiveBundles = Cand.LiveBundles;
874 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
875 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
876 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
877 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
878 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
879 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
880 unsigned Ins = 0;
881
882 Cand.Intf.moveToBlock(BC.Number);
883
884 if (BI.LiveIn)
885 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
886 if (BI.LiveOut)
887 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
888 while (Ins--)
889 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
890 }
891
892 for (unsigned Number : Cand.ActiveBlocks) {
893 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
894 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
895 if (!RegIn && !RegOut)
896 continue;
897 if (RegIn && RegOut) {
898 // We need double spill code if this block has interference.
899 Cand.Intf.moveToBlock(Number);
900 if (Cand.Intf.hasInterference()) {
901 GlobalCost += SpillPlacer->getBlockFrequency(Number);
902 GlobalCost += SpillPlacer->getBlockFrequency(Number);
903 }
904 continue;
905 }
906 // live-in / stack-out or stack-in live-out.
907 GlobalCost += SpillPlacer->getBlockFrequency(Number);
908 }
909 return GlobalCost;
910}
911
912/// splitAroundRegion - Split the current live range around the regions
913/// determined by BundleCand and GlobalCand.
914///
915/// Before calling this function, GlobalCand and BundleCand must be initialized
916/// so each bundle is assigned to a valid candidate, or NoCand for the
917/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
918/// objects must be initialized for the current live range, and intervals
919/// created for the used candidates.
920///
921/// @param LREdit The LiveRangeEdit object handling the current split.
922/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
923/// must appear in this list.
924void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
925 ArrayRef<unsigned> UsedCands) {
926 // These are the intervals created for new global ranges. We may create more
927 // intervals for local ranges.
928 const unsigned NumGlobalIntvs = LREdit.size();
929 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
930 << " globals.\n");
931 assert(NumGlobalIntvs && "No global intervals configured");
932
933 // Isolate even single instructions when dealing with a proper sub-class.
934 // That guarantees register class inflation for the stack interval because it
935 // is all copies.
936 Register Reg = SA->getParent().reg();
937 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
938
939 // First handle all the blocks with uses.
940 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
941 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
942 unsigned Number = BI.MBB->getNumber();
943 unsigned IntvIn = 0, IntvOut = 0;
944 SlotIndex IntfIn, IntfOut;
945 if (BI.LiveIn) {
946 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
947 if (CandIn != NoCand) {
948 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
949 IntvIn = Cand.IntvIdx;
950 Cand.Intf.moveToBlock(Number);
951 IntfIn = Cand.Intf.first();
952 }
953 }
954 if (BI.LiveOut) {
955 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
956 if (CandOut != NoCand) {
957 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
958 IntvOut = Cand.IntvIdx;
959 Cand.Intf.moveToBlock(Number);
960 IntfOut = Cand.Intf.last();
961 }
962 }
963
964 // Create separate intervals for isolated blocks with multiple uses.
965 if (!IntvIn && !IntvOut) {
966 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
967 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
968 SE->splitSingleBlock(BI);
969 continue;
970 }
971
972 if (IntvIn && IntvOut)
973 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
974 else if (IntvIn)
975 SE->splitRegInBlock(BI, IntvIn, IntfIn);
976 else
977 SE->splitRegOutBlock(BI, IntvOut, IntfOut);
978 }
979
980 // Handle live-through blocks. The relevant live-through blocks are stored in
981 // the ActiveBlocks list with each candidate. We need to filter out
982 // duplicates.
983 BitVector Todo = SA->getThroughBlocks();
984 for (unsigned UsedCand : UsedCands) {
985 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
986 for (unsigned Number : Blocks) {
987 if (!Todo.test(Number))
988 continue;
989 Todo.reset(Number);
990
991 unsigned IntvIn = 0, IntvOut = 0;
992 SlotIndex IntfIn, IntfOut;
993
994 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
995 if (CandIn != NoCand) {
996 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
997 IntvIn = Cand.IntvIdx;
998 Cand.Intf.moveToBlock(Number);
999 IntfIn = Cand.Intf.first();
1000 }
1001
1002 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1003 if (CandOut != NoCand) {
1004 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1005 IntvOut = Cand.IntvIdx;
1006 Cand.Intf.moveToBlock(Number);
1007 IntfOut = Cand.Intf.last();
1008 }
1009 if (!IntvIn && !IntvOut)
1010 continue;
1011 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1012 }
1013 }
1014
1015 ++NumGlobalSplits;
1016
1018 SE->finish(&IntvMap);
1019 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1020
1021 unsigned OrigBlocks = SA->getNumLiveBlocks();
1022
1023 // Sort out the new intervals created by splitting. We get four kinds:
1024 // - Remainder intervals should not be split again.
1025 // - Candidate intervals can be assigned to Cand.PhysReg.
1026 // - Block-local splits are candidates for local splitting.
1027 // - DCE leftovers should go back on the queue.
1028 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1029 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1030
1031 // Ignore old intervals from DCE.
1032 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1033 continue;
1034
1035 // Remainder interval. Don't try splitting again, spill if it doesn't
1036 // allocate.
1037 if (IntvMap[I] == 0) {
1038 ExtraInfo->setStage(Reg, RS_Spill);
1039 continue;
1040 }
1041
1042 // Global intervals. Allow repeated splitting as long as the number of live
1043 // blocks is strictly decreasing.
1044 if (IntvMap[I] < NumGlobalIntvs) {
1045 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1046 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1047 << " blocks as original.\n");
1048 // Don't allow repeated splitting as a safe guard against looping.
1049 ExtraInfo->setStage(Reg, RS_Split2);
1050 }
1051 continue;
1052 }
1053
1054 // Other intervals are treated as new. This includes local intervals created
1055 // for blocks with multiple uses, and anything created by DCE.
1056 }
1057
1058 if (VerifyEnabled)
1059 MF->verify(this, "After splitting live range around region");
1060}
1061
1062MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1063 AllocationOrder &Order,
1064 SmallVectorImpl<Register> &NewVRegs) {
1065 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1067 unsigned NumCands = 0;
1068 BlockFrequency SpillCost = calcSpillCost();
1069 BlockFrequency BestCost;
1070
1071 // Check if we can split this live range around a compact region.
1072 bool HasCompact = calcCompactRegion(GlobalCand.front());
1073 if (HasCompact) {
1074 // Yes, keep GlobalCand[0] as the compact region candidate.
1075 NumCands = 1;
1076 BestCost = BlockFrequency::max();
1077 } else {
1078 // No benefit from the compact region, our fallback will be per-block
1079 // splitting. Make sure we find a solution that is cheaper than spilling.
1080 BestCost = SpillCost;
1081 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "
1082 << printBlockFreq(*MBFI, BestCost) << '\n');
1083 }
1084
1085 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1086 NumCands, false /*IgnoreCSR*/);
1087
1088 // No solutions found, fall back to single block splitting.
1089 if (!HasCompact && BestCand == NoCand)
1091
1092 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1093}
1094
1095unsigned
1096RAGreedy::calculateRegionSplitCostAroundReg(MCPhysReg PhysReg,
1097 AllocationOrder &Order,
1098 BlockFrequency &BestCost,
1099 unsigned &NumCands,
1100 unsigned &BestCand) {
1101 // Discard bad candidates before we run out of interference cache cursors.
1102 // This will only affect register classes with a lot of registers (>32).
1103 if (NumCands == IntfCache.getMaxCursors()) {
1104 unsigned WorstCount = ~0u;
1105 unsigned Worst = 0;
1106 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1107 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1108 continue;
1109 unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1110 if (Count < WorstCount) {
1111 Worst = CandIndex;
1112 WorstCount = Count;
1113 }
1114 }
1115 --NumCands;
1116 GlobalCand[Worst] = GlobalCand[NumCands];
1117 if (BestCand == NumCands)
1118 BestCand = Worst;
1119 }
1120
1121 if (GlobalCand.size() <= NumCands)
1122 GlobalCand.resize(NumCands+1);
1123 GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1124 Cand.reset(IntfCache, PhysReg);
1125
1126 SpillPlacer->prepare(Cand.LiveBundles);
1128 if (!addSplitConstraints(Cand.Intf, Cost)) {
1129 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1130 return BestCand;
1131 }
1132 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
1133 << "\tstatic = " << printBlockFreq(*MBFI, Cost));
1134 if (Cost >= BestCost) {
1135 LLVM_DEBUG({
1136 if (BestCand == NoCand)
1137 dbgs() << " worse than no bundles\n";
1138 else
1139 dbgs() << " worse than "
1140 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1141 });
1142 return BestCand;
1143 }
1144 if (!growRegion(Cand)) {
1145 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1146 return BestCand;
1147 }
1148
1149 SpillPlacer->finish();
1150
1151 // No live bundles, defer to splitSingleBlocks().
1152 if (!Cand.LiveBundles.any()) {
1153 LLVM_DEBUG(dbgs() << " no bundles.\n");
1154 return BestCand;
1155 }
1156
1157 Cost += calcGlobalSplitCost(Cand, Order);
1158 LLVM_DEBUG({
1159 dbgs() << ", total = " << printBlockFreq(*MBFI, Cost) << " with bundles";
1160 for (int I : Cand.LiveBundles.set_bits())
1161 dbgs() << " EB#" << I;
1162 dbgs() << ".\n";
1163 });
1164 if (Cost < BestCost) {
1165 BestCand = NumCands;
1166 BestCost = Cost;
1167 }
1168 ++NumCands;
1169
1170 return BestCand;
1171}
1172
1173unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1174 AllocationOrder &Order,
1175 BlockFrequency &BestCost,
1176 unsigned &NumCands,
1177 bool IgnoreCSR) {
1178 unsigned BestCand = NoCand;
1179 for (MCPhysReg PhysReg : Order) {
1180 assert(PhysReg);
1181 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1182 continue;
1183
1184 calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands,
1185 BestCand);
1186 }
1187
1188 return BestCand;
1189}
1190
1191unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
1192 bool HasCompact,
1193 SmallVectorImpl<Register> &NewVRegs) {
1194 SmallVector<unsigned, 8> UsedCands;
1195 // Prepare split editor.
1196 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1197 SE->reset(LREdit, SplitSpillMode);
1198
1199 // Assign all edge bundles to the preferred candidate, or NoCand.
1200 BundleCand.assign(Bundles->getNumBundles(), NoCand);
1201
1202 // Assign bundles for the best candidate region.
1203 if (BestCand != NoCand) {
1204 GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1205 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1206 UsedCands.push_back(BestCand);
1207 Cand.IntvIdx = SE->openIntv();
1208 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1209 << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1210 (void)B;
1211 }
1212 }
1213
1214 // Assign bundles for the compact region.
1215 if (HasCompact) {
1216 GlobalSplitCandidate &Cand = GlobalCand.front();
1217 assert(!Cand.PhysReg && "Compact region has no physreg");
1218 if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1219 UsedCands.push_back(0);
1220 Cand.IntvIdx = SE->openIntv();
1221 LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1222 << " bundles, intv " << Cand.IntvIdx << ".\n");
1223 (void)B;
1224 }
1225 }
1226
1227 splitAroundRegion(LREdit, UsedCands);
1228 return 0;
1229}
1230
1231// VirtReg has a physical Hint, this function tries to split VirtReg around
1232// Hint if we can place new COPY instructions in cold blocks.
1233bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
1234 const LiveInterval &VirtReg,
1235 SmallVectorImpl<Register> &NewVRegs,
1236 AllocationOrder &Order) {
1237 // Split the VirtReg may generate COPY instructions in multiple cold basic
1238 // blocks, and increase code size. So we avoid it when the function is
1239 // optimized for size.
1240 if (MF->getFunction().hasOptSize())
1241 return false;
1242
1243 // Don't allow repeated splitting as a safe guard against looping.
1244 if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
1245 return false;
1246
1248 Register Reg = VirtReg.reg();
1249
1250 // Compute the cost of assigning a non Hint physical register to VirtReg.
1251 // We define it as the total frequency of broken COPY instructions to/from
1252 // Hint register, and after split, they can be deleted.
1253 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
1254 if (!TII->isFullCopyInstr(Instr))
1255 continue;
1256 Register OtherReg = Instr.getOperand(1).getReg();
1257 if (OtherReg == Reg) {
1258 OtherReg = Instr.getOperand(0).getReg();
1259 if (OtherReg == Reg)
1260 continue;
1261 // Check if VirtReg interferes with OtherReg after this COPY instruction.
1262 if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
1263 continue;
1264 }
1265 MCRegister OtherPhysReg =
1266 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
1267 if (OtherPhysReg == Hint)
1268 Cost += MBFI->getBlockFreq(Instr.getParent());
1269 }
1270
1271 // Decrease the cost so it will be split in colder blocks.
1273 Cost *= Threshold;
1274 if (Cost == BlockFrequency(0))
1275 return false;
1276
1277 unsigned NumCands = 0;
1278 unsigned BestCand = NoCand;
1279 SA->analyze(&VirtReg);
1280 calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand);
1281 if (BestCand == NoCand)
1282 return false;
1283
1284 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
1285 return true;
1286}
1287
1288//===----------------------------------------------------------------------===//
1289// Per-Block Splitting
1290//===----------------------------------------------------------------------===//
1291
1292/// tryBlockSplit - Split a global live range around every block with uses. This
1293/// creates a lot of local live ranges, that will be split by tryLocalSplit if
1294/// they don't allocate.
1295unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1296 AllocationOrder &Order,
1297 SmallVectorImpl<Register> &NewVRegs) {
1298 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1299 Register Reg = VirtReg.reg();
1300 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1301 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1302 SE->reset(LREdit, SplitSpillMode);
1303 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1304 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1305 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1306 SE->splitSingleBlock(BI);
1307 }
1308 // No blocks were split.
1309 if (LREdit.empty())
1310 return 0;
1311
1312 // We did split for some blocks.
1314 SE->finish(&IntvMap);
1315
1316 // Tell LiveDebugVariables about the new ranges.
1317 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1318
1319 // Sort out the new intervals created by splitting. The remainder interval
1320 // goes straight to spilling, the new local ranges get to stay RS_New.
1321 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1322 const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1323 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1324 ExtraInfo->setStage(LI, RS_Spill);
1325 }
1326
1327 if (VerifyEnabled)
1328 MF->verify(this, "After splitting live range around basic blocks");
1329 return 0;
1330}
1331
1332//===----------------------------------------------------------------------===//
1333// Per-Instruction Splitting
1334//===----------------------------------------------------------------------===//
1335
1336/// Get the number of allocatable registers that match the constraints of \p Reg
1337/// on \p MI and that are also in \p SuperRC.
1339 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1340 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1341 const RegisterClassInfo &RCI) {
1342 assert(SuperRC && "Invalid register class");
1343
1344 const TargetRegisterClass *ConstrainedRC =
1345 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1346 /* ExploreBundle */ true);
1347 if (!ConstrainedRC)
1348 return 0;
1349 return RCI.getNumAllocatableRegs(ConstrainedRC);
1350}
1351
1353 const TargetRegisterInfo &TRI,
1354 const MachineInstr &FirstMI,
1355 Register Reg) {
1356 LaneBitmask Mask;
1358 (void)AnalyzeVirtRegInBundle(const_cast<MachineInstr &>(FirstMI), Reg, &Ops);
1359
1360 for (auto [MI, OpIdx] : Ops) {
1361 const MachineOperand &MO = MI->getOperand(OpIdx);
1362 assert(MO.isReg() && MO.getReg() == Reg);
1363 unsigned SubReg = MO.getSubReg();
1364 if (SubReg == 0 && MO.isUse()) {
1365 if (MO.isUndef())
1366 continue;
1367 return MRI.getMaxLaneMaskForVReg(Reg);
1368 }
1369
1371 if (MO.isDef()) {
1372 if (!MO.isUndef())
1373 Mask |= ~SubRegMask;
1374 } else
1375 Mask |= SubRegMask;
1376 }
1377
1378 return Mask;
1379}
1380
1381/// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1382/// VirtReg.
1384 const MachineInstr *MI, const LiveInterval &VirtReg,
1386 const TargetInstrInfo *TII) {
1387 // Early check the common case. Beware of the semi-formed bundles SplitKit
1388 // creates by setting the bundle flag on copies without a matching BUNDLE.
1389
1390 auto DestSrc = TII->isCopyInstr(*MI);
1391 if (DestSrc && !MI->isBundled() &&
1392 DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
1393 return false;
1394
1395 // FIXME: We're only considering uses, but should be consider defs too?
1396 LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1397
1398 LaneBitmask LiveAtMask;
1399 for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1400 if (S.liveAt(Use))
1401 LiveAtMask |= S.LaneMask;
1402 }
1403
1404 // If the live lanes aren't different from the lanes used by the instruction,
1405 // this doesn't help.
1406 return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1407}
1408
1409/// tryInstructionSplit - Split a live range around individual instructions.
1410/// This is normally not worthwhile since the spiller is doing essentially the
1411/// same thing. However, when the live range is in a constrained register
1412/// class, it may help to insert copies such that parts of the live range can
1413/// be moved to a larger register class.
1414///
1415/// This is similar to spilling to a larger register class.
1416unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1417 AllocationOrder &Order,
1418 SmallVectorImpl<Register> &NewVRegs) {
1419 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1420 // There is no point to this if there are no larger sub-classes.
1421
1422 bool SplitSubClass = true;
1423 if (!RegClassInfo.isProperSubClass(CurRC)) {
1424 if (!VirtReg.hasSubRanges())
1425 return 0;
1426 SplitSubClass = false;
1427 }
1428
1429 // Always enable split spill mode, since we're effectively spilling to a
1430 // register.
1431 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1432 SE->reset(LREdit, SplitEditor::SM_Size);
1433
1434 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1435 if (Uses.size() <= 1)
1436 return 0;
1437
1438 LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1439 << " individual instrs.\n");
1440
1441 const TargetRegisterClass *SuperRC =
1442 TRI->getLargestLegalSuperClass(CurRC, *MF);
1443 unsigned SuperRCNumAllocatableRegs =
1445 // Split around every non-copy instruction if this split will relax
1446 // the constraints on the virtual register.
1447 // Otherwise, splitting just inserts uncoalescable copies that do not help
1448 // the allocation.
1449 for (const SlotIndex Use : Uses) {
1450 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1451 if (TII->isFullCopyInstr(*MI) ||
1452 (SplitSubClass &&
1453 SuperRCNumAllocatableRegs ==
1454 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1455 TII, TRI, RegClassInfo)) ||
1456 // TODO: Handle split for subranges with subclass constraints?
1457 (!SplitSubClass && VirtReg.hasSubRanges() &&
1458 !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
1459 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
1460 continue;
1461 }
1462 }
1463 SE->openIntv();
1464 SlotIndex SegStart = SE->enterIntvBefore(Use);
1465 SlotIndex SegStop = SE->leaveIntvAfter(Use);
1466 SE->useIntv(SegStart, SegStop);
1467 }
1468
1469 if (LREdit.empty()) {
1470 LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1471 return 0;
1472 }
1473
1475 SE->finish(&IntvMap);
1476 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1477 // Assign all new registers to RS_Spill. This was the last chance.
1478 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1479 return 0;
1480}
1481
1482//===----------------------------------------------------------------------===//
1483// Local Splitting
1484//===----------------------------------------------------------------------===//
1485
1486/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1487/// in order to use PhysReg between two entries in SA->UseSlots.
1488///
1489/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1490///
1491void RAGreedy::calcGapWeights(MCRegister PhysReg,
1492 SmallVectorImpl<float> &GapWeight) {
1493 assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1494 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1495 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1496 const unsigned NumGaps = Uses.size()-1;
1497
1498 // Start and end points for the interference check.
1499 SlotIndex StartIdx =
1501 SlotIndex StopIdx =
1503
1504 GapWeight.assign(NumGaps, 0.0f);
1505
1506 // Add interference from each overlapping register.
1507 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1508 if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
1510 continue;
1511
1512 // We know that VirtReg is a continuous interval from FirstInstr to
1513 // LastInstr, so we don't need InterferenceQuery.
1514 //
1515 // Interference that overlaps an instruction is counted in both gaps
1516 // surrounding the instruction. The exception is interference before
1517 // StartIdx and after StopIdx.
1518 //
1520 Matrix->getLiveUnions()[Unit].find(StartIdx);
1521 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1522 // Skip the gaps before IntI.
1523 while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1524 if (++Gap == NumGaps)
1525 break;
1526 if (Gap == NumGaps)
1527 break;
1528
1529 // Update the gaps covered by IntI.
1530 const float weight = IntI.value()->weight();
1531 for (; Gap != NumGaps; ++Gap) {
1532 GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1533 if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1534 break;
1535 }
1536 if (Gap == NumGaps)
1537 break;
1538 }
1539 }
1540
1541 // Add fixed interference.
1542 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1543 const LiveRange &LR = LIS->getRegUnit(Unit);
1544 LiveRange::const_iterator I = LR.find(StartIdx);
1546
1547 // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1548 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1549 while (Uses[Gap+1].getBoundaryIndex() < I->start)
1550 if (++Gap == NumGaps)
1551 break;
1552 if (Gap == NumGaps)
1553 break;
1554
1555 for (; Gap != NumGaps; ++Gap) {
1556 GapWeight[Gap] = huge_valf;
1557 if (Uses[Gap+1].getBaseIndex() >= I->end)
1558 break;
1559 }
1560 if (Gap == NumGaps)
1561 break;
1562 }
1563 }
1564}
1565
1566/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1567/// basic block.
1568///
1569unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1570 AllocationOrder &Order,
1571 SmallVectorImpl<Register> &NewVRegs) {
1572 // TODO: the function currently only handles a single UseBlock; it should be
1573 // possible to generalize.
1574 if (SA->getUseBlocks().size() != 1)
1575 return 0;
1576
1577 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1578
1579 // Note that it is possible to have an interval that is live-in or live-out
1580 // while only covering a single block - A phi-def can use undef values from
1581 // predecessors, and the block could be a single-block loop.
1582 // We don't bother doing anything clever about such a case, we simply assume
1583 // that the interval is continuous from FirstInstr to LastInstr. We should
1584 // make sure that we don't do anything illegal to such an interval, though.
1585
1586 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1587 if (Uses.size() <= 2)
1588 return 0;
1589 const unsigned NumGaps = Uses.size()-1;
1590
1591 LLVM_DEBUG({
1592 dbgs() << "tryLocalSplit: ";
1593 for (const auto &Use : Uses)
1594 dbgs() << ' ' << Use;
1595 dbgs() << '\n';
1596 });
1597
1598 // If VirtReg is live across any register mask operands, compute a list of
1599 // gaps with register masks.
1600 SmallVector<unsigned, 8> RegMaskGaps;
1601 if (Matrix->checkRegMaskInterference(VirtReg)) {
1602 // Get regmask slots for the whole block.
1604 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1605 // Constrain to VirtReg's live range.
1606 unsigned RI =
1607 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1608 unsigned RE = RMS.size();
1609 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1610 // Look for Uses[I] <= RMS <= Uses[I + 1].
1612 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1613 continue;
1614 // Skip a regmask on the same instruction as the last use. It doesn't
1615 // overlap the live range.
1616 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1617 break;
1618 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1619 << Uses[I + 1]);
1620 RegMaskGaps.push_back(I);
1621 // Advance ri to the next gap. A regmask on one of the uses counts in
1622 // both gaps.
1623 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1624 ++RI;
1625 }
1626 LLVM_DEBUG(dbgs() << '\n');
1627 }
1628
1629 // Since we allow local split results to be split again, there is a risk of
1630 // creating infinite loops. It is tempting to require that the new live
1631 // ranges have less instructions than the original. That would guarantee
1632 // convergence, but it is too strict. A live range with 3 instructions can be
1633 // split 2+3 (including the COPY), and we want to allow that.
1634 //
1635 // Instead we use these rules:
1636 //
1637 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1638 // noop split, of course).
1639 // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1640 // the new ranges must have fewer instructions than before the split.
1641 // 3. New ranges with the same number of instructions are marked RS_Split2,
1642 // smaller ranges are marked RS_New.
1643 //
1644 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1645 // excessive splitting and infinite loops.
1646 //
1647 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1648
1649 // Best split candidate.
1650 unsigned BestBefore = NumGaps;
1651 unsigned BestAfter = 0;
1652 float BestDiff = 0;
1653
1654 const float blockFreq =
1655 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1656 (1.0f / MBFI->getEntryFreq().getFrequency());
1657 SmallVector<float, 8> GapWeight;
1658
1659 for (MCPhysReg PhysReg : Order) {
1660 assert(PhysReg);
1661 // Keep track of the largest spill weight that would need to be evicted in
1662 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1663 calcGapWeights(PhysReg, GapWeight);
1664
1665 // Remove any gaps with regmask clobbers.
1666 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1667 for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
1668 GapWeight[RegMaskGaps[I]] = huge_valf;
1669
1670 // Try to find the best sequence of gaps to close.
1671 // The new spill weight must be larger than any gap interference.
1672
1673 // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1674 unsigned SplitBefore = 0, SplitAfter = 1;
1675
1676 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1677 // It is the spill weight that needs to be evicted.
1678 float MaxGap = GapWeight[0];
1679
1680 while (true) {
1681 // Live before/after split?
1682 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1683 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1684
1685 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1686 << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1687
1688 // Stop before the interval gets so big we wouldn't be making progress.
1689 if (!LiveBefore && !LiveAfter) {
1690 LLVM_DEBUG(dbgs() << " all\n");
1691 break;
1692 }
1693 // Should the interval be extended or shrunk?
1694 bool Shrink = true;
1695
1696 // How many gaps would the new range have?
1697 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1698
1699 // Legally, without causing looping?
1700 bool Legal = !ProgressRequired || NewGaps < NumGaps;
1701
1702 if (Legal && MaxGap < huge_valf) {
1703 // Estimate the new spill weight. Each instruction reads or writes the
1704 // register. Conservatively assume there are no read-modify-write
1705 // instructions.
1706 //
1707 // Try to guess the size of the new interval.
1708 const float EstWeight = normalizeSpillWeight(
1709 blockFreq * (NewGaps + 1),
1710 Uses[SplitBefore].distance(Uses[SplitAfter]) +
1711 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1712 1);
1713 // Would this split be possible to allocate?
1714 // Never allocate all gaps, we wouldn't be making progress.
1715 LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1716 if (EstWeight * Hysteresis >= MaxGap) {
1717 Shrink = false;
1718 float Diff = EstWeight - MaxGap;
1719 if (Diff > BestDiff) {
1720 LLVM_DEBUG(dbgs() << " (best)");
1721 BestDiff = Hysteresis * Diff;
1722 BestBefore = SplitBefore;
1723 BestAfter = SplitAfter;
1724 }
1725 }
1726 }
1727
1728 // Try to shrink.
1729 if (Shrink) {
1730 if (++SplitBefore < SplitAfter) {
1731 LLVM_DEBUG(dbgs() << " shrink\n");
1732 // Recompute the max when necessary.
1733 if (GapWeight[SplitBefore - 1] >= MaxGap) {
1734 MaxGap = GapWeight[SplitBefore];
1735 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1736 MaxGap = std::max(MaxGap, GapWeight[I]);
1737 }
1738 continue;
1739 }
1740 MaxGap = 0;
1741 }
1742
1743 // Try to extend the interval.
1744 if (SplitAfter >= NumGaps) {
1745 LLVM_DEBUG(dbgs() << " end\n");
1746 break;
1747 }
1748
1749 LLVM_DEBUG(dbgs() << " extend\n");
1750 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1751 }
1752 }
1753
1754 // Didn't find any candidates?
1755 if (BestBefore == NumGaps)
1756 return 0;
1757
1758 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1759 << Uses[BestAfter] << ", " << BestDiff << ", "
1760 << (BestAfter - BestBefore + 1) << " instrs\n");
1761
1762 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1763 SE->reset(LREdit);
1764
1765 SE->openIntv();
1766 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1767 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
1768 SE->useIntv(SegStart, SegStop);
1770 SE->finish(&IntvMap);
1771 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1772 // If the new range has the same number of instructions as before, mark it as
1773 // RS_Split2 so the next split will be forced to make progress. Otherwise,
1774 // leave the new intervals as RS_New so they can compete.
1775 bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1776 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1777 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1778 if (NewGaps >= NumGaps) {
1779 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1780 assert(!ProgressRequired && "Didn't make progress when it was required.");
1781 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1782 if (IntvMap[I] == 1) {
1783 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1784 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1785 }
1786 LLVM_DEBUG(dbgs() << '\n');
1787 }
1788 ++NumLocalSplits;
1789
1790 return 0;
1791}
1792
1793//===----------------------------------------------------------------------===//
1794// Live Range Splitting
1795//===----------------------------------------------------------------------===//
1796
1797/// trySplit - Try to split VirtReg or one of its interferences, making it
1798/// assignable.
1799/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1800unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
1801 SmallVectorImpl<Register> &NewVRegs,
1802 const SmallVirtRegSet &FixedRegisters) {
1803 // Ranges must be Split2 or less.
1804 if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1805 return 0;
1806
1807 // Local intervals are handled separately.
1808 if (LIS->intervalIsInOneMBB(VirtReg)) {
1809 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1811 SA->analyze(&VirtReg);
1812 Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1813 if (PhysReg || !NewVRegs.empty())
1814 return PhysReg;
1815 return tryInstructionSplit(VirtReg, Order, NewVRegs);
1816 }
1817
1818 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1820
1821 SA->analyze(&VirtReg);
1822
1823 // First try to split around a region spanning multiple blocks. RS_Split2
1824 // ranges already made dubious progress with region splitting, so they go
1825 // straight to single block splitting.
1826 if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1827 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1828 if (PhysReg || !NewVRegs.empty())
1829 return PhysReg;
1830 }
1831
1832 // Then isolate blocks.
1833 return tryBlockSplit(VirtReg, Order, NewVRegs);
1834}
1835
1836//===----------------------------------------------------------------------===//
1837// Last Chance Recoloring
1838//===----------------------------------------------------------------------===//
1839
1840/// Return true if \p reg has any tied def operand.
1841static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
1842 for (const MachineOperand &MO : MRI->def_operands(reg))
1843 if (MO.isTied())
1844 return true;
1845
1846 return false;
1847}
1848
1849/// Return true if the existing assignment of \p Intf overlaps, but is not the
1850/// same, as \p PhysReg.
1852 const VirtRegMap &VRM,
1853 MCRegister PhysReg,
1854 const LiveInterval &Intf) {
1855 MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1856 if (PhysReg == AssignedReg)
1857 return false;
1858 return TRI.regsOverlap(PhysReg, AssignedReg);
1859}
1860
1861/// mayRecolorAllInterferences - Check if the virtual registers that
1862/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1863/// recolored to free \p PhysReg.
1864/// When true is returned, \p RecoloringCandidates has been augmented with all
1865/// the live intervals that need to be recolored in order to free \p PhysReg
1866/// for \p VirtReg.
1867/// \p FixedRegisters contains all the virtual registers that cannot be
1868/// recolored.
1869bool RAGreedy::mayRecolorAllInterferences(
1870 MCRegister PhysReg, const LiveInterval &VirtReg,
1871 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
1872 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1873
1874 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1875 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
1876 // If there is LastChanceRecoloringMaxInterference or more interferences,
1877 // chances are one would not be recolorable.
1881 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
1882 CutOffInfo |= CO_Interf;
1883 return false;
1884 }
1885 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1886 // If Intf is done and sits on the same register class as VirtReg, it
1887 // would not be recolorable as it is in the same state as
1888 // VirtReg. However there are at least two exceptions.
1889 //
1890 // If VirtReg has tied defs and Intf doesn't, then
1891 // there is still a point in examining if it can be recolorable.
1892 //
1893 // Additionally, if the register class has overlapping tuple members, it
1894 // may still be recolorable using a different tuple. This is more likely
1895 // if the existing assignment aliases with the candidate.
1896 //
1897 if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1898 MRI->getRegClass(Intf->reg()) == CurRC &&
1899 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
1900 !(hasTiedDef(MRI, VirtReg.reg()) &&
1901 !hasTiedDef(MRI, Intf->reg()))) ||
1902 FixedRegisters.count(Intf->reg())) {
1903 LLVM_DEBUG(
1904 dbgs() << "Early abort: the interference is not recolorable.\n");
1905 return false;
1906 }
1907 RecoloringCandidates.insert(Intf);
1908 }
1909 }
1910 return true;
1911}
1912
1913/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
1914/// its interferences.
1915/// Last chance recoloring chooses a color for \p VirtReg and recolors every
1916/// virtual register that was using it. The recoloring process may recursively
1917/// use the last chance recoloring. Therefore, when a virtual register has been
1918/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
1919/// be last-chance-recolored again during this recoloring "session".
1920/// E.g.,
1921/// Let
1922/// vA can use {R1, R2 }
1923/// vB can use { R2, R3}
1924/// vC can use {R1 }
1925/// Where vA, vB, and vC cannot be split anymore (they are reloads for
1926/// instance) and they all interfere.
1927///
1928/// vA is assigned R1
1929/// vB is assigned R2
1930/// vC tries to evict vA but vA is already done.
1931/// Regular register allocation fails.
1932///
1933/// Last chance recoloring kicks in:
1934/// vC does as if vA was evicted => vC uses R1.
1935/// vC is marked as fixed.
1936/// vA needs to find a color.
1937/// None are available.
1938/// vA cannot evict vC: vC is a fixed virtual register now.
1939/// vA does as if vB was evicted => vA uses R2.
1940/// vB needs to find a color.
1941/// R3 is available.
1942/// Recoloring => vC = R1, vA = R2, vB = R3
1943///
1944/// \p Order defines the preferred allocation order for \p VirtReg.
1945/// \p NewRegs will contain any new virtual register that have been created
1946/// (split, spill) during the process and that must be assigned.
1947/// \p FixedRegisters contains all the virtual registers that cannot be
1948/// recolored.
1949///
1950/// \p RecolorStack tracks the original assignments of successfully recolored
1951/// registers.
1952///
1953/// \p Depth gives the current depth of the last chance recoloring.
1954/// \return a physical register that can be used for VirtReg or ~0u if none
1955/// exists.
1956unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
1957 AllocationOrder &Order,
1958 SmallVectorImpl<Register> &NewVRegs,
1959 SmallVirtRegSet &FixedRegisters,
1960 RecoloringStack &RecolorStack,
1961 unsigned Depth) {
1962 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
1963 return ~0u;
1964
1965 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
1966
1967 const ssize_t EntryStackSize = RecolorStack.size();
1968
1969 // Ranges must be Done.
1970 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
1971 "Last chance recoloring should really be last chance");
1972 // Set the max depth to LastChanceRecoloringMaxDepth.
1973 // We may want to reconsider that if we end up with a too large search space
1974 // for target with hundreds of registers.
1975 // Indeed, in that case we may want to cut the search space earlier.
1977 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
1978 CutOffInfo |= CO_Depth;
1979 return ~0u;
1980 }
1981
1982 // Set of Live intervals that will need to be recolored.
1983 SmallLISet RecoloringCandidates;
1984
1985 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
1986 // this recoloring "session".
1987 assert(!FixedRegisters.count(VirtReg.reg()));
1988 FixedRegisters.insert(VirtReg.reg());
1989 SmallVector<Register, 4> CurrentNewVRegs;
1990
1991 for (MCRegister PhysReg : Order) {
1992 assert(PhysReg.isValid());
1993 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
1994 << printReg(PhysReg, TRI) << '\n');
1995 RecoloringCandidates.clear();
1996 CurrentNewVRegs.clear();
1997
1998 // It is only possible to recolor virtual register interference.
1999 if (Matrix->checkInterference(VirtReg, PhysReg) >
2001 LLVM_DEBUG(
2002 dbgs() << "Some interferences are not with virtual registers.\n");
2003
2004 continue;
2005 }
2006
2007 // Early give up on this PhysReg if it is obvious we cannot recolor all
2008 // the interferences.
2009 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2010 FixedRegisters)) {
2011 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
2012 continue;
2013 }
2014
2015 // RecoloringCandidates contains all the virtual registers that interfere
2016 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
2017 // recoloring and perform the actual recoloring.
2018 PQueue RecoloringQueue;
2019 for (const LiveInterval *RC : RecoloringCandidates) {
2020 Register ItVirtReg = RC->reg();
2021 enqueue(RecoloringQueue, RC);
2022 assert(VRM->hasPhys(ItVirtReg) &&
2023 "Interferences are supposed to be with allocated variables");
2024
2025 // Record the current allocation.
2026 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
2027
2028 // unset the related struct.
2029 Matrix->unassign(*RC);
2030 }
2031
2032 // Do as if VirtReg was assigned to PhysReg so that the underlying
2033 // recoloring has the right information about the interferes and
2034 // available colors.
2035 Matrix->assign(VirtReg, PhysReg);
2036
2037 // Save the current recoloring state.
2038 // If we cannot recolor all the interferences, we will have to start again
2039 // at this point for the next physical register.
2040 SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2041 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
2042 FixedRegisters, RecolorStack, Depth)) {
2043 // Push the queued vregs into the main queue.
2044 for (Register NewVReg : CurrentNewVRegs)
2045 NewVRegs.push_back(NewVReg);
2046 // Do not mess up with the global assignment process.
2047 // I.e., VirtReg must be unassigned.
2048 Matrix->unassign(VirtReg);
2049 return PhysReg;
2050 }
2051
2052 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2053 << printReg(PhysReg, TRI) << '\n');
2054
2055 // The recoloring attempt failed, undo the changes.
2056 FixedRegisters = SaveFixedRegisters;
2057 Matrix->unassign(VirtReg);
2058
2059 // For a newly created vreg which is also in RecoloringCandidates,
2060 // don't add it to NewVRegs because its physical register will be restored
2061 // below. Other vregs in CurrentNewVRegs are created by calling
2062 // selectOrSplit and should be added into NewVRegs.
2063 for (Register R : CurrentNewVRegs) {
2064 if (RecoloringCandidates.count(&LIS->getInterval(R)))
2065 continue;
2066 NewVRegs.push_back(R);
2067 }
2068
2069 // Roll back our unsuccessful recoloring. Also roll back any successful
2070 // recolorings in any recursive recoloring attempts, since it's possible
2071 // they would have introduced conflicts with assignments we will be
2072 // restoring further up the stack. Perform all unassignments prior to
2073 // reassigning, since sub-recolorings may have conflicted with the registers
2074 // we are going to restore to their original assignments.
2075 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
2076 const LiveInterval *LI;
2077 MCRegister PhysReg;
2078 std::tie(LI, PhysReg) = RecolorStack[I];
2079
2080 if (VRM->hasPhys(LI->reg()))
2081 Matrix->unassign(*LI);
2082 }
2083
2084 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
2085 const LiveInterval *LI;
2086 MCRegister PhysReg;
2087 std::tie(LI, PhysReg) = RecolorStack[I];
2088 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
2089 Matrix->assign(*LI, PhysReg);
2090 }
2091
2092 // Pop the stack of recoloring attempts.
2093 RecolorStack.resize(EntryStackSize);
2094 }
2095
2096 // Last chance recoloring did not worked either, give up.
2097 return ~0u;
2098}
2099
2100/// tryRecoloringCandidates - Try to assign a new color to every register
2101/// in \RecoloringQueue.
2102/// \p NewRegs will contain any new virtual register created during the
2103/// recoloring process.
2104/// \p FixedRegisters[in/out] contains all the registers that have been
2105/// recolored.
2106/// \return true if all virtual registers in RecoloringQueue were successfully
2107/// recolored, false otherwise.
2108bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2109 SmallVectorImpl<Register> &NewVRegs,
2110 SmallVirtRegSet &FixedRegisters,
2111 RecoloringStack &RecolorStack,
2112 unsigned Depth) {
2113 while (!RecoloringQueue.empty()) {
2114 const LiveInterval *LI = dequeue(RecoloringQueue);
2115 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2116 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2117 RecolorStack, Depth + 1);
2118 // When splitting happens, the live-range may actually be empty.
2119 // In that case, this is okay to continue the recoloring even
2120 // if we did not find an alternative color for it. Indeed,
2121 // there will not be anything to color for LI in the end.
2122 if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2123 return false;
2124
2125 if (!PhysReg) {
2126 assert(LI->empty() && "Only empty live-range do not require a register");
2127 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2128 << " succeeded. Empty LI.\n");
2129 continue;
2130 }
2131 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2132 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2133
2134 Matrix->assign(*LI, PhysReg);
2135 FixedRegisters.insert(LI->reg());
2136 }
2137 return true;
2138}
2139
2140//===----------------------------------------------------------------------===//
2141// Main Entry Point
2142//===----------------------------------------------------------------------===//
2143
2145 SmallVectorImpl<Register> &NewVRegs) {
2146 CutOffInfo = CO_None;
2147 LLVMContext &Ctx = MF->getFunction().getContext();
2148 SmallVirtRegSet FixedRegisters;
2149 RecoloringStack RecolorStack;
2150 MCRegister Reg =
2151 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2152 if (Reg == ~0U && (CutOffInfo != CO_None)) {
2153 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2154 if (CutOffEncountered == CO_Depth)
2155 Ctx.emitError("register allocation failed: maximum depth for recoloring "
2156 "reached. Use -fexhaustive-register-search to skip "
2157 "cutoffs");
2158 else if (CutOffEncountered == CO_Interf)
2159 Ctx.emitError("register allocation failed: maximum interference for "
2160 "recoloring reached. Use -fexhaustive-register-search "
2161 "to skip cutoffs");
2162 else if (CutOffEncountered == (CO_Depth | CO_Interf))
2163 Ctx.emitError("register allocation failed: maximum interference and "
2164 "depth for recoloring reached. Use "
2165 "-fexhaustive-register-search to skip cutoffs");
2166 }
2167 return Reg;
2168}
2169
2170/// Using a CSR for the first time has a cost because it causes push|pop
2171/// to be added to prologue|epilogue. Splitting a cold section of the live
2172/// range can have lower cost than using the CSR for the first time;
2173/// Spilling a live range in the cold path can have lower cost than using
2174/// the CSR for the first time. Returns the physical register if we decide
2175/// to use the CSR; otherwise return 0.
2176MCRegister RAGreedy::tryAssignCSRFirstTime(
2177 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2178 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2179 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2180 // We choose spill over using the CSR for the first time if the spill cost
2181 // is lower than CSRCost.
2182 SA->analyze(&VirtReg);
2183 if (calcSpillCost() >= CSRCost)
2184 return PhysReg;
2185
2186 // We are going to spill, set CostPerUseLimit to 1 to make sure that
2187 // we will not use a callee-saved register in tryEvict.
2188 CostPerUseLimit = 1;
2189 return 0;
2190 }
2191 if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2192 // We choose pre-splitting over using the CSR for the first time if
2193 // the cost of splitting is lower than CSRCost.
2194 SA->analyze(&VirtReg);
2195 unsigned NumCands = 0;
2196 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2197 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2198 NumCands, true /*IgnoreCSR*/);
2199 if (BestCand == NoCand)
2200 // Use the CSR if we can't find a region split below CSRCost.
2201 return PhysReg;
2202
2203 // Perform the actual pre-splitting.
2204 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2205 return 0;
2206 }
2207 return PhysReg;
2208}
2209
2211 // Do not keep invalid information around.
2212 SetOfBrokenHints.remove(&LI);
2213}
2214
2215void RAGreedy::initializeCSRCost() {
2216 // We use the larger one out of the command-line option and the value report
2217 // by TRI.
2218 CSRCost = BlockFrequency(
2219 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2220 if (!CSRCost.getFrequency())
2221 return;
2222
2223 // Raw cost is relative to Entry == 2^14; scale it appropriately.
2224 uint64_t ActualEntry = MBFI->getEntryFreq().getFrequency();
2225 if (!ActualEntry) {
2226 CSRCost = BlockFrequency(0);
2227 return;
2228 }
2229 uint64_t FixedEntry = 1 << 14;
2230 if (ActualEntry < FixedEntry)
2231 CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2232 else if (ActualEntry <= UINT32_MAX)
2233 // Invert the fraction and divide.
2234 CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2235 else
2236 // Can't use BranchProbability in general, since it takes 32-bit numbers.
2237 CSRCost =
2238 BlockFrequency(CSRCost.getFrequency() * (ActualEntry / FixedEntry));
2239}
2240
2241/// Collect the hint info for \p Reg.
2242/// The results are stored into \p Out.
2243/// \p Out is not cleared before being populated.
2244void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2245 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2246 if (!TII->isFullCopyInstr(Instr))
2247 continue;
2248 // Look for the other end of the copy.
2249 Register OtherReg = Instr.getOperand(0).getReg();
2250 if (OtherReg == Reg) {
2251 OtherReg = Instr.getOperand(1).getReg();
2252 if (OtherReg == Reg)
2253 continue;
2254 }
2255 // Get the current assignment.
2256 MCRegister OtherPhysReg =
2257 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2258 // Push the collected information.
2259 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2260 OtherPhysReg));
2261 }
2262}
2263
2264/// Using the given \p List, compute the cost of the broken hints if
2265/// \p PhysReg was used.
2266/// \return The cost of \p List for \p PhysReg.
2267BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2268 MCRegister PhysReg) {
2270 for (const HintInfo &Info : List) {
2271 if (Info.PhysReg != PhysReg)
2272 Cost += Info.Freq;
2273 }
2274 return Cost;
2275}
2276
2277/// Using the register assigned to \p VirtReg, try to recolor
2278/// all the live ranges that are copy-related with \p VirtReg.
2279/// The recoloring is then propagated to all the live-ranges that have
2280/// been recolored and so on, until no more copies can be coalesced or
2281/// it is not profitable.
2282/// For a given live range, profitability is determined by the sum of the
2283/// frequencies of the non-identity copies it would introduce with the old
2284/// and new register.
2285void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2286 // We have a broken hint, check if it is possible to fix it by
2287 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2288 // some register and PhysReg may be available for the other live-ranges.
2289 SmallSet<Register, 4> Visited;
2290 SmallVector<unsigned, 2> RecoloringCandidates;
2291 HintsInfo Info;
2292 Register Reg = VirtReg.reg();
2293 MCRegister PhysReg = VRM->getPhys(Reg);
2294 // Start the recoloring algorithm from the input live-interval, then
2295 // it will propagate to the ones that are copy-related with it.
2296 Visited.insert(Reg);
2297 RecoloringCandidates.push_back(Reg);
2298
2299 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2300 << '(' << printReg(PhysReg, TRI) << ")\n");
2301
2302 do {
2303 Reg = RecoloringCandidates.pop_back_val();
2304
2305 // We cannot recolor physical register.
2306 if (Reg.isPhysical())
2307 continue;
2308
2309 // This may be a skipped class
2310 if (!VRM->hasPhys(Reg)) {
2312 "We have an unallocated variable which should have been handled");
2313 continue;
2314 }
2315
2316 // Get the live interval mapped with this virtual register to be able
2317 // to check for the interference with the new color.
2318 LiveInterval &LI = LIS->getInterval(Reg);
2319 MCRegister CurrPhys = VRM->getPhys(Reg);
2320 // Check that the new color matches the register class constraints and
2321 // that it is free for this live range.
2322 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2323 Matrix->checkInterference(LI, PhysReg)))
2324 continue;
2325
2326 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2327 << ") is recolorable.\n");
2328
2329 // Gather the hint info.
2330 Info.clear();
2331 collectHintInfo(Reg, Info);
2332 // Check if recoloring the live-range will increase the cost of the
2333 // non-identity copies.
2334 if (CurrPhys != PhysReg) {
2335 LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2336 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2337 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2338 LLVM_DEBUG(dbgs() << "Old Cost: " << printBlockFreq(*MBFI, OldCopiesCost)
2339 << "\nNew Cost: "
2340 << printBlockFreq(*MBFI, NewCopiesCost) << '\n');
2341 if (OldCopiesCost < NewCopiesCost) {
2342 LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2343 continue;
2344 }
2345 // At this point, the cost is either cheaper or equal. If it is
2346 // equal, we consider this is profitable because it may expose
2347 // more recoloring opportunities.
2348 LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2349 // Recolor the live-range.
2350 Matrix->unassign(LI);
2351 Matrix->assign(LI, PhysReg);
2352 }
2353 // Push all copy-related live-ranges to keep reconciling the broken
2354 // hints.
2355 for (const HintInfo &HI : Info) {
2356 if (Visited.insert(HI.Reg).second)
2357 RecoloringCandidates.push_back(HI.Reg);
2358 }
2359 } while (!RecoloringCandidates.empty());
2360}
2361
2362/// Try to recolor broken hints.
2363/// Broken hints may be repaired by recoloring when an evicted variable
2364/// freed up a register for a larger live-range.
2365/// Consider the following example:
2366/// BB1:
2367/// a =
2368/// b =
2369/// BB2:
2370/// ...
2371/// = b
2372/// = a
2373/// Let us assume b gets split:
2374/// BB1:
2375/// a =
2376/// b =
2377/// BB2:
2378/// c = b
2379/// ...
2380/// d = c
2381/// = d
2382/// = a
2383/// Because of how the allocation work, b, c, and d may be assigned different
2384/// colors. Now, if a gets evicted later:
2385/// BB1:
2386/// a =
2387/// st a, SpillSlot
2388/// b =
2389/// BB2:
2390/// c = b
2391/// ...
2392/// d = c
2393/// = d
2394/// e = ld SpillSlot
2395/// = e
2396/// This is likely that we can assign the same register for b, c, and d,
2397/// getting rid of 2 copies.
2398void RAGreedy::tryHintsRecoloring() {
2399 for (const LiveInterval *LI : SetOfBrokenHints) {
2400 assert(LI->reg().isVirtual() &&
2401 "Recoloring is possible only for virtual registers");
2402 // Some dead defs may be around (e.g., because of debug uses).
2403 // Ignore those.
2404 if (!VRM->hasPhys(LI->reg()))
2405 continue;
2406 tryHintRecoloring(*LI);
2407 }
2408}
2409
2410MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2411 SmallVectorImpl<Register> &NewVRegs,
2412 SmallVirtRegSet &FixedRegisters,
2413 RecoloringStack &RecolorStack,
2414 unsigned Depth) {
2415 uint8_t CostPerUseLimit = uint8_t(~0u);
2416 // First try assigning a free register.
2417 auto Order =
2419 if (MCRegister PhysReg =
2420 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2421 // When NewVRegs is not empty, we may have made decisions such as evicting
2422 // a virtual register, go with the earlier decisions and use the physical
2423 // register.
2424 if (CSRCost.getFrequency() &&
2425 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2426 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2427 CostPerUseLimit, NewVRegs);
2428 if (CSRReg || !NewVRegs.empty())
2429 // Return now if we decide to use a CSR or create new vregs due to
2430 // pre-splitting.
2431 return CSRReg;
2432 } else
2433 return PhysReg;
2434 }
2435 // Non emtpy NewVRegs means VirtReg has been split.
2436 if (!NewVRegs.empty())
2437 return 0;
2438
2439 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2440 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2441 << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2442
2443 // Try to evict a less worthy live range, but only for ranges from the primary
2444 // queue. The RS_Split ranges already failed to do this, and they should not
2445 // get a second chance until they have been split.
2446 if (Stage != RS_Split)
2447 if (Register PhysReg =
2448 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2449 FixedRegisters)) {
2450 Register Hint = MRI->getSimpleHint(VirtReg.reg());
2451 // If VirtReg has a hint and that hint is broken record this
2452 // virtual register as a recoloring candidate for broken hint.
2453 // Indeed, since we evicted a variable in its neighborhood it is
2454 // likely we can at least partially recolor some of the
2455 // copy-related live-ranges.
2456 if (Hint && Hint != PhysReg)
2457 SetOfBrokenHints.insert(&VirtReg);
2458 return PhysReg;
2459 }
2460
2461 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2462
2463 // The first time we see a live range, don't try to split or spill.
2464 // Wait until the second time, when all smaller ranges have been allocated.
2465 // This gives a better picture of the interference to split around.
2466 if (Stage < RS_Split) {
2467 ExtraInfo->setStage(VirtReg, RS_Split);
2468 LLVM_DEBUG(dbgs() << "wait for second round\n");
2469 NewVRegs.push_back(VirtReg.reg());
2470 return 0;
2471 }
2472
2473 if (Stage < RS_Spill) {
2474 // Try splitting VirtReg or interferences.
2475 unsigned NewVRegSizeBefore = NewVRegs.size();
2476 Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2477 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2478 return PhysReg;
2479 }
2480
2481 // If we couldn't allocate a register from spilling, there is probably some
2482 // invalid inline assembly. The base class will report it.
2483 if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2484 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2485 RecolorStack, Depth);
2486 }
2487
2488 // Finally spill VirtReg itself.
2490 TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
2491 ExtraInfo->getStage(VirtReg) < RS_Memory) {
2492 // TODO: This is experimental and in particular, we do not model
2493 // the live range splitting done by spilling correctly.
2494 // We would need a deep integration with the spiller to do the
2495 // right thing here. Anyway, that is still good for early testing.
2496 ExtraInfo->setStage(VirtReg, RS_Memory);
2497 LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
2498 NewVRegs.push_back(VirtReg.reg());
2499 } else {
2500 NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2502 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2503 spiller().spill(LRE);
2504 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2505
2506 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2507 // the new regs are kept in LDV (still mapping to the old register), until
2508 // we rewrite spilled locations in LDV at a later stage.
2509 DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
2510
2511 if (VerifyEnabled)
2512 MF->verify(this, "After spilling");
2513 }
2514
2515 // The live virtual register requesting allocation was spilled, so tell
2516 // the caller not to allocate anything during this round.
2517 return 0;
2518}
2519
2520void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2521 using namespace ore;
2522 if (Spills) {
2523 R << NV("NumSpills", Spills) << " spills ";
2524 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2525 }
2526 if (FoldedSpills) {
2527 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2528 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2529 << " total folded spills cost ";
2530 }
2531 if (Reloads) {
2532 R << NV("NumReloads", Reloads) << " reloads ";
2533 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2534 }
2535 if (FoldedReloads) {
2536 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2537 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2538 << " total folded reloads cost ";
2539 }
2540 if (ZeroCostFoldedReloads)
2541 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2542 << " zero cost folded reloads ";
2543 if (Copies) {
2544 R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2545 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2546 }
2547}
2548
2549RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2550 RAGreedyStats Stats;
2551 const MachineFrameInfo &MFI = MF->getFrameInfo();
2552 int FI;
2553
2554 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2555 return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2556 A->getPseudoValue())->getFrameIndex());
2557 };
2558 auto isPatchpointInstr = [](const MachineInstr &MI) {
2559 return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2560 MI.getOpcode() == TargetOpcode::STACKMAP ||
2561 MI.getOpcode() == TargetOpcode::STATEPOINT;
2562 };
2563 for (MachineInstr &MI : MBB) {
2564 auto DestSrc = TII->isCopyInstr(MI);
2565 if (DestSrc) {
2566 const MachineOperand &Dest = *DestSrc->Destination;
2567 const MachineOperand &Src = *DestSrc->Source;
2568 Register SrcReg = Src.getReg();
2569 Register DestReg = Dest.getReg();
2570 // Only count `COPY`s with a virtual register as source or destination.
2571 if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2572 if (SrcReg.isVirtual()) {
2573 SrcReg = VRM->getPhys(SrcReg);
2574 if (SrcReg && Src.getSubReg())
2575 SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2576 }
2577 if (DestReg.isVirtual()) {
2578 DestReg = VRM->getPhys(DestReg);
2579 if (DestReg && Dest.getSubReg())
2580 DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2581 }
2582 if (SrcReg != DestReg)
2583 ++Stats.Copies;
2584 }
2585 continue;
2586 }
2587
2589 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2590 ++Stats.Reloads;
2591 continue;
2592 }
2593 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2594 ++Stats.Spills;
2595 continue;
2596 }
2597 if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2598 llvm::any_of(Accesses, isSpillSlotAccess)) {
2599 if (!isPatchpointInstr(MI)) {
2600 Stats.FoldedReloads += Accesses.size();
2601 continue;
2602 }
2603 // For statepoint there may be folded and zero cost folded stack reloads.
2604 std::pair<unsigned, unsigned> NonZeroCostRange =
2606 SmallSet<unsigned, 16> FoldedReloads;
2607 SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2608 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2609 MachineOperand &MO = MI.getOperand(Idx);
2610 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2611 continue;
2612 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2613 FoldedReloads.insert(MO.getIndex());
2614 else
2615 ZeroCostFoldedReloads.insert(MO.getIndex());
2616 }
2617 // If stack slot is used in folded reload it is not zero cost then.
2618 for (unsigned Slot : FoldedReloads)
2619 ZeroCostFoldedReloads.erase(Slot);
2620 Stats.FoldedReloads += FoldedReloads.size();
2621 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2622 continue;
2623 }
2624 Accesses.clear();
2625 if (TII->hasStoreToStackSlot(MI, Accesses) &&
2626 llvm::any_of(Accesses, isSpillSlotAccess)) {
2627 Stats.FoldedSpills += Accesses.size();
2628 }
2629 }
2630 // Set cost of collected statistic by multiplication to relative frequency of
2631 // this basic block.
2632 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2633 Stats.ReloadsCost = RelFreq * Stats.Reloads;
2634 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2635 Stats.SpillsCost = RelFreq * Stats.Spills;
2636 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2637 Stats.CopiesCost = RelFreq * Stats.Copies;
2638 return Stats;
2639}
2640
2641RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2642 RAGreedyStats Stats;
2643
2644 // Sum up the spill and reloads in subloops.
2645 for (MachineLoop *SubLoop : *L)
2646 Stats.add(reportStats(SubLoop));
2647
2648 for (MachineBasicBlock *MBB : L->getBlocks())
2649 // Handle blocks that were not included in subloops.
2650 if (Loops->getLoopFor(MBB) == L)
2651 Stats.add(computeStats(*MBB));
2652
2653 if (!Stats.isEmpty()) {
2654 using namespace ore;
2655
2656 ORE->emit([&]() {
2657 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2658 L->getStartLoc(), L->getHeader());
2659 Stats.report(R);
2660 R << "generated in loop";
2661 return R;
2662 });
2663 }
2664 return Stats;
2665}
2666
2667void RAGreedy::reportStats() {
2668 if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2669 return;
2670 RAGreedyStats Stats;
2671 for (MachineLoop *L : *Loops)
2672 Stats.add(reportStats(L));
2673 // Process non-loop blocks.
2674 for (MachineBasicBlock &MBB : *MF)
2675 if (!Loops->getLoopFor(&MBB))
2676 Stats.add(computeStats(MBB));
2677 if (!Stats.isEmpty()) {
2678 using namespace ore;
2679
2680 ORE->emit([&]() {
2681 DebugLoc Loc;
2682 if (auto *SP = MF->getFunction().getSubprogram())
2683 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2684 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2685 &MF->front());
2686 Stats.report(R);
2687 R << "generated in function";
2688 return R;
2689 });
2690 }
2691}
2692
2693bool RAGreedy::hasVirtRegAlloc() {
2694 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2696 if (MRI->reg_nodbg_empty(Reg))
2697 continue;
2698 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2699 if (!RC)
2700 continue;
2701 if (ShouldAllocateClass(*TRI, *RC))
2702 return true;
2703 }
2704
2705 return false;
2706}
2707
2709 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2710 << "********** Function: " << mf.getName() << '\n');
2711
2712 MF = &mf;
2713 TII = MF->getSubtarget().getInstrInfo();
2714
2715 if (VerifyEnabled)
2716 MF->verify(this, "Before greedy register allocator");
2717
2718 RegAllocBase::init(getAnalysis<VirtRegMap>(),
2719 getAnalysis<LiveIntervals>(),
2720 getAnalysis<LiveRegMatrix>());
2721
2722 // Early return if there is no virtual register to be allocated to a
2723 // physical register.
2724 if (!hasVirtRegAlloc())
2725 return false;
2726
2727 Indexes = &getAnalysis<SlotIndexes>();
2728 // Renumber to get accurate and consistent results from
2729 // SlotIndexes::getApproxInstrDistance.
2730 Indexes->packIndexes();
2731 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2732 DomTree = &getAnalysis<MachineDominatorTree>();
2733 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
2734 Loops = &getAnalysis<MachineLoopInfo>();
2735 Bundles = &getAnalysis<EdgeBundles>();
2736 SpillPlacer = &getAnalysis<SpillPlacement>();
2737 DebugVars = &getAnalysis<LiveDebugVariables>();
2738
2739 initializeCSRCost();
2740
2741 RegCosts = TRI->getRegisterCosts(*MF);
2742 RegClassPriorityTrumpsGlobalness =
2743 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2746
2747 ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2750
2751 ExtraInfo.emplace();
2752 EvictAdvisor =
2753 getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
2754 PriorityAdvisor =
2755 getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this);
2756
2757 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2758 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
2759
2760 VRAI->calculateSpillWeightsAndHints();
2761
2762 LLVM_DEBUG(LIS->dump());
2763
2764 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2765 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2766
2767 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2768 GlobalCand.resize(32); // This will grow as needed.
2769 SetOfBrokenHints.clear();
2770
2772 tryHintsRecoloring();
2773
2774 if (VerifyEnabled)
2775 MF->verify(this, "Before post optimization");
2777 reportStats();
2778
2779 releaseMemory();
2780 return true;
2781}
unsigned SubReg
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Rewrite Partial Register Uses
#define DEBUG_TYPE
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
This file implements an indexed map.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
block placement Basic Block Placement Stats
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static cl::opt< bool > GreedyRegClassPriorityTrumpsGlobalness("greedy-regclass-priority-trumps-globalness", cl::desc("Change the greedy register allocator's live range priority " "calculation to make the AllocationPriority of the register class " "more important then whether the range is global"), cl::Hidden)
static cl::opt< bool > ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, cl::desc("Exhaustive Search for registers bypassing the depth " "and interference cutoffs of last chance recoloring"), cl::Hidden)
const float Hysteresis
static cl::opt< unsigned > LastChanceRecoloringMaxInterference("lcr-max-interf", cl::Hidden, cl::desc("Last chance recoloring maximum number of considered" " interference at a time"), cl::init(8))
static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg)
Return true if reg has any tied def operand.
static bool readsLaneSubset(const MachineRegisterInfo &MRI, const MachineInstr *MI, const LiveInterval &VirtReg, const TargetRegisterInfo *TRI, SlotIndex Use, const TargetInstrInfo *TII)
Return true if MI at \P Use reads a subset of the lanes live in VirtReg.
static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, const VirtRegMap &VRM, MCRegister PhysReg, const LiveInterval &Intf)
Return true if the existing assignment of Intf overlaps, but is not the same, as PhysReg.
static cl::opt< unsigned > CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, cl::desc("Last chance recoloring max depth"), cl::init(5))
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator)
static cl::opt< unsigned long > GrowRegionComplexityBudget("grow-region-complexity-budget", cl::desc("growRegion() does not scale with the number of BB edges, so " "limit its budget and bail out once we reach the limit."), cl::init(10000), cl::Hidden)
Greedy Register Allocator
static cl::opt< SplitEditor::ComplementSpillMode > SplitSpillMode("split-spill-mode", cl::Hidden, cl::desc("Spill mode for splitting live ranges"), cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), cl::init(SplitEditor::SM_Speed))
static unsigned getNumAllocatableRegsForConstraints(const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const RegisterClassInfo &RCI)
Get the number of allocatable registers that match the constraints of Reg on MI and that are also in ...
static cl::opt< bool > EnableDeferredSpilling("enable-deferred-spilling", cl::Hidden, cl::desc("Instead of spilling a variable right away, defer the actual " "code insertion to the end of the allocation. That way the " "allocator might still find a suitable coloring for this " "variable because of other evicted variables."), cl::init(false))
static cl::opt< unsigned > SplitThresholdForRegWithHint("split-threshold-for-reg-with-hint", cl::desc("The threshold for splitting a virtual register with a hint, in " "percentate"), cl::init(75), cl::Hidden)
static cl::opt< bool > GreedyReverseLocalAssignment("greedy-reverse-local-assignment", cl::desc("Reverse allocation order of local live ranges, such that " "shorter local live ranges will tend to be allocated first"), cl::Hidden)
static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineInstr &FirstMI, Register Reg)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
SI optimize exec mask operations pre RA
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
bool isHint(Register Reg) const
Return true if Reg is a preferred physical register.
ArrayRef< MCPhysReg > getOrder() const
Get the allocation order without reordered hints.
static AllocationOrder create(unsigned VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo, const LiveRegMatrix *Matrix)
Create a new AllocationOrder for VirtReg.
Iterator end() const
Iterator begin() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & reset()
Definition: BitVector.h:392
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
A debug info location.
Definition: DebugLoc.h:33
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
Definition: EdgeBundles.h:47
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:41
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:44
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:681
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Cursor - The primary query interface for the block interference cache.
SlotIndex first()
first - Return the starting index of the first interfering range in the current block.
SlotIndex last()
last - Return the ending index of the last interfering range in the current block.
bool hasInterference()
hasInterference - Return true if the current block has any interference.
void moveToBlock(unsigned MBBNum)
moveTo - Move cursor to basic block MBBNum.
void init(MachineFunction *mf, LiveIntervalUnion *liuarray, SlotIndexes *indexes, LiveIntervals *lis, const TargetRegisterInfo *tri)
init - Prepare cache for a new function.
unsigned getMaxCursors() const
getMaxCursors - Return the maximum number of concurrent cursors that can be supported.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
void splitRegister(Register OldReg, ArrayRef< Register > NewRegs, LiveIntervals &LIS)
splitRegister - Move any user variables in OldReg to the live ranges in NewRegs where they are live.
Query interferences between a single live virtual register and a live interval union.
const SmallVectorImpl< const LiveInterval * > & interferingVRegs(unsigned MaxInterferingRegs=std::numeric_limits< unsigned >::max())
SegmentIter find(SlotIndex x)
LiveSegments::iterator SegmentIter
A live range for subregisters.
Definition: LiveInterval.h:694
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
Register reg() const
Definition: LiveInterval.h:718
bool isSpillable() const
isSpillable - Can this interval be spilled?
Definition: LiveInterval.h:826
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:810
unsigned getSize() const
getSize - Returns the sum of sizes of all the LiveRange's.
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:782
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
ArrayRef< SlotIndex > getRegMaskSlotsInBlock(unsigned MBBNum) const
Returns a sorted array of slot indices of all instructions with register mask operands in the basic b...
LiveInterval & getInterval(Register Reg)
MachineBasicBlock * intervalIsInOneMBB(const LiveInterval &LI) const
If LI is confined to a single basic block, return a pointer to that block.
bool empty() const
unsigned size() const
Register get(unsigned idx) const
ArrayRef< Register > regs() const
iterator end() const
iterator begin() const
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
bool empty() const
Definition: LiveInterval.h:382
iterator end()
Definition: LiveInterval.h:216
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool checkRegMaskInterference(const LiveInterval &VirtReg, MCRegister PhysReg=MCRegister::NoRegister)
Check for regmask interference only.
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
LiveIntervalUnion::Query & query(const LiveRange &LR, MCRegister RegUnit)
Query a line of the assigned virtual register matrix directly.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
@ IK_VirtReg
Virtual register interference.
Definition: LiveRegMatrix.h:90
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)
Check for interference before assigning VirtReg to PhysReg.
LiveIntervalUnion * getLiveUnions()
Directly access the live interval unions per regunit.
iterator_range< MCRegUnitIterator > regunits(MCRegister Reg) const
Returns an iterator range over all regunits for Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
double getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const
Compute the frequency of the block, relative to the entry block.
BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
Function & getFunction()
Return the LLVM function that this machine code represents.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isImplicitDef() const
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to be more informative.
Diagnostic information for missed-optimization remarks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getSimpleHint(Register VReg) const
getSimpleHint - same as getRegAllocationHint except it will only return a target independent hint.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool reg_nodbg_empty(Register RegNo) const
reg_nodbg_empty - Return true if the only instructions using or defining Reg are Debug instructions.
iterator_range< def_iterator > def_operands(Register Reg) const
LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
iterator_range< reg_instr_nodbg_iterator > reg_nodbg_instructions(Register Reg) const
void dump() const
Definition: Pass.cpp:136
void LRE_DidCloneVirtReg(Register New, Register Old)
Spiller & spiller() override
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
MCRegister selectOrSplit(const LiveInterval &, SmallVectorImpl< Register > &) override
bool runOnMachineFunction(MachineFunction &mf) override
Perform register allocation.
const LiveInterval * dequeue() override
dequeue - Return the next unassigned register, or NULL.
void enqueueImpl(const LiveInterval *LI) override
enqueue - Add VirtReg to the priority queue of unassigned registers.
RAGreedy(const RegClassFilterFunc F=allocateAllRegClasses)
static char ID
void getAnalysisUsage(AnalysisUsage &AU) const override
RAGreedy analysis usage.
void aboutToRemoveInterval(const LiveInterval &) override
Method called when the allocator is about to remove a LiveInterval.
RegAllocBase provides the register allocation driver and interface that can be extended to add intere...
Definition: RegAllocBase.h:61
void enqueue(const LiveInterval *LI)
enqueue - Add VirtReg to the priority queue of unassigned registers.
void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat)
SmallPtrSet< MachineInstr *, 32 > DeadRemats
Inst which is a def of an original reg and whose defs are already all dead after remat is saved in De...
Definition: RegAllocBase.h:77
const TargetRegisterInfo * TRI
Definition: RegAllocBase.h:65
LiveIntervals * LIS
Definition: RegAllocBase.h:68
static const char TimerGroupName[]
Definition: RegAllocBase.h:115
static const char TimerGroupDescription[]
Definition: RegAllocBase.h:116
LiveRegMatrix * Matrix
Definition: RegAllocBase.h:69
virtual void postOptimization()
VirtRegMap * VRM
Definition: RegAllocBase.h:67
RegisterClassInfo RegClassInfo
Definition: RegAllocBase.h:70
MachineRegisterInfo * MRI
Definition: RegAllocBase.h:66
static bool VerifyEnabled
VerifyEnabled - True when -verify-regalloc is given.
Definition: RegAllocBase.h:123
const RegClassFilterFunc ShouldAllocateClass
Definition: RegAllocBase.h:71
ImmutableAnalysis abstraction for fetching the Eviction Advisor.
std::optional< unsigned > getOrderLimit(const LiveInterval &VirtReg, const AllocationOrder &Order, unsigned CostPerUseLimit) const
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const
Returns true if the given PhysReg is a callee saved register and has not been used for allocation yet...
bool canReassign(const LiveInterval &VirtReg, MCRegister FromReg) const
bool canAllocatePhysReg(unsigned CostPerUseLimit, MCRegister PhysReg) const
unsigned getLastCostChange(const TargetRegisterClass *RC) const
Get the position of the last cost change in getOrder(RC).
bool isProperSubClass(const TargetRegisterClass *RC) const
isProperSubClass - Returns true if RC has a legal super-class with more allocatable registers.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
uint8_t getMinCost(const TargetRegisterClass *RC) const
Get the minimum register cost in RC's allocation order.
MCRegister getLastCalleeSavedAlias(MCRegister PhysReg) const
getLastCalleeSavedAlias - Returns the last callee saved register that overlaps PhysReg,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:179
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
Definition: SlotIndexes.h:185
@ InstrDist
The default distance between instructions as returned by distance().
Definition: SlotIndexes.h:115
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:133
SlotIndex getBoundaryIndex() const
Returns the boundary index for associated with this index.
Definition: SlotIndexes.h:234
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition: SlotIndexes.h:227
int getApproxInstrDistance(SlotIndex other) const
Return the scaled distance from this index to the given one, where all slots on the same instruction ...
Definition: SlotIndexes.h:206
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:240
SlotIndexes pass.
Definition: SlotIndexes.h:300
SlotIndex getLastIndex()
Returns the base index of the last slot in this analysis.
Definition: SlotIndexes.h:360
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:452
void packIndexes()
Renumber all indexes using the default instruction distance.
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
Definition: SlotIndexes.h:354
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction for the given index, or null if the given index has no instruction associated...
Definition: SlotIndexes.h:389
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool erase(const T &V)
Definition: SmallSet.h:207
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_type size() const
Definition: SmallSet.h:161
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
void addConstraints(ArrayRef< BlockConstraint > LiveBlocks)
addConstraints - Add constraints and biases.
bool finish()
finish - Compute the optimal spill code placement given the constraints.
void addPrefSpill(ArrayRef< unsigned > Blocks, bool Strong)
addPrefSpill - Add PrefSpill constraints to all blocks listed.
void prepare(BitVector &RegBundles)
prepare - Reset state and prepare for a new spill placement computation.
bool scanActiveBundles()
scanActiveBundles - Perform an initial scan of all bundles activated by addConstraints and addLinks,...
void addLinks(ArrayRef< unsigned > Links)
addLinks - Add transparent blocks with the given numbers.
void iterate()
iterate - Update the network iteratively until convergence, or new bundles are found.
@ MustSpill
A register is impossible, variable must be spilled.
@ DontCare
Block doesn't care / variable not live.
@ PrefReg
Block entry/exit prefers a register.
@ PrefSpill
Block entry/exit prefers a stack slot.
ArrayRef< unsigned > getRecentPositive()
getRecentPositive - Return an array of bundles that became positive during the previous call to scanA...
BlockFrequency getBlockFrequency(unsigned Number) const
getBlockFrequency - Return the estimated block execution frequency per function invocation.
virtual void spill(LiveRangeEdit &LRE)=0
spill - Spill the LRE.getParent() live interval.
SplitAnalysis - Analyze a LiveInterval, looking for live range splitting opportunities.
Definition: SplitKit.h:96
SplitEditor - Edit machine code and LiveIntervals for live range splitting.
Definition: SplitKit.h:263
@ SM_Partition
SM_Partition(Default) - Try to create the complement interval so it doesn't overlap any other interva...
Definition: SplitKit.h:286
@ SM_Speed
SM_Speed - Overlap intervals to minimize the expected execution frequency of the inserted copies.
Definition: SplitKit.h:298
@ SM_Size
SM_Size - Overlap intervals to minimize the number of inserted COPY instructions.
Definition: SplitKit.h:293
TargetInstrInfo - Interface to description of machine instruction set.
virtual std::pair< unsigned, unsigned > getPatchpointUnfoldableRange(const MachineInstr &MI) const
For a patchpoint, stackmap, or statepoint intrinsic, return the range of operands which can't be fold...
bool isFullCopyInstr(const MachineInstr &MI) const
virtual bool hasStoreToStackSlot(const MachineInstr &MI, SmallVectorImpl< const MachineMemOperand * > &Accesses) const
If the specified machine instruction has a store to a stack slot, return true along with the FrameInd...
virtual Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct load from a stack slot, return the virtual or physic...
virtual Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
std::optional< DestSourcePair > isCopyInstr(const MachineInstr &MI) const
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
virtual bool hasLoadFromStackSlot(const MachineInstr &MI, SmallVectorImpl< const MachineMemOperand * > &Accesses) const
If the specified machine instruction has a load from a stack slot, return true along with the FrameIn...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
const uint8_t AllocationPriority
Classes with a higher priority value are assigned first by register allocators using a greedy heurist...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool shouldUseDeferredSpillingForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Deferred spilling delays the spill insertion of a virtual register after every other allocation.
virtual bool shouldRegionSplitForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Region split has a high compile time cost especially for large live range.
virtual bool shouldUseLastChanceRecoloringForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Last chance recoloring has a high compile time cost especially for targets with a lot of registers.
virtual unsigned getCSRFirstUseCost() const
Allow the target to override the cost of using a callee-saved register for the first time.
LaneBitmask getCoveringLanes() const
The lane masks returned by getSubRegIndexLaneMask() above can only be used to determine if sub-regist...
ArrayRef< uint8_t > getRegisterCosts(const MachineFunction &MF) const
Get a list of cost values for all registers that correspond to the index returned by RegisterCostTabl...
virtual bool reverseLocalAssignment() const
Allow the target to reverse allocation order of local live ranges.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
virtual bool regClassPriorityTrumpsGlobalness(const MachineFunction &MF) const
When prioritizing live ranges in register allocation, if this hook returns true then the AllocationPr...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasKnownPreference(Register VirtReg) const
returns true if VirtReg has a known preferred register.
Definition: VirtRegMap.cpp:118
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition: VirtRegMap.h:105
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition: VirtRegMap.h:99
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
bool TimePassesIsEnabled
If the user specifies the -time-passes argument on an LLVM tool command line then the value of this b...
char & RAGreedyID
Greedy register allocator.
Spiller * createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
@ RS_Split2
Attempt more aggressive live range splitting that is guaranteed to make progress.
@ RS_Spill
Live range will be spilled. No more splitting will be attempted.
@ RS_Split
Attempt live range splitting if assignment is impossible.
@ RS_New
Newly created live range that has never been queued.
@ RS_Done
There is nothing more we can do to this live range.
@ RS_Assign
Only attempt assignment and eviction. Then requeue as RS_Split.
@ RS_Memory
Live range is in memory.
VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
const float huge_valf
Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
Definition: MathExtras.cpp:28
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1954
InstructionCost Cost
Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
static float normalizeSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr)
Normalize the spill weight of a live interval.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
std::function< bool(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC)> RegClassFilterFunc
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
Definition: MathExtras.h:201
constexpr bool any() const
Definition: LaneBitmask.h:53
This class is basically a combination of TimeRegion and Timer.
Definition: Timer.h:163
BlockConstraint - Entry and exit constraints for a basic block.
BorderConstraint Exit
Constraint on block exit.
bool ChangesValue
True when this block changes the value of the live range.
BorderConstraint Entry
Constraint on block entry.
unsigned Number
Basic block number (from MBB::getNumber()).
Additional information about basic blocks where the current variable is live.
Definition: SplitKit.h:121
SlotIndex FirstDef
First non-phi valno->def, or SlotIndex().
Definition: SplitKit.h:125
bool LiveOut
Current reg is live out.
Definition: SplitKit.h:127
bool LiveIn
Current reg is live in.
Definition: SplitKit.h:126
MachineBasicBlock * MBB
Definition: SplitKit.h:122
SlotIndex LastInstr
Last instr accessing current reg.
Definition: SplitKit.h:124
SlotIndex FirstInstr
First instr accessing current reg.
Definition: SplitKit.h:123