LLVM 20.0.0git
RegAllocGreedy.cpp
Go to the documentation of this file.
1//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the RAGreedy function pass for register allocation in
10// optimized builds.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RegAllocGreedy.h"
15#include "AllocationOrder.h"
16#include "InterferenceCache.h"
17#include "RegAllocBase.h"
20#include "SplitKit.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/BitVector.h"
23#include "llvm/ADT/IndexedMap.h"
24#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringRef.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/LLVMContext.h"
61#include "llvm/Pass.h"
65#include "llvm/Support/Debug.h"
67#include "llvm/Support/Timer.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <utility>
73
74using namespace llvm;
75
76#define DEBUG_TYPE "regalloc"
77
78STATISTIC(NumGlobalSplits, "Number of split global live ranges");
79STATISTIC(NumLocalSplits, "Number of split local live ranges");
80STATISTIC(NumEvicted, "Number of interferences evicted");
81
83 "split-spill-mode", cl::Hidden,
84 cl::desc("Spill mode for splitting live ranges"),
85 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
86 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
87 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
89
92 cl::desc("Last chance recoloring max depth"),
93 cl::init(5));
94
96 "lcr-max-interf", cl::Hidden,
97 cl::desc("Last chance recoloring maximum number of considered"
98 " interference at a time"),
99 cl::init(8));
100
102 "exhaustive-register-search", cl::NotHidden,
103 cl::desc("Exhaustive Search for registers bypassing the depth "
104 "and interference cutoffs of last chance recoloring"),
105 cl::Hidden);
106
108 "enable-deferred-spilling", cl::Hidden,
109 cl::desc("Instead of spilling a variable right away, defer the actual "
110 "code insertion to the end of the allocation. That way the "
111 "allocator might still find a suitable coloring for this "
112 "variable because of other evicted variables."),
113 cl::init(false));
114
115// FIXME: Find a good default for this flag and remove the flag.
117CSRFirstTimeCost("regalloc-csr-first-time-cost",
118 cl::desc("Cost for first time use of callee-saved register."),
119 cl::init(0), cl::Hidden);
120
122 "grow-region-complexity-budget",
123 cl::desc("growRegion() does not scale with the number of BB edges, so "
124 "limit its budget and bail out once we reach the limit."),
125 cl::init(10000), cl::Hidden);
126
128 "greedy-regclass-priority-trumps-globalness",
129 cl::desc("Change the greedy register allocator's live range priority "
130 "calculation to make the AllocationPriority of the register class "
131 "more important then whether the range is global"),
132 cl::Hidden);
133
135 "greedy-reverse-local-assignment",
136 cl::desc("Reverse allocation order of local live ranges, such that "
137 "shorter local live ranges will tend to be allocated first"),
138 cl::Hidden);
139
141 "split-threshold-for-reg-with-hint",
142 cl::desc("The threshold for splitting a virtual register with a hint, in "
143 "percentate"),
144 cl::init(75), cl::Hidden);
145
146static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
148
149char RAGreedy::ID = 0;
151
153 "Greedy Register Allocator", false, false)
157INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
158INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
171
172#ifndef NDEBUG
173const char *const RAGreedy::StageName[] = {
174 "RS_New",
175 "RS_Assign",
176 "RS_Split",
177 "RS_Split2",
178 "RS_Spill",
179 "RS_Memory",
180 "RS_Done"
181};
182#endif
183
184// Hysteresis to use when comparing floats.
185// This helps stabilize decisions based on float comparisons.
186const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
187
189 return new RAGreedy();
190}
191
193 return new RAGreedy(Ftor);
194}
195
198
200 AU.setPreservesCFG();
225}
226
227//===----------------------------------------------------------------------===//
228// LiveRangeEdit delegate methods
229//===----------------------------------------------------------------------===//
230
231bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
232 LiveInterval &LI = LIS->getInterval(VirtReg);
233 if (VRM->hasPhys(VirtReg)) {
234 Matrix->unassign(LI);
236 return true;
237 }
238 // Unassigned virtreg is probably in the priority queue.
239 // RegAllocBase will erase it after dequeueing.
240 // Nonetheless, clear the live-range so that the debug
241 // dump will show the right state for that VirtReg.
242 LI.clear();
243 return false;
244}
245
246void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
247 if (!VRM->hasPhys(VirtReg))
248 return;
249
250 // Register is assigned, put it back on the queue for reassignment.
251 LiveInterval &LI = LIS->getInterval(VirtReg);
252 Matrix->unassign(LI);
254}
255
256void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
257 ExtraInfo->LRE_DidCloneVirtReg(New, Old);
258}
259
261 // Cloning a register we haven't even heard about yet? Just ignore it.
262 if (!Info.inBounds(Old))
263 return;
264
265 // LRE may clone a virtual register because dead code elimination causes it to
266 // be split into connected components. The new components are much smaller
267 // than the original, so they should get a new chance at being assigned.
268 // same stage as the parent.
269 Info[Old].Stage = RS_Assign;
270 Info.grow(New.id());
271 Info[New] = Info[Old];
272}
273
275 SpillerInstance.reset();
276 GlobalCand.clear();
277}
278
279void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
280
281void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
282 // Prioritize live ranges by size, assigning larger ranges first.
283 // The queue holds (size, reg) pairs.
284 const Register Reg = LI->reg();
285 assert(Reg.isVirtual() && "Can only enqueue virtual registers");
286
287 auto Stage = ExtraInfo->getOrInitStage(Reg);
288 if (Stage == RS_New) {
289 Stage = RS_Assign;
290 ExtraInfo->setStage(Reg, Stage);
291 }
292
293 unsigned Ret = PriorityAdvisor->getPriority(*LI);
294
295 // The virtual register number is a tie breaker for same-sized ranges.
296 // Give lower vreg numbers higher priority to assign them first.
297 CurQueue.push(std::make_pair(Ret, ~Reg));
298}
299
300unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
301 const unsigned Size = LI.getSize();
302 const Register Reg = LI.reg();
303 unsigned Prio;
304 LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
305
306 if (Stage == RS_Split) {
307 // Unsplit ranges that couldn't be allocated immediately are deferred until
308 // everything else has been allocated.
309 Prio = Size;
310 } else if (Stage == RS_Memory) {
311 // Memory operand should be considered last.
312 // Change the priority such that Memory operand are assigned in
313 // the reverse order that they came in.
314 // TODO: Make this a member variable and probably do something about hints.
315 static unsigned MemOp = 0;
316 Prio = MemOp++;
317 } else {
318 // Giant live ranges fall back to the global assignment heuristic, which
319 // prevents excessive spilling in pathological cases.
320 const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
321 bool ForceGlobal = RC.GlobalPriority ||
322 (!ReverseLocalAssignment &&
325 unsigned GlobalBit = 0;
326
327 if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
328 LIS->intervalIsInOneMBB(LI)) {
329 // Allocate original local ranges in linear instruction order. Since they
330 // are singly defined, this produces optimal coloring in the absence of
331 // global interference and other constraints.
332 if (!ReverseLocalAssignment)
333 Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
334 else {
335 // Allocating bottom up may allow many short LRGs to be assigned first
336 // to one of the cheap registers. This could be much faster for very
337 // large blocks on targets with many physical registers.
338 Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
339 }
340 } else {
341 // Allocate global and split ranges in long->short order. Long ranges that
342 // don't fit should be spilled (or split) ASAP so they don't create
343 // interference. Mark a bit to prioritize global above local ranges.
344 Prio = Size;
345 GlobalBit = 1;
346 }
347
348 // Priority bit layout:
349 // 31 RS_Assign priority
350 // 30 Preference priority
351 // if (RegClassPriorityTrumpsGlobalness)
352 // 29-25 AllocPriority
353 // 24 GlobalBit
354 // else
355 // 29 Global bit
356 // 28-24 AllocPriority
357 // 0-23 Size/Instr distance
358
359 // Clamp the size to fit with the priority masking scheme
360 Prio = std::min(Prio, (unsigned)maxUIntN(24));
361 assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
362
363 if (RegClassPriorityTrumpsGlobalness)
364 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
365 else
366 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
367
368 // Mark a higher bit to prioritize global and local above RS_Split.
369 Prio |= (1u << 31);
370
371 // Boost ranges that have a physical register hint.
372 if (VRM->hasKnownPreference(Reg))
373 Prio |= (1u << 30);
374 }
375
376 return Prio;
377}
378
379const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
380
381const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
382 if (CurQueue.empty())
383 return nullptr;
384 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
385 CurQueue.pop();
386 return LI;
387}
388
389//===----------------------------------------------------------------------===//
390// Direct Assignment
391//===----------------------------------------------------------------------===//
392
393/// tryAssign - Try to assign VirtReg to an available register.
394MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
395 AllocationOrder &Order,
397 const SmallVirtRegSet &FixedRegisters) {
398 MCRegister PhysReg;
399 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
400 assert(*I);
401 if (!Matrix->checkInterference(VirtReg, *I)) {
402 if (I.isHint())
403 return *I;
404 else
405 PhysReg = *I;
406 }
407 }
408 if (!PhysReg.isValid())
409 return PhysReg;
410
411 // PhysReg is available, but there may be a better choice.
412
413 // If we missed a simple hint, try to cheaply evict interference from the
414 // preferred register.
415 if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
416 if (Order.isHint(Hint)) {
417 MCRegister PhysHint = Hint.asMCReg();
418 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
419
420 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
421 FixedRegisters)) {
422 evictInterference(VirtReg, PhysHint, NewVRegs);
423 return PhysHint;
424 }
425
426 // We can also split the virtual register in cold blocks.
427 if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order))
428 return 0;
429
430 // Record the missed hint, we may be able to recover
431 // at the end if the surrounding allocation changed.
432 SetOfBrokenHints.insert(&VirtReg);
433 }
434
435 // Try to evict interference from a cheaper alternative.
436 uint8_t Cost = RegCosts[PhysReg];
437
438 // Most registers have 0 additional cost.
439 if (!Cost)
440 return PhysReg;
441
442 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
443 << (unsigned)Cost << '\n');
444 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
445 return CheapReg ? CheapReg : PhysReg;
446}
447
448//===----------------------------------------------------------------------===//
449// Interference eviction
450//===----------------------------------------------------------------------===//
451
453 MCRegister FromReg) const {
454 auto HasRegUnitInterference = [&](MCRegUnit Unit) {
455 // Instantiate a "subquery", not to be confused with the Queries array.
456 LiveIntervalUnion::Query SubQ(VirtReg, Matrix->getLiveUnions()[Unit]);
457 return SubQ.checkInterference();
458 };
459
460 for (MCRegister Reg :
462 if (Reg == FromReg)
463 continue;
464 // If no units have interference, reassignment is possible.
465 if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
466 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
467 << printReg(FromReg, TRI) << " to "
468 << printReg(Reg, TRI) << '\n');
469 return true;
470 }
471 }
472 return false;
473}
474
475/// evictInterference - Evict any interferring registers that prevent VirtReg
476/// from being assigned to Physreg. This assumes that canEvictInterference
477/// returned true.
478void RAGreedy::evictInterference(const LiveInterval &VirtReg,
479 MCRegister PhysReg,
480 SmallVectorImpl<Register> &NewVRegs) {
481 // Make sure that VirtReg has a cascade number, and assign that cascade
482 // number to every evicted register. These live ranges than then only be
483 // evicted by a newer cascade, preventing infinite loops.
484 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
485
486 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
487 << " interference: Cascade " << Cascade << '\n');
488
489 // Collect all interfering virtregs first.
491 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
492 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
493 // We usually have the interfering VRegs cached so collectInterferingVRegs()
494 // should be fast, we may need to recalculate if when different physregs
495 // overlap the same register unit so we had different SubRanges queried
496 // against it.
498 Intfs.append(IVR.begin(), IVR.end());
499 }
500
501 // Evict them second. This will invalidate the queries.
502 for (const LiveInterval *Intf : Intfs) {
503 // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
504 if (!VRM->hasPhys(Intf->reg()))
505 continue;
506
507 Matrix->unassign(*Intf);
508 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
509 VirtReg.isSpillable() < Intf->isSpillable()) &&
510 "Cannot decrease cascade number, illegal eviction");
511 ExtraInfo->setCascade(Intf->reg(), Cascade);
512 ++NumEvicted;
513 NewVRegs.push_back(Intf->reg());
514 }
515}
516
517/// Returns true if the given \p PhysReg is a callee saved register and has not
518/// been used for allocation yet.
521 if (!CSR)
522 return false;
523
524 return !Matrix->isPhysRegUsed(PhysReg);
525}
526
527std::optional<unsigned>
529 const AllocationOrder &Order,
530 unsigned CostPerUseLimit) const {
531 unsigned OrderLimit = Order.getOrder().size();
532
533 if (CostPerUseLimit < uint8_t(~0u)) {
534 // Check of any registers in RC are below CostPerUseLimit.
535 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
536 uint8_t MinCost = RegClassInfo.getMinCost(RC);
537 if (MinCost >= CostPerUseLimit) {
538 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
539 << MinCost << ", no cheaper registers to be found.\n");
540 return std::nullopt;
541 }
542
543 // It is normal for register classes to have a long tail of registers with
544 // the same cost. We don't need to look at them if they're too expensive.
545 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
546 OrderLimit = RegClassInfo.getLastCostChange(RC);
547 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
548 << " regs.\n");
549 }
550 }
551 return OrderLimit;
552}
553
555 MCRegister PhysReg) const {
556 if (RegCosts[PhysReg] >= CostPerUseLimit)
557 return false;
558 // The first use of a callee-saved register in a function has cost 1.
559 // Don't start using a CSR when the CostPerUseLimit is low.
560 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
562 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
564 << '\n');
565 return false;
566 }
567 return true;
568}
569
570/// tryEvict - Try to evict all interferences for a physreg.
571/// @param VirtReg Currently unassigned virtual register.
572/// @param Order Physregs to try.
573/// @return Physreg to assign VirtReg, or 0.
574MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
575 AllocationOrder &Order,
577 uint8_t CostPerUseLimit,
578 const SmallVirtRegSet &FixedRegisters) {
581
582 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
583 VirtReg, Order, CostPerUseLimit, FixedRegisters);
584 if (BestPhys.isValid())
585 evictInterference(VirtReg, BestPhys, NewVRegs);
586 return BestPhys;
587}
588
589//===----------------------------------------------------------------------===//
590// Region Splitting
591//===----------------------------------------------------------------------===//
592
593/// addSplitConstraints - Fill out the SplitConstraints vector based on the
594/// interference pattern in Physreg and its aliases. Add the constraints to
595/// SpillPlacement and return the static cost of this split in Cost, assuming
596/// that all preferences in SplitConstraints are met.
597/// Return false if there are no bundles with positive bias.
598bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
600 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
601
602 // Reset interference dependent info.
603 SplitConstraints.resize(UseBlocks.size());
604 BlockFrequency StaticCost = BlockFrequency(0);
605 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
606 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
607 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
608
609 BC.Number = BI.MBB->getNumber();
610 Intf.moveToBlock(BC.Number);
612 BC.Exit = (BI.LiveOut &&
616 BC.ChangesValue = BI.FirstDef.isValid();
617
618 if (!Intf.hasInterference())
619 continue;
620
621 // Number of spill code instructions to insert.
622 unsigned Ins = 0;
623
624 // Interference for the live-in value.
625 if (BI.LiveIn) {
626 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
628 ++Ins;
629 } else if (Intf.first() < BI.FirstInstr) {
631 ++Ins;
632 } else if (Intf.first() < BI.LastInstr) {
633 ++Ins;
634 }
635
636 // Abort if the spill cannot be inserted at the MBB' start
637 if (((BC.Entry == SpillPlacement::MustSpill) ||
640 SA->getFirstSplitPoint(BC.Number)))
641 return false;
642 }
643
644 // Interference for the live-out value.
645 if (BI.LiveOut) {
646 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
648 ++Ins;
649 } else if (Intf.last() > BI.LastInstr) {
651 ++Ins;
652 } else if (Intf.last() > BI.FirstInstr) {
653 ++Ins;
654 }
655 }
656
657 // Accumulate the total frequency of inserted spill code.
658 while (Ins--)
659 StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
660 }
661 Cost = StaticCost;
662
663 // Add constraints for use-blocks. Note that these are the only constraints
664 // that may add a positive bias, it is downhill from here.
665 SpillPlacer->addConstraints(SplitConstraints);
666 return SpillPlacer->scanActiveBundles();
667}
668
669/// addThroughConstraints - Add constraints and links to SpillPlacer from the
670/// live-through blocks in Blocks.
671bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
673 const unsigned GroupSize = 8;
674 SpillPlacement::BlockConstraint BCS[GroupSize];
675 unsigned TBS[GroupSize];
676 unsigned B = 0, T = 0;
677
678 for (unsigned Number : Blocks) {
679 Intf.moveToBlock(Number);
680
681 if (!Intf.hasInterference()) {
682 assert(T < GroupSize && "Array overflow");
683 TBS[T] = Number;
684 if (++T == GroupSize) {
685 SpillPlacer->addLinks(ArrayRef(TBS, T));
686 T = 0;
687 }
688 continue;
689 }
690
691 assert(B < GroupSize && "Array overflow");
692 BCS[B].Number = Number;
693
694 // Abort if the spill cannot be inserted at the MBB' start
696 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
697 if (FirstNonDebugInstr != MBB->end() &&
699 SA->getFirstSplitPoint(Number)))
700 return false;
701 // Interference for the live-in value.
702 if (Intf.first() <= Indexes->getMBBStartIdx(Number))
704 else
706
707 // Interference for the live-out value.
708 if (Intf.last() >= SA->getLastSplitPoint(Number))
710 else
712
713 if (++B == GroupSize) {
714 SpillPlacer->addConstraints(ArrayRef(BCS, B));
715 B = 0;
716 }
717 }
718
719 SpillPlacer->addConstraints(ArrayRef(BCS, B));
720 SpillPlacer->addLinks(ArrayRef(TBS, T));
721 return true;
722}
723
724bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
725 // Keep track of through blocks that have not been added to SpillPlacer.
726 BitVector Todo = SA->getThroughBlocks();
727 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
728 unsigned AddedTo = 0;
729#ifndef NDEBUG
730 unsigned Visited = 0;
731#endif
732
733 unsigned long Budget = GrowRegionComplexityBudget;
734 while (true) {
735 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
736 // Find new through blocks in the periphery of PrefRegBundles.
737 for (unsigned Bundle : NewBundles) {
738 // Look at all blocks connected to Bundle in the full graph.
739 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
740 // Limit compilation time by bailing out after we use all our budget.
741 if (Blocks.size() >= Budget)
742 return false;
743 Budget -= Blocks.size();
744 for (unsigned Block : Blocks) {
745 if (!Todo.test(Block))
746 continue;
747 Todo.reset(Block);
748 // This is a new through block. Add it to SpillPlacer later.
749 ActiveBlocks.push_back(Block);
750#ifndef NDEBUG
751 ++Visited;
752#endif
753 }
754 }
755 // Any new blocks to add?
756 if (ActiveBlocks.size() == AddedTo)
757 break;
758
759 // Compute through constraints from the interference, or assume that all
760 // through blocks prefer spilling when forming compact regions.
761 auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
762 if (Cand.PhysReg) {
763 if (!addThroughConstraints(Cand.Intf, NewBlocks))
764 return false;
765 } else {
766 // Providing that the variable being spilled does not look like a loop
767 // induction variable, which is expensive to spill around and better
768 // pushed into a condition inside the loop if possible, provide a strong
769 // negative bias on through blocks to prevent unwanted liveness on loop
770 // backedges.
771 bool PrefSpill = true;
772 if (SA->looksLikeLoopIV() && NewBlocks.size() >= 2) {
773 // Check that the current bundle is adding a Header + start+end of
774 // loop-internal blocks. If the block is indeed a header, don't make
775 // the NewBlocks as PrefSpill to allow the variable to be live in
776 // Header<->Latch.
777 MachineLoop *L = Loops->getLoopFor(MF->getBlockNumbered(NewBlocks[0]));
778 if (L && L->getHeader()->getNumber() == (int)NewBlocks[0] &&
779 all_of(NewBlocks.drop_front(), [&](unsigned Block) {
780 return L == Loops->getLoopFor(MF->getBlockNumbered(Block));
781 }))
782 PrefSpill = false;
783 }
784 if (PrefSpill)
785 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
786 }
787 AddedTo = ActiveBlocks.size();
788
789 // Perhaps iterating can enable more bundles?
790 SpillPlacer->iterate();
791 }
792 LLVM_DEBUG(dbgs() << ", v=" << Visited);
793 return true;
794}
795
796/// calcCompactRegion - Compute the set of edge bundles that should be live
797/// when splitting the current live range into compact regions. Compact
798/// regions can be computed without looking at interference. They are the
799/// regions formed by removing all the live-through blocks from the live range.
800///
801/// Returns false if the current live range is already compact, or if the
802/// compact regions would form single block regions anyway.
803bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
804 // Without any through blocks, the live range is already compact.
805 if (!SA->getNumThroughBlocks())
806 return false;
807
808 // Compact regions don't correspond to any physreg.
809 Cand.reset(IntfCache, MCRegister::NoRegister);
810
811 LLVM_DEBUG(dbgs() << "Compact region bundles");
812
813 // Use the spill placer to determine the live bundles. GrowRegion pretends
814 // that all the through blocks have interference when PhysReg is unset.
815 SpillPlacer->prepare(Cand.LiveBundles);
816
817 // The static split cost will be zero since Cand.Intf reports no interference.
819 if (!addSplitConstraints(Cand.Intf, Cost)) {
820 LLVM_DEBUG(dbgs() << ", none.\n");
821 return false;
822 }
823
824 if (!growRegion(Cand)) {
825 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
826 return false;
827 }
828
829 SpillPlacer->finish();
830
831 if (!Cand.LiveBundles.any()) {
832 LLVM_DEBUG(dbgs() << ", none.\n");
833 return false;
834 }
835
836 LLVM_DEBUG({
837 for (int I : Cand.LiveBundles.set_bits())
838 dbgs() << " EB#" << I;
839 dbgs() << ".\n";
840 });
841 return true;
842}
843
844/// calcSpillCost - Compute how expensive it would be to split the live range in
845/// SA around all use blocks instead of forming bundle regions.
846BlockFrequency RAGreedy::calcSpillCost() {
848 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
849 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
850 unsigned Number = BI.MBB->getNumber();
851 // We normally only need one spill instruction - a load or a store.
852 Cost += SpillPlacer->getBlockFrequency(Number);
853
854 // Unless the value is redefined in the block.
855 if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
856 Cost += SpillPlacer->getBlockFrequency(Number);
857 }
858 return Cost;
859}
860
861/// calcGlobalSplitCost - Return the global split cost of following the split
862/// pattern in LiveBundles. This cost should be added to the local cost of the
863/// interference pattern in SplitConstraints.
864///
865BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
866 const AllocationOrder &Order) {
867 BlockFrequency GlobalCost = BlockFrequency(0);
868 const BitVector &LiveBundles = Cand.LiveBundles;
869 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
870 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
871 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
872 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
873 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
874 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
875 unsigned Ins = 0;
876
877 Cand.Intf.moveToBlock(BC.Number);
878
879 if (BI.LiveIn)
880 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
881 if (BI.LiveOut)
882 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
883 while (Ins--)
884 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
885 }
886
887 for (unsigned Number : Cand.ActiveBlocks) {
888 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
889 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
890 if (!RegIn && !RegOut)
891 continue;
892 if (RegIn && RegOut) {
893 // We need double spill code if this block has interference.
894 Cand.Intf.moveToBlock(Number);
895 if (Cand.Intf.hasInterference()) {
896 GlobalCost += SpillPlacer->getBlockFrequency(Number);
897 GlobalCost += SpillPlacer->getBlockFrequency(Number);
898 }
899 continue;
900 }
901 // live-in / stack-out or stack-in live-out.
902 GlobalCost += SpillPlacer->getBlockFrequency(Number);
903 }
904 return GlobalCost;
905}
906
907/// splitAroundRegion - Split the current live range around the regions
908/// determined by BundleCand and GlobalCand.
909///
910/// Before calling this function, GlobalCand and BundleCand must be initialized
911/// so each bundle is assigned to a valid candidate, or NoCand for the
912/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
913/// objects must be initialized for the current live range, and intervals
914/// created for the used candidates.
915///
916/// @param LREdit The LiveRangeEdit object handling the current split.
917/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
918/// must appear in this list.
919void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
920 ArrayRef<unsigned> UsedCands) {
921 // These are the intervals created for new global ranges. We may create more
922 // intervals for local ranges.
923 const unsigned NumGlobalIntvs = LREdit.size();
924 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
925 << " globals.\n");
926 assert(NumGlobalIntvs && "No global intervals configured");
927
928 // Isolate even single instructions when dealing with a proper sub-class.
929 // That guarantees register class inflation for the stack interval because it
930 // is all copies.
931 Register Reg = SA->getParent().reg();
932 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
933
934 // First handle all the blocks with uses.
935 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
936 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
937 unsigned Number = BI.MBB->getNumber();
938 unsigned IntvIn = 0, IntvOut = 0;
939 SlotIndex IntfIn, IntfOut;
940 if (BI.LiveIn) {
941 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
942 if (CandIn != NoCand) {
943 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
944 IntvIn = Cand.IntvIdx;
945 Cand.Intf.moveToBlock(Number);
946 IntfIn = Cand.Intf.first();
947 }
948 }
949 if (BI.LiveOut) {
950 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
951 if (CandOut != NoCand) {
952 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
953 IntvOut = Cand.IntvIdx;
954 Cand.Intf.moveToBlock(Number);
955 IntfOut = Cand.Intf.last();
956 }
957 }
958
959 // Create separate intervals for isolated blocks with multiple uses.
960 if (!IntvIn && !IntvOut) {
961 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
962 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
963 SE->splitSingleBlock(BI);
964 continue;
965 }
966
967 if (IntvIn && IntvOut)
968 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
969 else if (IntvIn)
970 SE->splitRegInBlock(BI, IntvIn, IntfIn);
971 else
972 SE->splitRegOutBlock(BI, IntvOut, IntfOut);
973 }
974
975 // Handle live-through blocks. The relevant live-through blocks are stored in
976 // the ActiveBlocks list with each candidate. We need to filter out
977 // duplicates.
978 BitVector Todo = SA->getThroughBlocks();
979 for (unsigned UsedCand : UsedCands) {
980 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
981 for (unsigned Number : Blocks) {
982 if (!Todo.test(Number))
983 continue;
984 Todo.reset(Number);
985
986 unsigned IntvIn = 0, IntvOut = 0;
987 SlotIndex IntfIn, IntfOut;
988
989 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
990 if (CandIn != NoCand) {
991 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
992 IntvIn = Cand.IntvIdx;
993 Cand.Intf.moveToBlock(Number);
994 IntfIn = Cand.Intf.first();
995 }
996
997 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
998 if (CandOut != NoCand) {
999 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1000 IntvOut = Cand.IntvIdx;
1001 Cand.Intf.moveToBlock(Number);
1002 IntfOut = Cand.Intf.last();
1003 }
1004 if (!IntvIn && !IntvOut)
1005 continue;
1006 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1007 }
1008 }
1009
1010 ++NumGlobalSplits;
1011
1013 SE->finish(&IntvMap);
1014 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1015
1016 unsigned OrigBlocks = SA->getNumLiveBlocks();
1017
1018 // Sort out the new intervals created by splitting. We get four kinds:
1019 // - Remainder intervals should not be split again.
1020 // - Candidate intervals can be assigned to Cand.PhysReg.
1021 // - Block-local splits are candidates for local splitting.
1022 // - DCE leftovers should go back on the queue.
1023 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1024 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1025
1026 // Ignore old intervals from DCE.
1027 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1028 continue;
1029
1030 // Remainder interval. Don't try splitting again, spill if it doesn't
1031 // allocate.
1032 if (IntvMap[I] == 0) {
1033 ExtraInfo->setStage(Reg, RS_Spill);
1034 continue;
1035 }
1036
1037 // Global intervals. Allow repeated splitting as long as the number of live
1038 // blocks is strictly decreasing.
1039 if (IntvMap[I] < NumGlobalIntvs) {
1040 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1041 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1042 << " blocks as original.\n");
1043 // Don't allow repeated splitting as a safe guard against looping.
1044 ExtraInfo->setStage(Reg, RS_Split2);
1045 }
1046 continue;
1047 }
1048
1049 // Other intervals are treated as new. This includes local intervals created
1050 // for blocks with multiple uses, and anything created by DCE.
1051 }
1052
1053 if (VerifyEnabled)
1054 MF->verify(this, "After splitting live range around region", &errs());
1055}
1056
1057MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1058 AllocationOrder &Order,
1059 SmallVectorImpl<Register> &NewVRegs) {
1060 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1062 unsigned NumCands = 0;
1063 BlockFrequency SpillCost = calcSpillCost();
1064 BlockFrequency BestCost;
1065
1066 // Check if we can split this live range around a compact region.
1067 bool HasCompact = calcCompactRegion(GlobalCand.front());
1068 if (HasCompact) {
1069 // Yes, keep GlobalCand[0] as the compact region candidate.
1070 NumCands = 1;
1071 BestCost = BlockFrequency::max();
1072 } else {
1073 // No benefit from the compact region, our fallback will be per-block
1074 // splitting. Make sure we find a solution that is cheaper than spilling.
1075 BestCost = SpillCost;
1076 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "
1077 << printBlockFreq(*MBFI, BestCost) << '\n');
1078 }
1079
1080 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1081 NumCands, false /*IgnoreCSR*/);
1082
1083 // No solutions found, fall back to single block splitting.
1084 if (!HasCompact && BestCand == NoCand)
1086
1087 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1088}
1089
1090unsigned
1091RAGreedy::calculateRegionSplitCostAroundReg(MCPhysReg PhysReg,
1092 AllocationOrder &Order,
1093 BlockFrequency &BestCost,
1094 unsigned &NumCands,
1095 unsigned &BestCand) {
1096 // Discard bad candidates before we run out of interference cache cursors.
1097 // This will only affect register classes with a lot of registers (>32).
1098 if (NumCands == IntfCache.getMaxCursors()) {
1099 unsigned WorstCount = ~0u;
1100 unsigned Worst = 0;
1101 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1102 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1103 continue;
1104 unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1105 if (Count < WorstCount) {
1106 Worst = CandIndex;
1107 WorstCount = Count;
1108 }
1109 }
1110 --NumCands;
1111 GlobalCand[Worst] = GlobalCand[NumCands];
1112 if (BestCand == NumCands)
1113 BestCand = Worst;
1114 }
1115
1116 if (GlobalCand.size() <= NumCands)
1117 GlobalCand.resize(NumCands+1);
1118 GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1119 Cand.reset(IntfCache, PhysReg);
1120
1121 SpillPlacer->prepare(Cand.LiveBundles);
1123 if (!addSplitConstraints(Cand.Intf, Cost)) {
1124 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1125 return BestCand;
1126 }
1127 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
1128 << "\tstatic = " << printBlockFreq(*MBFI, Cost));
1129 if (Cost >= BestCost) {
1130 LLVM_DEBUG({
1131 if (BestCand == NoCand)
1132 dbgs() << " worse than no bundles\n";
1133 else
1134 dbgs() << " worse than "
1135 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1136 });
1137 return BestCand;
1138 }
1139 if (!growRegion(Cand)) {
1140 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1141 return BestCand;
1142 }
1143
1144 SpillPlacer->finish();
1145
1146 // No live bundles, defer to splitSingleBlocks().
1147 if (!Cand.LiveBundles.any()) {
1148 LLVM_DEBUG(dbgs() << " no bundles.\n");
1149 return BestCand;
1150 }
1151
1152 Cost += calcGlobalSplitCost(Cand, Order);
1153 LLVM_DEBUG({
1154 dbgs() << ", total = " << printBlockFreq(*MBFI, Cost) << " with bundles";
1155 for (int I : Cand.LiveBundles.set_bits())
1156 dbgs() << " EB#" << I;
1157 dbgs() << ".\n";
1158 });
1159 if (Cost < BestCost) {
1160 BestCand = NumCands;
1161 BestCost = Cost;
1162 }
1163 ++NumCands;
1164
1165 return BestCand;
1166}
1167
1168unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1169 AllocationOrder &Order,
1170 BlockFrequency &BestCost,
1171 unsigned &NumCands,
1172 bool IgnoreCSR) {
1173 unsigned BestCand = NoCand;
1174 for (MCPhysReg PhysReg : Order) {
1175 assert(PhysReg);
1176 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1177 continue;
1178
1179 calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands,
1180 BestCand);
1181 }
1182
1183 return BestCand;
1184}
1185
1186unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
1187 bool HasCompact,
1188 SmallVectorImpl<Register> &NewVRegs) {
1189 SmallVector<unsigned, 8> UsedCands;
1190 // Prepare split editor.
1191 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1192 SE->reset(LREdit, SplitSpillMode);
1193
1194 // Assign all edge bundles to the preferred candidate, or NoCand.
1195 BundleCand.assign(Bundles->getNumBundles(), NoCand);
1196
1197 // Assign bundles for the best candidate region.
1198 if (BestCand != NoCand) {
1199 GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1200 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1201 UsedCands.push_back(BestCand);
1202 Cand.IntvIdx = SE->openIntv();
1203 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1204 << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1205 (void)B;
1206 }
1207 }
1208
1209 // Assign bundles for the compact region.
1210 if (HasCompact) {
1211 GlobalSplitCandidate &Cand = GlobalCand.front();
1212 assert(!Cand.PhysReg && "Compact region has no physreg");
1213 if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1214 UsedCands.push_back(0);
1215 Cand.IntvIdx = SE->openIntv();
1216 LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1217 << " bundles, intv " << Cand.IntvIdx << ".\n");
1218 (void)B;
1219 }
1220 }
1221
1222 splitAroundRegion(LREdit, UsedCands);
1223 return 0;
1224}
1225
1226// VirtReg has a physical Hint, this function tries to split VirtReg around
1227// Hint if we can place new COPY instructions in cold blocks.
1228bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
1229 const LiveInterval &VirtReg,
1230 SmallVectorImpl<Register> &NewVRegs,
1231 AllocationOrder &Order) {
1232 // Split the VirtReg may generate COPY instructions in multiple cold basic
1233 // blocks, and increase code size. So we avoid it when the function is
1234 // optimized for size.
1235 if (MF->getFunction().hasOptSize())
1236 return false;
1237
1238 // Don't allow repeated splitting as a safe guard against looping.
1239 if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
1240 return false;
1241
1243 Register Reg = VirtReg.reg();
1244
1245 // Compute the cost of assigning a non Hint physical register to VirtReg.
1246 // We define it as the total frequency of broken COPY instructions to/from
1247 // Hint register, and after split, they can be deleted.
1248 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
1249 if (!TII->isFullCopyInstr(Instr))
1250 continue;
1251 Register OtherReg = Instr.getOperand(1).getReg();
1252 if (OtherReg == Reg) {
1253 OtherReg = Instr.getOperand(0).getReg();
1254 if (OtherReg == Reg)
1255 continue;
1256 // Check if VirtReg interferes with OtherReg after this COPY instruction.
1257 if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
1258 continue;
1259 }
1260 MCRegister OtherPhysReg =
1261 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
1262 if (OtherPhysReg == Hint)
1263 Cost += MBFI->getBlockFreq(Instr.getParent());
1264 }
1265
1266 // Decrease the cost so it will be split in colder blocks.
1268 Cost *= Threshold;
1269 if (Cost == BlockFrequency(0))
1270 return false;
1271
1272 unsigned NumCands = 0;
1273 unsigned BestCand = NoCand;
1274 SA->analyze(&VirtReg);
1275 calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand);
1276 if (BestCand == NoCand)
1277 return false;
1278
1279 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
1280 return true;
1281}
1282
1283//===----------------------------------------------------------------------===//
1284// Per-Block Splitting
1285//===----------------------------------------------------------------------===//
1286
1287/// tryBlockSplit - Split a global live range around every block with uses. This
1288/// creates a lot of local live ranges, that will be split by tryLocalSplit if
1289/// they don't allocate.
1290unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1291 AllocationOrder &Order,
1292 SmallVectorImpl<Register> &NewVRegs) {
1293 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1294 Register Reg = VirtReg.reg();
1295 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1296 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1297 SE->reset(LREdit, SplitSpillMode);
1298 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1299 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1300 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1301 SE->splitSingleBlock(BI);
1302 }
1303 // No blocks were split.
1304 if (LREdit.empty())
1305 return 0;
1306
1307 // We did split for some blocks.
1309 SE->finish(&IntvMap);
1310
1311 // Tell LiveDebugVariables about the new ranges.
1312 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1313
1314 // Sort out the new intervals created by splitting. The remainder interval
1315 // goes straight to spilling, the new local ranges get to stay RS_New.
1316 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1317 const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1318 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1319 ExtraInfo->setStage(LI, RS_Spill);
1320 }
1321
1322 if (VerifyEnabled)
1323 MF->verify(this, "After splitting live range around basic blocks", &errs());
1324 return 0;
1325}
1326
1327//===----------------------------------------------------------------------===//
1328// Per-Instruction Splitting
1329//===----------------------------------------------------------------------===//
1330
1331/// Get the number of allocatable registers that match the constraints of \p Reg
1332/// on \p MI and that are also in \p SuperRC.
1334 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1335 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1336 const RegisterClassInfo &RCI) {
1337 assert(SuperRC && "Invalid register class");
1338
1339 const TargetRegisterClass *ConstrainedRC =
1340 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1341 /* ExploreBundle */ true);
1342 if (!ConstrainedRC)
1343 return 0;
1344 return RCI.getNumAllocatableRegs(ConstrainedRC);
1345}
1346
1348 const TargetRegisterInfo &TRI,
1349 const MachineInstr &FirstMI,
1350 Register Reg) {
1351 LaneBitmask Mask;
1353 (void)AnalyzeVirtRegInBundle(const_cast<MachineInstr &>(FirstMI), Reg, &Ops);
1354
1355 for (auto [MI, OpIdx] : Ops) {
1356 const MachineOperand &MO = MI->getOperand(OpIdx);
1357 assert(MO.isReg() && MO.getReg() == Reg);
1358 unsigned SubReg = MO.getSubReg();
1359 if (SubReg == 0 && MO.isUse()) {
1360 if (MO.isUndef())
1361 continue;
1362 return MRI.getMaxLaneMaskForVReg(Reg);
1363 }
1364
1366 if (MO.isDef()) {
1367 if (!MO.isUndef())
1368 Mask |= ~SubRegMask;
1369 } else
1370 Mask |= SubRegMask;
1371 }
1372
1373 return Mask;
1374}
1375
1376/// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1377/// VirtReg.
1379 const MachineInstr *MI, const LiveInterval &VirtReg,
1381 const TargetInstrInfo *TII) {
1382 // Early check the common case. Beware of the semi-formed bundles SplitKit
1383 // creates by setting the bundle flag on copies without a matching BUNDLE.
1384
1385 auto DestSrc = TII->isCopyInstr(*MI);
1386 if (DestSrc && !MI->isBundled() &&
1387 DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
1388 return false;
1389
1390 // FIXME: We're only considering uses, but should be consider defs too?
1391 LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1392
1393 LaneBitmask LiveAtMask;
1394 for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1395 if (S.liveAt(Use))
1396 LiveAtMask |= S.LaneMask;
1397 }
1398
1399 // If the live lanes aren't different from the lanes used by the instruction,
1400 // this doesn't help.
1401 return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1402}
1403
1404/// tryInstructionSplit - Split a live range around individual instructions.
1405/// This is normally not worthwhile since the spiller is doing essentially the
1406/// same thing. However, when the live range is in a constrained register
1407/// class, it may help to insert copies such that parts of the live range can
1408/// be moved to a larger register class.
1409///
1410/// This is similar to spilling to a larger register class.
1411unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1412 AllocationOrder &Order,
1413 SmallVectorImpl<Register> &NewVRegs) {
1414 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1415 // There is no point to this if there are no larger sub-classes.
1416
1417 bool SplitSubClass = true;
1418 if (!RegClassInfo.isProperSubClass(CurRC)) {
1419 if (!VirtReg.hasSubRanges())
1420 return 0;
1421 SplitSubClass = false;
1422 }
1423
1424 // Always enable split spill mode, since we're effectively spilling to a
1425 // register.
1426 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1427 SE->reset(LREdit, SplitEditor::SM_Size);
1428
1429 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1430 if (Uses.size() <= 1)
1431 return 0;
1432
1433 LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1434 << " individual instrs.\n");
1435
1436 const TargetRegisterClass *SuperRC =
1437 TRI->getLargestLegalSuperClass(CurRC, *MF);
1438 unsigned SuperRCNumAllocatableRegs =
1440 // Split around every non-copy instruction if this split will relax
1441 // the constraints on the virtual register.
1442 // Otherwise, splitting just inserts uncoalescable copies that do not help
1443 // the allocation.
1444 for (const SlotIndex Use : Uses) {
1445 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1446 if (TII->isFullCopyInstr(*MI) ||
1447 (SplitSubClass &&
1448 SuperRCNumAllocatableRegs ==
1449 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1450 TII, TRI, RegClassInfo)) ||
1451 // TODO: Handle split for subranges with subclass constraints?
1452 (!SplitSubClass && VirtReg.hasSubRanges() &&
1453 !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
1454 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
1455 continue;
1456 }
1457 }
1458 SE->openIntv();
1459 SlotIndex SegStart = SE->enterIntvBefore(Use);
1460 SlotIndex SegStop = SE->leaveIntvAfter(Use);
1461 SE->useIntv(SegStart, SegStop);
1462 }
1463
1464 if (LREdit.empty()) {
1465 LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1466 return 0;
1467 }
1468
1470 SE->finish(&IntvMap);
1471 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1472 // Assign all new registers to RS_Spill. This was the last chance.
1473 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1474 return 0;
1475}
1476
1477//===----------------------------------------------------------------------===//
1478// Local Splitting
1479//===----------------------------------------------------------------------===//
1480
1481/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1482/// in order to use PhysReg between two entries in SA->UseSlots.
1483///
1484/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1485///
1486void RAGreedy::calcGapWeights(MCRegister PhysReg,
1487 SmallVectorImpl<float> &GapWeight) {
1488 assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1489 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1490 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1491 const unsigned NumGaps = Uses.size()-1;
1492
1493 // Start and end points for the interference check.
1494 SlotIndex StartIdx =
1496 SlotIndex StopIdx =
1498
1499 GapWeight.assign(NumGaps, 0.0f);
1500
1501 // Add interference from each overlapping register.
1502 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1503 if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
1505 continue;
1506
1507 // We know that VirtReg is a continuous interval from FirstInstr to
1508 // LastInstr, so we don't need InterferenceQuery.
1509 //
1510 // Interference that overlaps an instruction is counted in both gaps
1511 // surrounding the instruction. The exception is interference before
1512 // StartIdx and after StopIdx.
1513 //
1515 Matrix->getLiveUnions()[Unit].find(StartIdx);
1516 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1517 // Skip the gaps before IntI.
1518 while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1519 if (++Gap == NumGaps)
1520 break;
1521 if (Gap == NumGaps)
1522 break;
1523
1524 // Update the gaps covered by IntI.
1525 const float weight = IntI.value()->weight();
1526 for (; Gap != NumGaps; ++Gap) {
1527 GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1528 if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1529 break;
1530 }
1531 if (Gap == NumGaps)
1532 break;
1533 }
1534 }
1535
1536 // Add fixed interference.
1537 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1538 const LiveRange &LR = LIS->getRegUnit(Unit);
1539 LiveRange::const_iterator I = LR.find(StartIdx);
1541
1542 // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1543 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1544 while (Uses[Gap+1].getBoundaryIndex() < I->start)
1545 if (++Gap == NumGaps)
1546 break;
1547 if (Gap == NumGaps)
1548 break;
1549
1550 for (; Gap != NumGaps; ++Gap) {
1551 GapWeight[Gap] = huge_valf;
1552 if (Uses[Gap+1].getBaseIndex() >= I->end)
1553 break;
1554 }
1555 if (Gap == NumGaps)
1556 break;
1557 }
1558 }
1559}
1560
1561/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1562/// basic block.
1563///
1564unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1565 AllocationOrder &Order,
1566 SmallVectorImpl<Register> &NewVRegs) {
1567 // TODO: the function currently only handles a single UseBlock; it should be
1568 // possible to generalize.
1569 if (SA->getUseBlocks().size() != 1)
1570 return 0;
1571
1572 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1573
1574 // Note that it is possible to have an interval that is live-in or live-out
1575 // while only covering a single block - A phi-def can use undef values from
1576 // predecessors, and the block could be a single-block loop.
1577 // We don't bother doing anything clever about such a case, we simply assume
1578 // that the interval is continuous from FirstInstr to LastInstr. We should
1579 // make sure that we don't do anything illegal to such an interval, though.
1580
1581 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1582 if (Uses.size() <= 2)
1583 return 0;
1584 const unsigned NumGaps = Uses.size()-1;
1585
1586 LLVM_DEBUG({
1587 dbgs() << "tryLocalSplit: ";
1588 for (const auto &Use : Uses)
1589 dbgs() << ' ' << Use;
1590 dbgs() << '\n';
1591 });
1592
1593 // If VirtReg is live across any register mask operands, compute a list of
1594 // gaps with register masks.
1595 SmallVector<unsigned, 8> RegMaskGaps;
1596 if (Matrix->checkRegMaskInterference(VirtReg)) {
1597 // Get regmask slots for the whole block.
1599 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1600 // Constrain to VirtReg's live range.
1601 unsigned RI =
1602 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1603 unsigned RE = RMS.size();
1604 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1605 // Look for Uses[I] <= RMS <= Uses[I + 1].
1607 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1608 continue;
1609 // Skip a regmask on the same instruction as the last use. It doesn't
1610 // overlap the live range.
1611 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1612 break;
1613 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1614 << Uses[I + 1]);
1615 RegMaskGaps.push_back(I);
1616 // Advance ri to the next gap. A regmask on one of the uses counts in
1617 // both gaps.
1618 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1619 ++RI;
1620 }
1621 LLVM_DEBUG(dbgs() << '\n');
1622 }
1623
1624 // Since we allow local split results to be split again, there is a risk of
1625 // creating infinite loops. It is tempting to require that the new live
1626 // ranges have less instructions than the original. That would guarantee
1627 // convergence, but it is too strict. A live range with 3 instructions can be
1628 // split 2+3 (including the COPY), and we want to allow that.
1629 //
1630 // Instead we use these rules:
1631 //
1632 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1633 // noop split, of course).
1634 // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1635 // the new ranges must have fewer instructions than before the split.
1636 // 3. New ranges with the same number of instructions are marked RS_Split2,
1637 // smaller ranges are marked RS_New.
1638 //
1639 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1640 // excessive splitting and infinite loops.
1641 //
1642 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1643
1644 // Best split candidate.
1645 unsigned BestBefore = NumGaps;
1646 unsigned BestAfter = 0;
1647 float BestDiff = 0;
1648
1649 const float blockFreq =
1650 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1651 (1.0f / MBFI->getEntryFreq().getFrequency());
1652 SmallVector<float, 8> GapWeight;
1653
1654 for (MCPhysReg PhysReg : Order) {
1655 assert(PhysReg);
1656 // Keep track of the largest spill weight that would need to be evicted in
1657 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1658 calcGapWeights(PhysReg, GapWeight);
1659
1660 // Remove any gaps with regmask clobbers.
1661 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1662 for (unsigned Gap : RegMaskGaps)
1663 GapWeight[Gap] = huge_valf;
1664
1665 // Try to find the best sequence of gaps to close.
1666 // The new spill weight must be larger than any gap interference.
1667
1668 // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1669 unsigned SplitBefore = 0, SplitAfter = 1;
1670
1671 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1672 // It is the spill weight that needs to be evicted.
1673 float MaxGap = GapWeight[0];
1674
1675 while (true) {
1676 // Live before/after split?
1677 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1678 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1679
1680 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1681 << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1682
1683 // Stop before the interval gets so big we wouldn't be making progress.
1684 if (!LiveBefore && !LiveAfter) {
1685 LLVM_DEBUG(dbgs() << " all\n");
1686 break;
1687 }
1688 // Should the interval be extended or shrunk?
1689 bool Shrink = true;
1690
1691 // How many gaps would the new range have?
1692 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1693
1694 // Legally, without causing looping?
1695 bool Legal = !ProgressRequired || NewGaps < NumGaps;
1696
1697 if (Legal && MaxGap < huge_valf) {
1698 // Estimate the new spill weight. Each instruction reads or writes the
1699 // register. Conservatively assume there are no read-modify-write
1700 // instructions.
1701 //
1702 // Try to guess the size of the new interval.
1703 const float EstWeight = normalizeSpillWeight(
1704 blockFreq * (NewGaps + 1),
1705 Uses[SplitBefore].distance(Uses[SplitAfter]) +
1706 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1707 1);
1708 // Would this split be possible to allocate?
1709 // Never allocate all gaps, we wouldn't be making progress.
1710 LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1711 if (EstWeight * Hysteresis >= MaxGap) {
1712 Shrink = false;
1713 float Diff = EstWeight - MaxGap;
1714 if (Diff > BestDiff) {
1715 LLVM_DEBUG(dbgs() << " (best)");
1716 BestDiff = Hysteresis * Diff;
1717 BestBefore = SplitBefore;
1718 BestAfter = SplitAfter;
1719 }
1720 }
1721 }
1722
1723 // Try to shrink.
1724 if (Shrink) {
1725 if (++SplitBefore < SplitAfter) {
1726 LLVM_DEBUG(dbgs() << " shrink\n");
1727 // Recompute the max when necessary.
1728 if (GapWeight[SplitBefore - 1] >= MaxGap) {
1729 MaxGap = GapWeight[SplitBefore];
1730 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1731 MaxGap = std::max(MaxGap, GapWeight[I]);
1732 }
1733 continue;
1734 }
1735 MaxGap = 0;
1736 }
1737
1738 // Try to extend the interval.
1739 if (SplitAfter >= NumGaps) {
1740 LLVM_DEBUG(dbgs() << " end\n");
1741 break;
1742 }
1743
1744 LLVM_DEBUG(dbgs() << " extend\n");
1745 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1746 }
1747 }
1748
1749 // Didn't find any candidates?
1750 if (BestBefore == NumGaps)
1751 return 0;
1752
1753 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1754 << Uses[BestAfter] << ", " << BestDiff << ", "
1755 << (BestAfter - BestBefore + 1) << " instrs\n");
1756
1757 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1758 SE->reset(LREdit);
1759
1760 SE->openIntv();
1761 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1762 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
1763 SE->useIntv(SegStart, SegStop);
1765 SE->finish(&IntvMap);
1766 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1767 // If the new range has the same number of instructions as before, mark it as
1768 // RS_Split2 so the next split will be forced to make progress. Otherwise,
1769 // leave the new intervals as RS_New so they can compete.
1770 bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1771 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1772 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1773 if (NewGaps >= NumGaps) {
1774 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1775 assert(!ProgressRequired && "Didn't make progress when it was required.");
1776 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1777 if (IntvMap[I] == 1) {
1778 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1779 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1780 }
1781 LLVM_DEBUG(dbgs() << '\n');
1782 }
1783 ++NumLocalSplits;
1784
1785 return 0;
1786}
1787
1788//===----------------------------------------------------------------------===//
1789// Live Range Splitting
1790//===----------------------------------------------------------------------===//
1791
1792/// trySplit - Try to split VirtReg or one of its interferences, making it
1793/// assignable.
1794/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1795unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
1796 SmallVectorImpl<Register> &NewVRegs,
1797 const SmallVirtRegSet &FixedRegisters) {
1798 // Ranges must be Split2 or less.
1799 if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1800 return 0;
1801
1802 // Local intervals are handled separately.
1803 if (LIS->intervalIsInOneMBB(VirtReg)) {
1804 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1806 SA->analyze(&VirtReg);
1807 Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1808 if (PhysReg || !NewVRegs.empty())
1809 return PhysReg;
1810 return tryInstructionSplit(VirtReg, Order, NewVRegs);
1811 }
1812
1813 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1815
1816 SA->analyze(&VirtReg);
1817
1818 // First try to split around a region spanning multiple blocks. RS_Split2
1819 // ranges already made dubious progress with region splitting, so they go
1820 // straight to single block splitting.
1821 if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1822 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1823 if (PhysReg || !NewVRegs.empty())
1824 return PhysReg;
1825 }
1826
1827 // Then isolate blocks.
1828 return tryBlockSplit(VirtReg, Order, NewVRegs);
1829}
1830
1831//===----------------------------------------------------------------------===//
1832// Last Chance Recoloring
1833//===----------------------------------------------------------------------===//
1834
1835/// Return true if \p reg has any tied def operand.
1836static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
1837 for (const MachineOperand &MO : MRI->def_operands(reg))
1838 if (MO.isTied())
1839 return true;
1840
1841 return false;
1842}
1843
1844/// Return true if the existing assignment of \p Intf overlaps, but is not the
1845/// same, as \p PhysReg.
1847 const VirtRegMap &VRM,
1848 MCRegister PhysReg,
1849 const LiveInterval &Intf) {
1850 MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1851 if (PhysReg == AssignedReg)
1852 return false;
1853 return TRI.regsOverlap(PhysReg, AssignedReg);
1854}
1855
1856/// mayRecolorAllInterferences - Check if the virtual registers that
1857/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1858/// recolored to free \p PhysReg.
1859/// When true is returned, \p RecoloringCandidates has been augmented with all
1860/// the live intervals that need to be recolored in order to free \p PhysReg
1861/// for \p VirtReg.
1862/// \p FixedRegisters contains all the virtual registers that cannot be
1863/// recolored.
1864bool RAGreedy::mayRecolorAllInterferences(
1865 MCRegister PhysReg, const LiveInterval &VirtReg,
1866 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
1867 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1868
1869 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1870 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
1871 // If there is LastChanceRecoloringMaxInterference or more interferences,
1872 // chances are one would not be recolorable.
1876 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
1877 CutOffInfo |= CO_Interf;
1878 return false;
1879 }
1880 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1881 // If Intf is done and sits on the same register class as VirtReg, it
1882 // would not be recolorable as it is in the same state as
1883 // VirtReg. However there are at least two exceptions.
1884 //
1885 // If VirtReg has tied defs and Intf doesn't, then
1886 // there is still a point in examining if it can be recolorable.
1887 //
1888 // Additionally, if the register class has overlapping tuple members, it
1889 // may still be recolorable using a different tuple. This is more likely
1890 // if the existing assignment aliases with the candidate.
1891 //
1892 if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1893 MRI->getRegClass(Intf->reg()) == CurRC &&
1894 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
1895 !(hasTiedDef(MRI, VirtReg.reg()) &&
1896 !hasTiedDef(MRI, Intf->reg()))) ||
1897 FixedRegisters.count(Intf->reg())) {
1898 LLVM_DEBUG(
1899 dbgs() << "Early abort: the interference is not recolorable.\n");
1900 return false;
1901 }
1902 RecoloringCandidates.insert(Intf);
1903 }
1904 }
1905 return true;
1906}
1907
1908/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
1909/// its interferences.
1910/// Last chance recoloring chooses a color for \p VirtReg and recolors every
1911/// virtual register that was using it. The recoloring process may recursively
1912/// use the last chance recoloring. Therefore, when a virtual register has been
1913/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
1914/// be last-chance-recolored again during this recoloring "session".
1915/// E.g.,
1916/// Let
1917/// vA can use {R1, R2 }
1918/// vB can use { R2, R3}
1919/// vC can use {R1 }
1920/// Where vA, vB, and vC cannot be split anymore (they are reloads for
1921/// instance) and they all interfere.
1922///
1923/// vA is assigned R1
1924/// vB is assigned R2
1925/// vC tries to evict vA but vA is already done.
1926/// Regular register allocation fails.
1927///
1928/// Last chance recoloring kicks in:
1929/// vC does as if vA was evicted => vC uses R1.
1930/// vC is marked as fixed.
1931/// vA needs to find a color.
1932/// None are available.
1933/// vA cannot evict vC: vC is a fixed virtual register now.
1934/// vA does as if vB was evicted => vA uses R2.
1935/// vB needs to find a color.
1936/// R3 is available.
1937/// Recoloring => vC = R1, vA = R2, vB = R3
1938///
1939/// \p Order defines the preferred allocation order for \p VirtReg.
1940/// \p NewRegs will contain any new virtual register that have been created
1941/// (split, spill) during the process and that must be assigned.
1942/// \p FixedRegisters contains all the virtual registers that cannot be
1943/// recolored.
1944///
1945/// \p RecolorStack tracks the original assignments of successfully recolored
1946/// registers.
1947///
1948/// \p Depth gives the current depth of the last chance recoloring.
1949/// \return a physical register that can be used for VirtReg or ~0u if none
1950/// exists.
1951unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
1952 AllocationOrder &Order,
1953 SmallVectorImpl<Register> &NewVRegs,
1954 SmallVirtRegSet &FixedRegisters,
1955 RecoloringStack &RecolorStack,
1956 unsigned Depth) {
1957 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
1958 return ~0u;
1959
1960 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
1961
1962 const ssize_t EntryStackSize = RecolorStack.size();
1963
1964 // Ranges must be Done.
1965 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
1966 "Last chance recoloring should really be last chance");
1967 // Set the max depth to LastChanceRecoloringMaxDepth.
1968 // We may want to reconsider that if we end up with a too large search space
1969 // for target with hundreds of registers.
1970 // Indeed, in that case we may want to cut the search space earlier.
1972 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
1973 CutOffInfo |= CO_Depth;
1974 return ~0u;
1975 }
1976
1977 // Set of Live intervals that will need to be recolored.
1978 SmallLISet RecoloringCandidates;
1979
1980 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
1981 // this recoloring "session".
1982 assert(!FixedRegisters.count(VirtReg.reg()));
1983 FixedRegisters.insert(VirtReg.reg());
1984 SmallVector<Register, 4> CurrentNewVRegs;
1985
1986 for (MCRegister PhysReg : Order) {
1987 assert(PhysReg.isValid());
1988 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
1989 << printReg(PhysReg, TRI) << '\n');
1990 RecoloringCandidates.clear();
1991 CurrentNewVRegs.clear();
1992
1993 // It is only possible to recolor virtual register interference.
1994 if (Matrix->checkInterference(VirtReg, PhysReg) >
1996 LLVM_DEBUG(
1997 dbgs() << "Some interferences are not with virtual registers.\n");
1998
1999 continue;
2000 }
2001
2002 // Early give up on this PhysReg if it is obvious we cannot recolor all
2003 // the interferences.
2004 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2005 FixedRegisters)) {
2006 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
2007 continue;
2008 }
2009
2010 // RecoloringCandidates contains all the virtual registers that interfere
2011 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
2012 // recoloring and perform the actual recoloring.
2013 PQueue RecoloringQueue;
2014 for (const LiveInterval *RC : RecoloringCandidates) {
2015 Register ItVirtReg = RC->reg();
2016 enqueue(RecoloringQueue, RC);
2017 assert(VRM->hasPhys(ItVirtReg) &&
2018 "Interferences are supposed to be with allocated variables");
2019
2020 // Record the current allocation.
2021 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
2022
2023 // unset the related struct.
2024 Matrix->unassign(*RC);
2025 }
2026
2027 // Do as if VirtReg was assigned to PhysReg so that the underlying
2028 // recoloring has the right information about the interferes and
2029 // available colors.
2030 Matrix->assign(VirtReg, PhysReg);
2031
2032 // Save the current recoloring state.
2033 // If we cannot recolor all the interferences, we will have to start again
2034 // at this point for the next physical register.
2035 SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2036 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
2037 FixedRegisters, RecolorStack, Depth)) {
2038 // Push the queued vregs into the main queue.
2039 for (Register NewVReg : CurrentNewVRegs)
2040 NewVRegs.push_back(NewVReg);
2041 // Do not mess up with the global assignment process.
2042 // I.e., VirtReg must be unassigned.
2043 Matrix->unassign(VirtReg);
2044 return PhysReg;
2045 }
2046
2047 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2048 << printReg(PhysReg, TRI) << '\n');
2049
2050 // The recoloring attempt failed, undo the changes.
2051 FixedRegisters = SaveFixedRegisters;
2052 Matrix->unassign(VirtReg);
2053
2054 // For a newly created vreg which is also in RecoloringCandidates,
2055 // don't add it to NewVRegs because its physical register will be restored
2056 // below. Other vregs in CurrentNewVRegs are created by calling
2057 // selectOrSplit and should be added into NewVRegs.
2058 for (Register R : CurrentNewVRegs) {
2059 if (RecoloringCandidates.count(&LIS->getInterval(R)))
2060 continue;
2061 NewVRegs.push_back(R);
2062 }
2063
2064 // Roll back our unsuccessful recoloring. Also roll back any successful
2065 // recolorings in any recursive recoloring attempts, since it's possible
2066 // they would have introduced conflicts with assignments we will be
2067 // restoring further up the stack. Perform all unassignments prior to
2068 // reassigning, since sub-recolorings may have conflicted with the registers
2069 // we are going to restore to their original assignments.
2070 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
2071 const LiveInterval *LI;
2072 MCRegister PhysReg;
2073 std::tie(LI, PhysReg) = RecolorStack[I];
2074
2075 if (VRM->hasPhys(LI->reg()))
2076 Matrix->unassign(*LI);
2077 }
2078
2079 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
2080 const LiveInterval *LI;
2081 MCRegister PhysReg;
2082 std::tie(LI, PhysReg) = RecolorStack[I];
2083 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
2084 Matrix->assign(*LI, PhysReg);
2085 }
2086
2087 // Pop the stack of recoloring attempts.
2088 RecolorStack.resize(EntryStackSize);
2089 }
2090
2091 // Last chance recoloring did not worked either, give up.
2092 return ~0u;
2093}
2094
2095/// tryRecoloringCandidates - Try to assign a new color to every register
2096/// in \RecoloringQueue.
2097/// \p NewRegs will contain any new virtual register created during the
2098/// recoloring process.
2099/// \p FixedRegisters[in/out] contains all the registers that have been
2100/// recolored.
2101/// \return true if all virtual registers in RecoloringQueue were successfully
2102/// recolored, false otherwise.
2103bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2104 SmallVectorImpl<Register> &NewVRegs,
2105 SmallVirtRegSet &FixedRegisters,
2106 RecoloringStack &RecolorStack,
2107 unsigned Depth) {
2108 while (!RecoloringQueue.empty()) {
2109 const LiveInterval *LI = dequeue(RecoloringQueue);
2110 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2111 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2112 RecolorStack, Depth + 1);
2113 // When splitting happens, the live-range may actually be empty.
2114 // In that case, this is okay to continue the recoloring even
2115 // if we did not find an alternative color for it. Indeed,
2116 // there will not be anything to color for LI in the end.
2117 if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2118 return false;
2119
2120 if (!PhysReg) {
2121 assert(LI->empty() && "Only empty live-range do not require a register");
2122 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2123 << " succeeded. Empty LI.\n");
2124 continue;
2125 }
2126 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2127 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2128
2129 Matrix->assign(*LI, PhysReg);
2130 FixedRegisters.insert(LI->reg());
2131 }
2132 return true;
2133}
2134
2135//===----------------------------------------------------------------------===//
2136// Main Entry Point
2137//===----------------------------------------------------------------------===//
2138
2140 SmallVectorImpl<Register> &NewVRegs) {
2141 CutOffInfo = CO_None;
2142 LLVMContext &Ctx = MF->getFunction().getContext();
2143 SmallVirtRegSet FixedRegisters;
2144 RecoloringStack RecolorStack;
2145 MCRegister Reg =
2146 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2147 if (Reg == ~0U && (CutOffInfo != CO_None)) {
2148 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2149 if (CutOffEncountered == CO_Depth)
2150 Ctx.emitError("register allocation failed: maximum depth for recoloring "
2151 "reached. Use -fexhaustive-register-search to skip "
2152 "cutoffs");
2153 else if (CutOffEncountered == CO_Interf)
2154 Ctx.emitError("register allocation failed: maximum interference for "
2155 "recoloring reached. Use -fexhaustive-register-search "
2156 "to skip cutoffs");
2157 else if (CutOffEncountered == (CO_Depth | CO_Interf))
2158 Ctx.emitError("register allocation failed: maximum interference and "
2159 "depth for recoloring reached. Use "
2160 "-fexhaustive-register-search to skip cutoffs");
2161 }
2162 return Reg;
2163}
2164
2165/// Using a CSR for the first time has a cost because it causes push|pop
2166/// to be added to prologue|epilogue. Splitting a cold section of the live
2167/// range can have lower cost than using the CSR for the first time;
2168/// Spilling a live range in the cold path can have lower cost than using
2169/// the CSR for the first time. Returns the physical register if we decide
2170/// to use the CSR; otherwise return 0.
2171MCRegister RAGreedy::tryAssignCSRFirstTime(
2172 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2173 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2174 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2175 // We choose spill over using the CSR for the first time if the spill cost
2176 // is lower than CSRCost.
2177 SA->analyze(&VirtReg);
2178 if (calcSpillCost() >= CSRCost)
2179 return PhysReg;
2180
2181 // We are going to spill, set CostPerUseLimit to 1 to make sure that
2182 // we will not use a callee-saved register in tryEvict.
2183 CostPerUseLimit = 1;
2184 return 0;
2185 }
2186 if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2187 // We choose pre-splitting over using the CSR for the first time if
2188 // the cost of splitting is lower than CSRCost.
2189 SA->analyze(&VirtReg);
2190 unsigned NumCands = 0;
2191 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2192 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2193 NumCands, true /*IgnoreCSR*/);
2194 if (BestCand == NoCand)
2195 // Use the CSR if we can't find a region split below CSRCost.
2196 return PhysReg;
2197
2198 // Perform the actual pre-splitting.
2199 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2200 return 0;
2201 }
2202 return PhysReg;
2203}
2204
2206 // Do not keep invalid information around.
2207 SetOfBrokenHints.remove(&LI);
2208}
2209
2210void RAGreedy::initializeCSRCost() {
2211 // We use the larger one out of the command-line option and the value report
2212 // by TRI.
2213 CSRCost = BlockFrequency(
2214 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2215 if (!CSRCost.getFrequency())
2216 return;
2217
2218 // Raw cost is relative to Entry == 2^14; scale it appropriately.
2219 uint64_t ActualEntry = MBFI->getEntryFreq().getFrequency();
2220 if (!ActualEntry) {
2221 CSRCost = BlockFrequency(0);
2222 return;
2223 }
2224 uint64_t FixedEntry = 1 << 14;
2225 if (ActualEntry < FixedEntry)
2226 CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2227 else if (ActualEntry <= UINT32_MAX)
2228 // Invert the fraction and divide.
2229 CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2230 else
2231 // Can't use BranchProbability in general, since it takes 32-bit numbers.
2232 CSRCost =
2233 BlockFrequency(CSRCost.getFrequency() * (ActualEntry / FixedEntry));
2234}
2235
2236/// Collect the hint info for \p Reg.
2237/// The results are stored into \p Out.
2238/// \p Out is not cleared before being populated.
2239void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2240 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2241 if (!TII->isFullCopyInstr(Instr))
2242 continue;
2243 // Look for the other end of the copy.
2244 Register OtherReg = Instr.getOperand(0).getReg();
2245 if (OtherReg == Reg) {
2246 OtherReg = Instr.getOperand(1).getReg();
2247 if (OtherReg == Reg)
2248 continue;
2249 }
2250 // Get the current assignment.
2251 MCRegister OtherPhysReg =
2252 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2253 // Push the collected information.
2254 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2255 OtherPhysReg));
2256 }
2257}
2258
2259/// Using the given \p List, compute the cost of the broken hints if
2260/// \p PhysReg was used.
2261/// \return The cost of \p List for \p PhysReg.
2262BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2263 MCRegister PhysReg) {
2265 for (const HintInfo &Info : List) {
2266 if (Info.PhysReg != PhysReg)
2267 Cost += Info.Freq;
2268 }
2269 return Cost;
2270}
2271
2272/// Using the register assigned to \p VirtReg, try to recolor
2273/// all the live ranges that are copy-related with \p VirtReg.
2274/// The recoloring is then propagated to all the live-ranges that have
2275/// been recolored and so on, until no more copies can be coalesced or
2276/// it is not profitable.
2277/// For a given live range, profitability is determined by the sum of the
2278/// frequencies of the non-identity copies it would introduce with the old
2279/// and new register.
2280void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2281 // We have a broken hint, check if it is possible to fix it by
2282 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2283 // some register and PhysReg may be available for the other live-ranges.
2284 SmallSet<Register, 4> Visited;
2285 SmallVector<unsigned, 2> RecoloringCandidates;
2286 HintsInfo Info;
2287 Register Reg = VirtReg.reg();
2288 MCRegister PhysReg = VRM->getPhys(Reg);
2289 // Start the recoloring algorithm from the input live-interval, then
2290 // it will propagate to the ones that are copy-related with it.
2291 Visited.insert(Reg);
2292 RecoloringCandidates.push_back(Reg);
2293
2294 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2295 << '(' << printReg(PhysReg, TRI) << ")\n");
2296
2297 do {
2298 Reg = RecoloringCandidates.pop_back_val();
2299
2300 // We cannot recolor physical register.
2301 if (Reg.isPhysical())
2302 continue;
2303
2304 // This may be a skipped register.
2305 if (!VRM->hasPhys(Reg)) {
2307 "We have an unallocated variable which should have been handled");
2308 continue;
2309 }
2310
2311 // Get the live interval mapped with this virtual register to be able
2312 // to check for the interference with the new color.
2313 LiveInterval &LI = LIS->getInterval(Reg);
2314 MCRegister CurrPhys = VRM->getPhys(Reg);
2315 // Check that the new color matches the register class constraints and
2316 // that it is free for this live range.
2317 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2318 Matrix->checkInterference(LI, PhysReg)))
2319 continue;
2320
2321 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2322 << ") is recolorable.\n");
2323
2324 // Gather the hint info.
2325 Info.clear();
2326 collectHintInfo(Reg, Info);
2327 // Check if recoloring the live-range will increase the cost of the
2328 // non-identity copies.
2329 if (CurrPhys != PhysReg) {
2330 LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2331 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2332 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2333 LLVM_DEBUG(dbgs() << "Old Cost: " << printBlockFreq(*MBFI, OldCopiesCost)
2334 << "\nNew Cost: "
2335 << printBlockFreq(*MBFI, NewCopiesCost) << '\n');
2336 if (OldCopiesCost < NewCopiesCost) {
2337 LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2338 continue;
2339 }
2340 // At this point, the cost is either cheaper or equal. If it is
2341 // equal, we consider this is profitable because it may expose
2342 // more recoloring opportunities.
2343 LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2344 // Recolor the live-range.
2345 Matrix->unassign(LI);
2346 Matrix->assign(LI, PhysReg);
2347 }
2348 // Push all copy-related live-ranges to keep reconciling the broken
2349 // hints.
2350 for (const HintInfo &HI : Info) {
2351 if (Visited.insert(HI.Reg).second)
2352 RecoloringCandidates.push_back(HI.Reg);
2353 }
2354 } while (!RecoloringCandidates.empty());
2355}
2356
2357/// Try to recolor broken hints.
2358/// Broken hints may be repaired by recoloring when an evicted variable
2359/// freed up a register for a larger live-range.
2360/// Consider the following example:
2361/// BB1:
2362/// a =
2363/// b =
2364/// BB2:
2365/// ...
2366/// = b
2367/// = a
2368/// Let us assume b gets split:
2369/// BB1:
2370/// a =
2371/// b =
2372/// BB2:
2373/// c = b
2374/// ...
2375/// d = c
2376/// = d
2377/// = a
2378/// Because of how the allocation work, b, c, and d may be assigned different
2379/// colors. Now, if a gets evicted later:
2380/// BB1:
2381/// a =
2382/// st a, SpillSlot
2383/// b =
2384/// BB2:
2385/// c = b
2386/// ...
2387/// d = c
2388/// = d
2389/// e = ld SpillSlot
2390/// = e
2391/// This is likely that we can assign the same register for b, c, and d,
2392/// getting rid of 2 copies.
2393void RAGreedy::tryHintsRecoloring() {
2394 for (const LiveInterval *LI : SetOfBrokenHints) {
2395 assert(LI->reg().isVirtual() &&
2396 "Recoloring is possible only for virtual registers");
2397 // Some dead defs may be around (e.g., because of debug uses).
2398 // Ignore those.
2399 if (!VRM->hasPhys(LI->reg()))
2400 continue;
2401 tryHintRecoloring(*LI);
2402 }
2403}
2404
2405MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2406 SmallVectorImpl<Register> &NewVRegs,
2407 SmallVirtRegSet &FixedRegisters,
2408 RecoloringStack &RecolorStack,
2409 unsigned Depth) {
2410 uint8_t CostPerUseLimit = uint8_t(~0u);
2411 // First try assigning a free register.
2412 auto Order =
2414 if (MCRegister PhysReg =
2415 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2416 // When NewVRegs is not empty, we may have made decisions such as evicting
2417 // a virtual register, go with the earlier decisions and use the physical
2418 // register.
2419 if (CSRCost.getFrequency() &&
2420 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2421 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2422 CostPerUseLimit, NewVRegs);
2423 if (CSRReg || !NewVRegs.empty())
2424 // Return now if we decide to use a CSR or create new vregs due to
2425 // pre-splitting.
2426 return CSRReg;
2427 } else
2428 return PhysReg;
2429 }
2430 // Non empty NewVRegs means VirtReg has been split.
2431 if (!NewVRegs.empty())
2432 return 0;
2433
2434 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2435 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2436 << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2437
2438 // Try to evict a less worthy live range, but only for ranges from the primary
2439 // queue. The RS_Split ranges already failed to do this, and they should not
2440 // get a second chance until they have been split.
2441 if (Stage != RS_Split)
2442 if (Register PhysReg =
2443 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2444 FixedRegisters)) {
2445 Register Hint = MRI->getSimpleHint(VirtReg.reg());
2446 // If VirtReg has a hint and that hint is broken record this
2447 // virtual register as a recoloring candidate for broken hint.
2448 // Indeed, since we evicted a variable in its neighborhood it is
2449 // likely we can at least partially recolor some of the
2450 // copy-related live-ranges.
2451 if (Hint && Hint != PhysReg)
2452 SetOfBrokenHints.insert(&VirtReg);
2453 return PhysReg;
2454 }
2455
2456 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2457
2458 // The first time we see a live range, don't try to split or spill.
2459 // Wait until the second time, when all smaller ranges have been allocated.
2460 // This gives a better picture of the interference to split around.
2461 if (Stage < RS_Split) {
2462 ExtraInfo->setStage(VirtReg, RS_Split);
2463 LLVM_DEBUG(dbgs() << "wait for second round\n");
2464 NewVRegs.push_back(VirtReg.reg());
2465 return 0;
2466 }
2467
2468 if (Stage < RS_Spill && !VirtReg.empty()) {
2469 // Try splitting VirtReg or interferences.
2470 unsigned NewVRegSizeBefore = NewVRegs.size();
2471 Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2472 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2473 return PhysReg;
2474 }
2475
2476 // If we couldn't allocate a register from spilling, there is probably some
2477 // invalid inline assembly. The base class will report it.
2478 if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2479 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2480 RecolorStack, Depth);
2481 }
2482
2483 // Finally spill VirtReg itself.
2485 TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
2486 ExtraInfo->getStage(VirtReg) < RS_Memory) {
2487 // TODO: This is experimental and in particular, we do not model
2488 // the live range splitting done by spilling correctly.
2489 // We would need a deep integration with the spiller to do the
2490 // right thing here. Anyway, that is still good for early testing.
2491 ExtraInfo->setStage(VirtReg, RS_Memory);
2492 LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
2493 NewVRegs.push_back(VirtReg.reg());
2494 } else {
2495 NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2497 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2498 spiller().spill(LRE);
2499 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2500
2501 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2502 // the new regs are kept in LDV (still mapping to the old register), until
2503 // we rewrite spilled locations in LDV at a later stage.
2504 for (Register r : spiller().getSpilledRegs())
2505 DebugVars->splitRegister(r, LRE.regs(), *LIS);
2506 for (Register r : spiller().getReplacedRegs())
2507 DebugVars->splitRegister(r, LRE.regs(), *LIS);
2508
2509 if (VerifyEnabled)
2510 MF->verify(this, "After spilling", &errs());
2511 }
2512
2513 // The live virtual register requesting allocation was spilled, so tell
2514 // the caller not to allocate anything during this round.
2515 return 0;
2516}
2517
2518void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2519 using namespace ore;
2520 if (Spills) {
2521 R << NV("NumSpills", Spills) << " spills ";
2522 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2523 }
2524 if (FoldedSpills) {
2525 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2526 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2527 << " total folded spills cost ";
2528 }
2529 if (Reloads) {
2530 R << NV("NumReloads", Reloads) << " reloads ";
2531 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2532 }
2533 if (FoldedReloads) {
2534 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2535 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2536 << " total folded reloads cost ";
2537 }
2538 if (ZeroCostFoldedReloads)
2539 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2540 << " zero cost folded reloads ";
2541 if (Copies) {
2542 R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2543 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2544 }
2545}
2546
2547RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2548 RAGreedyStats Stats;
2549 const MachineFrameInfo &MFI = MF->getFrameInfo();
2550 int FI;
2551
2552 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2553 return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2554 A->getPseudoValue())->getFrameIndex());
2555 };
2556 auto isPatchpointInstr = [](const MachineInstr &MI) {
2557 return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2558 MI.getOpcode() == TargetOpcode::STACKMAP ||
2559 MI.getOpcode() == TargetOpcode::STATEPOINT;
2560 };
2561 for (MachineInstr &MI : MBB) {
2562 auto DestSrc = TII->isCopyInstr(MI);
2563 if (DestSrc) {
2564 const MachineOperand &Dest = *DestSrc->Destination;
2565 const MachineOperand &Src = *DestSrc->Source;
2566 Register SrcReg = Src.getReg();
2567 Register DestReg = Dest.getReg();
2568 // Only count `COPY`s with a virtual register as source or destination.
2569 if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2570 if (SrcReg.isVirtual()) {
2571 SrcReg = VRM->getPhys(SrcReg);
2572 if (SrcReg && Src.getSubReg())
2573 SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2574 }
2575 if (DestReg.isVirtual()) {
2576 DestReg = VRM->getPhys(DestReg);
2577 if (DestReg && Dest.getSubReg())
2578 DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2579 }
2580 if (SrcReg != DestReg)
2581 ++Stats.Copies;
2582 }
2583 continue;
2584 }
2585
2587 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2588 ++Stats.Reloads;
2589 continue;
2590 }
2591 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2592 ++Stats.Spills;
2593 continue;
2594 }
2595 if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2596 llvm::any_of(Accesses, isSpillSlotAccess)) {
2597 if (!isPatchpointInstr(MI)) {
2598 Stats.FoldedReloads += Accesses.size();
2599 continue;
2600 }
2601 // For statepoint there may be folded and zero cost folded stack reloads.
2602 std::pair<unsigned, unsigned> NonZeroCostRange =
2604 SmallSet<unsigned, 16> FoldedReloads;
2605 SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2606 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2607 MachineOperand &MO = MI.getOperand(Idx);
2608 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2609 continue;
2610 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2611 FoldedReloads.insert(MO.getIndex());
2612 else
2613 ZeroCostFoldedReloads.insert(MO.getIndex());
2614 }
2615 // If stack slot is used in folded reload it is not zero cost then.
2616 for (unsigned Slot : FoldedReloads)
2617 ZeroCostFoldedReloads.erase(Slot);
2618 Stats.FoldedReloads += FoldedReloads.size();
2619 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2620 continue;
2621 }
2622 Accesses.clear();
2623 if (TII->hasStoreToStackSlot(MI, Accesses) &&
2624 llvm::any_of(Accesses, isSpillSlotAccess)) {
2625 Stats.FoldedSpills += Accesses.size();
2626 }
2627 }
2628 // Set cost of collected statistic by multiplication to relative frequency of
2629 // this basic block.
2630 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2631 Stats.ReloadsCost = RelFreq * Stats.Reloads;
2632 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2633 Stats.SpillsCost = RelFreq * Stats.Spills;
2634 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2635 Stats.CopiesCost = RelFreq * Stats.Copies;
2636 return Stats;
2637}
2638
2639RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2640 RAGreedyStats Stats;
2641
2642 // Sum up the spill and reloads in subloops.
2643 for (MachineLoop *SubLoop : *L)
2644 Stats.add(reportStats(SubLoop));
2645
2646 for (MachineBasicBlock *MBB : L->getBlocks())
2647 // Handle blocks that were not included in subloops.
2648 if (Loops->getLoopFor(MBB) == L)
2649 Stats.add(computeStats(*MBB));
2650
2651 if (!Stats.isEmpty()) {
2652 using namespace ore;
2653
2654 ORE->emit([&]() {
2655 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2656 L->getStartLoc(), L->getHeader());
2657 Stats.report(R);
2658 R << "generated in loop";
2659 return R;
2660 });
2661 }
2662 return Stats;
2663}
2664
2665void RAGreedy::reportStats() {
2666 if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2667 return;
2668 RAGreedyStats Stats;
2669 for (MachineLoop *L : *Loops)
2670 Stats.add(reportStats(L));
2671 // Process non-loop blocks.
2672 for (MachineBasicBlock &MBB : *MF)
2673 if (!Loops->getLoopFor(&MBB))
2674 Stats.add(computeStats(MBB));
2675 if (!Stats.isEmpty()) {
2676 using namespace ore;
2677
2678 ORE->emit([&]() {
2679 DebugLoc Loc;
2680 if (auto *SP = MF->getFunction().getSubprogram())
2681 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2682 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2683 &MF->front());
2684 Stats.report(R);
2685 R << "generated in function";
2686 return R;
2687 });
2688 }
2689}
2690
2691bool RAGreedy::hasVirtRegAlloc() {
2692 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2694 if (MRI->reg_nodbg_empty(Reg))
2695 continue;
2696 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2697 if (!RC)
2698 continue;
2699 if (shouldAllocateRegister(Reg))
2700 return true;
2701 }
2702
2703 return false;
2704}
2705
2707 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2708 << "********** Function: " << mf.getName() << '\n');
2709
2710 MF = &mf;
2711 TII = MF->getSubtarget().getInstrInfo();
2712
2713 if (VerifyEnabled)
2714 MF->verify(this, "Before greedy register allocator", &errs());
2715
2716 RegAllocBase::init(getAnalysis<VirtRegMapWrapperLegacy>().getVRM(),
2717 getAnalysis<LiveIntervalsWrapperPass>().getLIS(),
2718 getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM());
2719
2720 // Early return if there is no virtual register to be allocated to a
2721 // physical register.
2722 if (!hasVirtRegAlloc())
2723 return false;
2724
2725 Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
2726 // Renumber to get accurate and consistent results from
2727 // SlotIndexes::getApproxInstrDistance.
2728 Indexes->packIndexes();
2729 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
2730 DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2731 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
2732 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2733 Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
2734 SpillPlacer = &getAnalysis<SpillPlacementWrapperLegacy>().getResult();
2735 DebugVars = &getAnalysis<LiveDebugVariablesWrapperLegacy>().getLDV();
2736
2737 initializeCSRCost();
2738
2739 RegCosts = TRI->getRegisterCosts(*MF);
2740 RegClassPriorityTrumpsGlobalness =
2741 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2744
2745 ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2748
2749 ExtraInfo.emplace();
2750 EvictAdvisor =
2751 getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
2752 PriorityAdvisor =
2753 getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this);
2754
2755 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2756 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
2757
2758 VRAI->calculateSpillWeightsAndHints();
2759
2760 LLVM_DEBUG(LIS->dump());
2761
2762 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2763 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2764
2765 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2766 GlobalCand.resize(32); // This will grow as needed.
2767 SetOfBrokenHints.clear();
2768
2770 tryHintsRecoloring();
2771
2772 if (VerifyEnabled)
2773 MF->verify(this, "Before post optimization", &errs());
2775 reportStats();
2776
2777 releaseMemory();
2778 return true;
2779}
unsigned SubReg
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Size
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
#define DEBUG_TYPE
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
This file implements an indexed map.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
block placement Basic Block Placement Stats
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static cl::opt< bool > GreedyRegClassPriorityTrumpsGlobalness("greedy-regclass-priority-trumps-globalness", cl::desc("Change the greedy register allocator's live range priority " "calculation to make the AllocationPriority of the register class " "more important then whether the range is global"), cl::Hidden)
static cl::opt< bool > ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, cl::desc("Exhaustive Search for registers bypassing the depth " "and interference cutoffs of last chance recoloring"), cl::Hidden)
const float Hysteresis
static cl::opt< unsigned > LastChanceRecoloringMaxInterference("lcr-max-interf", cl::Hidden, cl::desc("Last chance recoloring maximum number of considered" " interference at a time"), cl::init(8))
static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg)
Return true if reg has any tied def operand.
static bool readsLaneSubset(const MachineRegisterInfo &MRI, const MachineInstr *MI, const LiveInterval &VirtReg, const TargetRegisterInfo *TRI, SlotIndex Use, const TargetInstrInfo *TII)
Return true if MI at \P Use reads a subset of the lanes live in VirtReg.
static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, const VirtRegMap &VRM, MCRegister PhysReg, const LiveInterval &Intf)
Return true if the existing assignment of Intf overlaps, but is not the same, as PhysReg.
static cl::opt< unsigned > CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, cl::desc("Last chance recoloring max depth"), cl::init(5))
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator)
static cl::opt< unsigned long > GrowRegionComplexityBudget("grow-region-complexity-budget", cl::desc("growRegion() does not scale with the number of BB edges, so " "limit its budget and bail out once we reach the limit."), cl::init(10000), cl::Hidden)
Greedy Register Allocator
static cl::opt< SplitEditor::ComplementSpillMode > SplitSpillMode("split-spill-mode", cl::Hidden, cl::desc("Spill mode for splitting live ranges"), cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), cl::init(SplitEditor::SM_Speed))
static unsigned getNumAllocatableRegsForConstraints(const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const RegisterClassInfo &RCI)
Get the number of allocatable registers that match the constraints of Reg on MI and that are also in ...
static cl::opt< bool > EnableDeferredSpilling("enable-deferred-spilling", cl::Hidden, cl::desc("Instead of spilling a variable right away, defer the actual " "code insertion to the end of the allocation. That way the " "allocator might still find a suitable coloring for this " "variable because of other evicted variables."), cl::init(false))
static cl::opt< unsigned > SplitThresholdForRegWithHint("split-threshold-for-reg-with-hint", cl::desc("The threshold for splitting a virtual register with a hint, in " "percentate"), cl::init(75), cl::Hidden)
static cl::opt< bool > GreedyReverseLocalAssignment("greedy-reverse-local-assignment", cl::desc("Reverse allocation order of local live ranges, such that " "shorter local live ranges will tend to be allocated first"), cl::Hidden)
static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineInstr &FirstMI, Register Reg)
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
SI optimize exec mask operations pre RA
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
bool isHint(Register Reg) const
Return true if Reg is a preferred physical register.
ArrayRef< MCPhysReg > getOrder() const
Get the allocation order without reordered hints.
static AllocationOrder create(unsigned VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo, const LiveRegMatrix *Matrix)
Create a new AllocationOrder for VirtReg.
Iterator end() const
Iterator begin() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & reset()
Definition: BitVector.h:392
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
A debug info location.
Definition: DebugLoc.h:33
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
Definition: EdgeBundles.h:53
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:47
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:50
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:707
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Cursor - The primary query interface for the block interference cache.
SlotIndex first()
first - Return the starting index of the first interfering range in the current block.
SlotIndex last()
last - Return the ending index of the last interfering range in the current block.
bool hasInterference()
hasInterference - Return true if the current block has any interference.
void moveToBlock(unsigned MBBNum)
moveTo - Move cursor to basic block MBBNum.
void init(MachineFunction *mf, LiveIntervalUnion *liuarray, SlotIndexes *indexes, LiveIntervals *lis, const TargetRegisterInfo *tri)
init - Prepare cache for a new function.
unsigned getMaxCursors() const
getMaxCursors - Return the maximum number of concurrent cursors that can be supported.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
void splitRegister(Register OldReg, ArrayRef< Register > NewRegs, LiveIntervals &LIS)
splitRegister - Move any user variables in OldReg to the live ranges in NewRegs where they are live.
Query interferences between a single live virtual register and a live interval union.
const SmallVectorImpl< const LiveInterval * > & interferingVRegs(unsigned MaxInterferingRegs=std::numeric_limits< unsigned >::max())
SegmentIter find(SlotIndex x)
LiveSegments::iterator SegmentIter
A live range for subregisters.
Definition: LiveInterval.h:694
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
Register reg() const
Definition: LiveInterval.h:718
bool isSpillable() const
isSpillable - Can this interval be spilled?
Definition: LiveInterval.h:826
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:810
unsigned getSize() const
getSize - Returns the sum of sizes of all the LiveRange's.
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:782
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
ArrayRef< SlotIndex > getRegMaskSlotsInBlock(unsigned MBBNum) const
Returns a sorted array of slot indices of all instructions with register mask operands in the basic b...
LiveInterval & getInterval(Register Reg)
MachineBasicBlock * intervalIsInOneMBB(const LiveInterval &LI) const
If LI is confined to a single basic block, return a pointer to that block.
bool empty() const
unsigned size() const
Register get(unsigned idx) const
ArrayRef< Register > regs() const
iterator end() const
iterator begin() const
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
bool empty() const
Definition: LiveInterval.h:382
iterator end()
Definition: LiveInterval.h:216
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool checkRegMaskInterference(const LiveInterval &VirtReg, MCRegister PhysReg=MCRegister::NoRegister)
Check for regmask interference only.
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
LiveIntervalUnion::Query & query(const LiveRange &LR, MCRegister RegUnit)
Query a line of the assigned virtual register matrix directly.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
@ IK_VirtReg
Virtual register interference.
Definition: LiveRegMatrix.h:94
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)
Check for interference before assigning VirtReg to PhysReg.
LiveIntervalUnion * getLiveUnions()
Directly access the live interval unions per regunit.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
iterator_range< MCRegUnitIterator > regunits(MCRegister Reg) const
Returns an iterator range over all regunits for Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1543
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
double getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const
Compute the frequency of the block, relative to the entry block.
BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Analysis pass which computes a MachineDominatorTree.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isImplicitDef() const
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to be more informative.
Diagnostic information for missed-optimization remarks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getSimpleHint(Register VReg) const
getSimpleHint - same as getRegAllocationHint except it will only return a target independent hint.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool reg_nodbg_empty(Register RegNo) const
reg_nodbg_empty - Return true if the only instructions using or defining Reg are Debug instructions.
iterator_range< def_iterator > def_operands(Register Reg) const
LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
iterator_range< reg_instr_nodbg_iterator > reg_nodbg_instructions(Register Reg) const
void LRE_DidCloneVirtReg(Register New, Register Old)
Spiller & spiller() override
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
MCRegister selectOrSplit(const LiveInterval &, SmallVectorImpl< Register > &) override
bool runOnMachineFunction(MachineFunction &mf) override
Perform register allocation.
const LiveInterval * dequeue() override
dequeue - Return the next unassigned register, or NULL.
RAGreedy(const RegAllocFilterFunc F=nullptr)
void enqueueImpl(const LiveInterval *LI) override
enqueue - Add VirtReg to the priority queue of unassigned registers.
static char ID
void getAnalysisUsage(AnalysisUsage &AU) const override
RAGreedy analysis usage.
void aboutToRemoveInterval(const LiveInterval &) override
Method called when the allocator is about to remove a LiveInterval.
RegAllocBase provides the register allocation driver and interface that can be extended to add intere...
Definition: RegAllocBase.h:62
void enqueue(const LiveInterval *LI)
enqueue - Add VirtReg to the priority queue of unassigned registers.
void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat)
SmallPtrSet< MachineInstr *, 32 > DeadRemats
Inst which is a def of an original reg and whose defs are already all dead after remat is saved in De...
Definition: RegAllocBase.h:82
const TargetRegisterInfo * TRI
Definition: RegAllocBase.h:66
LiveIntervals * LIS
Definition: RegAllocBase.h:69
static const char TimerGroupName[]
Definition: RegAllocBase.h:133
static const char TimerGroupDescription[]
Definition: RegAllocBase.h:134
LiveRegMatrix * Matrix
Definition: RegAllocBase.h:70
virtual void postOptimization()
VirtRegMap * VRM
Definition: RegAllocBase.h:68
RegisterClassInfo RegClassInfo
Definition: RegAllocBase.h:71
MachineRegisterInfo * MRI
Definition: RegAllocBase.h:67
bool shouldAllocateRegister(Register Reg)
Get whether a given register should be allocated.
Definition: RegAllocBase.h:93
static bool VerifyEnabled
VerifyEnabled - True when -verify-regalloc is given.
Definition: RegAllocBase.h:141
ImmutableAnalysis abstraction for fetching the Eviction Advisor.
std::optional< unsigned > getOrderLimit(const LiveInterval &VirtReg, const AllocationOrder &Order, unsigned CostPerUseLimit) const
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const
Returns true if the given PhysReg is a callee saved register and has not been used for allocation yet...
bool canReassign(const LiveInterval &VirtReg, MCRegister FromReg) const
bool canAllocatePhysReg(unsigned CostPerUseLimit, MCRegister PhysReg) const
unsigned getLastCostChange(const TargetRegisterClass *RC) const
Get the position of the last cost change in getOrder(RC).
bool isProperSubClass(const TargetRegisterClass *RC) const
isProperSubClass - Returns true if RC has a legal super-class with more allocatable registers.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
uint8_t getMinCost(const TargetRegisterClass *RC) const
Get the minimum register cost in RC's allocation order.
MCRegister getLastCalleeSavedAlias(MCRegister PhysReg) const
getLastCalleeSavedAlias - Returns the last callee saved register that overlaps PhysReg,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:176
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
Definition: SlotIndexes.h:182
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:130
SlotIndex getBoundaryIndex() const
Returns the boundary index for associated with this index.
Definition: SlotIndexes.h:231
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition: SlotIndexes.h:224
@ InstrDist
The default distance between instructions as returned by distance().
Definition: SlotIndexes.h:112
int getApproxInstrDistance(SlotIndex other) const
Return the scaled distance from this index to the given one, where all slots on the same instruction ...
Definition: SlotIndexes.h:203
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
SlotIndex getLastIndex()
Returns the base index of the last slot in this analysis.
Definition: SlotIndexes.h:368
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:460
void packIndexes()
Renumber all indexes using the default instruction distance.
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
Definition: SlotIndexes.h:362
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction for the given index, or null if the given index has no instruction associated...
Definition: SlotIndexes.h:397
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
bool erase(const T &V)
Definition: SmallSet.h:193
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
size_type size() const
Definition: SmallSet.h:170
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:704
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
void addConstraints(ArrayRef< BlockConstraint > LiveBlocks)
addConstraints - Add constraints and biases.
bool finish()
finish - Compute the optimal spill code placement given the constraints.
void addPrefSpill(ArrayRef< unsigned > Blocks, bool Strong)
addPrefSpill - Add PrefSpill constraints to all blocks listed.
void prepare(BitVector &RegBundles)
prepare - Reset state and prepare for a new spill placement computation.
bool scanActiveBundles()
scanActiveBundles - Perform an initial scan of all bundles activated by addConstraints and addLinks,...
void addLinks(ArrayRef< unsigned > Links)
addLinks - Add transparent blocks with the given numbers.
void iterate()
iterate - Update the network iteratively until convergence, or new bundles are found.
@ MustSpill
A register is impossible, variable must be spilled.
@ DontCare
Block doesn't care / variable not live.
@ PrefReg
Block entry/exit prefers a register.
@ PrefSpill
Block entry/exit prefers a stack slot.
ArrayRef< unsigned > getRecentPositive()
getRecentPositive - Return an array of bundles that became positive during the previous call to scanA...
BlockFrequency getBlockFrequency(unsigned Number) const
getBlockFrequency - Return the estimated block execution frequency per function invocation.
virtual void spill(LiveRangeEdit &LRE)=0
spill - Spill the LRE.getParent() live interval.
SplitAnalysis - Analyze a LiveInterval, looking for live range splitting opportunities.
Definition: SplitKit.h:96
SplitEditor - Edit machine code and LiveIntervals for live range splitting.
Definition: SplitKit.h:263
@ SM_Partition
SM_Partition(Default) - Try to create the complement interval so it doesn't overlap any other interva...
Definition: SplitKit.h:286
@ SM_Speed
SM_Speed - Overlap intervals to minimize the expected execution frequency of the inserted copies.
Definition: SplitKit.h:298
@ SM_Size
SM_Size - Overlap intervals to minimize the number of inserted COPY instructions.
Definition: SplitKit.h:293
TargetInstrInfo - Interface to description of machine instruction set.
virtual std::pair< unsigned, unsigned > getPatchpointUnfoldableRange(const MachineInstr &MI) const
For a patchpoint, stackmap, or statepoint intrinsic, return the range of operands which can't be fold...
bool isFullCopyInstr(const MachineInstr &MI) const
virtual bool hasStoreToStackSlot(const MachineInstr &MI, SmallVectorImpl< const MachineMemOperand * > &Accesses) const
If the specified machine instruction has a store to a stack slot, return true along with the FrameInd...
virtual Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct load from a stack slot, return the virtual or physic...
virtual Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
std::optional< DestSourcePair > isCopyInstr(const MachineInstr &MI) const
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
virtual bool hasLoadFromStackSlot(const MachineInstr &MI, SmallVectorImpl< const MachineMemOperand * > &Accesses) const
If the specified machine instruction has a load from a stack slot, return true along with the FrameIn...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
const uint8_t AllocationPriority
Classes with a higher priority value are assigned first by register allocators using a greedy heurist...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool shouldUseDeferredSpillingForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Deferred spilling delays the spill insertion of a virtual register after every other allocation.
virtual bool shouldRegionSplitForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Region split has a high compile time cost especially for large live range.
virtual bool shouldUseLastChanceRecoloringForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Last chance recoloring has a high compile time cost especially for targets with a lot of registers.
virtual unsigned getCSRFirstUseCost() const
Allow the target to override the cost of using a callee-saved register for the first time.
LaneBitmask getCoveringLanes() const
The lane masks returned by getSubRegIndexLaneMask() above can only be used to determine if sub-regist...
ArrayRef< uint8_t > getRegisterCosts(const MachineFunction &MF) const
Get a list of cost values for all registers that correspond to the index returned by RegisterCostTabl...
virtual bool reverseLocalAssignment() const
Allow the target to reverse allocation order of local live ranges.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
virtual bool regClassPriorityTrumpsGlobalness(const MachineFunction &MF) const
When prioritizing live ranges in register allocation, if this hook returns true then the AllocationPr...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasKnownPreference(Register VirtReg) const
returns true if VirtReg has a known preferred register.
Definition: VirtRegMap.cpp:121
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition: VirtRegMap.h:90
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition: VirtRegMap.h:86
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::function< bool(const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, const Register Reg)> RegAllocFilterFunc
Filter function for register classes during regalloc.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
bool TimePassesIsEnabled
If the user specifies the -time-passes argument on an LLVM tool command line then the value of this b...
char & RAGreedyID
Greedy register allocator.
Spiller * createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
@ RS_Split2
Attempt more aggressive live range splitting that is guaranteed to make progress.
@ RS_Spill
Live range will be spilled. No more splitting will be attempted.
@ RS_Split
Attempt live range splitting if assignment is impossible.
@ RS_New
Newly created live range that has never been queued.
@ RS_Done
There is nothing more we can do to this live range.
@ RS_Assign
Only attempt assignment and eviction. Then requeue as RS_Split.
@ RS_Memory
Live range is in memory.
VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
const float huge_valf
Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
Definition: MathExtras.cpp:28
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1978
InstructionCost Cost
Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
static float normalizeSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr)
Normalize the spill weight of a live interval.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
Definition: MathExtras.h:219
constexpr bool any() const
Definition: LaneBitmask.h:53
This class is basically a combination of TimeRegion and Timer.
Definition: Timer.h:163
BlockConstraint - Entry and exit constraints for a basic block.
BorderConstraint Exit
Constraint on block exit.
bool ChangesValue
True when this block changes the value of the live range.
BorderConstraint Entry
Constraint on block entry.
unsigned Number
Basic block number (from MBB::getNumber()).
Additional information about basic blocks where the current variable is live.
Definition: SplitKit.h:121
SlotIndex FirstDef
First non-phi valno->def, or SlotIndex().
Definition: SplitKit.h:125
bool LiveOut
Current reg is live out.
Definition: SplitKit.h:127
bool LiveIn
Current reg is live in.
Definition: SplitKit.h:126
MachineBasicBlock * MBB
Definition: SplitKit.h:122
SlotIndex LastInstr
Last instr accessing current reg.
Definition: SplitKit.h:124
SlotIndex FirstInstr
First instr accessing current reg.
Definition: SplitKit.h:123