LLVM 20.0.0git
RegAllocGreedy.cpp
Go to the documentation of this file.
1//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the RAGreedy function pass for register allocation in
10// optimized builds.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RegAllocGreedy.h"
15#include "AllocationOrder.h"
16#include "InterferenceCache.h"
17#include "RegAllocBase.h"
20#include "SpillPlacement.h"
21#include "SplitKit.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/IndexedMap.h"
25#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/Function.h"
61#include "llvm/IR/LLVMContext.h"
64#include "llvm/Pass.h"
68#include "llvm/Support/Debug.h"
70#include "llvm/Support/Timer.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <utility>
76
77using namespace llvm;
78
79#define DEBUG_TYPE "regalloc"
80
81STATISTIC(NumGlobalSplits, "Number of split global live ranges");
82STATISTIC(NumLocalSplits, "Number of split local live ranges");
83STATISTIC(NumEvicted, "Number of interferences evicted");
84
86 "split-spill-mode", cl::Hidden,
87 cl::desc("Spill mode for splitting live ranges"),
88 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
89 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
90 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
92
95 cl::desc("Last chance recoloring max depth"),
96 cl::init(5));
97
99 "lcr-max-interf", cl::Hidden,
100 cl::desc("Last chance recoloring maximum number of considered"
101 " interference at a time"),
102 cl::init(8));
103
105 "exhaustive-register-search", cl::NotHidden,
106 cl::desc("Exhaustive Search for registers bypassing the depth "
107 "and interference cutoffs of last chance recoloring"),
108 cl::Hidden);
109
111 "enable-deferred-spilling", cl::Hidden,
112 cl::desc("Instead of spilling a variable right away, defer the actual "
113 "code insertion to the end of the allocation. That way the "
114 "allocator might still find a suitable coloring for this "
115 "variable because of other evicted variables."),
116 cl::init(false));
117
118// FIXME: Find a good default for this flag and remove the flag.
120CSRFirstTimeCost("regalloc-csr-first-time-cost",
121 cl::desc("Cost for first time use of callee-saved register."),
122 cl::init(0), cl::Hidden);
123
125 "grow-region-complexity-budget",
126 cl::desc("growRegion() does not scale with the number of BB edges, so "
127 "limit its budget and bail out once we reach the limit."),
128 cl::init(10000), cl::Hidden);
129
131 "greedy-regclass-priority-trumps-globalness",
132 cl::desc("Change the greedy register allocator's live range priority "
133 "calculation to make the AllocationPriority of the register class "
134 "more important then whether the range is global"),
135 cl::Hidden);
136
138 "greedy-reverse-local-assignment",
139 cl::desc("Reverse allocation order of local live ranges, such that "
140 "shorter local live ranges will tend to be allocated first"),
141 cl::Hidden);
142
144 "split-threshold-for-reg-with-hint",
145 cl::desc("The threshold for splitting a virtual register with a hint, in "
146 "percentate"),
147 cl::init(75), cl::Hidden);
148
149static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
151
152char RAGreedy::ID = 0;
154
156 "Greedy Register Allocator", false, false)
160INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
161INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
174
175#ifndef NDEBUG
176const char *const RAGreedy::StageName[] = {
177 "RS_New",
178 "RS_Assign",
179 "RS_Split",
180 "RS_Split2",
181 "RS_Spill",
182 "RS_Memory",
183 "RS_Done"
184};
185#endif
186
187// Hysteresis to use when comparing floats.
188// This helps stabilize decisions based on float comparisons.
189const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
190
192 return new RAGreedy();
193}
194
196 return new RAGreedy(Ftor);
197}
198
201
203 AU.setPreservesCFG();
228}
229
230//===----------------------------------------------------------------------===//
231// LiveRangeEdit delegate methods
232//===----------------------------------------------------------------------===//
233
234bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
235 LiveInterval &LI = LIS->getInterval(VirtReg);
236 if (VRM->hasPhys(VirtReg)) {
237 Matrix->unassign(LI);
239 return true;
240 }
241 // Unassigned virtreg is probably in the priority queue.
242 // RegAllocBase will erase it after dequeueing.
243 // Nonetheless, clear the live-range so that the debug
244 // dump will show the right state for that VirtReg.
245 LI.clear();
246 return false;
247}
248
249void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
250 if (!VRM->hasPhys(VirtReg))
251 return;
252
253 // Register is assigned, put it back on the queue for reassignment.
254 LiveInterval &LI = LIS->getInterval(VirtReg);
255 Matrix->unassign(LI);
257}
258
259void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
260 ExtraInfo->LRE_DidCloneVirtReg(New, Old);
261}
262
264 // Cloning a register we haven't even heard about yet? Just ignore it.
265 if (!Info.inBounds(Old))
266 return;
267
268 // LRE may clone a virtual register because dead code elimination causes it to
269 // be split into connected components. The new components are much smaller
270 // than the original, so they should get a new chance at being assigned.
271 // same stage as the parent.
272 Info[Old].Stage = RS_Assign;
273 Info.grow(New.id());
274 Info[New] = Info[Old];
275}
276
278 SpillerInstance.reset();
279 GlobalCand.clear();
280}
281
282void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
283
284void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
285 // Prioritize live ranges by size, assigning larger ranges first.
286 // The queue holds (size, reg) pairs.
287 const Register Reg = LI->reg();
288 assert(Reg.isVirtual() && "Can only enqueue virtual registers");
289
290 auto Stage = ExtraInfo->getOrInitStage(Reg);
291 if (Stage == RS_New) {
292 Stage = RS_Assign;
293 ExtraInfo->setStage(Reg, Stage);
294 }
295
296 unsigned Ret = PriorityAdvisor->getPriority(*LI);
297
298 // The virtual register number is a tie breaker for same-sized ranges.
299 // Give lower vreg numbers higher priority to assign them first.
300 CurQueue.push(std::make_pair(Ret, ~Reg));
301}
302
303unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
304 const unsigned Size = LI.getSize();
305 const Register Reg = LI.reg();
306 unsigned Prio;
307 LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
308
309 if (Stage == RS_Split) {
310 // Unsplit ranges that couldn't be allocated immediately are deferred until
311 // everything else has been allocated.
312 Prio = Size;
313 } else if (Stage == RS_Memory) {
314 // Memory operand should be considered last.
315 // Change the priority such that Memory operand are assigned in
316 // the reverse order that they came in.
317 // TODO: Make this a member variable and probably do something about hints.
318 static unsigned MemOp = 0;
319 Prio = MemOp++;
320 } else {
321 // Giant live ranges fall back to the global assignment heuristic, which
322 // prevents excessive spilling in pathological cases.
323 const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
324 bool ForceGlobal = RC.GlobalPriority ||
325 (!ReverseLocalAssignment &&
328 unsigned GlobalBit = 0;
329
330 if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
331 LIS->intervalIsInOneMBB(LI)) {
332 // Allocate original local ranges in linear instruction order. Since they
333 // are singly defined, this produces optimal coloring in the absence of
334 // global interference and other constraints.
335 if (!ReverseLocalAssignment)
336 Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
337 else {
338 // Allocating bottom up may allow many short LRGs to be assigned first
339 // to one of the cheap registers. This could be much faster for very
340 // large blocks on targets with many physical registers.
341 Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
342 }
343 } else {
344 // Allocate global and split ranges in long->short order. Long ranges that
345 // don't fit should be spilled (or split) ASAP so they don't create
346 // interference. Mark a bit to prioritize global above local ranges.
347 Prio = Size;
348 GlobalBit = 1;
349 }
350
351 // Priority bit layout:
352 // 31 RS_Assign priority
353 // 30 Preference priority
354 // if (RegClassPriorityTrumpsGlobalness)
355 // 29-25 AllocPriority
356 // 24 GlobalBit
357 // else
358 // 29 Global bit
359 // 28-24 AllocPriority
360 // 0-23 Size/Instr distance
361
362 // Clamp the size to fit with the priority masking scheme
363 Prio = std::min(Prio, (unsigned)maxUIntN(24));
364 assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
365
366 if (RegClassPriorityTrumpsGlobalness)
367 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
368 else
369 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
370
371 // Mark a higher bit to prioritize global and local above RS_Split.
372 Prio |= (1u << 31);
373
374 // Boost ranges that have a physical register hint.
375 if (VRM->hasKnownPreference(Reg))
376 Prio |= (1u << 30);
377 }
378
379 return Prio;
380}
381
382const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
383
384const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
385 if (CurQueue.empty())
386 return nullptr;
387 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
388 CurQueue.pop();
389 return LI;
390}
391
392//===----------------------------------------------------------------------===//
393// Direct Assignment
394//===----------------------------------------------------------------------===//
395
396/// tryAssign - Try to assign VirtReg to an available register.
397MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
398 AllocationOrder &Order,
400 const SmallVirtRegSet &FixedRegisters) {
401 MCRegister PhysReg;
402 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
403 assert(*I);
404 if (!Matrix->checkInterference(VirtReg, *I)) {
405 if (I.isHint())
406 return *I;
407 else
408 PhysReg = *I;
409 }
410 }
411 if (!PhysReg.isValid())
412 return PhysReg;
413
414 // PhysReg is available, but there may be a better choice.
415
416 // If we missed a simple hint, try to cheaply evict interference from the
417 // preferred register.
418 if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
419 if (Order.isHint(Hint)) {
420 MCRegister PhysHint = Hint.asMCReg();
421 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
422
423 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
424 FixedRegisters)) {
425 evictInterference(VirtReg, PhysHint, NewVRegs);
426 return PhysHint;
427 }
428
429 // We can also split the virtual register in cold blocks.
430 if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order))
431 return 0;
432
433 // Record the missed hint, we may be able to recover
434 // at the end if the surrounding allocation changed.
435 SetOfBrokenHints.insert(&VirtReg);
436 }
437
438 // Try to evict interference from a cheaper alternative.
439 uint8_t Cost = RegCosts[PhysReg];
440
441 // Most registers have 0 additional cost.
442 if (!Cost)
443 return PhysReg;
444
445 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
446 << (unsigned)Cost << '\n');
447 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
448 return CheapReg ? CheapReg : PhysReg;
449}
450
451//===----------------------------------------------------------------------===//
452// Interference eviction
453//===----------------------------------------------------------------------===//
454
456 MCRegister FromReg) const {
457 auto HasRegUnitInterference = [&](MCRegUnit Unit) {
458 // Instantiate a "subquery", not to be confused with the Queries array.
459 LiveIntervalUnion::Query SubQ(VirtReg, Matrix->getLiveUnions()[Unit]);
460 return SubQ.checkInterference();
461 };
462
463 for (MCRegister Reg :
465 if (Reg == FromReg)
466 continue;
467 // If no units have interference, reassignment is possible.
468 if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
469 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
470 << printReg(FromReg, TRI) << " to "
471 << printReg(Reg, TRI) << '\n');
472 return true;
473 }
474 }
475 return false;
476}
477
478/// evictInterference - Evict any interferring registers that prevent VirtReg
479/// from being assigned to Physreg. This assumes that canEvictInterference
480/// returned true.
481void RAGreedy::evictInterference(const LiveInterval &VirtReg,
482 MCRegister PhysReg,
483 SmallVectorImpl<Register> &NewVRegs) {
484 // Make sure that VirtReg has a cascade number, and assign that cascade
485 // number to every evicted register. These live ranges than then only be
486 // evicted by a newer cascade, preventing infinite loops.
487 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
488
489 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
490 << " interference: Cascade " << Cascade << '\n');
491
492 // Collect all interfering virtregs first.
494 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
495 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
496 // We usually have the interfering VRegs cached so collectInterferingVRegs()
497 // should be fast, we may need to recalculate if when different physregs
498 // overlap the same register unit so we had different SubRanges queried
499 // against it.
501 Intfs.append(IVR.begin(), IVR.end());
502 }
503
504 // Evict them second. This will invalidate the queries.
505 for (const LiveInterval *Intf : Intfs) {
506 // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
507 if (!VRM->hasPhys(Intf->reg()))
508 continue;
509
510 Matrix->unassign(*Intf);
511 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
512 VirtReg.isSpillable() < Intf->isSpillable()) &&
513 "Cannot decrease cascade number, illegal eviction");
514 ExtraInfo->setCascade(Intf->reg(), Cascade);
515 ++NumEvicted;
516 NewVRegs.push_back(Intf->reg());
517 }
518}
519
520/// Returns true if the given \p PhysReg is a callee saved register and has not
521/// been used for allocation yet.
524 if (!CSR)
525 return false;
526
527 return !Matrix->isPhysRegUsed(PhysReg);
528}
529
530std::optional<unsigned>
532 const AllocationOrder &Order,
533 unsigned CostPerUseLimit) const {
534 unsigned OrderLimit = Order.getOrder().size();
535
536 if (CostPerUseLimit < uint8_t(~0u)) {
537 // Check of any registers in RC are below CostPerUseLimit.
538 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
539 uint8_t MinCost = RegClassInfo.getMinCost(RC);
540 if (MinCost >= CostPerUseLimit) {
541 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
542 << MinCost << ", no cheaper registers to be found.\n");
543 return std::nullopt;
544 }
545
546 // It is normal for register classes to have a long tail of registers with
547 // the same cost. We don't need to look at them if they're too expensive.
548 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
549 OrderLimit = RegClassInfo.getLastCostChange(RC);
550 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
551 << " regs.\n");
552 }
553 }
554 return OrderLimit;
555}
556
558 MCRegister PhysReg) const {
559 if (RegCosts[PhysReg] >= CostPerUseLimit)
560 return false;
561 // The first use of a callee-saved register in a function has cost 1.
562 // Don't start using a CSR when the CostPerUseLimit is low.
563 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
565 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
567 << '\n');
568 return false;
569 }
570 return true;
571}
572
573/// tryEvict - Try to evict all interferences for a physreg.
574/// @param VirtReg Currently unassigned virtual register.
575/// @param Order Physregs to try.
576/// @return Physreg to assign VirtReg, or 0.
577MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
578 AllocationOrder &Order,
580 uint8_t CostPerUseLimit,
581 const SmallVirtRegSet &FixedRegisters) {
584
585 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
586 VirtReg, Order, CostPerUseLimit, FixedRegisters);
587 if (BestPhys.isValid())
588 evictInterference(VirtReg, BestPhys, NewVRegs);
589 return BestPhys;
590}
591
592//===----------------------------------------------------------------------===//
593// Region Splitting
594//===----------------------------------------------------------------------===//
595
596/// addSplitConstraints - Fill out the SplitConstraints vector based on the
597/// interference pattern in Physreg and its aliases. Add the constraints to
598/// SpillPlacement and return the static cost of this split in Cost, assuming
599/// that all preferences in SplitConstraints are met.
600/// Return false if there are no bundles with positive bias.
601bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
603 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
604
605 // Reset interference dependent info.
606 SplitConstraints.resize(UseBlocks.size());
607 BlockFrequency StaticCost = BlockFrequency(0);
608 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
609 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
610 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
611
612 BC.Number = BI.MBB->getNumber();
613 Intf.moveToBlock(BC.Number);
615 BC.Exit = (BI.LiveOut &&
619 BC.ChangesValue = BI.FirstDef.isValid();
620
621 if (!Intf.hasInterference())
622 continue;
623
624 // Number of spill code instructions to insert.
625 unsigned Ins = 0;
626
627 // Interference for the live-in value.
628 if (BI.LiveIn) {
629 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
631 ++Ins;
632 } else if (Intf.first() < BI.FirstInstr) {
634 ++Ins;
635 } else if (Intf.first() < BI.LastInstr) {
636 ++Ins;
637 }
638
639 // Abort if the spill cannot be inserted at the MBB' start
640 if (((BC.Entry == SpillPlacement::MustSpill) ||
643 SA->getFirstSplitPoint(BC.Number)))
644 return false;
645 }
646
647 // Interference for the live-out value.
648 if (BI.LiveOut) {
649 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
651 ++Ins;
652 } else if (Intf.last() > BI.LastInstr) {
654 ++Ins;
655 } else if (Intf.last() > BI.FirstInstr) {
656 ++Ins;
657 }
658 }
659
660 // Accumulate the total frequency of inserted spill code.
661 while (Ins--)
662 StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
663 }
664 Cost = StaticCost;
665
666 // Add constraints for use-blocks. Note that these are the only constraints
667 // that may add a positive bias, it is downhill from here.
668 SpillPlacer->addConstraints(SplitConstraints);
669 return SpillPlacer->scanActiveBundles();
670}
671
672/// addThroughConstraints - Add constraints and links to SpillPlacer from the
673/// live-through blocks in Blocks.
674bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
676 const unsigned GroupSize = 8;
677 SpillPlacement::BlockConstraint BCS[GroupSize];
678 unsigned TBS[GroupSize];
679 unsigned B = 0, T = 0;
680
681 for (unsigned Number : Blocks) {
682 Intf.moveToBlock(Number);
683
684 if (!Intf.hasInterference()) {
685 assert(T < GroupSize && "Array overflow");
686 TBS[T] = Number;
687 if (++T == GroupSize) {
688 SpillPlacer->addLinks(ArrayRef(TBS, T));
689 T = 0;
690 }
691 continue;
692 }
693
694 assert(B < GroupSize && "Array overflow");
695 BCS[B].Number = Number;
696
697 // Abort if the spill cannot be inserted at the MBB' start
699 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
700 if (FirstNonDebugInstr != MBB->end() &&
702 SA->getFirstSplitPoint(Number)))
703 return false;
704 // Interference for the live-in value.
705 if (Intf.first() <= Indexes->getMBBStartIdx(Number))
707 else
709
710 // Interference for the live-out value.
711 if (Intf.last() >= SA->getLastSplitPoint(Number))
713 else
715
716 if (++B == GroupSize) {
717 SpillPlacer->addConstraints(ArrayRef(BCS, B));
718 B = 0;
719 }
720 }
721
722 SpillPlacer->addConstraints(ArrayRef(BCS, B));
723 SpillPlacer->addLinks(ArrayRef(TBS, T));
724 return true;
725}
726
727bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
728 // Keep track of through blocks that have not been added to SpillPlacer.
729 BitVector Todo = SA->getThroughBlocks();
730 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
731 unsigned AddedTo = 0;
732#ifndef NDEBUG
733 unsigned Visited = 0;
734#endif
735
736 unsigned long Budget = GrowRegionComplexityBudget;
737 while (true) {
738 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
739 // Find new through blocks in the periphery of PrefRegBundles.
740 for (unsigned Bundle : NewBundles) {
741 // Look at all blocks connected to Bundle in the full graph.
742 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
743 // Limit compilation time by bailing out after we use all our budget.
744 if (Blocks.size() >= Budget)
745 return false;
746 Budget -= Blocks.size();
747 for (unsigned Block : Blocks) {
748 if (!Todo.test(Block))
749 continue;
750 Todo.reset(Block);
751 // This is a new through block. Add it to SpillPlacer later.
752 ActiveBlocks.push_back(Block);
753#ifndef NDEBUG
754 ++Visited;
755#endif
756 }
757 }
758 // Any new blocks to add?
759 if (ActiveBlocks.size() == AddedTo)
760 break;
761
762 // Compute through constraints from the interference, or assume that all
763 // through blocks prefer spilling when forming compact regions.
764 auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
765 if (Cand.PhysReg) {
766 if (!addThroughConstraints(Cand.Intf, NewBlocks))
767 return false;
768 } else {
769 // Providing that the variable being spilled does not look like a loop
770 // induction variable, which is expensive to spill around and better
771 // pushed into a condition inside the loop if possible, provide a strong
772 // negative bias on through blocks to prevent unwanted liveness on loop
773 // backedges.
774 bool PrefSpill = true;
775 if (SA->looksLikeLoopIV() && NewBlocks.size() >= 2) {
776 // Check that the current bundle is adding a Header + start+end of
777 // loop-internal blocks. If the block is indeed a header, don't make
778 // the NewBlocks as PrefSpill to allow the variable to be live in
779 // Header<->Latch.
780 MachineLoop *L = Loops->getLoopFor(MF->getBlockNumbered(NewBlocks[0]));
781 if (L && L->getHeader()->getNumber() == (int)NewBlocks[0] &&
782 all_of(NewBlocks.drop_front(), [&](unsigned Block) {
783 return L == Loops->getLoopFor(MF->getBlockNumbered(Block));
784 }))
785 PrefSpill = false;
786 }
787 if (PrefSpill)
788 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
789 }
790 AddedTo = ActiveBlocks.size();
791
792 // Perhaps iterating can enable more bundles?
793 SpillPlacer->iterate();
794 }
795 LLVM_DEBUG(dbgs() << ", v=" << Visited);
796 return true;
797}
798
799/// calcCompactRegion - Compute the set of edge bundles that should be live
800/// when splitting the current live range into compact regions. Compact
801/// regions can be computed without looking at interference. They are the
802/// regions formed by removing all the live-through blocks from the live range.
803///
804/// Returns false if the current live range is already compact, or if the
805/// compact regions would form single block regions anyway.
806bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
807 // Without any through blocks, the live range is already compact.
808 if (!SA->getNumThroughBlocks())
809 return false;
810
811 // Compact regions don't correspond to any physreg.
812 Cand.reset(IntfCache, MCRegister::NoRegister);
813
814 LLVM_DEBUG(dbgs() << "Compact region bundles");
815
816 // Use the spill placer to determine the live bundles. GrowRegion pretends
817 // that all the through blocks have interference when PhysReg is unset.
818 SpillPlacer->prepare(Cand.LiveBundles);
819
820 // The static split cost will be zero since Cand.Intf reports no interference.
822 if (!addSplitConstraints(Cand.Intf, Cost)) {
823 LLVM_DEBUG(dbgs() << ", none.\n");
824 return false;
825 }
826
827 if (!growRegion(Cand)) {
828 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
829 return false;
830 }
831
832 SpillPlacer->finish();
833
834 if (!Cand.LiveBundles.any()) {
835 LLVM_DEBUG(dbgs() << ", none.\n");
836 return false;
837 }
838
839 LLVM_DEBUG({
840 for (int I : Cand.LiveBundles.set_bits())
841 dbgs() << " EB#" << I;
842 dbgs() << ".\n";
843 });
844 return true;
845}
846
847/// calcSpillCost - Compute how expensive it would be to split the live range in
848/// SA around all use blocks instead of forming bundle regions.
849BlockFrequency RAGreedy::calcSpillCost() {
851 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
852 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
853 unsigned Number = BI.MBB->getNumber();
854 // We normally only need one spill instruction - a load or a store.
855 Cost += SpillPlacer->getBlockFrequency(Number);
856
857 // Unless the value is redefined in the block.
858 if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
859 Cost += SpillPlacer->getBlockFrequency(Number);
860 }
861 return Cost;
862}
863
864/// calcGlobalSplitCost - Return the global split cost of following the split
865/// pattern in LiveBundles. This cost should be added to the local cost of the
866/// interference pattern in SplitConstraints.
867///
868BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
869 const AllocationOrder &Order) {
870 BlockFrequency GlobalCost = BlockFrequency(0);
871 const BitVector &LiveBundles = Cand.LiveBundles;
872 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
873 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
874 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
875 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
876 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
877 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
878 unsigned Ins = 0;
879
880 Cand.Intf.moveToBlock(BC.Number);
881
882 if (BI.LiveIn)
883 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
884 if (BI.LiveOut)
885 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
886 while (Ins--)
887 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
888 }
889
890 for (unsigned Number : Cand.ActiveBlocks) {
891 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
892 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
893 if (!RegIn && !RegOut)
894 continue;
895 if (RegIn && RegOut) {
896 // We need double spill code if this block has interference.
897 Cand.Intf.moveToBlock(Number);
898 if (Cand.Intf.hasInterference()) {
899 GlobalCost += SpillPlacer->getBlockFrequency(Number);
900 GlobalCost += SpillPlacer->getBlockFrequency(Number);
901 }
902 continue;
903 }
904 // live-in / stack-out or stack-in live-out.
905 GlobalCost += SpillPlacer->getBlockFrequency(Number);
906 }
907 return GlobalCost;
908}
909
910/// splitAroundRegion - Split the current live range around the regions
911/// determined by BundleCand and GlobalCand.
912///
913/// Before calling this function, GlobalCand and BundleCand must be initialized
914/// so each bundle is assigned to a valid candidate, or NoCand for the
915/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
916/// objects must be initialized for the current live range, and intervals
917/// created for the used candidates.
918///
919/// @param LREdit The LiveRangeEdit object handling the current split.
920/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
921/// must appear in this list.
922void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
923 ArrayRef<unsigned> UsedCands) {
924 // These are the intervals created for new global ranges. We may create more
925 // intervals for local ranges.
926 const unsigned NumGlobalIntvs = LREdit.size();
927 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
928 << " globals.\n");
929 assert(NumGlobalIntvs && "No global intervals configured");
930
931 // Isolate even single instructions when dealing with a proper sub-class.
932 // That guarantees register class inflation for the stack interval because it
933 // is all copies.
934 Register Reg = SA->getParent().reg();
935 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
936
937 // First handle all the blocks with uses.
938 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
939 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
940 unsigned Number = BI.MBB->getNumber();
941 unsigned IntvIn = 0, IntvOut = 0;
942 SlotIndex IntfIn, IntfOut;
943 if (BI.LiveIn) {
944 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
945 if (CandIn != NoCand) {
946 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
947 IntvIn = Cand.IntvIdx;
948 Cand.Intf.moveToBlock(Number);
949 IntfIn = Cand.Intf.first();
950 }
951 }
952 if (BI.LiveOut) {
953 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
954 if (CandOut != NoCand) {
955 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
956 IntvOut = Cand.IntvIdx;
957 Cand.Intf.moveToBlock(Number);
958 IntfOut = Cand.Intf.last();
959 }
960 }
961
962 // Create separate intervals for isolated blocks with multiple uses.
963 if (!IntvIn && !IntvOut) {
964 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
965 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
966 SE->splitSingleBlock(BI);
967 continue;
968 }
969
970 if (IntvIn && IntvOut)
971 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
972 else if (IntvIn)
973 SE->splitRegInBlock(BI, IntvIn, IntfIn);
974 else
975 SE->splitRegOutBlock(BI, IntvOut, IntfOut);
976 }
977
978 // Handle live-through blocks. The relevant live-through blocks are stored in
979 // the ActiveBlocks list with each candidate. We need to filter out
980 // duplicates.
981 BitVector Todo = SA->getThroughBlocks();
982 for (unsigned UsedCand : UsedCands) {
983 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
984 for (unsigned Number : Blocks) {
985 if (!Todo.test(Number))
986 continue;
987 Todo.reset(Number);
988
989 unsigned IntvIn = 0, IntvOut = 0;
990 SlotIndex IntfIn, IntfOut;
991
992 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
993 if (CandIn != NoCand) {
994 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
995 IntvIn = Cand.IntvIdx;
996 Cand.Intf.moveToBlock(Number);
997 IntfIn = Cand.Intf.first();
998 }
999
1000 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1001 if (CandOut != NoCand) {
1002 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1003 IntvOut = Cand.IntvIdx;
1004 Cand.Intf.moveToBlock(Number);
1005 IntfOut = Cand.Intf.last();
1006 }
1007 if (!IntvIn && !IntvOut)
1008 continue;
1009 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1010 }
1011 }
1012
1013 ++NumGlobalSplits;
1014
1016 SE->finish(&IntvMap);
1017 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1018
1019 unsigned OrigBlocks = SA->getNumLiveBlocks();
1020
1021 // Sort out the new intervals created by splitting. We get four kinds:
1022 // - Remainder intervals should not be split again.
1023 // - Candidate intervals can be assigned to Cand.PhysReg.
1024 // - Block-local splits are candidates for local splitting.
1025 // - DCE leftovers should go back on the queue.
1026 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1027 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1028
1029 // Ignore old intervals from DCE.
1030 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1031 continue;
1032
1033 // Remainder interval. Don't try splitting again, spill if it doesn't
1034 // allocate.
1035 if (IntvMap[I] == 0) {
1036 ExtraInfo->setStage(Reg, RS_Spill);
1037 continue;
1038 }
1039
1040 // Global intervals. Allow repeated splitting as long as the number of live
1041 // blocks is strictly decreasing.
1042 if (IntvMap[I] < NumGlobalIntvs) {
1043 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1044 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1045 << " blocks as original.\n");
1046 // Don't allow repeated splitting as a safe guard against looping.
1047 ExtraInfo->setStage(Reg, RS_Split2);
1048 }
1049 continue;
1050 }
1051
1052 // Other intervals are treated as new. This includes local intervals created
1053 // for blocks with multiple uses, and anything created by DCE.
1054 }
1055
1056 if (VerifyEnabled)
1057 MF->verify(this, "After splitting live range around region");
1058}
1059
1060MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1061 AllocationOrder &Order,
1062 SmallVectorImpl<Register> &NewVRegs) {
1063 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1065 unsigned NumCands = 0;
1066 BlockFrequency SpillCost = calcSpillCost();
1067 BlockFrequency BestCost;
1068
1069 // Check if we can split this live range around a compact region.
1070 bool HasCompact = calcCompactRegion(GlobalCand.front());
1071 if (HasCompact) {
1072 // Yes, keep GlobalCand[0] as the compact region candidate.
1073 NumCands = 1;
1074 BestCost = BlockFrequency::max();
1075 } else {
1076 // No benefit from the compact region, our fallback will be per-block
1077 // splitting. Make sure we find a solution that is cheaper than spilling.
1078 BestCost = SpillCost;
1079 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "
1080 << printBlockFreq(*MBFI, BestCost) << '\n');
1081 }
1082
1083 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1084 NumCands, false /*IgnoreCSR*/);
1085
1086 // No solutions found, fall back to single block splitting.
1087 if (!HasCompact && BestCand == NoCand)
1089
1090 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1091}
1092
1093unsigned
1094RAGreedy::calculateRegionSplitCostAroundReg(MCPhysReg PhysReg,
1095 AllocationOrder &Order,
1096 BlockFrequency &BestCost,
1097 unsigned &NumCands,
1098 unsigned &BestCand) {
1099 // Discard bad candidates before we run out of interference cache cursors.
1100 // This will only affect register classes with a lot of registers (>32).
1101 if (NumCands == IntfCache.getMaxCursors()) {
1102 unsigned WorstCount = ~0u;
1103 unsigned Worst = 0;
1104 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1105 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1106 continue;
1107 unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1108 if (Count < WorstCount) {
1109 Worst = CandIndex;
1110 WorstCount = Count;
1111 }
1112 }
1113 --NumCands;
1114 GlobalCand[Worst] = GlobalCand[NumCands];
1115 if (BestCand == NumCands)
1116 BestCand = Worst;
1117 }
1118
1119 if (GlobalCand.size() <= NumCands)
1120 GlobalCand.resize(NumCands+1);
1121 GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1122 Cand.reset(IntfCache, PhysReg);
1123
1124 SpillPlacer->prepare(Cand.LiveBundles);
1126 if (!addSplitConstraints(Cand.Intf, Cost)) {
1127 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1128 return BestCand;
1129 }
1130 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
1131 << "\tstatic = " << printBlockFreq(*MBFI, Cost));
1132 if (Cost >= BestCost) {
1133 LLVM_DEBUG({
1134 if (BestCand == NoCand)
1135 dbgs() << " worse than no bundles\n";
1136 else
1137 dbgs() << " worse than "
1138 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1139 });
1140 return BestCand;
1141 }
1142 if (!growRegion(Cand)) {
1143 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1144 return BestCand;
1145 }
1146
1147 SpillPlacer->finish();
1148
1149 // No live bundles, defer to splitSingleBlocks().
1150 if (!Cand.LiveBundles.any()) {
1151 LLVM_DEBUG(dbgs() << " no bundles.\n");
1152 return BestCand;
1153 }
1154
1155 Cost += calcGlobalSplitCost(Cand, Order);
1156 LLVM_DEBUG({
1157 dbgs() << ", total = " << printBlockFreq(*MBFI, Cost) << " with bundles";
1158 for (int I : Cand.LiveBundles.set_bits())
1159 dbgs() << " EB#" << I;
1160 dbgs() << ".\n";
1161 });
1162 if (Cost < BestCost) {
1163 BestCand = NumCands;
1164 BestCost = Cost;
1165 }
1166 ++NumCands;
1167
1168 return BestCand;
1169}
1170
1171unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1172 AllocationOrder &Order,
1173 BlockFrequency &BestCost,
1174 unsigned &NumCands,
1175 bool IgnoreCSR) {
1176 unsigned BestCand = NoCand;
1177 for (MCPhysReg PhysReg : Order) {
1178 assert(PhysReg);
1179 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1180 continue;
1181
1182 calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands,
1183 BestCand);
1184 }
1185
1186 return BestCand;
1187}
1188
1189unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
1190 bool HasCompact,
1191 SmallVectorImpl<Register> &NewVRegs) {
1192 SmallVector<unsigned, 8> UsedCands;
1193 // Prepare split editor.
1194 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1195 SE->reset(LREdit, SplitSpillMode);
1196
1197 // Assign all edge bundles to the preferred candidate, or NoCand.
1198 BundleCand.assign(Bundles->getNumBundles(), NoCand);
1199
1200 // Assign bundles for the best candidate region.
1201 if (BestCand != NoCand) {
1202 GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1203 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1204 UsedCands.push_back(BestCand);
1205 Cand.IntvIdx = SE->openIntv();
1206 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1207 << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1208 (void)B;
1209 }
1210 }
1211
1212 // Assign bundles for the compact region.
1213 if (HasCompact) {
1214 GlobalSplitCandidate &Cand = GlobalCand.front();
1215 assert(!Cand.PhysReg && "Compact region has no physreg");
1216 if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1217 UsedCands.push_back(0);
1218 Cand.IntvIdx = SE->openIntv();
1219 LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1220 << " bundles, intv " << Cand.IntvIdx << ".\n");
1221 (void)B;
1222 }
1223 }
1224
1225 splitAroundRegion(LREdit, UsedCands);
1226 return 0;
1227}
1228
1229// VirtReg has a physical Hint, this function tries to split VirtReg around
1230// Hint if we can place new COPY instructions in cold blocks.
1231bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
1232 const LiveInterval &VirtReg,
1233 SmallVectorImpl<Register> &NewVRegs,
1234 AllocationOrder &Order) {
1235 // Split the VirtReg may generate COPY instructions in multiple cold basic
1236 // blocks, and increase code size. So we avoid it when the function is
1237 // optimized for size.
1238 if (MF->getFunction().hasOptSize())
1239 return false;
1240
1241 // Don't allow repeated splitting as a safe guard against looping.
1242 if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
1243 return false;
1244
1246 Register Reg = VirtReg.reg();
1247
1248 // Compute the cost of assigning a non Hint physical register to VirtReg.
1249 // We define it as the total frequency of broken COPY instructions to/from
1250 // Hint register, and after split, they can be deleted.
1251 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
1252 if (!TII->isFullCopyInstr(Instr))
1253 continue;
1254 Register OtherReg = Instr.getOperand(1).getReg();
1255 if (OtherReg == Reg) {
1256 OtherReg = Instr.getOperand(0).getReg();
1257 if (OtherReg == Reg)
1258 continue;
1259 // Check if VirtReg interferes with OtherReg after this COPY instruction.
1260 if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
1261 continue;
1262 }
1263 MCRegister OtherPhysReg =
1264 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
1265 if (OtherPhysReg == Hint)
1266 Cost += MBFI->getBlockFreq(Instr.getParent());
1267 }
1268
1269 // Decrease the cost so it will be split in colder blocks.
1271 Cost *= Threshold;
1272 if (Cost == BlockFrequency(0))
1273 return false;
1274
1275 unsigned NumCands = 0;
1276 unsigned BestCand = NoCand;
1277 SA->analyze(&VirtReg);
1278 calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand);
1279 if (BestCand == NoCand)
1280 return false;
1281
1282 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
1283 return true;
1284}
1285
1286//===----------------------------------------------------------------------===//
1287// Per-Block Splitting
1288//===----------------------------------------------------------------------===//
1289
1290/// tryBlockSplit - Split a global live range around every block with uses. This
1291/// creates a lot of local live ranges, that will be split by tryLocalSplit if
1292/// they don't allocate.
1293unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1294 AllocationOrder &Order,
1295 SmallVectorImpl<Register> &NewVRegs) {
1296 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1297 Register Reg = VirtReg.reg();
1298 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1299 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1300 SE->reset(LREdit, SplitSpillMode);
1301 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1302 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1303 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1304 SE->splitSingleBlock(BI);
1305 }
1306 // No blocks were split.
1307 if (LREdit.empty())
1308 return 0;
1309
1310 // We did split for some blocks.
1312 SE->finish(&IntvMap);
1313
1314 // Tell LiveDebugVariables about the new ranges.
1315 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1316
1317 // Sort out the new intervals created by splitting. The remainder interval
1318 // goes straight to spilling, the new local ranges get to stay RS_New.
1319 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1320 const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1321 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1322 ExtraInfo->setStage(LI, RS_Spill);
1323 }
1324
1325 if (VerifyEnabled)
1326 MF->verify(this, "After splitting live range around basic blocks");
1327 return 0;
1328}
1329
1330//===----------------------------------------------------------------------===//
1331// Per-Instruction Splitting
1332//===----------------------------------------------------------------------===//
1333
1334/// Get the number of allocatable registers that match the constraints of \p Reg
1335/// on \p MI and that are also in \p SuperRC.
1337 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1338 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1339 const RegisterClassInfo &RCI) {
1340 assert(SuperRC && "Invalid register class");
1341
1342 const TargetRegisterClass *ConstrainedRC =
1343 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1344 /* ExploreBundle */ true);
1345 if (!ConstrainedRC)
1346 return 0;
1347 return RCI.getNumAllocatableRegs(ConstrainedRC);
1348}
1349
1351 const TargetRegisterInfo &TRI,
1352 const MachineInstr &FirstMI,
1353 Register Reg) {
1354 LaneBitmask Mask;
1356 (void)AnalyzeVirtRegInBundle(const_cast<MachineInstr &>(FirstMI), Reg, &Ops);
1357
1358 for (auto [MI, OpIdx] : Ops) {
1359 const MachineOperand &MO = MI->getOperand(OpIdx);
1360 assert(MO.isReg() && MO.getReg() == Reg);
1361 unsigned SubReg = MO.getSubReg();
1362 if (SubReg == 0 && MO.isUse()) {
1363 if (MO.isUndef())
1364 continue;
1365 return MRI.getMaxLaneMaskForVReg(Reg);
1366 }
1367
1369 if (MO.isDef()) {
1370 if (!MO.isUndef())
1371 Mask |= ~SubRegMask;
1372 } else
1373 Mask |= SubRegMask;
1374 }
1375
1376 return Mask;
1377}
1378
1379/// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1380/// VirtReg.
1382 const MachineInstr *MI, const LiveInterval &VirtReg,
1384 const TargetInstrInfo *TII) {
1385 // Early check the common case. Beware of the semi-formed bundles SplitKit
1386 // creates by setting the bundle flag on copies without a matching BUNDLE.
1387
1388 auto DestSrc = TII->isCopyInstr(*MI);
1389 if (DestSrc && !MI->isBundled() &&
1390 DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
1391 return false;
1392
1393 // FIXME: We're only considering uses, but should be consider defs too?
1394 LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1395
1396 LaneBitmask LiveAtMask;
1397 for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1398 if (S.liveAt(Use))
1399 LiveAtMask |= S.LaneMask;
1400 }
1401
1402 // If the live lanes aren't different from the lanes used by the instruction,
1403 // this doesn't help.
1404 return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1405}
1406
1407/// tryInstructionSplit - Split a live range around individual instructions.
1408/// This is normally not worthwhile since the spiller is doing essentially the
1409/// same thing. However, when the live range is in a constrained register
1410/// class, it may help to insert copies such that parts of the live range can
1411/// be moved to a larger register class.
1412///
1413/// This is similar to spilling to a larger register class.
1414unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1415 AllocationOrder &Order,
1416 SmallVectorImpl<Register> &NewVRegs) {
1417 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1418 // There is no point to this if there are no larger sub-classes.
1419
1420 bool SplitSubClass = true;
1421 if (!RegClassInfo.isProperSubClass(CurRC)) {
1422 if (!VirtReg.hasSubRanges())
1423 return 0;
1424 SplitSubClass = false;
1425 }
1426
1427 // Always enable split spill mode, since we're effectively spilling to a
1428 // register.
1429 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1430 SE->reset(LREdit, SplitEditor::SM_Size);
1431
1432 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1433 if (Uses.size() <= 1)
1434 return 0;
1435
1436 LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1437 << " individual instrs.\n");
1438
1439 const TargetRegisterClass *SuperRC =
1440 TRI->getLargestLegalSuperClass(CurRC, *MF);
1441 unsigned SuperRCNumAllocatableRegs =
1443 // Split around every non-copy instruction if this split will relax
1444 // the constraints on the virtual register.
1445 // Otherwise, splitting just inserts uncoalescable copies that do not help
1446 // the allocation.
1447 for (const SlotIndex Use : Uses) {
1448 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1449 if (TII->isFullCopyInstr(*MI) ||
1450 (SplitSubClass &&
1451 SuperRCNumAllocatableRegs ==
1452 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1453 TII, TRI, RegClassInfo)) ||
1454 // TODO: Handle split for subranges with subclass constraints?
1455 (!SplitSubClass && VirtReg.hasSubRanges() &&
1456 !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
1457 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
1458 continue;
1459 }
1460 }
1461 SE->openIntv();
1462 SlotIndex SegStart = SE->enterIntvBefore(Use);
1463 SlotIndex SegStop = SE->leaveIntvAfter(Use);
1464 SE->useIntv(SegStart, SegStop);
1465 }
1466
1467 if (LREdit.empty()) {
1468 LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1469 return 0;
1470 }
1471
1473 SE->finish(&IntvMap);
1474 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1475 // Assign all new registers to RS_Spill. This was the last chance.
1476 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1477 return 0;
1478}
1479
1480//===----------------------------------------------------------------------===//
1481// Local Splitting
1482//===----------------------------------------------------------------------===//
1483
1484/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1485/// in order to use PhysReg between two entries in SA->UseSlots.
1486///
1487/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1488///
1489void RAGreedy::calcGapWeights(MCRegister PhysReg,
1490 SmallVectorImpl<float> &GapWeight) {
1491 assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1492 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1493 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1494 const unsigned NumGaps = Uses.size()-1;
1495
1496 // Start and end points for the interference check.
1497 SlotIndex StartIdx =
1499 SlotIndex StopIdx =
1501
1502 GapWeight.assign(NumGaps, 0.0f);
1503
1504 // Add interference from each overlapping register.
1505 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1506 if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
1508 continue;
1509
1510 // We know that VirtReg is a continuous interval from FirstInstr to
1511 // LastInstr, so we don't need InterferenceQuery.
1512 //
1513 // Interference that overlaps an instruction is counted in both gaps
1514 // surrounding the instruction. The exception is interference before
1515 // StartIdx and after StopIdx.
1516 //
1518 Matrix->getLiveUnions()[Unit].find(StartIdx);
1519 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1520 // Skip the gaps before IntI.
1521 while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1522 if (++Gap == NumGaps)
1523 break;
1524 if (Gap == NumGaps)
1525 break;
1526
1527 // Update the gaps covered by IntI.
1528 const float weight = IntI.value()->weight();
1529 for (; Gap != NumGaps; ++Gap) {
1530 GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1531 if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1532 break;
1533 }
1534 if (Gap == NumGaps)
1535 break;
1536 }
1537 }
1538
1539 // Add fixed interference.
1540 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1541 const LiveRange &LR = LIS->getRegUnit(Unit);
1542 LiveRange::const_iterator I = LR.find(StartIdx);
1544
1545 // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1546 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1547 while (Uses[Gap+1].getBoundaryIndex() < I->start)
1548 if (++Gap == NumGaps)
1549 break;
1550 if (Gap == NumGaps)
1551 break;
1552
1553 for (; Gap != NumGaps; ++Gap) {
1554 GapWeight[Gap] = huge_valf;
1555 if (Uses[Gap+1].getBaseIndex() >= I->end)
1556 break;
1557 }
1558 if (Gap == NumGaps)
1559 break;
1560 }
1561 }
1562}
1563
1564/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1565/// basic block.
1566///
1567unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1568 AllocationOrder &Order,
1569 SmallVectorImpl<Register> &NewVRegs) {
1570 // TODO: the function currently only handles a single UseBlock; it should be
1571 // possible to generalize.
1572 if (SA->getUseBlocks().size() != 1)
1573 return 0;
1574
1575 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1576
1577 // Note that it is possible to have an interval that is live-in or live-out
1578 // while only covering a single block - A phi-def can use undef values from
1579 // predecessors, and the block could be a single-block loop.
1580 // We don't bother doing anything clever about such a case, we simply assume
1581 // that the interval is continuous from FirstInstr to LastInstr. We should
1582 // make sure that we don't do anything illegal to such an interval, though.
1583
1584 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1585 if (Uses.size() <= 2)
1586 return 0;
1587 const unsigned NumGaps = Uses.size()-1;
1588
1589 LLVM_DEBUG({
1590 dbgs() << "tryLocalSplit: ";
1591 for (const auto &Use : Uses)
1592 dbgs() << ' ' << Use;
1593 dbgs() << '\n';
1594 });
1595
1596 // If VirtReg is live across any register mask operands, compute a list of
1597 // gaps with register masks.
1598 SmallVector<unsigned, 8> RegMaskGaps;
1599 if (Matrix->checkRegMaskInterference(VirtReg)) {
1600 // Get regmask slots for the whole block.
1602 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1603 // Constrain to VirtReg's live range.
1604 unsigned RI =
1605 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1606 unsigned RE = RMS.size();
1607 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1608 // Look for Uses[I] <= RMS <= Uses[I + 1].
1610 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1611 continue;
1612 // Skip a regmask on the same instruction as the last use. It doesn't
1613 // overlap the live range.
1614 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1615 break;
1616 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1617 << Uses[I + 1]);
1618 RegMaskGaps.push_back(I);
1619 // Advance ri to the next gap. A regmask on one of the uses counts in
1620 // both gaps.
1621 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1622 ++RI;
1623 }
1624 LLVM_DEBUG(dbgs() << '\n');
1625 }
1626
1627 // Since we allow local split results to be split again, there is a risk of
1628 // creating infinite loops. It is tempting to require that the new live
1629 // ranges have less instructions than the original. That would guarantee
1630 // convergence, but it is too strict. A live range with 3 instructions can be
1631 // split 2+3 (including the COPY), and we want to allow that.
1632 //
1633 // Instead we use these rules:
1634 //
1635 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1636 // noop split, of course).
1637 // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1638 // the new ranges must have fewer instructions than before the split.
1639 // 3. New ranges with the same number of instructions are marked RS_Split2,
1640 // smaller ranges are marked RS_New.
1641 //
1642 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1643 // excessive splitting and infinite loops.
1644 //
1645 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1646
1647 // Best split candidate.
1648 unsigned BestBefore = NumGaps;
1649 unsigned BestAfter = 0;
1650 float BestDiff = 0;
1651
1652 const float blockFreq =
1653 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1654 (1.0f / MBFI->getEntryFreq().getFrequency());
1655 SmallVector<float, 8> GapWeight;
1656
1657 for (MCPhysReg PhysReg : Order) {
1658 assert(PhysReg);
1659 // Keep track of the largest spill weight that would need to be evicted in
1660 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1661 calcGapWeights(PhysReg, GapWeight);
1662
1663 // Remove any gaps with regmask clobbers.
1664 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1665 for (unsigned Gap : RegMaskGaps)
1666 GapWeight[Gap] = huge_valf;
1667
1668 // Try to find the best sequence of gaps to close.
1669 // The new spill weight must be larger than any gap interference.
1670
1671 // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1672 unsigned SplitBefore = 0, SplitAfter = 1;
1673
1674 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1675 // It is the spill weight that needs to be evicted.
1676 float MaxGap = GapWeight[0];
1677
1678 while (true) {
1679 // Live before/after split?
1680 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1681 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1682
1683 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1684 << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1685
1686 // Stop before the interval gets so big we wouldn't be making progress.
1687 if (!LiveBefore && !LiveAfter) {
1688 LLVM_DEBUG(dbgs() << " all\n");
1689 break;
1690 }
1691 // Should the interval be extended or shrunk?
1692 bool Shrink = true;
1693
1694 // How many gaps would the new range have?
1695 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1696
1697 // Legally, without causing looping?
1698 bool Legal = !ProgressRequired || NewGaps < NumGaps;
1699
1700 if (Legal && MaxGap < huge_valf) {
1701 // Estimate the new spill weight. Each instruction reads or writes the
1702 // register. Conservatively assume there are no read-modify-write
1703 // instructions.
1704 //
1705 // Try to guess the size of the new interval.
1706 const float EstWeight = normalizeSpillWeight(
1707 blockFreq * (NewGaps + 1),
1708 Uses[SplitBefore].distance(Uses[SplitAfter]) +
1709 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1710 1);
1711 // Would this split be possible to allocate?
1712 // Never allocate all gaps, we wouldn't be making progress.
1713 LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1714 if (EstWeight * Hysteresis >= MaxGap) {
1715 Shrink = false;
1716 float Diff = EstWeight - MaxGap;
1717 if (Diff > BestDiff) {
1718 LLVM_DEBUG(dbgs() << " (best)");
1719 BestDiff = Hysteresis * Diff;
1720 BestBefore = SplitBefore;
1721 BestAfter = SplitAfter;
1722 }
1723 }
1724 }
1725
1726 // Try to shrink.
1727 if (Shrink) {
1728 if (++SplitBefore < SplitAfter) {
1729 LLVM_DEBUG(dbgs() << " shrink\n");
1730 // Recompute the max when necessary.
1731 if (GapWeight[SplitBefore - 1] >= MaxGap) {
1732 MaxGap = GapWeight[SplitBefore];
1733 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1734 MaxGap = std::max(MaxGap, GapWeight[I]);
1735 }
1736 continue;
1737 }
1738 MaxGap = 0;
1739 }
1740
1741 // Try to extend the interval.
1742 if (SplitAfter >= NumGaps) {
1743 LLVM_DEBUG(dbgs() << " end\n");
1744 break;
1745 }
1746
1747 LLVM_DEBUG(dbgs() << " extend\n");
1748 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1749 }
1750 }
1751
1752 // Didn't find any candidates?
1753 if (BestBefore == NumGaps)
1754 return 0;
1755
1756 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1757 << Uses[BestAfter] << ", " << BestDiff << ", "
1758 << (BestAfter - BestBefore + 1) << " instrs\n");
1759
1760 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1761 SE->reset(LREdit);
1762
1763 SE->openIntv();
1764 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1765 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
1766 SE->useIntv(SegStart, SegStop);
1768 SE->finish(&IntvMap);
1769 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1770 // If the new range has the same number of instructions as before, mark it as
1771 // RS_Split2 so the next split will be forced to make progress. Otherwise,
1772 // leave the new intervals as RS_New so they can compete.
1773 bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1774 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1775 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1776 if (NewGaps >= NumGaps) {
1777 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1778 assert(!ProgressRequired && "Didn't make progress when it was required.");
1779 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1780 if (IntvMap[I] == 1) {
1781 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1782 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1783 }
1784 LLVM_DEBUG(dbgs() << '\n');
1785 }
1786 ++NumLocalSplits;
1787
1788 return 0;
1789}
1790
1791//===----------------------------------------------------------------------===//
1792// Live Range Splitting
1793//===----------------------------------------------------------------------===//
1794
1795/// trySplit - Try to split VirtReg or one of its interferences, making it
1796/// assignable.
1797/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1798unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
1799 SmallVectorImpl<Register> &NewVRegs,
1800 const SmallVirtRegSet &FixedRegisters) {
1801 // Ranges must be Split2 or less.
1802 if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1803 return 0;
1804
1805 // Local intervals are handled separately.
1806 if (LIS->intervalIsInOneMBB(VirtReg)) {
1807 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1809 SA->analyze(&VirtReg);
1810 Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1811 if (PhysReg || !NewVRegs.empty())
1812 return PhysReg;
1813 return tryInstructionSplit(VirtReg, Order, NewVRegs);
1814 }
1815
1816 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1818
1819 SA->analyze(&VirtReg);
1820
1821 // First try to split around a region spanning multiple blocks. RS_Split2
1822 // ranges already made dubious progress with region splitting, so they go
1823 // straight to single block splitting.
1824 if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1825 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1826 if (PhysReg || !NewVRegs.empty())
1827 return PhysReg;
1828 }
1829
1830 // Then isolate blocks.
1831 return tryBlockSplit(VirtReg, Order, NewVRegs);
1832}
1833
1834//===----------------------------------------------------------------------===//
1835// Last Chance Recoloring
1836//===----------------------------------------------------------------------===//
1837
1838/// Return true if \p reg has any tied def operand.
1839static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
1840 for (const MachineOperand &MO : MRI->def_operands(reg))
1841 if (MO.isTied())
1842 return true;
1843
1844 return false;
1845}
1846
1847/// Return true if the existing assignment of \p Intf overlaps, but is not the
1848/// same, as \p PhysReg.
1850 const VirtRegMap &VRM,
1851 MCRegister PhysReg,
1852 const LiveInterval &Intf) {
1853 MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1854 if (PhysReg == AssignedReg)
1855 return false;
1856 return TRI.regsOverlap(PhysReg, AssignedReg);
1857}
1858
1859/// mayRecolorAllInterferences - Check if the virtual registers that
1860/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1861/// recolored to free \p PhysReg.
1862/// When true is returned, \p RecoloringCandidates has been augmented with all
1863/// the live intervals that need to be recolored in order to free \p PhysReg
1864/// for \p VirtReg.
1865/// \p FixedRegisters contains all the virtual registers that cannot be
1866/// recolored.
1867bool RAGreedy::mayRecolorAllInterferences(
1868 MCRegister PhysReg, const LiveInterval &VirtReg,
1869 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
1870 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1871
1872 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1873 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
1874 // If there is LastChanceRecoloringMaxInterference or more interferences,
1875 // chances are one would not be recolorable.
1879 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
1880 CutOffInfo |= CO_Interf;
1881 return false;
1882 }
1883 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1884 // If Intf is done and sits on the same register class as VirtReg, it
1885 // would not be recolorable as it is in the same state as
1886 // VirtReg. However there are at least two exceptions.
1887 //
1888 // If VirtReg has tied defs and Intf doesn't, then
1889 // there is still a point in examining if it can be recolorable.
1890 //
1891 // Additionally, if the register class has overlapping tuple members, it
1892 // may still be recolorable using a different tuple. This is more likely
1893 // if the existing assignment aliases with the candidate.
1894 //
1895 if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1896 MRI->getRegClass(Intf->reg()) == CurRC &&
1897 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
1898 !(hasTiedDef(MRI, VirtReg.reg()) &&
1899 !hasTiedDef(MRI, Intf->reg()))) ||
1900 FixedRegisters.count(Intf->reg())) {
1901 LLVM_DEBUG(
1902 dbgs() << "Early abort: the interference is not recolorable.\n");
1903 return false;
1904 }
1905 RecoloringCandidates.insert(Intf);
1906 }
1907 }
1908 return true;
1909}
1910
1911/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
1912/// its interferences.
1913/// Last chance recoloring chooses a color for \p VirtReg and recolors every
1914/// virtual register that was using it. The recoloring process may recursively
1915/// use the last chance recoloring. Therefore, when a virtual register has been
1916/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
1917/// be last-chance-recolored again during this recoloring "session".
1918/// E.g.,
1919/// Let
1920/// vA can use {R1, R2 }
1921/// vB can use { R2, R3}
1922/// vC can use {R1 }
1923/// Where vA, vB, and vC cannot be split anymore (they are reloads for
1924/// instance) and they all interfere.
1925///
1926/// vA is assigned R1
1927/// vB is assigned R2
1928/// vC tries to evict vA but vA is already done.
1929/// Regular register allocation fails.
1930///
1931/// Last chance recoloring kicks in:
1932/// vC does as if vA was evicted => vC uses R1.
1933/// vC is marked as fixed.
1934/// vA needs to find a color.
1935/// None are available.
1936/// vA cannot evict vC: vC is a fixed virtual register now.
1937/// vA does as if vB was evicted => vA uses R2.
1938/// vB needs to find a color.
1939/// R3 is available.
1940/// Recoloring => vC = R1, vA = R2, vB = R3
1941///
1942/// \p Order defines the preferred allocation order for \p VirtReg.
1943/// \p NewRegs will contain any new virtual register that have been created
1944/// (split, spill) during the process and that must be assigned.
1945/// \p FixedRegisters contains all the virtual registers that cannot be
1946/// recolored.
1947///
1948/// \p RecolorStack tracks the original assignments of successfully recolored
1949/// registers.
1950///
1951/// \p Depth gives the current depth of the last chance recoloring.
1952/// \return a physical register that can be used for VirtReg or ~0u if none
1953/// exists.
1954unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
1955 AllocationOrder &Order,
1956 SmallVectorImpl<Register> &NewVRegs,
1957 SmallVirtRegSet &FixedRegisters,
1958 RecoloringStack &RecolorStack,
1959 unsigned Depth) {
1960 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
1961 return ~0u;
1962
1963 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
1964
1965 const ssize_t EntryStackSize = RecolorStack.size();
1966
1967 // Ranges must be Done.
1968 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
1969 "Last chance recoloring should really be last chance");
1970 // Set the max depth to LastChanceRecoloringMaxDepth.
1971 // We may want to reconsider that if we end up with a too large search space
1972 // for target with hundreds of registers.
1973 // Indeed, in that case we may want to cut the search space earlier.
1975 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
1976 CutOffInfo |= CO_Depth;
1977 return ~0u;
1978 }
1979
1980 // Set of Live intervals that will need to be recolored.
1981 SmallLISet RecoloringCandidates;
1982
1983 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
1984 // this recoloring "session".
1985 assert(!FixedRegisters.count(VirtReg.reg()));
1986 FixedRegisters.insert(VirtReg.reg());
1987 SmallVector<Register, 4> CurrentNewVRegs;
1988
1989 for (MCRegister PhysReg : Order) {
1990 assert(PhysReg.isValid());
1991 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
1992 << printReg(PhysReg, TRI) << '\n');
1993 RecoloringCandidates.clear();
1994 CurrentNewVRegs.clear();
1995
1996 // It is only possible to recolor virtual register interference.
1997 if (Matrix->checkInterference(VirtReg, PhysReg) >
1999 LLVM_DEBUG(
2000 dbgs() << "Some interferences are not with virtual registers.\n");
2001
2002 continue;
2003 }
2004
2005 // Early give up on this PhysReg if it is obvious we cannot recolor all
2006 // the interferences.
2007 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2008 FixedRegisters)) {
2009 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
2010 continue;
2011 }
2012
2013 // RecoloringCandidates contains all the virtual registers that interfere
2014 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
2015 // recoloring and perform the actual recoloring.
2016 PQueue RecoloringQueue;
2017 for (const LiveInterval *RC : RecoloringCandidates) {
2018 Register ItVirtReg = RC->reg();
2019 enqueue(RecoloringQueue, RC);
2020 assert(VRM->hasPhys(ItVirtReg) &&
2021 "Interferences are supposed to be with allocated variables");
2022
2023 // Record the current allocation.
2024 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
2025
2026 // unset the related struct.
2027 Matrix->unassign(*RC);
2028 }
2029
2030 // Do as if VirtReg was assigned to PhysReg so that the underlying
2031 // recoloring has the right information about the interferes and
2032 // available colors.
2033 Matrix->assign(VirtReg, PhysReg);
2034
2035 // Save the current recoloring state.
2036 // If we cannot recolor all the interferences, we will have to start again
2037 // at this point for the next physical register.
2038 SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2039 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
2040 FixedRegisters, RecolorStack, Depth)) {
2041 // Push the queued vregs into the main queue.
2042 for (Register NewVReg : CurrentNewVRegs)
2043 NewVRegs.push_back(NewVReg);
2044 // Do not mess up with the global assignment process.
2045 // I.e., VirtReg must be unassigned.
2046 Matrix->unassign(VirtReg);
2047 return PhysReg;
2048 }
2049
2050 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2051 << printReg(PhysReg, TRI) << '\n');
2052
2053 // The recoloring attempt failed, undo the changes.
2054 FixedRegisters = SaveFixedRegisters;
2055 Matrix->unassign(VirtReg);
2056
2057 // For a newly created vreg which is also in RecoloringCandidates,
2058 // don't add it to NewVRegs because its physical register will be restored
2059 // below. Other vregs in CurrentNewVRegs are created by calling
2060 // selectOrSplit and should be added into NewVRegs.
2061 for (Register R : CurrentNewVRegs) {
2062 if (RecoloringCandidates.count(&LIS->getInterval(R)))
2063 continue;
2064 NewVRegs.push_back(R);
2065 }
2066
2067 // Roll back our unsuccessful recoloring. Also roll back any successful
2068 // recolorings in any recursive recoloring attempts, since it's possible
2069 // they would have introduced conflicts with assignments we will be
2070 // restoring further up the stack. Perform all unassignments prior to
2071 // reassigning, since sub-recolorings may have conflicted with the registers
2072 // we are going to restore to their original assignments.
2073 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
2074 const LiveInterval *LI;
2075 MCRegister PhysReg;
2076 std::tie(LI, PhysReg) = RecolorStack[I];
2077
2078 if (VRM->hasPhys(LI->reg()))
2079 Matrix->unassign(*LI);
2080 }
2081
2082 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
2083 const LiveInterval *LI;
2084 MCRegister PhysReg;
2085 std::tie(LI, PhysReg) = RecolorStack[I];
2086 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
2087 Matrix->assign(*LI, PhysReg);
2088 }
2089
2090 // Pop the stack of recoloring attempts.
2091 RecolorStack.resize(EntryStackSize);
2092 }
2093
2094 // Last chance recoloring did not worked either, give up.
2095 return ~0u;
2096}
2097
2098/// tryRecoloringCandidates - Try to assign a new color to every register
2099/// in \RecoloringQueue.
2100/// \p NewRegs will contain any new virtual register created during the
2101/// recoloring process.
2102/// \p FixedRegisters[in/out] contains all the registers that have been
2103/// recolored.
2104/// \return true if all virtual registers in RecoloringQueue were successfully
2105/// recolored, false otherwise.
2106bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2107 SmallVectorImpl<Register> &NewVRegs,
2108 SmallVirtRegSet &FixedRegisters,
2109 RecoloringStack &RecolorStack,
2110 unsigned Depth) {
2111 while (!RecoloringQueue.empty()) {
2112 const LiveInterval *LI = dequeue(RecoloringQueue);
2113 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2114 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2115 RecolorStack, Depth + 1);
2116 // When splitting happens, the live-range may actually be empty.
2117 // In that case, this is okay to continue the recoloring even
2118 // if we did not find an alternative color for it. Indeed,
2119 // there will not be anything to color for LI in the end.
2120 if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2121 return false;
2122
2123 if (!PhysReg) {
2124 assert(LI->empty() && "Only empty live-range do not require a register");
2125 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2126 << " succeeded. Empty LI.\n");
2127 continue;
2128 }
2129 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2130 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2131
2132 Matrix->assign(*LI, PhysReg);
2133 FixedRegisters.insert(LI->reg());
2134 }
2135 return true;
2136}
2137
2138//===----------------------------------------------------------------------===//
2139// Main Entry Point
2140//===----------------------------------------------------------------------===//
2141
2143 SmallVectorImpl<Register> &NewVRegs) {
2144 CutOffInfo = CO_None;
2145 LLVMContext &Ctx = MF->getFunction().getContext();
2146 SmallVirtRegSet FixedRegisters;
2147 RecoloringStack RecolorStack;
2148 MCRegister Reg =
2149 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2150 if (Reg == ~0U && (CutOffInfo != CO_None)) {
2151 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2152 if (CutOffEncountered == CO_Depth)
2153 Ctx.emitError("register allocation failed: maximum depth for recoloring "
2154 "reached. Use -fexhaustive-register-search to skip "
2155 "cutoffs");
2156 else if (CutOffEncountered == CO_Interf)
2157 Ctx.emitError("register allocation failed: maximum interference for "
2158 "recoloring reached. Use -fexhaustive-register-search "
2159 "to skip cutoffs");
2160 else if (CutOffEncountered == (CO_Depth | CO_Interf))
2161 Ctx.emitError("register allocation failed: maximum interference and "
2162 "depth for recoloring reached. Use "
2163 "-fexhaustive-register-search to skip cutoffs");
2164 }
2165 return Reg;
2166}
2167
2168/// Using a CSR for the first time has a cost because it causes push|pop
2169/// to be added to prologue|epilogue. Splitting a cold section of the live
2170/// range can have lower cost than using the CSR for the first time;
2171/// Spilling a live range in the cold path can have lower cost than using
2172/// the CSR for the first time. Returns the physical register if we decide
2173/// to use the CSR; otherwise return 0.
2174MCRegister RAGreedy::tryAssignCSRFirstTime(
2175 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2176 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2177 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2178 // We choose spill over using the CSR for the first time if the spill cost
2179 // is lower than CSRCost.
2180 SA->analyze(&VirtReg);
2181 if (calcSpillCost() >= CSRCost)
2182 return PhysReg;
2183
2184 // We are going to spill, set CostPerUseLimit to 1 to make sure that
2185 // we will not use a callee-saved register in tryEvict.
2186 CostPerUseLimit = 1;
2187 return 0;
2188 }
2189 if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2190 // We choose pre-splitting over using the CSR for the first time if
2191 // the cost of splitting is lower than CSRCost.
2192 SA->analyze(&VirtReg);
2193 unsigned NumCands = 0;
2194 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2195 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2196 NumCands, true /*IgnoreCSR*/);
2197 if (BestCand == NoCand)
2198 // Use the CSR if we can't find a region split below CSRCost.
2199 return PhysReg;
2200
2201 // Perform the actual pre-splitting.
2202 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2203 return 0;
2204 }
2205 return PhysReg;
2206}
2207
2209 // Do not keep invalid information around.
2210 SetOfBrokenHints.remove(&LI);
2211}
2212
2213void RAGreedy::initializeCSRCost() {
2214 // We use the larger one out of the command-line option and the value report
2215 // by TRI.
2216 CSRCost = BlockFrequency(
2217 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2218 if (!CSRCost.getFrequency())
2219 return;
2220
2221 // Raw cost is relative to Entry == 2^14; scale it appropriately.
2222 uint64_t ActualEntry = MBFI->getEntryFreq().getFrequency();
2223 if (!ActualEntry) {
2224 CSRCost = BlockFrequency(0);
2225 return;
2226 }
2227 uint64_t FixedEntry = 1 << 14;
2228 if (ActualEntry < FixedEntry)
2229 CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2230 else if (ActualEntry <= UINT32_MAX)
2231 // Invert the fraction and divide.
2232 CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2233 else
2234 // Can't use BranchProbability in general, since it takes 32-bit numbers.
2235 CSRCost =
2236 BlockFrequency(CSRCost.getFrequency() * (ActualEntry / FixedEntry));
2237}
2238
2239/// Collect the hint info for \p Reg.
2240/// The results are stored into \p Out.
2241/// \p Out is not cleared before being populated.
2242void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2243 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2244 if (!TII->isFullCopyInstr(Instr))
2245 continue;
2246 // Look for the other end of the copy.
2247 Register OtherReg = Instr.getOperand(0).getReg();
2248 if (OtherReg == Reg) {
2249 OtherReg = Instr.getOperand(1).getReg();
2250 if (OtherReg == Reg)
2251 continue;
2252 }
2253 // Get the current assignment.
2254 MCRegister OtherPhysReg =
2255 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2256 // Push the collected information.
2257 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2258 OtherPhysReg));
2259 }
2260}
2261
2262/// Using the given \p List, compute the cost of the broken hints if
2263/// \p PhysReg was used.
2264/// \return The cost of \p List for \p PhysReg.
2265BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2266 MCRegister PhysReg) {
2268 for (const HintInfo &Info : List) {
2269 if (Info.PhysReg != PhysReg)
2270 Cost += Info.Freq;
2271 }
2272 return Cost;
2273}
2274
2275/// Using the register assigned to \p VirtReg, try to recolor
2276/// all the live ranges that are copy-related with \p VirtReg.
2277/// The recoloring is then propagated to all the live-ranges that have
2278/// been recolored and so on, until no more copies can be coalesced or
2279/// it is not profitable.
2280/// For a given live range, profitability is determined by the sum of the
2281/// frequencies of the non-identity copies it would introduce with the old
2282/// and new register.
2283void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2284 // We have a broken hint, check if it is possible to fix it by
2285 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2286 // some register and PhysReg may be available for the other live-ranges.
2287 SmallSet<Register, 4> Visited;
2288 SmallVector<unsigned, 2> RecoloringCandidates;
2289 HintsInfo Info;
2290 Register Reg = VirtReg.reg();
2291 MCRegister PhysReg = VRM->getPhys(Reg);
2292 // Start the recoloring algorithm from the input live-interval, then
2293 // it will propagate to the ones that are copy-related with it.
2294 Visited.insert(Reg);
2295 RecoloringCandidates.push_back(Reg);
2296
2297 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2298 << '(' << printReg(PhysReg, TRI) << ")\n");
2299
2300 do {
2301 Reg = RecoloringCandidates.pop_back_val();
2302
2303 // We cannot recolor physical register.
2304 if (Reg.isPhysical())
2305 continue;
2306
2307 // This may be a skipped register.
2308 if (!VRM->hasPhys(Reg)) {
2310 "We have an unallocated variable which should have been handled");
2311 continue;
2312 }
2313
2314 // Get the live interval mapped with this virtual register to be able
2315 // to check for the interference with the new color.
2316 LiveInterval &LI = LIS->getInterval(Reg);
2317 MCRegister CurrPhys = VRM->getPhys(Reg);
2318 // Check that the new color matches the register class constraints and
2319 // that it is free for this live range.
2320 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2321 Matrix->checkInterference(LI, PhysReg)))
2322 continue;
2323
2324 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2325 << ") is recolorable.\n");
2326
2327 // Gather the hint info.
2328 Info.clear();
2329 collectHintInfo(Reg, Info);
2330 // Check if recoloring the live-range will increase the cost of the
2331 // non-identity copies.
2332 if (CurrPhys != PhysReg) {
2333 LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2334 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2335 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2336 LLVM_DEBUG(dbgs() << "Old Cost: " << printBlockFreq(*MBFI, OldCopiesCost)
2337 << "\nNew Cost: "
2338 << printBlockFreq(*MBFI, NewCopiesCost) << '\n');
2339 if (OldCopiesCost < NewCopiesCost) {
2340 LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2341 continue;
2342 }
2343 // At this point, the cost is either cheaper or equal. If it is
2344 // equal, we consider this is profitable because it may expose
2345 // more recoloring opportunities.
2346 LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2347 // Recolor the live-range.
2348 Matrix->unassign(LI);
2349 Matrix->assign(LI, PhysReg);
2350 }
2351 // Push all copy-related live-ranges to keep reconciling the broken
2352 // hints.
2353 for (const HintInfo &HI : Info) {
2354 if (Visited.insert(HI.Reg).second)
2355 RecoloringCandidates.push_back(HI.Reg);
2356 }
2357 } while (!RecoloringCandidates.empty());
2358}
2359
2360/// Try to recolor broken hints.
2361/// Broken hints may be repaired by recoloring when an evicted variable
2362/// freed up a register for a larger live-range.
2363/// Consider the following example:
2364/// BB1:
2365/// a =
2366/// b =
2367/// BB2:
2368/// ...
2369/// = b
2370/// = a
2371/// Let us assume b gets split:
2372/// BB1:
2373/// a =
2374/// b =
2375/// BB2:
2376/// c = b
2377/// ...
2378/// d = c
2379/// = d
2380/// = a
2381/// Because of how the allocation work, b, c, and d may be assigned different
2382/// colors. Now, if a gets evicted later:
2383/// BB1:
2384/// a =
2385/// st a, SpillSlot
2386/// b =
2387/// BB2:
2388/// c = b
2389/// ...
2390/// d = c
2391/// = d
2392/// e = ld SpillSlot
2393/// = e
2394/// This is likely that we can assign the same register for b, c, and d,
2395/// getting rid of 2 copies.
2396void RAGreedy::tryHintsRecoloring() {
2397 for (const LiveInterval *LI : SetOfBrokenHints) {
2398 assert(LI->reg().isVirtual() &&
2399 "Recoloring is possible only for virtual registers");
2400 // Some dead defs may be around (e.g., because of debug uses).
2401 // Ignore those.
2402 if (!VRM->hasPhys(LI->reg()))
2403 continue;
2404 tryHintRecoloring(*LI);
2405 }
2406}
2407
2408MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2409 SmallVectorImpl<Register> &NewVRegs,
2410 SmallVirtRegSet &FixedRegisters,
2411 RecoloringStack &RecolorStack,
2412 unsigned Depth) {
2413 uint8_t CostPerUseLimit = uint8_t(~0u);
2414 // First try assigning a free register.
2415 auto Order =
2417 if (MCRegister PhysReg =
2418 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2419 // When NewVRegs is not empty, we may have made decisions such as evicting
2420 // a virtual register, go with the earlier decisions and use the physical
2421 // register.
2422 if (CSRCost.getFrequency() &&
2423 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2424 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2425 CostPerUseLimit, NewVRegs);
2426 if (CSRReg || !NewVRegs.empty())
2427 // Return now if we decide to use a CSR or create new vregs due to
2428 // pre-splitting.
2429 return CSRReg;
2430 } else
2431 return PhysReg;
2432 }
2433 // Non emtpy NewVRegs means VirtReg has been split.
2434 if (!NewVRegs.empty())
2435 return 0;
2436
2437 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2438 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2439 << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2440
2441 // Try to evict a less worthy live range, but only for ranges from the primary
2442 // queue. The RS_Split ranges already failed to do this, and they should not
2443 // get a second chance until they have been split.
2444 if (Stage != RS_Split)
2445 if (Register PhysReg =
2446 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2447 FixedRegisters)) {
2448 Register Hint = MRI->getSimpleHint(VirtReg.reg());
2449 // If VirtReg has a hint and that hint is broken record this
2450 // virtual register as a recoloring candidate for broken hint.
2451 // Indeed, since we evicted a variable in its neighborhood it is
2452 // likely we can at least partially recolor some of the
2453 // copy-related live-ranges.
2454 if (Hint && Hint != PhysReg)
2455 SetOfBrokenHints.insert(&VirtReg);
2456 return PhysReg;
2457 }
2458
2459 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2460
2461 // The first time we see a live range, don't try to split or spill.
2462 // Wait until the second time, when all smaller ranges have been allocated.
2463 // This gives a better picture of the interference to split around.
2464 if (Stage < RS_Split) {
2465 ExtraInfo->setStage(VirtReg, RS_Split);
2466 LLVM_DEBUG(dbgs() << "wait for second round\n");
2467 NewVRegs.push_back(VirtReg.reg());
2468 return 0;
2469 }
2470
2471 if (Stage < RS_Spill) {
2472 // Try splitting VirtReg or interferences.
2473 unsigned NewVRegSizeBefore = NewVRegs.size();
2474 Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2475 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2476 return PhysReg;
2477 }
2478
2479 // If we couldn't allocate a register from spilling, there is probably some
2480 // invalid inline assembly. The base class will report it.
2481 if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2482 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2483 RecolorStack, Depth);
2484 }
2485
2486 // Finally spill VirtReg itself.
2488 TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
2489 ExtraInfo->getStage(VirtReg) < RS_Memory) {
2490 // TODO: This is experimental and in particular, we do not model
2491 // the live range splitting done by spilling correctly.
2492 // We would need a deep integration with the spiller to do the
2493 // right thing here. Anyway, that is still good for early testing.
2494 ExtraInfo->setStage(VirtReg, RS_Memory);
2495 LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
2496 NewVRegs.push_back(VirtReg.reg());
2497 } else {
2498 NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2500 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2501 spiller().spill(LRE);
2502 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2503
2504 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2505 // the new regs are kept in LDV (still mapping to the old register), until
2506 // we rewrite spilled locations in LDV at a later stage.
2507 DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
2508
2509 if (VerifyEnabled)
2510 MF->verify(this, "After spilling");
2511 }
2512
2513 // The live virtual register requesting allocation was spilled, so tell
2514 // the caller not to allocate anything during this round.
2515 return 0;
2516}
2517
2518void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2519 using namespace ore;
2520 if (Spills) {
2521 R << NV("NumSpills", Spills) << " spills ";
2522 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2523 }
2524 if (FoldedSpills) {
2525 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2526 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2527 << " total folded spills cost ";
2528 }
2529 if (Reloads) {
2530 R << NV("NumReloads", Reloads) << " reloads ";
2531 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2532 }
2533 if (FoldedReloads) {
2534 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2535 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2536 << " total folded reloads cost ";
2537 }
2538 if (ZeroCostFoldedReloads)
2539 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2540 << " zero cost folded reloads ";
2541 if (Copies) {
2542 R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2543 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2544 }
2545}
2546
2547RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2548 RAGreedyStats Stats;
2549 const MachineFrameInfo &MFI = MF->getFrameInfo();
2550 int FI;
2551
2552 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2553 return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2554 A->getPseudoValue())->getFrameIndex());
2555 };
2556 auto isPatchpointInstr = [](const MachineInstr &MI) {
2557 return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2558 MI.getOpcode() == TargetOpcode::STACKMAP ||
2559 MI.getOpcode() == TargetOpcode::STATEPOINT;
2560 };
2561 for (MachineInstr &MI : MBB) {
2562 auto DestSrc = TII->isCopyInstr(MI);
2563 if (DestSrc) {
2564 const MachineOperand &Dest = *DestSrc->Destination;
2565 const MachineOperand &Src = *DestSrc->Source;
2566 Register SrcReg = Src.getReg();
2567 Register DestReg = Dest.getReg();
2568 // Only count `COPY`s with a virtual register as source or destination.
2569 if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2570 if (SrcReg.isVirtual()) {
2571 SrcReg = VRM->getPhys(SrcReg);
2572 if (SrcReg && Src.getSubReg())
2573 SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2574 }
2575 if (DestReg.isVirtual()) {
2576 DestReg = VRM->getPhys(DestReg);
2577 if (DestReg && Dest.getSubReg())
2578 DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2579 }
2580 if (SrcReg != DestReg)
2581 ++Stats.Copies;
2582 }
2583 continue;
2584 }
2585
2587 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2588 ++Stats.Reloads;
2589 continue;
2590 }
2591 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2592 ++Stats.Spills;
2593 continue;
2594 }
2595 if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2596 llvm::any_of(Accesses, isSpillSlotAccess)) {
2597 if (!isPatchpointInstr(MI)) {
2598 Stats.FoldedReloads += Accesses.size();
2599 continue;
2600 }
2601 // For statepoint there may be folded and zero cost folded stack reloads.
2602 std::pair<unsigned, unsigned> NonZeroCostRange =
2604 SmallSet<unsigned, 16> FoldedReloads;
2605 SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2606 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2607 MachineOperand &MO = MI.getOperand(Idx);
2608 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2609 continue;
2610 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2611 FoldedReloads.insert(MO.getIndex());
2612 else
2613 ZeroCostFoldedReloads.insert(MO.getIndex());
2614 }
2615 // If stack slot is used in folded reload it is not zero cost then.
2616 for (unsigned Slot : FoldedReloads)
2617 ZeroCostFoldedReloads.erase(Slot);
2618 Stats.FoldedReloads += FoldedReloads.size();
2619 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2620 continue;
2621 }
2622 Accesses.clear();
2623 if (TII->hasStoreToStackSlot(MI, Accesses) &&
2624 llvm::any_of(Accesses, isSpillSlotAccess)) {
2625 Stats.FoldedSpills += Accesses.size();
2626 }
2627 }
2628 // Set cost of collected statistic by multiplication to relative frequency of
2629 // this basic block.
2630 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2631 Stats.ReloadsCost = RelFreq * Stats.Reloads;
2632 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2633 Stats.SpillsCost = RelFreq * Stats.Spills;
2634 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2635 Stats.CopiesCost = RelFreq * Stats.Copies;
2636 return Stats;
2637}
2638
2639RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2640 RAGreedyStats Stats;
2641
2642 // Sum up the spill and reloads in subloops.
2643 for (MachineLoop *SubLoop : *L)
2644 Stats.add(reportStats(SubLoop));
2645
2646 for (MachineBasicBlock *MBB : L->getBlocks())
2647 // Handle blocks that were not included in subloops.
2648 if (Loops->getLoopFor(MBB) == L)
2649 Stats.add(computeStats(*MBB));
2650
2651 if (!Stats.isEmpty()) {
2652 using namespace ore;
2653
2654 ORE->emit([&]() {
2655 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2656 L->getStartLoc(), L->getHeader());
2657 Stats.report(R);
2658 R << "generated in loop";
2659 return R;
2660 });
2661 }
2662 return Stats;
2663}
2664
2665void RAGreedy::reportStats() {
2666 if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2667 return;
2668 RAGreedyStats Stats;
2669 for (MachineLoop *L : *Loops)
2670 Stats.add(reportStats(L));
2671 // Process non-loop blocks.
2672 for (MachineBasicBlock &MBB : *MF)
2673 if (!Loops->getLoopFor(&MBB))
2674 Stats.add(computeStats(MBB));
2675 if (!Stats.isEmpty()) {
2676 using namespace ore;
2677
2678 ORE->emit([&]() {
2679 DebugLoc Loc;
2680 if (auto *SP = MF->getFunction().getSubprogram())
2681 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2682 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2683 &MF->front());
2684 Stats.report(R);
2685 R << "generated in function";
2686 return R;
2687 });
2688 }
2689}
2690
2691bool RAGreedy::hasVirtRegAlloc() {
2692 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2694 if (MRI->reg_nodbg_empty(Reg))
2695 continue;
2696 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2697 if (!RC)
2698 continue;
2699 if (shouldAllocateRegister(Reg))
2700 return true;
2701 }
2702
2703 return false;
2704}
2705
2707 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2708 << "********** Function: " << mf.getName() << '\n');
2709
2710 MF = &mf;
2711 TII = MF->getSubtarget().getInstrInfo();
2712
2713 if (VerifyEnabled)
2714 MF->verify(this, "Before greedy register allocator");
2715
2716 RegAllocBase::init(getAnalysis<VirtRegMap>(),
2717 getAnalysis<LiveIntervalsWrapperPass>().getLIS(),
2718 getAnalysis<LiveRegMatrix>());
2719
2720 // Early return if there is no virtual register to be allocated to a
2721 // physical register.
2722 if (!hasVirtRegAlloc())
2723 return false;
2724
2725 Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
2726 // Renumber to get accurate and consistent results from
2727 // SlotIndexes::getApproxInstrDistance.
2728 Indexes->packIndexes();
2729 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
2730 DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2731 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
2732 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2733 Bundles = &getAnalysis<EdgeBundles>();
2734 SpillPlacer = &getAnalysis<SpillPlacement>();
2735 DebugVars = &getAnalysis<LiveDebugVariables>();
2736
2737 initializeCSRCost();
2738
2739 RegCosts = TRI->getRegisterCosts(*MF);
2740 RegClassPriorityTrumpsGlobalness =
2741 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2744
2745 ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2748
2749 ExtraInfo.emplace();
2750 EvictAdvisor =
2751 getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
2752 PriorityAdvisor =
2753 getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this);
2754
2755 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2756 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
2757
2758 VRAI->calculateSpillWeightsAndHints();
2759
2760 LLVM_DEBUG(LIS->dump());
2761
2762 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2763 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2764
2765 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2766 GlobalCand.resize(32); // This will grow as needed.
2767 SetOfBrokenHints.clear();
2768
2770 tryHintsRecoloring();
2771
2772 if (VerifyEnabled)
2773 MF->verify(this, "Before post optimization");
2775 reportStats();
2776
2777 releaseMemory();
2778 return true;
2779}
unsigned SubReg
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Rewrite Partial Register Uses
#define DEBUG_TYPE
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
This file implements an indexed map.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
block placement Basic Block Placement Stats
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static cl::opt< bool > GreedyRegClassPriorityTrumpsGlobalness("greedy-regclass-priority-trumps-globalness", cl::desc("Change the greedy register allocator's live range priority " "calculation to make the AllocationPriority of the register class " "more important then whether the range is global"), cl::Hidden)
static cl::opt< bool > ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, cl::desc("Exhaustive Search for registers bypassing the depth " "and interference cutoffs of last chance recoloring"), cl::Hidden)
const float Hysteresis
static cl::opt< unsigned > LastChanceRecoloringMaxInterference("lcr-max-interf", cl::Hidden, cl::desc("Last chance recoloring maximum number of considered" " interference at a time"), cl::init(8))
static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg)
Return true if reg has any tied def operand.
static bool readsLaneSubset(const MachineRegisterInfo &MRI, const MachineInstr *MI, const LiveInterval &VirtReg, const TargetRegisterInfo *TRI, SlotIndex Use, const TargetInstrInfo *TII)
Return true if MI at \P Use reads a subset of the lanes live in VirtReg.
static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, const VirtRegMap &VRM, MCRegister PhysReg, const LiveInterval &Intf)
Return true if the existing assignment of Intf overlaps, but is not the same, as PhysReg.
static cl::opt< unsigned > CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, cl::desc("Last chance recoloring max depth"), cl::init(5))
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator)
static cl::opt< unsigned long > GrowRegionComplexityBudget("grow-region-complexity-budget", cl::desc("growRegion() does not scale with the number of BB edges, so " "limit its budget and bail out once we reach the limit."), cl::init(10000), cl::Hidden)
Greedy Register Allocator
static cl::opt< SplitEditor::ComplementSpillMode > SplitSpillMode("split-spill-mode", cl::Hidden, cl::desc("Spill mode for splitting live ranges"), cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), cl::init(SplitEditor::SM_Speed))
static unsigned getNumAllocatableRegsForConstraints(const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const RegisterClassInfo &RCI)
Get the number of allocatable registers that match the constraints of Reg on MI and that are also in ...
static cl::opt< bool > EnableDeferredSpilling("enable-deferred-spilling", cl::Hidden, cl::desc("Instead of spilling a variable right away, defer the actual " "code insertion to the end of the allocation. That way the " "allocator might still find a suitable coloring for this " "variable because of other evicted variables."), cl::init(false))
static cl::opt< unsigned > SplitThresholdForRegWithHint("split-threshold-for-reg-with-hint", cl::desc("The threshold for splitting a virtual register with a hint, in " "percentate"), cl::init(75), cl::Hidden)
static cl::opt< bool > GreedyReverseLocalAssignment("greedy-reverse-local-assignment", cl::desc("Reverse allocation order of local live ranges, such that " "shorter local live ranges will tend to be allocated first"), cl::Hidden)
static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineInstr &FirstMI, Register Reg)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
SI optimize exec mask operations pre RA
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
bool isHint(Register Reg) const
Return true if Reg is a preferred physical register.
ArrayRef< MCPhysReg > getOrder() const
Get the allocation order without reordered hints.
static AllocationOrder create(unsigned VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo, const LiveRegMatrix *Matrix)
Create a new AllocationOrder for VirtReg.
Iterator end() const
Iterator begin() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & reset()
Definition: BitVector.h:392
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
A debug info location.
Definition: DebugLoc.h:33
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
Definition: EdgeBundles.h:47
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:41
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:44
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:705
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
Cursor - The primary query interface for the block interference cache.
SlotIndex first()
first - Return the starting index of the first interfering range in the current block.
SlotIndex last()
last - Return the ending index of the last interfering range in the current block.
bool hasInterference()
hasInterference - Return true if the current block has any interference.
void moveToBlock(unsigned MBBNum)
moveTo - Move cursor to basic block MBBNum.
void init(MachineFunction *mf, LiveIntervalUnion *liuarray, SlotIndexes *indexes, LiveIntervals *lis, const TargetRegisterInfo *tri)
init - Prepare cache for a new function.
unsigned getMaxCursors() const
getMaxCursors - Return the maximum number of concurrent cursors that can be supported.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
void splitRegister(Register OldReg, ArrayRef< Register > NewRegs, LiveIntervals &LIS)
splitRegister - Move any user variables in OldReg to the live ranges in NewRegs where they are live.
Query interferences between a single live virtual register and a live interval union.
const SmallVectorImpl< const LiveInterval * > & interferingVRegs(unsigned MaxInterferingRegs=std::numeric_limits< unsigned >::max())
SegmentIter find(SlotIndex x)
LiveSegments::iterator SegmentIter
A live range for subregisters.
Definition: LiveInterval.h:694
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
Register reg() const
Definition: LiveInterval.h:718
bool isSpillable() const
isSpillable - Can this interval be spilled?
Definition: LiveInterval.h:826
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:810
unsigned getSize() const
getSize - Returns the sum of sizes of all the LiveRange's.
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:782
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
ArrayRef< SlotIndex > getRegMaskSlotsInBlock(unsigned MBBNum) const
Returns a sorted array of slot indices of all instructions with register mask operands in the basic b...
LiveInterval & getInterval(Register Reg)
MachineBasicBlock * intervalIsInOneMBB(const LiveInterval &LI) const
If LI is confined to a single basic block, return a pointer to that block.
bool empty() const
unsigned size() const
Register get(unsigned idx) const
ArrayRef< Register > regs() const
iterator end() const
iterator begin() const
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
bool empty() const
Definition: LiveInterval.h:382
iterator end()
Definition: LiveInterval.h:216
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool checkRegMaskInterference(const LiveInterval &VirtReg, MCRegister PhysReg=MCRegister::NoRegister)
Check for regmask interference only.
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
LiveIntervalUnion::Query & query(const LiveRange &LR, MCRegister RegUnit)
Query a line of the assigned virtual register matrix directly.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
@ IK_VirtReg
Virtual register interference.
Definition: LiveRegMatrix.h:90
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)
Check for interference before assigning VirtReg to PhysReg.
LiveIntervalUnion * getLiveUnions()
Directly access the live interval unions per regunit.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
iterator_range< MCRegUnitIterator > regunits(MCRegister Reg) const
Returns an iterator range over all regunits for Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1542
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
double getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const
Compute the frequency of the block, relative to the entry block.
BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Analysis pass which computes a MachineDominatorTree.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
Function & getFunction()
Return the LLVM function that this machine code represents.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isImplicitDef() const
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to be more informative.
Diagnostic information for missed-optimization remarks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getSimpleHint(Register VReg) const
getSimpleHint - same as getRegAllocationHint except it will only return a target independent hint.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool reg_nodbg_empty(Register RegNo) const
reg_nodbg_empty - Return true if the only instructions using or defining Reg are Debug instructions.
iterator_range< def_iterator > def_operands(Register Reg) const
LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
iterator_range< reg_instr_nodbg_iterator > reg_nodbg_instructions(Register Reg) const
void LRE_DidCloneVirtReg(Register New, Register Old)
Spiller & spiller() override
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
MCRegister selectOrSplit(const LiveInterval &, SmallVectorImpl< Register > &) override
bool runOnMachineFunction(MachineFunction &mf) override
Perform register allocation.
const LiveInterval * dequeue() override
dequeue - Return the next unassigned register, or NULL.
RAGreedy(const RegAllocFilterFunc F=nullptr)
void enqueueImpl(const LiveInterval *LI) override
enqueue - Add VirtReg to the priority queue of unassigned registers.
static char ID
void getAnalysisUsage(AnalysisUsage &AU) const override
RAGreedy analysis usage.
void aboutToRemoveInterval(const LiveInterval &) override
Method called when the allocator is about to remove a LiveInterval.
RegAllocBase provides the register allocation driver and interface that can be extended to add intere...
Definition: RegAllocBase.h:62
void enqueue(const LiveInterval *LI)
enqueue - Add VirtReg to the priority queue of unassigned registers.
void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat)
SmallPtrSet< MachineInstr *, 32 > DeadRemats
Inst which is a def of an original reg and whose defs are already all dead after remat is saved in De...
Definition: RegAllocBase.h:82
const TargetRegisterInfo * TRI
Definition: RegAllocBase.h:66
LiveIntervals * LIS
Definition: RegAllocBase.h:69
static const char TimerGroupName[]
Definition: RegAllocBase.h:127
static const char TimerGroupDescription[]
Definition: RegAllocBase.h:128
LiveRegMatrix * Matrix
Definition: RegAllocBase.h:70
virtual void postOptimization()
VirtRegMap * VRM
Definition: RegAllocBase.h:68
RegisterClassInfo RegClassInfo
Definition: RegAllocBase.h:71
MachineRegisterInfo * MRI
Definition: RegAllocBase.h:67
bool shouldAllocateRegister(Register Reg)
Get whether a given register should be allocated.
Definition: RegAllocBase.h:93
static bool VerifyEnabled
VerifyEnabled - True when -verify-regalloc is given.
Definition: RegAllocBase.h:135
ImmutableAnalysis abstraction for fetching the Eviction Advisor.
std::optional< unsigned > getOrderLimit(const LiveInterval &VirtReg, const AllocationOrder &Order, unsigned CostPerUseLimit) const
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const
Returns true if the given PhysReg is a callee saved register and has not been used for allocation yet...
bool canReassign(const LiveInterval &VirtReg, MCRegister FromReg) const
bool canAllocatePhysReg(unsigned CostPerUseLimit, MCRegister PhysReg) const
unsigned getLastCostChange(const TargetRegisterClass *RC) const
Get the position of the last cost change in getOrder(RC).
bool isProperSubClass(const TargetRegisterClass *RC) const
isProperSubClass - Returns true if RC has a legal super-class with more allocatable registers.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
uint8_t getMinCost(const TargetRegisterClass *RC) const
Get the minimum register cost in RC's allocation order.
MCRegister getLastCalleeSavedAlias(MCRegister PhysReg) const
getLastCalleeSavedAlias - Returns the last callee saved register that overlaps PhysReg,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:176
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
Definition: SlotIndexes.h:182
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:130
SlotIndex getBoundaryIndex() const
Returns the boundary index for associated with this index.
Definition: SlotIndexes.h:231
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition: SlotIndexes.h:224
@ InstrDist
The default distance between instructions as returned by distance().
Definition: SlotIndexes.h:112
int getApproxInstrDistance(SlotIndex other) const
Return the scaled distance from this index to the given one, where all slots on the same instruction ...
Definition: SlotIndexes.h:203
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
SlotIndex getLastIndex()
Returns the base index of the last slot in this analysis.
Definition: SlotIndexes.h:368
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:460
void packIndexes()
Renumber all indexes using the default instruction distance.
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
Definition: SlotIndexes.h:362
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction for the given index, or null if the given index has no instruction associated...
Definition: SlotIndexes.h:397
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool erase(const T &V)
Definition: SmallSet.h:207
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_type size() const
Definition: SmallSet.h:161
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:718
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:697
void resize(size_type N)
Definition: SmallVector.h:652
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
void addConstraints(ArrayRef< BlockConstraint > LiveBlocks)
addConstraints - Add constraints and biases.
bool finish()
finish - Compute the optimal spill code placement given the constraints.
void addPrefSpill(ArrayRef< unsigned > Blocks, bool Strong)
addPrefSpill - Add PrefSpill constraints to all blocks listed.
void prepare(BitVector &RegBundles)
prepare - Reset state and prepare for a new spill placement computation.
bool scanActiveBundles()
scanActiveBundles - Perform an initial scan of all bundles activated by addConstraints and addLinks,...
void addLinks(ArrayRef< unsigned > Links)
addLinks - Add transparent blocks with the given numbers.
void iterate()
iterate - Update the network iteratively until convergence, or new bundles are found.
@ MustSpill
A register is impossible, variable must be spilled.
@ DontCare
Block doesn't care / variable not live.
@ PrefReg
Block entry/exit prefers a register.
@ PrefSpill
Block entry/exit prefers a stack slot.
ArrayRef< unsigned > getRecentPositive()
getRecentPositive - Return an array of bundles that became positive during the previous call to scanA...
BlockFrequency getBlockFrequency(unsigned Number) const
getBlockFrequency - Return the estimated block execution frequency per function invocation.
virtual void spill(LiveRangeEdit &LRE)=0
spill - Spill the LRE.getParent() live interval.
SplitAnalysis - Analyze a LiveInterval, looking for live range splitting opportunities.
Definition: SplitKit.h:96
SplitEditor - Edit machine code and LiveIntervals for live range splitting.
Definition: SplitKit.h:263
@ SM_Partition
SM_Partition(Default) - Try to create the complement interval so it doesn't overlap any other interva...
Definition: SplitKit.h:286
@ SM_Speed
SM_Speed - Overlap intervals to minimize the expected execution frequency of the inserted copies.
Definition: SplitKit.h:298
@ SM_Size
SM_Size - Overlap intervals to minimize the number of inserted COPY instructions.
Definition: SplitKit.h:293
TargetInstrInfo - Interface to description of machine instruction set.
virtual std::pair< unsigned, unsigned > getPatchpointUnfoldableRange(const MachineInstr &MI) const
For a patchpoint, stackmap, or statepoint intrinsic, return the range of operands which can't be fold...
bool isFullCopyInstr(const MachineInstr &MI) const
virtual bool hasStoreToStackSlot(const MachineInstr &MI, SmallVectorImpl< const MachineMemOperand * > &Accesses) const
If the specified machine instruction has a store to a stack slot, return true along with the FrameInd...
virtual Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct load from a stack slot, return the virtual or physic...
virtual Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
std::optional< DestSourcePair > isCopyInstr(const MachineInstr &MI) const
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
virtual bool hasLoadFromStackSlot(const MachineInstr &MI, SmallVectorImpl< const MachineMemOperand * > &Accesses) const
If the specified machine instruction has a load from a stack slot, return true along with the FrameIn...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
const uint8_t AllocationPriority
Classes with a higher priority value are assigned first by register allocators using a greedy heurist...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool shouldUseDeferredSpillingForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Deferred spilling delays the spill insertion of a virtual register after every other allocation.
virtual bool shouldRegionSplitForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Region split has a high compile time cost especially for large live range.
virtual bool shouldUseLastChanceRecoloringForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Last chance recoloring has a high compile time cost especially for targets with a lot of registers.
virtual unsigned getCSRFirstUseCost() const
Allow the target to override the cost of using a callee-saved register for the first time.
LaneBitmask getCoveringLanes() const
The lane masks returned by getSubRegIndexLaneMask() above can only be used to determine if sub-regist...
ArrayRef< uint8_t > getRegisterCosts(const MachineFunction &MF) const
Get a list of cost values for all registers that correspond to the index returned by RegisterCostTabl...
virtual bool reverseLocalAssignment() const
Allow the target to reverse allocation order of local live ranges.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
virtual bool regClassPriorityTrumpsGlobalness(const MachineFunction &MF) const
When prioritizing live ranges in register allocation, if this hook returns true then the AllocationPr...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasKnownPreference(Register VirtReg) const
returns true if VirtReg has a known preferred register.
Definition: VirtRegMap.cpp:118
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition: VirtRegMap.h:105
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition: VirtRegMap.h:99
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::function< bool(const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, const Register Reg)> RegAllocFilterFunc
Filter function for register classes during regalloc.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
bool TimePassesIsEnabled
If the user specifies the -time-passes argument on an LLVM tool command line then the value of this b...
char & RAGreedyID
Greedy register allocator.
Spiller * createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
@ RS_Split2
Attempt more aggressive live range splitting that is guaranteed to make progress.
@ RS_Spill
Live range will be spilled. No more splitting will be attempted.
@ RS_Split
Attempt live range splitting if assignment is impossible.
@ RS_New
Newly created live range that has never been queued.
@ RS_Done
There is nothing more we can do to this live range.
@ RS_Assign
Only attempt assignment and eviction. Then requeue as RS_Split.
@ RS_Memory
Live range is in memory.
VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
const float huge_valf
Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
Definition: MathExtras.cpp:28
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1954
InstructionCost Cost
Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
static float normalizeSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr)
Normalize the spill weight of a live interval.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
Definition: MathExtras.h:219
constexpr bool any() const
Definition: LaneBitmask.h:53
This class is basically a combination of TimeRegion and Timer.
Definition: Timer.h:163
BlockConstraint - Entry and exit constraints for a basic block.
BorderConstraint Exit
Constraint on block exit.
bool ChangesValue
True when this block changes the value of the live range.
BorderConstraint Entry
Constraint on block entry.
unsigned Number
Basic block number (from MBB::getNumber()).
Additional information about basic blocks where the current variable is live.
Definition: SplitKit.h:121
SlotIndex FirstDef
First non-phi valno->def, or SlotIndex().
Definition: SplitKit.h:125
bool LiveOut
Current reg is live out.
Definition: SplitKit.h:127
bool LiveIn
Current reg is live in.
Definition: SplitKit.h:126
MachineBasicBlock * MBB
Definition: SplitKit.h:122
SlotIndex LastInstr
Last instr accessing current reg.
Definition: SplitKit.h:124
SlotIndex FirstInstr
First instr accessing current reg.
Definition: SplitKit.h:123