LLVM 20.0.0git
RegAllocPBQP.cpp
Go to the documentation of this file.
1//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
10// register allocator for LLVM. This allocator works by constructing a PBQP
11// problem representing the register allocation problem under consideration,
12// solving this using a PBQP solver, and mapping the solution back to a
13// register assignment. If any variables are selected for spilling then spill
14// code is inserted and the process repeated.
15//
16// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
17// for register allocation. For more information on PBQP for register
18// allocation, see the following papers:
19//
20// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
21// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
22// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
23//
24// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
25// architectures. In Proceedings of the Joint Conference on Languages,
26// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
27// NY, USA, 139-148.
28//
29//===----------------------------------------------------------------------===//
30
32#include "RegisterCoalescer.h"
33#include "llvm/ADT/ArrayRef.h"
34#include "llvm/ADT/BitVector.h"
35#include "llvm/ADT/DenseMap.h"
36#include "llvm/ADT/DenseSet.h"
37#include "llvm/ADT/STLExtras.h"
40#include "llvm/ADT/StringRef.h"
64#include "llvm/Config/llvm-config.h"
65#include "llvm/IR/Function.h"
66#include "llvm/IR/Module.h"
67#include "llvm/Pass.h"
70#include "llvm/Support/Debug.h"
74#include <algorithm>
75#include <cassert>
76#include <cstddef>
77#include <limits>
78#include <map>
79#include <memory>
80#include <queue>
81#include <set>
82#include <sstream>
83#include <string>
84#include <system_error>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90
91#define DEBUG_TYPE "regalloc"
92
94RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
96
97static cl::opt<bool>
98PBQPCoalescing("pbqp-coalescing",
99 cl::desc("Attempt coalescing during PBQP register allocation."),
100 cl::init(false), cl::Hidden);
101
102#ifndef NDEBUG
103static cl::opt<bool>
104PBQPDumpGraphs("pbqp-dump-graphs",
105 cl::desc("Dump graphs for each function/round in the compilation unit."),
106 cl::init(false), cl::Hidden);
107#endif
108
109namespace {
110
111///
112/// PBQP based allocators solve the register allocation problem by mapping
113/// register allocation problems to Partitioned Boolean Quadratic
114/// Programming problems.
115class RegAllocPBQP : public MachineFunctionPass {
116public:
117 static char ID;
118
119 /// Construct a PBQP register allocator.
120 RegAllocPBQP(char *cPassID = nullptr)
121 : MachineFunctionPass(ID), customPassID(cPassID) {
126 }
127
128 /// Return the pass name.
129 StringRef getPassName() const override { return "PBQP Register Allocator"; }
130
131 /// PBQP analysis usage.
132 void getAnalysisUsage(AnalysisUsage &au) const override;
133
134 /// Perform register allocation
135 bool runOnMachineFunction(MachineFunction &MF) override;
136
139 MachineFunctionProperties::Property::NoPHIs);
140 }
141
144 MachineFunctionProperties::Property::IsSSA);
145 }
146
147private:
148 using RegSet = std::set<Register>;
149
150 char *customPassID;
151
152 RegSet VRegsToAlloc, EmptyIntervalVRegs;
153
154 /// Inst which is a def of an original reg and whose defs are already all
155 /// dead after remat is saved in DeadRemats. The deletion of such inst is
156 /// postponed till all the allocations are done, so its remat expr is
157 /// always available for the remat of all the siblings of the original reg.
159
160 /// Finds the initial set of vreg intervals to allocate.
161 void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
162
163 /// Constructs an initial graph.
164 void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
165
166 /// Spill the given VReg.
167 void spillVReg(Register VReg, SmallVectorImpl<Register> &NewIntervals,
169 Spiller &VRegSpiller);
170
171 /// Given a solved PBQP problem maps this solution back to a register
172 /// assignment.
173 bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
174 const PBQP::Solution &Solution,
175 VirtRegMap &VRM,
176 Spiller &VRegSpiller);
177
178 /// Postprocessing before final spilling. Sets basic block "live in"
179 /// variables.
180 void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
181 VirtRegMap &VRM) const;
182
183 void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
184};
185
186char RegAllocPBQP::ID = 0;
187
188/// Set spill costs for each node in the PBQP reg-alloc graph.
189class SpillCosts : public PBQPRAConstraint {
190public:
191 void apply(PBQPRAGraph &G) override {
192 LiveIntervals &LIS = G.getMetadata().LIS;
193
194 // A minimum spill costs, so that register constraints can be set
195 // without normalization in the [0.0:MinSpillCost( interval.
196 const PBQP::PBQPNum MinSpillCost = 10.0;
197
198 for (auto NId : G.nodeIds()) {
199 PBQP::PBQPNum SpillCost =
200 LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight();
201 if (SpillCost == 0.0)
202 SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
203 else
204 SpillCost += MinSpillCost;
205 PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
206 NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
207 G.setNodeCosts(NId, std::move(NodeCosts));
208 }
209 }
210};
211
212/// Add interference edges between overlapping vregs.
213class Interference : public PBQPRAConstraint {
214private:
215 using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
216 using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
217 using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>;
218 using DisjointAllowedRegsCache = DenseSet<IKey>;
219 using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
220 using IEdgeCache = DenseSet<IEdgeKey>;
221
222 bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
224 const DisjointAllowedRegsCache &D) const {
225 const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
226 const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
227
228 if (NRegs == MRegs)
229 return false;
230
231 if (NRegs < MRegs)
232 return D.contains(IKey(NRegs, MRegs));
233
234 return D.contains(IKey(MRegs, NRegs));
235 }
236
237 void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
239 DisjointAllowedRegsCache &D) {
240 const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
241 const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
242
243 assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
244
245 if (NRegs < MRegs)
246 D.insert(IKey(NRegs, MRegs));
247 else
248 D.insert(IKey(MRegs, NRegs));
249 }
250
251 // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
252 // for the fast interference graph construction algorithm. The last is there
253 // to save us from looking up node ids via the VRegToNode map in the graph
254 // metadata.
255 using IntervalInfo =
256 std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
257
258 static SlotIndex getStartPoint(const IntervalInfo &I) {
259 return std::get<0>(I)->segments[std::get<1>(I)].start;
260 }
261
262 static SlotIndex getEndPoint(const IntervalInfo &I) {
263 return std::get<0>(I)->segments[std::get<1>(I)].end;
264 }
265
266 static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
267 return std::get<2>(I);
268 }
269
270 static bool lowestStartPoint(const IntervalInfo &I1,
271 const IntervalInfo &I2) {
272 // Condition reversed because priority queue has the *highest* element at
273 // the front, rather than the lowest.
274 return getStartPoint(I1) > getStartPoint(I2);
275 }
276
277 static bool lowestEndPoint(const IntervalInfo &I1,
278 const IntervalInfo &I2) {
279 SlotIndex E1 = getEndPoint(I1);
280 SlotIndex E2 = getEndPoint(I2);
281
282 if (E1 < E2)
283 return true;
284
285 if (E1 > E2)
286 return false;
287
288 // If two intervals end at the same point, we need a way to break the tie or
289 // the set will assume they're actually equal and refuse to insert a
290 // "duplicate". Just compare the vregs - fast and guaranteed unique.
291 return std::get<0>(I1)->reg() < std::get<0>(I2)->reg();
292 }
293
294 static bool isAtLastSegment(const IntervalInfo &I) {
295 return std::get<1>(I) == std::get<0>(I)->size() - 1;
296 }
297
298 static IntervalInfo nextSegment(const IntervalInfo &I) {
299 return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
300 }
301
302public:
303 void apply(PBQPRAGraph &G) override {
304 // The following is loosely based on the linear scan algorithm introduced in
305 // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
306 // isn't linear, because the size of the active set isn't bound by the
307 // number of registers, but rather the size of the largest clique in the
308 // graph. Still, we expect this to be better than N^2.
309 LiveIntervals &LIS = G.getMetadata().LIS;
310
311 // Interferenc matrices are incredibly regular - they're only a function of
312 // the allowed sets, so we cache them to avoid the overhead of constructing
313 // and uniquing them.
314 IMatrixCache C;
315
316 // Finding an edge is expensive in the worst case (O(max_clique(G))). So
317 // cache locally edges we have already seen.
318 IEdgeCache EC;
319
320 // Cache known disjoint allowed registers pairs
321 DisjointAllowedRegsCache D;
322
323 using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>;
324 using IntervalQueue =
325 std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
326 decltype(&lowestStartPoint)>;
327 IntervalSet Active(lowestEndPoint);
328 IntervalQueue Inactive(lowestStartPoint);
329
330 // Start by building the inactive set.
331 for (auto NId : G.nodeIds()) {
332 Register VReg = G.getNodeMetadata(NId).getVReg();
333 LiveInterval &LI = LIS.getInterval(VReg);
334 assert(!LI.empty() && "PBQP graph contains node for empty interval");
335 Inactive.push(std::make_tuple(&LI, 0, NId));
336 }
337
338 while (!Inactive.empty()) {
339 // Tentatively grab the "next" interval - this choice may be overriden
340 // below.
341 IntervalInfo Cur = Inactive.top();
342
343 // Retire any active intervals that end before Cur starts.
344 IntervalSet::iterator RetireItr = Active.begin();
345 while (RetireItr != Active.end() &&
346 (getEndPoint(*RetireItr) <= getStartPoint(Cur))) {
347 // If this interval has subsequent segments, add the next one to the
348 // inactive list.
349 if (!isAtLastSegment(*RetireItr))
350 Inactive.push(nextSegment(*RetireItr));
351
352 ++RetireItr;
353 }
354 Active.erase(Active.begin(), RetireItr);
355
356 // One of the newly retired segments may actually start before the
357 // Cur segment, so re-grab the front of the inactive list.
358 Cur = Inactive.top();
359 Inactive.pop();
360
361 // At this point we know that Cur overlaps all active intervals. Add the
362 // interference edges.
363 PBQP::GraphBase::NodeId NId = getNodeId(Cur);
364 for (const auto &A : Active) {
365 PBQP::GraphBase::NodeId MId = getNodeId(A);
366
367 // Do not add an edge when the nodes' allowed registers do not
368 // intersect: there is obviously no interference.
369 if (haveDisjointAllowedRegs(G, NId, MId, D))
370 continue;
371
372 // Check that we haven't already added this edge
373 IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
374 if (EC.count(EK))
375 continue;
376
377 // This is a new edge - add it to the graph.
378 if (!createInterferenceEdge(G, NId, MId, C))
379 setDisjointAllowedRegs(G, NId, MId, D);
380 else
381 EC.insert(EK);
382 }
383
384 // Finally, add Cur to the Active set.
385 Active.insert(Cur);
386 }
387 }
388
389private:
390 // Create an Interference edge and add it to the graph, unless it is
391 // a null matrix, meaning the nodes' allowed registers do not have any
392 // interference. This case occurs frequently between integer and floating
393 // point registers for example.
394 // return true iff both nodes interferes.
395 bool createInterferenceEdge(PBQPRAGraph &G,
397 IMatrixCache &C) {
398 const TargetRegisterInfo &TRI =
399 *G.getMetadata().MF.getSubtarget().getRegisterInfo();
400 const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
401 const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
402
403 // Try looking the edge costs up in the IMatrixCache first.
404 IKey K(&NRegs, &MRegs);
405 IMatrixCache::iterator I = C.find(K);
406 if (I != C.end()) {
407 G.addEdgeBypassingCostAllocator(NId, MId, I->second);
408 return true;
409 }
410
411 PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
412 bool NodesInterfere = false;
413 for (unsigned I = 0; I != NRegs.size(); ++I) {
414 MCRegister PRegN = NRegs[I];
415 for (unsigned J = 0; J != MRegs.size(); ++J) {
416 MCRegister PRegM = MRegs[J];
417 if (TRI.regsOverlap(PRegN, PRegM)) {
418 M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
419 NodesInterfere = true;
420 }
421 }
422 }
423
424 if (!NodesInterfere)
425 return false;
426
427 PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
428 C[K] = G.getEdgeCostsPtr(EId);
429
430 return true;
431 }
432};
433
434class Coalescing : public PBQPRAConstraint {
435public:
436 void apply(PBQPRAGraph &G) override {
437 MachineFunction &MF = G.getMetadata().MF;
438 MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
440
441 // Scan the machine function and add a coalescing cost whenever CoalescerPair
442 // gives the Ok.
443 for (const auto &MBB : MF) {
444 for (const auto &MI : MBB) {
445 // Skip not-coalescable or already coalesced copies.
446 if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
447 continue;
448
449 Register DstReg = CP.getDstReg();
450 Register SrcReg = CP.getSrcReg();
451
453
454 if (CP.isPhys()) {
455 if (!MF.getRegInfo().isAllocatable(DstReg))
456 continue;
457
458 PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
459
460 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
461 G.getNodeMetadata(NId).getAllowedRegs();
462
463 unsigned PRegOpt = 0;
464 while (PRegOpt < Allowed.size() && Allowed[PRegOpt].id() != DstReg)
465 ++PRegOpt;
466
467 if (PRegOpt < Allowed.size()) {
468 PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
469 NewCosts[PRegOpt + 1] -= CBenefit;
470 G.setNodeCosts(NId, std::move(NewCosts));
471 }
472 } else {
473 PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
474 PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
475 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
476 &G.getNodeMetadata(N1Id).getAllowedRegs();
477 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
478 &G.getNodeMetadata(N2Id).getAllowedRegs();
479
480 PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
481 if (EId == G.invalidEdgeId()) {
482 PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
483 Allowed2->size() + 1, 0);
484 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
485 G.addEdge(N1Id, N2Id, std::move(Costs));
486 } else {
487 if (G.getEdgeNode1Id(EId) == N2Id) {
488 std::swap(N1Id, N2Id);
489 std::swap(Allowed1, Allowed2);
490 }
491 PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
492 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
493 G.updateEdgeCosts(EId, std::move(Costs));
494 }
495 }
496 }
497 }
498 }
499
500private:
501 void addVirtRegCoalesce(
502 PBQPRAGraph::RawMatrix &CostMat,
503 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
504 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
505 PBQP::PBQPNum Benefit) {
506 assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
507 assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
508 for (unsigned I = 0; I != Allowed1.size(); ++I) {
509 MCRegister PReg1 = Allowed1[I];
510 for (unsigned J = 0; J != Allowed2.size(); ++J) {
511 MCRegister PReg2 = Allowed2[J];
512 if (PReg1 == PReg2)
513 CostMat[I + 1][J + 1] -= Benefit;
514 }
515 }
516 }
517};
518
519/// PBQP-specific implementation of weight normalization.
520class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo {
521 float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr) override {
522 // All intervals have a spill weight that is mostly proportional to the
523 // number of uses, with uses in loops having a bigger weight.
524 return NumInstr * VirtRegAuxInfo::normalize(UseDefFreq, Size, 1);
525 }
526
527public:
528 PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
529 const MachineLoopInfo &Loops,
530 const MachineBlockFrequencyInfo &MBFI)
531 : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {}
532};
533} // end anonymous namespace
534
535// Out-of-line destructor/anchor for PBQPRAConstraint.
537
538void PBQPRAConstraint::anchor() {}
539
540void PBQPRAConstraintList::anchor() {}
541
542void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
543 au.setPreservesCFG();
550 //au.addRequiredID(SplitCriticalEdgesID);
551 if (customPassID)
552 au.addRequiredID(*customPassID);
564}
565
566void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
567 LiveIntervals &LIS) {
568 const MachineRegisterInfo &MRI = MF.getRegInfo();
569
570 // Iterate over all live ranges.
571 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
573 if (MRI.reg_nodbg_empty(Reg))
574 continue;
575 VRegsToAlloc.insert(Reg);
576 }
577}
578
580 const TargetRegisterInfo &TRI,
581 const MachineFunction &MF) {
582 const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
583 for (unsigned i = 0; CSR[i] != 0; ++i)
584 if (TRI.regsOverlap(Reg, CSR[i]))
585 return true;
586 return false;
587}
588
589void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
590 Spiller &VRegSpiller) {
591 MachineFunction &MF = G.getMetadata().MF;
592
593 LiveIntervals &LIS = G.getMetadata().LIS;
594 const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
595 const TargetRegisterInfo &TRI =
596 *G.getMetadata().MF.getSubtarget().getRegisterInfo();
597
598 std::vector<Register> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
599
600 std::map<Register, std::vector<MCRegister>> VRegAllowedMap;
601
602 while (!Worklist.empty()) {
603 Register VReg = Worklist.back();
604 Worklist.pop_back();
605
606 LiveInterval &VRegLI = LIS.getInterval(VReg);
607
608 // If this is an empty interval move it to the EmptyIntervalVRegs set then
609 // continue.
610 if (VRegLI.empty()) {
611 EmptyIntervalVRegs.insert(VRegLI.reg());
612 VRegsToAlloc.erase(VRegLI.reg());
613 continue;
614 }
615
616 const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
617
618 // Record any overlaps with regmask operands.
619 BitVector RegMaskOverlaps;
620 LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
621
622 // Compute an initial allowed set for the current vreg.
623 std::vector<MCRegister> VRegAllowed;
624 ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
625 for (MCPhysReg R : RawPRegOrder) {
626 MCRegister PReg(R);
627 if (MRI.isReserved(PReg))
628 continue;
629
630 // vregLI crosses a regmask operand that clobbers preg.
631 if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg))
632 continue;
633
634 // vregLI overlaps fixed regunit interference.
635 bool Interference = false;
636 for (MCRegUnit Unit : TRI.regunits(PReg)) {
637 if (VRegLI.overlaps(LIS.getRegUnit(Unit))) {
638 Interference = true;
639 break;
640 }
641 }
642 if (Interference)
643 continue;
644
645 // preg is usable for this virtual register.
646 VRegAllowed.push_back(PReg);
647 }
648
649 // Check for vregs that have no allowed registers. These should be
650 // pre-spilled and the new vregs added to the worklist.
651 if (VRegAllowed.empty()) {
653 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
654 llvm::append_range(Worklist, NewVRegs);
655 continue;
656 }
657
658 VRegAllowedMap[VReg.id()] = std::move(VRegAllowed);
659 }
660
661 for (auto &KV : VRegAllowedMap) {
662 auto VReg = KV.first;
663
664 // Move empty intervals to the EmptyIntervalVReg set.
665 if (LIS.getInterval(VReg).empty()) {
666 EmptyIntervalVRegs.insert(VReg);
667 VRegsToAlloc.erase(VReg);
668 continue;
669 }
670
671 auto &VRegAllowed = KV.second;
672
673 PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
674
675 // Tweak cost of callee saved registers, as using then force spilling and
676 // restoring them. This would only happen in the prologue / epilogue though.
677 for (unsigned i = 0; i != VRegAllowed.size(); ++i)
678 if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF))
679 NodeCosts[1 + i] += 1.0;
680
681 PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
682 G.getNodeMetadata(NId).setVReg(VReg);
683 G.getNodeMetadata(NId).setAllowedRegs(
684 G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
685 G.getMetadata().setNodeIdForVReg(VReg, NId);
686 }
687}
688
689void RegAllocPBQP::spillVReg(Register VReg,
690 SmallVectorImpl<Register> &NewIntervals,
692 VirtRegMap &VRM, Spiller &VRegSpiller) {
693 VRegsToAlloc.erase(VReg);
694 LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
695 nullptr, &DeadRemats);
696 VRegSpiller.spill(LRE);
697
699 (void)TRI;
700 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
701 << LRE.getParent().weight() << ", New vregs: ");
702
703 // Copy any newly inserted live intervals into the list of regs to
704 // allocate.
705 for (const Register &R : LRE) {
706 const LiveInterval &LI = LIS.getInterval(R);
707 assert(!LI.empty() && "Empty spill range.");
708 LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " ");
709 VRegsToAlloc.insert(LI.reg());
710 }
711
712 LLVM_DEBUG(dbgs() << ")\n");
713}
714
715bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
716 const PBQP::Solution &Solution,
717 VirtRegMap &VRM,
718 Spiller &VRegSpiller) {
719 MachineFunction &MF = G.getMetadata().MF;
720 LiveIntervals &LIS = G.getMetadata().LIS;
722 (void)TRI;
723
724 // Set to true if we have any spills
725 bool AnotherRoundNeeded = false;
726
727 // Clear the existing allocation.
728 VRM.clearAllVirt();
729
730 // Iterate over the nodes mapping the PBQP solution to a register
731 // assignment.
732 for (auto NId : G.nodeIds()) {
733 Register VReg = G.getNodeMetadata(NId).getVReg();
734 unsigned AllocOpt = Solution.getSelection(NId);
735
736 if (AllocOpt != PBQP::RegAlloc::getSpillOptionIdx()) {
737 MCRegister PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOpt - 1];
738 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
739 << TRI.getName(PReg) << "\n");
740 assert(PReg != 0 && "Invalid preg selected.");
741 VRM.assignVirt2Phys(VReg, PReg);
742 } else {
743 // Spill VReg. If this introduces new intervals we'll need another round
744 // of allocation.
746 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
747 AnotherRoundNeeded |= !NewVRegs.empty();
748 }
749 }
750
751 return !AnotherRoundNeeded;
752}
753
754void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
755 LiveIntervals &LIS,
756 VirtRegMap &VRM) const {
758
759 // First allocate registers for the empty intervals.
760 for (const Register &R : EmptyIntervalVRegs) {
761 LiveInterval &LI = LIS.getInterval(R);
762
763 Register PReg = MRI.getSimpleHint(LI.reg());
764
765 if (PReg == 0) {
766 const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg());
767 const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
768 for (MCRegister CandidateReg : RawPRegOrder) {
769 if (!VRM.getRegInfo().isReserved(CandidateReg)) {
770 PReg = CandidateReg;
771 break;
772 }
773 }
774 assert(PReg &&
775 "No un-reserved physical registers in this register class");
776 }
777
778 VRM.assignVirt2Phys(LI.reg(), PReg);
779 }
780}
781
782void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
783 VRegSpiller.postOptimization();
784 /// Remove dead defs because of rematerialization.
785 for (auto *DeadInst : DeadRemats) {
786 LIS.RemoveMachineInstrFromMaps(*DeadInst);
787 DeadInst->eraseFromParent();
788 }
789 DeadRemats.clear();
790}
791
792bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
793 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
795 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
796
797 VirtRegMap &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
798
799 PBQPVirtRegAuxInfo VRAI(
800 MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI);
801 VRAI.calculateSpillWeightsAndHints();
802
803 // FIXME: we create DefaultVRAI here to match existing behavior pre-passing
804 // the VRAI through the spiller to the live range editor. However, it probably
805 // makes more sense to pass the PBQP VRAI. The existing behavior had
806 // LiveRangeEdit make its own VirtRegAuxInfo object.
807 VirtRegAuxInfo DefaultVRAI(
808 MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI);
809 std::unique_ptr<Spiller> VRegSpiller(
810 createInlineSpiller(*this, MF, VRM, DefaultVRAI));
811
813
814 LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
815
816 // Allocator main loop:
817 //
818 // * Map current regalloc problem to a PBQP problem
819 // * Solve the PBQP problem
820 // * Map the solution back to a register allocation
821 // * Spill if necessary
822 //
823 // This process is continued till no more spills are generated.
824
825 // Find the vreg intervals in need of allocation.
826 findVRegIntervalsToAlloc(MF, LIS);
827
828#ifndef NDEBUG
829 const Function &F = MF.getFunction();
830 std::string FullyQualifiedName =
831 F.getParent()->getModuleIdentifier() + "." + F.getName().str();
832#endif
833
834 // If there are non-empty intervals allocate them using pbqp.
835 if (!VRegsToAlloc.empty()) {
836 const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
837 std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
838 std::make_unique<PBQPRAConstraintList>();
839 ConstraintsRoot->addConstraint(std::make_unique<SpillCosts>());
840 ConstraintsRoot->addConstraint(std::make_unique<Interference>());
841 if (PBQPCoalescing)
842 ConstraintsRoot->addConstraint(std::make_unique<Coalescing>());
843 ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
844
845 bool PBQPAllocComplete = false;
846 unsigned Round = 0;
847
848 while (!PBQPAllocComplete) {
849 LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
850 (void) Round;
851
853 initializeGraph(G, VRM, *VRegSpiller);
854 ConstraintsRoot->apply(G);
855
856#ifndef NDEBUG
857 if (PBQPDumpGraphs) {
858 std::ostringstream RS;
859 RS << Round;
860 std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
861 ".pbqpgraph";
862 std::error_code EC;
863 raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_TextWithCRLF);
864 LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
865 << GraphFileName << "\"\n");
866 G.dump(OS);
867 }
868#endif
869
871 PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
872 ++Round;
873 }
874 }
875
876 // Finalise allocation, allocate empty ranges.
877 finalizeAlloc(MF, LIS, VRM);
878 postOptimization(*VRegSpiller, LIS);
879 VRegsToAlloc.clear();
880 EmptyIntervalVRegs.clear();
881
882 LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
883
884 return true;
885}
886
887/// Create Printable object for node and register info.
890 return Printable([NId, &G](raw_ostream &OS) {
891 const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
892 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
893 Register VReg = G.getNodeMetadata(NId).getVReg();
894 const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
895 OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')';
896 });
897}
898
899#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
901 for (auto NId : nodeIds()) {
902 const Vector &Costs = getNodeCosts(NId);
903 assert(Costs.getLength() != 0 && "Empty vector in graph.");
904 OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
905 }
906 OS << '\n';
907
908 for (auto EId : edgeIds()) {
909 NodeId N1Id = getEdgeNode1Id(EId);
910 NodeId N2Id = getEdgeNode2Id(EId);
911 assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
912 const Matrix &M = getEdgeCosts(EId);
913 assert(M.getRows() != 0 && "No rows in matrix.");
914 assert(M.getCols() != 0 && "No cols in matrix.");
915 OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
916 OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
917 OS << M << '\n';
918 }
919}
920
922 dump(dbgs());
923}
924#endif
925
927 OS << "graph {\n";
928 for (auto NId : nodeIds()) {
929 OS << " node" << NId << " [ label=\""
930 << PrintNodeInfo(NId, *this) << "\\n"
931 << getNodeCosts(NId) << "\" ]\n";
932 }
933
934 OS << " edge [ len=" << nodeIds().size() << " ]\n";
935 for (auto EId : edgeIds()) {
936 OS << " node" << getEdgeNode1Id(EId)
937 << " -- node" << getEdgeNode2Id(EId)
938 << " [ label=\"";
939 const Matrix &EdgeCosts = getEdgeCosts(EId);
940 for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
941 OS << EdgeCosts.getRowAsVector(i) << "\\n";
942 }
943 OS << "\" ]\n";
944 }
945 OS << "}\n";
946}
947
949 return new RegAllocPBQP(customPassID);
950}
951
954}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
Hexagon Hardware Loops
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Branch Coalescing
static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, const PBQP::RegAlloc::PBQPRAGraph &G)
Create Printable object for node and register info.
static cl::opt< bool > PBQPDumpGraphs("pbqp-dump-graphs", cl::desc("Dump graphs for each function/round in the compilation unit."), cl::init(false), cl::Hidden)
static cl::opt< bool > PBQPCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden)
static bool isACalleeSavedRegister(MCRegister Reg, const TargetRegisterInfo &TRI, const MachineFunction &MF)
static RegisterRegAlloc RegisterPBQPRepAlloc("pbqp", "PBQP register allocator", createDefaultPBQPRegisterAllocator)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:270
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool test(unsigned Idx) const
Definition: BitVector.h:461
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition: BitVector.h:156
A helper class for register coalescers.
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
float weight() const
Definition: LiveInterval.h:719
Register reg() const
Definition: LiveInterval.h:718
bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs)
Test if LI is live across any register mask instructions, and compute a bit mask of physical register...
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
bool empty() const
Definition: LiveInterval.h:382
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
Definition: LiveInterval.h:448
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
double getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const
Compute the frequency of the block, relative to the entry block.
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual MachineFunctionProperties getClearedProperties() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void freezeReservedRegs()
freezeReservedRegs - Called by the register allocator to freeze the set of reserved registers before ...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Abstract base for classes implementing PBQP register allocation constraints (e.g.
virtual ~PBQPRAConstraint()=0
virtual void apply(PBQPRAGraph &G)=0
typename SolverT::GraphMetadata GraphMetadata
Definition: Graph.h:59
typename SolverT::Matrix Matrix
Definition: Graph.h:54
NodeId getEdgeNode2Id(EdgeId EId) const
Get the second node connected to this edge.
Definition: Graph.h:552
NodeId getEdgeNode1Id(EdgeId EId) const
Get the first node connected to this edge.
Definition: Graph.h:545
const Matrix & getEdgeCosts(EdgeId EId) const
Get an edge's cost matrix.
Definition: Graph.h:530
typename SolverT::Vector Vector
Definition: Graph.h:53
typename SolverT::RawMatrix RawMatrix
Definition: Graph.h:52
typename SolverT::RawVector RawVector
Definition: Graph.h:51
const Vector & getNodeCosts(NodeId NId) const
Get a node's cost vector.
Definition: Graph.h:488
Holds a vector of the allowed physical regs for a vreg.
Definition: RegAllocPBQP.h:94
void printDot(raw_ostream &OS) const
Print a representation of this graph in DOT format.
void dump() const
Dump this graph to dbgs().
Represents a solution to a PBQP problem.
Definition: Solution.h:26
unsigned getSelection(GraphBase::NodeId nodeId) const
Get a node's selection.
Definition: Solution.h:45
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:38
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
constexpr unsigned id() const
Definition: Register.h:103
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Spiller interface.
Definition: Spiller.h:27
virtual void spill(LiveRangeEdit &LRE)=0
spill - Spill the LRE.getParent() live interval.
virtual void postOptimization()
Definition: Spiller.h:43
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
ArrayRef< MCPhysReg > getRawAllocationOrder(const MachineFunction &MF) const
Returns the preferred order for allocating registers from this register class in MF.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual std::unique_ptr< PBQPRAConstraint > getCustomPBQPConstraints() const
Return PBQPConstraint(s) for the target.
Calculate auxiliary information for a virtual register such as its spill weight and allocation hint.
virtual float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr)
Weight normalization function.
void clearAllVirt()
clears all virtual to physical register mappings
Definition: VirtRegMap.h:124
MachineRegisterInfo & getRegInfo() const
Definition: VirtRegMap.h:79
void assignVirt2Phys(Register virtReg, MCPhysReg physReg)
creates a mapping for the specified virtual register to the specified physical register
Definition: VirtRegMap.cpp:86
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:460
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Solution solve(PBQPRAGraph &G)
Definition: RegAllocPBQP.h:520
unsigned getSpillOptionIdx()
Spill option index.
Definition: RegAllocPBQP.h:48
float PBQPNum
Definition: Math.h:22
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition: FileSystem.h:767
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void initializeVirtRegMapWrapperLegacyPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
Spiller * createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
void initializeLiveStacksWrapperLegacyPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createDefaultPBQPRegisterAllocator()
PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean Quadratic Prograaming (PBQ...
void initializeLiveIntervalsWrapperPassPass(PassRegistry &)
FunctionPass * createPBQPRegisterAllocator(char *customPassID=nullptr)
Create a PBQP register allocator instance.
void initializeSlotIndexesWrapperPassPass(PassRegistry &)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860