LLVM 20.0.0git
RegAllocPBQP.cpp
Go to the documentation of this file.
1//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
10// register allocator for LLVM. This allocator works by constructing a PBQP
11// problem representing the register allocation problem under consideration,
12// solving this using a PBQP solver, and mapping the solution back to a
13// register assignment. If any variables are selected for spilling then spill
14// code is inserted and the process repeated.
15//
16// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
17// for register allocation. For more information on PBQP for register
18// allocation, see the following papers:
19//
20// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
21// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
22// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
23//
24// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
25// architectures. In Proceedings of the Joint Conference on Languages,
26// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
27// NY, USA, 139-148.
28//
29//===----------------------------------------------------------------------===//
30
32#include "RegisterCoalescer.h"
33#include "llvm/ADT/ArrayRef.h"
34#include "llvm/ADT/BitVector.h"
35#include "llvm/ADT/DenseMap.h"
36#include "llvm/ADT/DenseSet.h"
37#include "llvm/ADT/STLExtras.h"
40#include "llvm/ADT/StringRef.h"
64#include "llvm/Config/llvm-config.h"
65#include "llvm/IR/Function.h"
66#include "llvm/IR/Module.h"
68#include "llvm/Pass.h"
71#include "llvm/Support/Debug.h"
75#include <algorithm>
76#include <cassert>
77#include <cstddef>
78#include <limits>
79#include <map>
80#include <memory>
81#include <queue>
82#include <set>
83#include <sstream>
84#include <string>
85#include <system_error>
86#include <tuple>
87#include <utility>
88#include <vector>
89
90using namespace llvm;
91
92#define DEBUG_TYPE "regalloc"
93
95RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
97
98static cl::opt<bool>
99PBQPCoalescing("pbqp-coalescing",
100 cl::desc("Attempt coalescing during PBQP register allocation."),
101 cl::init(false), cl::Hidden);
102
103#ifndef NDEBUG
104static cl::opt<bool>
105PBQPDumpGraphs("pbqp-dump-graphs",
106 cl::desc("Dump graphs for each function/round in the compilation unit."),
107 cl::init(false), cl::Hidden);
108#endif
109
110namespace {
111
112///
113/// PBQP based allocators solve the register allocation problem by mapping
114/// register allocation problems to Partitioned Boolean Quadratic
115/// Programming problems.
116class RegAllocPBQP : public MachineFunctionPass {
117public:
118 static char ID;
119
120 /// Construct a PBQP register allocator.
121 RegAllocPBQP(char *cPassID = nullptr)
122 : MachineFunctionPass(ID), customPassID(cPassID) {
127 }
128
129 /// Return the pass name.
130 StringRef getPassName() const override { return "PBQP Register Allocator"; }
131
132 /// PBQP analysis usage.
133 void getAnalysisUsage(AnalysisUsage &au) const override;
134
135 /// Perform register allocation
136 bool runOnMachineFunction(MachineFunction &MF) override;
137
140 MachineFunctionProperties::Property::NoPHIs);
141 }
142
145 MachineFunctionProperties::Property::IsSSA);
146 }
147
148private:
149 using RegSet = std::set<Register>;
150
151 char *customPassID;
152
153 RegSet VRegsToAlloc, EmptyIntervalVRegs;
154
155 /// Inst which is a def of an original reg and whose defs are already all
156 /// dead after remat is saved in DeadRemats. The deletion of such inst is
157 /// postponed till all the allocations are done, so its remat expr is
158 /// always available for the remat of all the siblings of the original reg.
160
161 /// Finds the initial set of vreg intervals to allocate.
162 void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
163
164 /// Constructs an initial graph.
165 void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
166
167 /// Spill the given VReg.
168 void spillVReg(Register VReg, SmallVectorImpl<Register> &NewIntervals,
170 Spiller &VRegSpiller);
171
172 /// Given a solved PBQP problem maps this solution back to a register
173 /// assignment.
174 bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
175 const PBQP::Solution &Solution,
176 VirtRegMap &VRM,
177 Spiller &VRegSpiller);
178
179 /// Postprocessing before final spilling. Sets basic block "live in"
180 /// variables.
181 void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
182 VirtRegMap &VRM) const;
183
184 void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
185};
186
187char RegAllocPBQP::ID = 0;
188
189/// Set spill costs for each node in the PBQP reg-alloc graph.
190class SpillCosts : public PBQPRAConstraint {
191public:
192 void apply(PBQPRAGraph &G) override {
193 LiveIntervals &LIS = G.getMetadata().LIS;
194
195 // A minimum spill costs, so that register constraints can be set
196 // without normalization in the [0.0:MinSpillCost( interval.
197 const PBQP::PBQPNum MinSpillCost = 10.0;
198
199 for (auto NId : G.nodeIds()) {
200 PBQP::PBQPNum SpillCost =
201 LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight();
202 if (SpillCost == 0.0)
203 SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
204 else
205 SpillCost += MinSpillCost;
206 PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
207 NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
208 G.setNodeCosts(NId, std::move(NodeCosts));
209 }
210 }
211};
212
213/// Add interference edges between overlapping vregs.
214class Interference : public PBQPRAConstraint {
215private:
216 using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
217 using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
218 using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>;
219 using DisjointAllowedRegsCache = DenseSet<IKey>;
220 using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
221 using IEdgeCache = DenseSet<IEdgeKey>;
222
223 bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
225 const DisjointAllowedRegsCache &D) const {
226 const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
227 const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
228
229 if (NRegs == MRegs)
230 return false;
231
232 if (NRegs < MRegs)
233 return D.contains(IKey(NRegs, MRegs));
234
235 return D.contains(IKey(MRegs, NRegs));
236 }
237
238 void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
240 DisjointAllowedRegsCache &D) {
241 const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
242 const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
243
244 assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
245
246 if (NRegs < MRegs)
247 D.insert(IKey(NRegs, MRegs));
248 else
249 D.insert(IKey(MRegs, NRegs));
250 }
251
252 // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
253 // for the fast interference graph construction algorithm. The last is there
254 // to save us from looking up node ids via the VRegToNode map in the graph
255 // metadata.
256 using IntervalInfo =
257 std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
258
259 static SlotIndex getStartPoint(const IntervalInfo &I) {
260 return std::get<0>(I)->segments[std::get<1>(I)].start;
261 }
262
263 static SlotIndex getEndPoint(const IntervalInfo &I) {
264 return std::get<0>(I)->segments[std::get<1>(I)].end;
265 }
266
267 static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
268 return std::get<2>(I);
269 }
270
271 static bool lowestStartPoint(const IntervalInfo &I1,
272 const IntervalInfo &I2) {
273 // Condition reversed because priority queue has the *highest* element at
274 // the front, rather than the lowest.
275 return getStartPoint(I1) > getStartPoint(I2);
276 }
277
278 static bool lowestEndPoint(const IntervalInfo &I1,
279 const IntervalInfo &I2) {
280 SlotIndex E1 = getEndPoint(I1);
281 SlotIndex E2 = getEndPoint(I2);
282
283 if (E1 < E2)
284 return true;
285
286 if (E1 > E2)
287 return false;
288
289 // If two intervals end at the same point, we need a way to break the tie or
290 // the set will assume they're actually equal and refuse to insert a
291 // "duplicate". Just compare the vregs - fast and guaranteed unique.
292 return std::get<0>(I1)->reg() < std::get<0>(I2)->reg();
293 }
294
295 static bool isAtLastSegment(const IntervalInfo &I) {
296 return std::get<1>(I) == std::get<0>(I)->size() - 1;
297 }
298
299 static IntervalInfo nextSegment(const IntervalInfo &I) {
300 return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
301 }
302
303public:
304 void apply(PBQPRAGraph &G) override {
305 // The following is loosely based on the linear scan algorithm introduced in
306 // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
307 // isn't linear, because the size of the active set isn't bound by the
308 // number of registers, but rather the size of the largest clique in the
309 // graph. Still, we expect this to be better than N^2.
310 LiveIntervals &LIS = G.getMetadata().LIS;
311
312 // Interferenc matrices are incredibly regular - they're only a function of
313 // the allowed sets, so we cache them to avoid the overhead of constructing
314 // and uniquing them.
315 IMatrixCache C;
316
317 // Finding an edge is expensive in the worst case (O(max_clique(G))). So
318 // cache locally edges we have already seen.
319 IEdgeCache EC;
320
321 // Cache known disjoint allowed registers pairs
322 DisjointAllowedRegsCache D;
323
324 using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>;
325 using IntervalQueue =
326 std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
327 decltype(&lowestStartPoint)>;
328 IntervalSet Active(lowestEndPoint);
329 IntervalQueue Inactive(lowestStartPoint);
330
331 // Start by building the inactive set.
332 for (auto NId : G.nodeIds()) {
333 Register VReg = G.getNodeMetadata(NId).getVReg();
334 LiveInterval &LI = LIS.getInterval(VReg);
335 assert(!LI.empty() && "PBQP graph contains node for empty interval");
336 Inactive.push(std::make_tuple(&LI, 0, NId));
337 }
338
339 while (!Inactive.empty()) {
340 // Tentatively grab the "next" interval - this choice may be overriden
341 // below.
342 IntervalInfo Cur = Inactive.top();
343
344 // Retire any active intervals that end before Cur starts.
345 IntervalSet::iterator RetireItr = Active.begin();
346 while (RetireItr != Active.end() &&
347 (getEndPoint(*RetireItr) <= getStartPoint(Cur))) {
348 // If this interval has subsequent segments, add the next one to the
349 // inactive list.
350 if (!isAtLastSegment(*RetireItr))
351 Inactive.push(nextSegment(*RetireItr));
352
353 ++RetireItr;
354 }
355 Active.erase(Active.begin(), RetireItr);
356
357 // One of the newly retired segments may actually start before the
358 // Cur segment, so re-grab the front of the inactive list.
359 Cur = Inactive.top();
360 Inactive.pop();
361
362 // At this point we know that Cur overlaps all active intervals. Add the
363 // interference edges.
364 PBQP::GraphBase::NodeId NId = getNodeId(Cur);
365 for (const auto &A : Active) {
366 PBQP::GraphBase::NodeId MId = getNodeId(A);
367
368 // Do not add an edge when the nodes' allowed registers do not
369 // intersect: there is obviously no interference.
370 if (haveDisjointAllowedRegs(G, NId, MId, D))
371 continue;
372
373 // Check that we haven't already added this edge
374 IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
375 if (EC.count(EK))
376 continue;
377
378 // This is a new edge - add it to the graph.
379 if (!createInterferenceEdge(G, NId, MId, C))
380 setDisjointAllowedRegs(G, NId, MId, D);
381 else
382 EC.insert(EK);
383 }
384
385 // Finally, add Cur to the Active set.
386 Active.insert(Cur);
387 }
388 }
389
390private:
391 // Create an Interference edge and add it to the graph, unless it is
392 // a null matrix, meaning the nodes' allowed registers do not have any
393 // interference. This case occurs frequently between integer and floating
394 // point registers for example.
395 // return true iff both nodes interferes.
396 bool createInterferenceEdge(PBQPRAGraph &G,
398 IMatrixCache &C) {
399 const TargetRegisterInfo &TRI =
400 *G.getMetadata().MF.getSubtarget().getRegisterInfo();
401 const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
402 const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
403
404 // Try looking the edge costs up in the IMatrixCache first.
405 IKey K(&NRegs, &MRegs);
406 IMatrixCache::iterator I = C.find(K);
407 if (I != C.end()) {
408 G.addEdgeBypassingCostAllocator(NId, MId, I->second);
409 return true;
410 }
411
412 PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
413 bool NodesInterfere = false;
414 for (unsigned I = 0; I != NRegs.size(); ++I) {
415 MCRegister PRegN = NRegs[I];
416 for (unsigned J = 0; J != MRegs.size(); ++J) {
417 MCRegister PRegM = MRegs[J];
418 if (TRI.regsOverlap(PRegN, PRegM)) {
419 M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
420 NodesInterfere = true;
421 }
422 }
423 }
424
425 if (!NodesInterfere)
426 return false;
427
428 PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
429 C[K] = G.getEdgeCostsPtr(EId);
430
431 return true;
432 }
433};
434
435class Coalescing : public PBQPRAConstraint {
436public:
437 void apply(PBQPRAGraph &G) override {
438 MachineFunction &MF = G.getMetadata().MF;
439 MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
441
442 // Scan the machine function and add a coalescing cost whenever CoalescerPair
443 // gives the Ok.
444 for (const auto &MBB : MF) {
445 for (const auto &MI : MBB) {
446 // Skip not-coalescable or already coalesced copies.
447 if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
448 continue;
449
450 Register DstReg = CP.getDstReg();
451 Register SrcReg = CP.getSrcReg();
452
454
455 if (CP.isPhys()) {
456 if (!MF.getRegInfo().isAllocatable(DstReg))
457 continue;
458
459 PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
460
461 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
462 G.getNodeMetadata(NId).getAllowedRegs();
463
464 unsigned PRegOpt = 0;
465 while (PRegOpt < Allowed.size() && Allowed[PRegOpt].id() != DstReg)
466 ++PRegOpt;
467
468 if (PRegOpt < Allowed.size()) {
469 PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
470 NewCosts[PRegOpt + 1] -= CBenefit;
471 G.setNodeCosts(NId, std::move(NewCosts));
472 }
473 } else {
474 PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
475 PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
476 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
477 &G.getNodeMetadata(N1Id).getAllowedRegs();
478 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
479 &G.getNodeMetadata(N2Id).getAllowedRegs();
480
481 PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
482 if (EId == G.invalidEdgeId()) {
483 PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
484 Allowed2->size() + 1, 0);
485 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
486 G.addEdge(N1Id, N2Id, std::move(Costs));
487 } else {
488 if (G.getEdgeNode1Id(EId) == N2Id) {
489 std::swap(N1Id, N2Id);
490 std::swap(Allowed1, Allowed2);
491 }
492 PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
493 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
494 G.updateEdgeCosts(EId, std::move(Costs));
495 }
496 }
497 }
498 }
499 }
500
501private:
502 void addVirtRegCoalesce(
503 PBQPRAGraph::RawMatrix &CostMat,
504 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
505 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
506 PBQP::PBQPNum Benefit) {
507 assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
508 assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
509 for (unsigned I = 0; I != Allowed1.size(); ++I) {
510 MCRegister PReg1 = Allowed1[I];
511 for (unsigned J = 0; J != Allowed2.size(); ++J) {
512 MCRegister PReg2 = Allowed2[J];
513 if (PReg1 == PReg2)
514 CostMat[I + 1][J + 1] -= Benefit;
515 }
516 }
517 }
518};
519
520/// PBQP-specific implementation of weight normalization.
521class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo {
522 float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr) override {
523 // All intervals have a spill weight that is mostly proportional to the
524 // number of uses, with uses in loops having a bigger weight.
525 return NumInstr * VirtRegAuxInfo::normalize(UseDefFreq, Size, 1);
526 }
527
528public:
529 PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
530 const MachineLoopInfo &Loops,
531 const MachineBlockFrequencyInfo &MBFI)
532 : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {}
533};
534} // end anonymous namespace
535
536// Out-of-line destructor/anchor for PBQPRAConstraint.
538
539void PBQPRAConstraint::anchor() {}
540
541void PBQPRAConstraintList::anchor() {}
542
543void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
544 au.setPreservesCFG();
551 //au.addRequiredID(SplitCriticalEdgesID);
552 if (customPassID)
553 au.addRequiredID(*customPassID);
565}
566
567void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
568 LiveIntervals &LIS) {
569 const MachineRegisterInfo &MRI = MF.getRegInfo();
570
571 // Iterate over all live ranges.
572 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
574 if (MRI.reg_nodbg_empty(Reg))
575 continue;
576 VRegsToAlloc.insert(Reg);
577 }
578}
579
581 const TargetRegisterInfo &TRI,
582 const MachineFunction &MF) {
583 const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
584 for (unsigned i = 0; CSR[i] != 0; ++i)
585 if (TRI.regsOverlap(Reg, CSR[i]))
586 return true;
587 return false;
588}
589
590void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
591 Spiller &VRegSpiller) {
592 MachineFunction &MF = G.getMetadata().MF;
593
594 LiveIntervals &LIS = G.getMetadata().LIS;
595 const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
596 const TargetRegisterInfo &TRI =
597 *G.getMetadata().MF.getSubtarget().getRegisterInfo();
598
599 std::vector<Register> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
600
601 std::map<Register, std::vector<MCRegister>> VRegAllowedMap;
602
603 while (!Worklist.empty()) {
604 Register VReg = Worklist.back();
605 Worklist.pop_back();
606
607 LiveInterval &VRegLI = LIS.getInterval(VReg);
608
609 // If this is an empty interval move it to the EmptyIntervalVRegs set then
610 // continue.
611 if (VRegLI.empty()) {
612 EmptyIntervalVRegs.insert(VRegLI.reg());
613 VRegsToAlloc.erase(VRegLI.reg());
614 continue;
615 }
616
617 const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
618
619 // Record any overlaps with regmask operands.
620 BitVector RegMaskOverlaps;
621 LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
622
623 // Compute an initial allowed set for the current vreg.
624 std::vector<MCRegister> VRegAllowed;
625 ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
626 for (MCPhysReg R : RawPRegOrder) {
627 MCRegister PReg(R);
628 if (MRI.isReserved(PReg))
629 continue;
630
631 // vregLI crosses a regmask operand that clobbers preg.
632 if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg))
633 continue;
634
635 // vregLI overlaps fixed regunit interference.
636 bool Interference = false;
637 for (MCRegUnit Unit : TRI.regunits(PReg)) {
638 if (VRegLI.overlaps(LIS.getRegUnit(Unit))) {
639 Interference = true;
640 break;
641 }
642 }
643 if (Interference)
644 continue;
645
646 // preg is usable for this virtual register.
647 VRegAllowed.push_back(PReg);
648 }
649
650 // Check for vregs that have no allowed registers. These should be
651 // pre-spilled and the new vregs added to the worklist.
652 if (VRegAllowed.empty()) {
654 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
655 llvm::append_range(Worklist, NewVRegs);
656 continue;
657 }
658
659 VRegAllowedMap[VReg.id()] = std::move(VRegAllowed);
660 }
661
662 for (auto &KV : VRegAllowedMap) {
663 auto VReg = KV.first;
664
665 // Move empty intervals to the EmptyIntervalVReg set.
666 if (LIS.getInterval(VReg).empty()) {
667 EmptyIntervalVRegs.insert(VReg);
668 VRegsToAlloc.erase(VReg);
669 continue;
670 }
671
672 auto &VRegAllowed = KV.second;
673
674 PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
675
676 // Tweak cost of callee saved registers, as using then force spilling and
677 // restoring them. This would only happen in the prologue / epilogue though.
678 for (unsigned i = 0; i != VRegAllowed.size(); ++i)
679 if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF))
680 NodeCosts[1 + i] += 1.0;
681
682 PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
683 G.getNodeMetadata(NId).setVReg(VReg);
684 G.getNodeMetadata(NId).setAllowedRegs(
685 G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
686 G.getMetadata().setNodeIdForVReg(VReg, NId);
687 }
688}
689
690void RegAllocPBQP::spillVReg(Register VReg,
691 SmallVectorImpl<Register> &NewIntervals,
693 VirtRegMap &VRM, Spiller &VRegSpiller) {
694 VRegsToAlloc.erase(VReg);
695 LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
696 nullptr, &DeadRemats);
697 VRegSpiller.spill(LRE);
698
700 (void)TRI;
701 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
702 << LRE.getParent().weight() << ", New vregs: ");
703
704 // Copy any newly inserted live intervals into the list of regs to
705 // allocate.
706 for (const Register &R : LRE) {
707 const LiveInterval &LI = LIS.getInterval(R);
708 assert(!LI.empty() && "Empty spill range.");
709 LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " ");
710 VRegsToAlloc.insert(LI.reg());
711 }
712
713 LLVM_DEBUG(dbgs() << ")\n");
714}
715
716bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
717 const PBQP::Solution &Solution,
718 VirtRegMap &VRM,
719 Spiller &VRegSpiller) {
720 MachineFunction &MF = G.getMetadata().MF;
721 LiveIntervals &LIS = G.getMetadata().LIS;
723 (void)TRI;
724
725 // Set to true if we have any spills
726 bool AnotherRoundNeeded = false;
727
728 // Clear the existing allocation.
729 VRM.clearAllVirt();
730
731 // Iterate over the nodes mapping the PBQP solution to a register
732 // assignment.
733 for (auto NId : G.nodeIds()) {
734 Register VReg = G.getNodeMetadata(NId).getVReg();
735 unsigned AllocOpt = Solution.getSelection(NId);
736
737 if (AllocOpt != PBQP::RegAlloc::getSpillOptionIdx()) {
738 MCRegister PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOpt - 1];
739 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
740 << TRI.getName(PReg) << "\n");
741 assert(PReg != 0 && "Invalid preg selected.");
742 VRM.assignVirt2Phys(VReg, PReg);
743 } else {
744 // Spill VReg. If this introduces new intervals we'll need another round
745 // of allocation.
747 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
748 AnotherRoundNeeded |= !NewVRegs.empty();
749 }
750 }
751
752 return !AnotherRoundNeeded;
753}
754
755void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
756 LiveIntervals &LIS,
757 VirtRegMap &VRM) const {
759
760 // First allocate registers for the empty intervals.
761 for (const Register &R : EmptyIntervalVRegs) {
762 LiveInterval &LI = LIS.getInterval(R);
763
764 Register PReg = MRI.getSimpleHint(LI.reg());
765
766 if (PReg == 0) {
767 const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg());
768 const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
769 for (MCRegister CandidateReg : RawPRegOrder) {
770 if (!VRM.getRegInfo().isReserved(CandidateReg)) {
771 PReg = CandidateReg;
772 break;
773 }
774 }
775 assert(PReg &&
776 "No un-reserved physical registers in this register class");
777 }
778
779 VRM.assignVirt2Phys(LI.reg(), PReg);
780 }
781}
782
783void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
784 VRegSpiller.postOptimization();
785 /// Remove dead defs because of rematerialization.
786 for (auto *DeadInst : DeadRemats) {
787 LIS.RemoveMachineInstrFromMaps(*DeadInst);
788 DeadInst->eraseFromParent();
789 }
790 DeadRemats.clear();
791}
792
793bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
794 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
796 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
797
798 VirtRegMap &VRM = getAnalysis<VirtRegMap>();
799
800 PBQPVirtRegAuxInfo VRAI(
801 MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI);
802 VRAI.calculateSpillWeightsAndHints();
803
804 // FIXME: we create DefaultVRAI here to match existing behavior pre-passing
805 // the VRAI through the spiller to the live range editor. However, it probably
806 // makes more sense to pass the PBQP VRAI. The existing behavior had
807 // LiveRangeEdit make its own VirtRegAuxInfo object.
808 VirtRegAuxInfo DefaultVRAI(
809 MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI);
810 std::unique_ptr<Spiller> VRegSpiller(
811 createInlineSpiller(*this, MF, VRM, DefaultVRAI));
812
814
815 LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
816
817 // Allocator main loop:
818 //
819 // * Map current regalloc problem to a PBQP problem
820 // * Solve the PBQP problem
821 // * Map the solution back to a register allocation
822 // * Spill if necessary
823 //
824 // This process is continued till no more spills are generated.
825
826 // Find the vreg intervals in need of allocation.
827 findVRegIntervalsToAlloc(MF, LIS);
828
829#ifndef NDEBUG
830 const Function &F = MF.getFunction();
831 std::string FullyQualifiedName =
832 F.getParent()->getModuleIdentifier() + "." + F.getName().str();
833#endif
834
835 // If there are non-empty intervals allocate them using pbqp.
836 if (!VRegsToAlloc.empty()) {
837 const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
838 std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
839 std::make_unique<PBQPRAConstraintList>();
840 ConstraintsRoot->addConstraint(std::make_unique<SpillCosts>());
841 ConstraintsRoot->addConstraint(std::make_unique<Interference>());
842 if (PBQPCoalescing)
843 ConstraintsRoot->addConstraint(std::make_unique<Coalescing>());
844 ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
845
846 bool PBQPAllocComplete = false;
847 unsigned Round = 0;
848
849 while (!PBQPAllocComplete) {
850 LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
851 (void) Round;
852
854 initializeGraph(G, VRM, *VRegSpiller);
855 ConstraintsRoot->apply(G);
856
857#ifndef NDEBUG
858 if (PBQPDumpGraphs) {
859 std::ostringstream RS;
860 RS << Round;
861 std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
862 ".pbqpgraph";
863 std::error_code EC;
864 raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_TextWithCRLF);
865 LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
866 << GraphFileName << "\"\n");
867 G.dump(OS);
868 }
869#endif
870
872 PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
873 ++Round;
874 }
875 }
876
877 // Finalise allocation, allocate empty ranges.
878 finalizeAlloc(MF, LIS, VRM);
879 postOptimization(*VRegSpiller, LIS);
880 VRegsToAlloc.clear();
881 EmptyIntervalVRegs.clear();
882
883 LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
884
885 return true;
886}
887
888/// Create Printable object for node and register info.
891 return Printable([NId, &G](raw_ostream &OS) {
892 const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
893 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
894 Register VReg = G.getNodeMetadata(NId).getVReg();
895 const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
896 OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')';
897 });
898}
899
900#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
902 for (auto NId : nodeIds()) {
903 const Vector &Costs = getNodeCosts(NId);
904 assert(Costs.getLength() != 0 && "Empty vector in graph.");
905 OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
906 }
907 OS << '\n';
908
909 for (auto EId : edgeIds()) {
910 NodeId N1Id = getEdgeNode1Id(EId);
911 NodeId N2Id = getEdgeNode2Id(EId);
912 assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
913 const Matrix &M = getEdgeCosts(EId);
914 assert(M.getRows() != 0 && "No rows in matrix.");
915 assert(M.getCols() != 0 && "No cols in matrix.");
916 OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
917 OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
918 OS << M << '\n';
919 }
920}
921
923 dump(dbgs());
924}
925#endif
926
928 OS << "graph {\n";
929 for (auto NId : nodeIds()) {
930 OS << " node" << NId << " [ label=\""
931 << PrintNodeInfo(NId, *this) << "\\n"
932 << getNodeCosts(NId) << "\" ]\n";
933 }
934
935 OS << " edge [ len=" << nodeIds().size() << " ]\n";
936 for (auto EId : edgeIds()) {
937 OS << " node" << getEdgeNode1Id(EId)
938 << " -- node" << getEdgeNode2Id(EId)
939 << " [ label=\"";
940 const Matrix &EdgeCosts = getEdgeCosts(EId);
941 for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
942 OS << EdgeCosts.getRowAsVector(i) << "\\n";
943 }
944 OS << "\" ]\n";
945 }
946 OS << "}\n";
947}
948
950 return new RegAllocPBQP(customPassID);
951}
952
955}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:533
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
Hexagon Hardware Loops
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
Branch Coalescing
static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, const PBQP::RegAlloc::PBQPRAGraph &G)
Create Printable object for node and register info.
static cl::opt< bool > PBQPDumpGraphs("pbqp-dump-graphs", cl::desc("Dump graphs for each function/round in the compilation unit."), cl::init(false), cl::Hidden)
static cl::opt< bool > PBQPCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden)
static bool isACalleeSavedRegister(MCRegister Reg, const TargetRegisterInfo &TRI, const MachineFunction &MF)
static RegisterRegAlloc RegisterPBQPRepAlloc("pbqp", "PBQP register allocator", createDefaultPBQPRegisterAllocator)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:270
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool test(unsigned Idx) const
Definition: BitVector.h:461
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition: BitVector.h:156
A helper class for register coalescers.
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
float weight() const
Definition: LiveInterval.h:719
Register reg() const
Definition: LiveInterval.h:718
bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs)
Test if LI is live across any register mask instructions, and compute a bit mask of physical register...
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
bool empty() const
Definition: LiveInterval.h:382
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
Definition: LiveInterval.h:448
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
double getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const
Compute the frequency of the block, relative to the entry block.
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual MachineFunctionProperties getClearedProperties() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void freezeReservedRegs()
freezeReservedRegs - Called by the register allocator to freeze the set of reserved registers before ...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Abstract base for classes implementing PBQP register allocation constraints (e.g.
virtual ~PBQPRAConstraint()=0
virtual void apply(PBQPRAGraph &G)=0
typename SolverT::GraphMetadata GraphMetadata
Definition: Graph.h:59
typename SolverT::Matrix Matrix
Definition: Graph.h:54
NodeId getEdgeNode2Id(EdgeId EId) const
Get the second node connected to this edge.
Definition: Graph.h:552
NodeId getEdgeNode1Id(EdgeId EId) const
Get the first node connected to this edge.
Definition: Graph.h:545
const Matrix & getEdgeCosts(EdgeId EId) const
Get an edge's cost matrix.
Definition: Graph.h:530
typename SolverT::Vector Vector
Definition: Graph.h:53
typename SolverT::RawMatrix RawMatrix
Definition: Graph.h:52
typename SolverT::RawVector RawVector
Definition: Graph.h:51
const Vector & getNodeCosts(NodeId NId) const
Get a node's cost vector.
Definition: Graph.h:488
Holds a vector of the allowed physical regs for a vreg.
Definition: RegAllocPBQP.h:94
void printDot(raw_ostream &OS) const
Print a representation of this graph in DOT format.
void dump() const
Dump this graph to dbgs().
Represents a solution to a PBQP problem.
Definition: Solution.h:26
unsigned getSelection(GraphBase::NodeId nodeId) const
Get a node's selection.
Definition: Solution.h:45
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:38
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
constexpr unsigned id() const
Definition: Register.h:103
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Spiller interface.
Definition: Spiller.h:24
virtual void spill(LiveRangeEdit &LRE)=0
spill - Spill the LRE.getParent() live interval.
virtual void postOptimization()
Definition: Spiller.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
ArrayRef< MCPhysReg > getRawAllocationOrder(const MachineFunction &MF) const
Returns the preferred order for allocating registers from this register class in MF.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual std::unique_ptr< PBQPRAConstraint > getCustomPBQPConstraints() const
Return PBQPConstraint(s) for the target.
Calculate auxiliary information for a virtual register such as its spill weight and allocation hint.
virtual float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr)
Weight normalization function.
void clearAllVirt()
clears all virtual to physical register mappings
Definition: VirtRegMap.h:139
MachineRegisterInfo & getRegInfo() const
Definition: VirtRegMap.h:92
void assignVirt2Phys(Register virtReg, MCPhysReg physReg)
creates a mapping for the specified virtual register to the specified physical register
Definition: VirtRegMap.cpp:85
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:460
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Solution solve(PBQPRAGraph &G)
Definition: RegAllocPBQP.h:520
unsigned getSpillOptionIdx()
Spill option index.
Definition: RegAllocPBQP.h:48
float PBQPNum
Definition: Math.h:22
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition: FileSystem.h:767
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
void initializeVirtRegMapPass(PassRegistry &)
Spiller * createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
void initializeLiveStacksPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createDefaultPBQPRegisterAllocator()
PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean Quadratic Prograaming (PBQ...
void initializeLiveIntervalsWrapperPassPass(PassRegistry &)
FunctionPass * createPBQPRegisterAllocator(char *customPassID=nullptr)
Create a PBQP register allocator instance.
void initializeSlotIndexesWrapperPassPass(PassRegistry &)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860