Go to the documentation of this file.
15 #ifndef LLVM_CODEGEN_REGALLOCPBQP_H
16 #define LLVM_CODEGEN_REGALLOCPBQP_H
40 class MachineBlockFrequencyInfo;
41 class MachineFunction;
56 : UnsafeRows(
new bool[
M.getRows() - 1]()),
57 UnsafeCols(
new bool[
M.getCols() - 1]()) {
58 unsigned* ColCounts =
new unsigned[
M.getCols() - 1]();
60 for (
unsigned i = 1;
i <
M.getRows(); ++
i) {
61 unsigned RowCount = 0;
62 for (
unsigned j = 1;
j <
M.getCols(); ++
j) {
63 if (
M[
i][
j] == std::numeric_limits<PBQPNum>::infinity()) {
66 UnsafeRows[
i - 1] =
true;
67 UnsafeCols[
j - 1] =
true;
70 WorstRow =
std::max(WorstRow, RowCount);
72 unsigned WorstColCountForCurRow =
73 *std::max_element(ColCounts, ColCounts +
M.getCols() - 1);
74 WorstCol =
std::max(WorstCol, WorstColCountForCurRow);
87 unsigned WorstRow = 0;
88 unsigned WorstCol = 0;
89 std::unique_ptr<bool[]> UnsafeRows;
90 std::unique_ptr<bool[]> UnsafeCols;
103 std::copy(OptVec.begin(), OptVec.end(), Opts.get());
106 unsigned size()
const {
return NumOpts; }
110 if (NumOpts !=
Other.NumOpts)
116 return !(*
this ==
Other);
120 unsigned NumOpts = 0;
121 std::unique_ptr<MCRegister[]> Opts;
126 MCRegister *OEnd = OptRegs.Opts.get() + OptRegs.NumOpts;
149 VRegToNodeId[VReg.
id()] = NId;
153 auto VRegItr = VRegToNodeId.find(VReg);
154 if (VRegItr == VRegToNodeId.end())
156 return VRegItr->second;
165 AllowedRegVecPool AllowedRegVecs;
178 NotProvablyAllocatable,
179 ConservativelyAllocatable,
187 OptUnsafeEdges(
new unsigned[NumOpts]), VReg(
Other.VReg),
188 AllowedRegs(
Other.AllowedRegs)
189 #
if LLVM_ENABLE_ABI_BREAKING_CHECKS
191 everConservativelyAllocatable(
Other.everConservativelyAllocatable)
207 this->AllowedRegs =
std::move(AllowedRegs);
213 OptUnsafeEdges = std::unique_ptr<unsigned[]>(
new unsigned[NumOpts]());
218 assert(RS >= this->RS &&
"A node's reduction state can not be downgraded");
221 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
224 if (RS == ConservativelyAllocatable)
225 everConservativelyAllocatable =
true;
231 const bool* UnsafeOpts =
233 for (
unsigned i = 0;
i < NumOpts; ++
i)
234 OptUnsafeEdges[
i] += UnsafeOpts[
i];
239 const bool* UnsafeOpts =
241 for (
unsigned i = 0;
i < NumOpts; ++
i)
242 OptUnsafeEdges[
i] -= UnsafeOpts[
i];
246 return (DeniedOpts < NumOpts) ||
247 (
std::find(&OptUnsafeEdges[0], &OptUnsafeEdges[NumOpts], 0) !=
248 &OptUnsafeEdges[NumOpts]);
251 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
252 bool wasConservativelyAllocatable()
const {
253 return everConservativelyAllocatable;
259 unsigned NumOpts = 0;
260 unsigned DeniedOpts = 0;
261 std::unique_ptr<unsigned[]> OptUnsafeEdges;
265 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
266 bool everConservativelyAllocatable =
false;
303 "PBQP Graph should not contain single or zero-option nodes");
356 moveToOptimallyReducibleNodes(NId);
358 NodeMetadata::NotProvablyAllocatable &&
361 moveToConservativelyAllocatableNodes(NId);
365 void removeFromCurrentSet(
NodeId NId) {
367 case NodeMetadata::Unprocessed:
break;
368 case NodeMetadata::OptimallyReducible:
369 assert(OptimallyReducibleNodes.find(NId) !=
370 OptimallyReducibleNodes.end() &&
371 "Node not in optimally reducible set.");
372 OptimallyReducibleNodes.erase(NId);
374 case NodeMetadata::ConservativelyAllocatable:
375 assert(ConservativelyAllocatableNodes.find(NId) !=
376 ConservativelyAllocatableNodes.end() &&
377 "Node not in conservatively allocatable set.");
378 ConservativelyAllocatableNodes.erase(NId);
380 case NodeMetadata::NotProvablyAllocatable:
381 assert(NotProvablyAllocatableNodes.find(NId) !=
382 NotProvablyAllocatableNodes.end() &&
383 "Node not in not-provably-allocatable set.");
384 NotProvablyAllocatableNodes.erase(NId);
389 void moveToOptimallyReducibleNodes(
NodeId NId) {
390 removeFromCurrentSet(NId);
391 OptimallyReducibleNodes.insert(NId);
393 NodeMetadata::OptimallyReducible);
396 void moveToConservativelyAllocatableNodes(
NodeId NId) {
397 removeFromCurrentSet(NId);
398 ConservativelyAllocatableNodes.insert(NId);
400 NodeMetadata::ConservativelyAllocatable);
403 void moveToNotProvablyAllocatableNodes(
NodeId NId) {
404 removeFromCurrentSet(NId);
405 NotProvablyAllocatableNodes.insert(NId);
407 NodeMetadata::NotProvablyAllocatable);
414 moveToOptimallyReducibleNodes(NId);
416 moveToConservativelyAllocatableNodes(NId);
418 moveToNotProvablyAllocatableNodes(NId);
428 std::vector<GraphBase::NodeId> reduce() {
429 assert(!G.
empty() &&
"Cannot reduce empty graph.");
432 std::vector<NodeId> NodeStack;
436 if (!OptimallyReducibleNodes.empty()) {
439 OptimallyReducibleNodes.erase(NItr);
440 NodeStack.push_back(NId);
452 }
else if (!ConservativelyAllocatableNodes.empty()) {
461 ConservativelyAllocatableNodes.erase(NItr);
462 NodeStack.push_back(NId);
464 }
else if (!NotProvablyAllocatableNodes.empty()) {
466 std::min_element(NotProvablyAllocatableNodes.begin(),
467 NotProvablyAllocatableNodes.end(),
468 SpillCostComparator(G));
470 NotProvablyAllocatableNodes.erase(NItr);
471 NodeStack.push_back(NId);
480 class SpillCostComparator {
482 SpillCostComparator(
const Graph& G) :
G(
G) {}
485 PBQPNum N1SC =
G.getNodeCosts(N1Id)[0];
486 PBQPNum N2SC =
G.getNodeCosts(N2Id)[0];
488 return G.getNodeDegree(N1Id) <
G.getNodeDegree(N2Id);
497 using NodeSet = std::set<NodeId>;
498 NodeSet OptimallyReducibleNodes;
499 NodeSet ConservativelyAllocatableNodes;
500 NodeSet NotProvablyAllocatableNodes;
526 return RegAllocSolver.
solve();
538 #endif // LLVM_CODEGEN_REGALLOCPBQP_H
void dump() const
Dump this graph to dbgs().
This is an optimization pass for GlobalISel generic memory operations.
bool operator==(const AllowedRegVector &Other) const
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
NodeId getEdgeNode1Id(EdgeId EId) const
Get the first node connected to this edge.
FunctionPass * createPBQPRegisterAllocator(char *customPassID=nullptr)
Create a PBQP register allocator instance.
void handleAddEdge(EdgeId EId)
static NodeId invalidNodeId()
Returns a value representing an invalid (non-existent) node.
SetVector< SUnit * >::const_iterator iterator
NodeId getEdgeNode2Id(EdgeId EId) const
Get the second node connected to this edge.
PBQPRAGraph(GraphMetadata Metadata)
typename RegAllocSolverImpl ::GraphMetadata GraphMetadata
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
__FakeVCSRevision h endif() endif() set(generated_files "$
void handleRemoveNode(NodeId NId)
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
void handleAddNode(NodeId NId)
const Vector & getNodeCosts(NodeId NId) const
Get a node's cost vector.
This class implements an extremely fast bulk output stream that can only output to a stream.
void printDot(raw_ostream &OS) const
Print a representation of this graph in DOT format.
PBQP::Graph< RegAllocSolverImpl > Graph
const Matrix & getEdgeCosts(EdgeId EId) const
Get an edge's cost matrix.
void disconnectAllNeighborsFromNode(NodeId NId)
Convenience method to disconnect all neighbours from the given node.
void handleReconnectEdge(EdgeId EId, NodeId NId)
friend hash_code hash_value(const AllowedRegVector &)
Represents a solution to a PBQP problem.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
unsigned getLength() const
Return the length of the vector.
NodeEntry::AdjEdgeList::size_type getNodeDegree(NodeId NId) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void applyR1(GraphT &G, typename GraphT::NodeId NId)
Reduce a node of degree one.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
bool empty() const
Returns true if the graph is empty.
NodeIdSet nodeIds() const
unsigned getSpillOptionIdx()
Spill option index.
hash_code hash_value(const AllowedRegVector &OptRegs)
Solution backpropagate(GraphT &G, StackT stack)
void handleDisconnectEdge(EdgeId EId, NodeId NId)
MCRegister operator[](size_t I) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
if(llvm_vc STREQUAL "") set(fake_version_inc "$
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Holds a vector of the allowed physical regs for a vreg.
AllowedRegVector()=default
Wrapper class representing virtual and physical registers.
void handleSetNodeCosts(NodeId NId, const Vector &newCosts)
Solution solve(PBQPRAGraph &G)
NodeMetadata & getNodeMetadata(NodeId NId)
std::shared_ptr< const AllowedRegVector > PoolRef
void applyR2(GraphT &G, typename GraphT::NodeId NId)
PoolRef getValue(ValueKeyT ValueKey)
void setSolver(SolverT &S)
Lock this graph to the given solver instance in preparation for running the solver.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
static cl::opt< RegisterRegAlloc::FunctionPassCtor, false, RegisterPassParser< RegisterRegAlloc > > RegAlloc("regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator), cl::desc("Register allocator to use"))
bool operator!=(const AllowedRegVector &Other) const
RegAllocSolverImpl(Graph &G)
void unsetSolver()
Release from solver instance.
AllowedRegVector(const std::vector< MCRegister > &OptVec)
const Metadata & getMetadata() const
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local copy
Optional< std::vector< StOtherPiece > > Other
void handleUpdateCosts(EdgeId EId, const Matrix &NewCosts)
Wrapper class representing physical registers. Should be passed by value.
An opaque object representing a hash code.