Line data Source code
1 : //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This implements bottom-up and top-down register pressure reduction list
11 : // schedulers, using standard algorithms. The basic approach uses a priority
12 : // queue of available nodes to schedule. One at a time, nodes are taken from
13 : // the priority queue (thus in priority order), checked for legality to
14 : // schedule, and emitted if legal.
15 : //
16 : //===----------------------------------------------------------------------===//
17 :
18 : #include "ScheduleDAGSDNodes.h"
19 : #include "llvm/ADT/ArrayRef.h"
20 : #include "llvm/ADT/DenseMap.h"
21 : #include "llvm/ADT/STLExtras.h"
22 : #include "llvm/ADT/SmallSet.h"
23 : #include "llvm/ADT/SmallVector.h"
24 : #include "llvm/ADT/Statistic.h"
25 : #include "llvm/CodeGen/ISDOpcodes.h"
26 : #include "llvm/CodeGen/MachineFunction.h"
27 : #include "llvm/CodeGen/MachineOperand.h"
28 : #include "llvm/CodeGen/MachineRegisterInfo.h"
29 : #include "llvm/CodeGen/ScheduleDAG.h"
30 : #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
31 : #include "llvm/CodeGen/SchedulerRegistry.h"
32 : #include "llvm/CodeGen/SelectionDAGISel.h"
33 : #include "llvm/CodeGen/SelectionDAGNodes.h"
34 : #include "llvm/CodeGen/TargetInstrInfo.h"
35 : #include "llvm/CodeGen/TargetLowering.h"
36 : #include "llvm/CodeGen/TargetOpcodes.h"
37 : #include "llvm/CodeGen/TargetRegisterInfo.h"
38 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
39 : #include "llvm/Config/llvm-config.h"
40 : #include "llvm/IR/InlineAsm.h"
41 : #include "llvm/MC/MCInstrDesc.h"
42 : #include "llvm/MC/MCRegisterInfo.h"
43 : #include "llvm/Support/Casting.h"
44 : #include "llvm/Support/CodeGen.h"
45 : #include "llvm/Support/CommandLine.h"
46 : #include "llvm/Support/Compiler.h"
47 : #include "llvm/Support/Debug.h"
48 : #include "llvm/Support/ErrorHandling.h"
49 : #include "llvm/Support/MachineValueType.h"
50 : #include "llvm/Support/raw_ostream.h"
51 : #include <algorithm>
52 : #include <cassert>
53 : #include <cstdint>
54 : #include <cstdlib>
55 : #include <iterator>
56 : #include <limits>
57 : #include <memory>
58 : #include <utility>
59 : #include <vector>
60 :
61 : using namespace llvm;
62 :
63 : #define DEBUG_TYPE "pre-RA-sched"
64 :
65 : STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
66 : STATISTIC(NumUnfolds, "Number of nodes unfolded");
67 : STATISTIC(NumDups, "Number of duplicated nodes");
68 : STATISTIC(NumPRCopies, "Number of physical register copies");
69 :
70 : static RegisterScheduler
71 : burrListDAGScheduler("list-burr",
72 : "Bottom-up register reduction list scheduling",
73 : createBURRListDAGScheduler);
74 :
75 : static RegisterScheduler
76 : sourceListDAGScheduler("source",
77 : "Similar to list-burr but schedules in source "
78 : "order when possible",
79 : createSourceListDAGScheduler);
80 :
81 : static RegisterScheduler
82 : hybridListDAGScheduler("list-hybrid",
83 : "Bottom-up register pressure aware list scheduling "
84 : "which tries to balance latency and register pressure",
85 : createHybridListDAGScheduler);
86 :
87 : static RegisterScheduler
88 : ILPListDAGScheduler("list-ilp",
89 : "Bottom-up register pressure aware list scheduling "
90 : "which tries to balance ILP and register pressure",
91 : createILPListDAGScheduler);
92 :
93 : static cl::opt<bool> DisableSchedCycles(
94 : "disable-sched-cycles", cl::Hidden, cl::init(false),
95 : cl::desc("Disable cycle-level precision during preRA scheduling"));
96 :
97 : // Temporary sched=list-ilp flags until the heuristics are robust.
98 : // Some options are also available under sched=list-hybrid.
99 : static cl::opt<bool> DisableSchedRegPressure(
100 : "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
101 : cl::desc("Disable regpressure priority in sched=list-ilp"));
102 : static cl::opt<bool> DisableSchedLiveUses(
103 : "disable-sched-live-uses", cl::Hidden, cl::init(true),
104 : cl::desc("Disable live use priority in sched=list-ilp"));
105 : static cl::opt<bool> DisableSchedVRegCycle(
106 : "disable-sched-vrcycle", cl::Hidden, cl::init(false),
107 : cl::desc("Disable virtual register cycle interference checks"));
108 : static cl::opt<bool> DisableSchedPhysRegJoin(
109 : "disable-sched-physreg-join", cl::Hidden, cl::init(false),
110 : cl::desc("Disable physreg def-use affinity"));
111 : static cl::opt<bool> DisableSchedStalls(
112 : "disable-sched-stalls", cl::Hidden, cl::init(true),
113 : cl::desc("Disable no-stall priority in sched=list-ilp"));
114 : static cl::opt<bool> DisableSchedCriticalPath(
115 : "disable-sched-critical-path", cl::Hidden, cl::init(false),
116 : cl::desc("Disable critical path priority in sched=list-ilp"));
117 : static cl::opt<bool> DisableSchedHeight(
118 : "disable-sched-height", cl::Hidden, cl::init(false),
119 : cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120 : static cl::opt<bool> Disable2AddrHack(
121 : "disable-2addr-hack", cl::Hidden, cl::init(true),
122 : cl::desc("Disable scheduler's two-address hack"));
123 :
124 : static cl::opt<int> MaxReorderWindow(
125 : "max-sched-reorder", cl::Hidden, cl::init(6),
126 : cl::desc("Number of instructions to allow ahead of the critical path "
127 : "in sched=list-ilp"));
128 :
129 : static cl::opt<unsigned> AvgIPC(
130 : "sched-avg-ipc", cl::Hidden, cl::init(1),
131 : cl::desc("Average inst/cycle whan no target itinerary exists."));
132 :
133 : namespace {
134 :
135 : //===----------------------------------------------------------------------===//
136 : /// ScheduleDAGRRList - The actual register reduction list scheduler
137 : /// implementation. This supports both top-down and bottom-up scheduling.
138 : ///
139 : class ScheduleDAGRRList : public ScheduleDAGSDNodes {
140 : private:
141 : /// NeedLatency - True if the scheduler will make use of latency information.
142 : bool NeedLatency;
143 :
144 : /// AvailableQueue - The priority queue to use for the available SUnits.
145 : SchedulingPriorityQueue *AvailableQueue;
146 :
147 : /// PendingQueue - This contains all of the instructions whose operands have
148 : /// been issued, but their results are not ready yet (due to the latency of
149 : /// the operation). Once the operands becomes available, the instruction is
150 : /// added to the AvailableQueue.
151 : std::vector<SUnit *> PendingQueue;
152 :
153 : /// HazardRec - The hazard recognizer to use.
154 : ScheduleHazardRecognizer *HazardRec;
155 :
156 : /// CurCycle - The current scheduler state corresponds to this cycle.
157 : unsigned CurCycle = 0;
158 :
159 : /// MinAvailableCycle - Cycle of the soonest available instruction.
160 : unsigned MinAvailableCycle;
161 :
162 : /// IssueCount - Count instructions issued in this cycle
163 : /// Currently valid only for bottom-up scheduling.
164 : unsigned IssueCount;
165 :
166 : /// LiveRegDefs - A set of physical registers and their definition
167 : /// that are "live". These nodes must be scheduled before any other nodes that
168 : /// modifies the registers can be scheduled.
169 : unsigned NumLiveRegs;
170 : std::unique_ptr<SUnit*[]> LiveRegDefs;
171 : std::unique_ptr<SUnit*[]> LiveRegGens;
172 :
173 : // Collect interferences between physical register use/defs.
174 : // Each interference is an SUnit and set of physical registers.
175 : SmallVector<SUnit*, 4> Interferences;
176 :
177 : using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>;
178 :
179 : LRegsMapT LRegsMap;
180 :
181 : /// Topo - A topological ordering for SUnits which permits fast IsReachable
182 : /// and similar queries.
183 : ScheduleDAGTopologicalSort Topo;
184 :
185 : // Hack to keep track of the inverse of FindCallSeqStart without more crazy
186 : // DAG crawling.
187 : DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
188 :
189 : public:
190 1269002 : ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
191 : SchedulingPriorityQueue *availqueue,
192 : CodeGenOpt::Level OptLevel)
193 1269002 : : ScheduleDAGSDNodes(mf),
194 : NeedLatency(needlatency), AvailableQueue(availqueue),
195 1269002 : Topo(SUnits, nullptr) {
196 1269002 : const TargetSubtargetInfo &STI = mf.getSubtarget();
197 1269002 : if (DisableSchedCycles || !NeedLatency)
198 1241781 : HazardRec = new ScheduleHazardRecognizer();
199 : else
200 27221 : HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
201 1269002 : }
202 :
203 3807006 : ~ScheduleDAGRRList() override {
204 1269002 : delete HazardRec;
205 1269002 : delete AvailableQueue;
206 2538004 : }
207 1269002 :
208 : void Schedule() override;
209 :
210 1269002 : ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
211 2538004 :
212 1269002 : /// IsReachable - Checks if SU is reachable from TargetSU.
213 1269002 : bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
214 1269002 : return Topo.IsReachable(SU, TargetSU);
215 : }
216 :
217 : /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
218 0 : /// create a cycle.
219 : bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
220 : return Topo.WillCreateCycle(SU, TargetSU);
221 : }
222 0 :
223 : /// AddPred - adds a predecessor edge to SUnit SU.
224 : /// This returns true if this is a new predecessor.
225 : /// Updates the topological ordering if required.
226 : void AddPred(SUnit *SU, const SDep &D) {
227 : Topo.AddPred(SU, D.getSUnit());
228 3423 : SU->addPred(D);
229 : }
230 :
231 : /// RemovePred - removes a predecessor edge from SUnit SU.
232 : /// This returns true if an edge was removed.
233 : /// Updates the topological ordering if required.
234 3600 : void RemovePred(SUnit *SU, const SDep &D) {
235 7200 : Topo.RemovePred(SU, D.getSUnit());
236 3600 : SU->removePred(D);
237 3600 : }
238 :
239 : private:
240 : bool isReady(SUnit *SU) {
241 : return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
242 1344 : AvailableQueue->isReady(SU);
243 2688 : }
244 1344 :
245 1344 : void ReleasePred(SUnit *SU, const SDep *PredEdge);
246 : void ReleasePredecessors(SUnit *SU);
247 : void ReleasePending();
248 0 : void AdvanceToCycle(unsigned NextCycle);
249 15049218 : void AdvancePastStalls(SUnit *SU);
250 0 : void EmitNode(SUnit *SU);
251 : void ScheduleNodeBottomUp(SUnit*);
252 : void CapturePred(SDep *PredEdge);
253 : void UnscheduleNodeBottomUp(SUnit*);
254 : void RestoreHazardCheckerBottomUp();
255 : void BacktrackBottomUp(SUnit*, SUnit*);
256 : SUnit *TryUnfoldSU(SUnit *);
257 : SUnit *CopyAndMoveSuccessors(SUnit*);
258 : void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
259 : const TargetRegisterClass*,
260 : const TargetRegisterClass*,
261 : SmallVectorImpl<SUnit*>&);
262 : bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
263 :
264 : void releaseInterferences(unsigned Reg = 0);
265 :
266 : SUnit *PickNodeToScheduleBottomUp();
267 : void ListScheduleBottomUp();
268 :
269 : /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
270 : /// Updates the topological ordering if required.
271 : SUnit *CreateNewSUnit(SDNode *N) {
272 : unsigned NumSUnits = SUnits.size();
273 : SUnit *NewNode = newSUnit(N);
274 : // Update the topological ordering.
275 : if (NewNode->NodeNum >= NumSUnits)
276 : Topo.InitDAGTopologicalSorting();
277 : return NewNode;
278 : }
279 376 :
280 376 : /// CreateClone - Creates a new SUnit from an existing one.
281 376 : /// Updates the topological ordering if required.
282 : SUnit *CreateClone(SUnit *N) {
283 376 : unsigned NumSUnits = SUnits.size();
284 376 : SUnit *NewNode = Clone(N);
285 376 : // Update the topological ordering.
286 : if (NewNode->NodeNum >= NumSUnits)
287 : Topo.InitDAGTopologicalSorting();
288 : return NewNode;
289 : }
290 274 :
291 274 : /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
292 274 : /// need actual latency information but the hybrid scheduler does.
293 : bool forceUnitLatencies() const override {
294 274 : return !NeedLatency;
295 274 : }
296 274 : };
297 :
298 : } // end anonymous namespace
299 :
300 : /// GetCostForDef - Looks up the register class and cost for a given definition.
301 16340128 : /// Typically this just means looking up the representative register class,
302 36122992 : /// but for untyped values (MVT::Untyped) it means inspecting the node's
303 : /// opcode to determine what register class is being generated.
304 : static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
305 : const TargetLowering *TLI,
306 : const TargetInstrInfo *TII,
307 : const TargetRegisterInfo *TRI,
308 : unsigned &RegClass, unsigned &Cost,
309 : const MachineFunction &MF) {
310 : MVT VT = RegDefPos.GetValue();
311 :
312 491120 : // Special handling for untyped values. These values can only come from
313 : // the expansion of custom DAG-to-DAG patterns.
314 : if (VT == MVT::Untyped) {
315 : const SDNode *Node = RegDefPos.GetNode();
316 :
317 : // Special handling for CopyFromReg of untyped values.
318 : if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
319 : unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
320 : const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
321 : RegClass = RC->getID();
322 491120 : Cost = 1;
323 1976 : return;
324 : }
325 :
326 1976 : unsigned Opcode = Node->getMachineOpcode();
327 160 : if (Opcode == TargetOpcode::REG_SEQUENCE) {
328 160 : unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
329 160 : const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
330 160 : RegClass = RC->getID();
331 421 : Cost = 1;
332 : return;
333 : }
334 :
335 1816 : unsigned Idx = RegDefPos.GetIdx();
336 522 : const MCInstrDesc Desc = TII->get(Opcode);
337 261 : const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
338 261 : RegClass = RC->getID();
339 261 : // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
340 261 : // better way to determine it.
341 : Cost = 1;
342 : } else {
343 1555 : RegClass = TLI->getRepRegClassFor(VT)->getID();
344 1555 : Cost = TLI->getRepRegClassCostFor(VT);
345 1555 : }
346 1555 : }
347 :
348 : /// Schedule - Schedule the DAG using list scheduling.
349 1555 : void ScheduleDAGRRList::Schedule() {
350 : LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
351 489144 : << " '" << BB->getName() << "' **********\n");
352 489144 :
353 : CurCycle = 0;
354 : IssueCount = 0;
355 : MinAvailableCycle =
356 : DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
357 1269002 : NumLiveRegs = 0;
358 : // Allocate slots for each physical register, plus one for a special register
359 : // to track the virtual resource of a calling sequence.
360 : LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
361 1269002 : LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
362 1269002 : CallSeqEndForStart.clear();
363 1269002 : assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
364 1269002 :
365 1269002 : // Build the scheduling graph.
366 : BuildSchedGraph(nullptr);
367 :
368 399161731 : LLVM_DEBUG(dump());
369 399161731 : Topo.InitDAGTopologicalSorting();
370 1269002 :
371 : AvailableQueue->initNodes(SUnits);
372 :
373 : HazardRec->Reset();
374 1269002 :
375 : // Execute the actual scheduling loop.
376 : ListScheduleBottomUp();
377 1269002 :
378 : AvailableQueue->releaseState();
379 1269002 :
380 : LLVM_DEBUG({
381 1269002 : dbgs() << "*** Final schedule ***\n";
382 : dumpSchedule();
383 : dbgs() << '\n';
384 1269002 : });
385 : }
386 1269001 :
387 : //===----------------------------------------------------------------------===//
388 : // Bottom-Up Scheduling
389 : //===----------------------------------------------------------------------===//
390 :
391 : /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
392 : /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
393 1269001 : void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
394 : SUnit *PredSU = PredEdge->getSUnit();
395 :
396 : #ifndef NDEBUG
397 : if (PredSU->NumSuccsLeft == 0) {
398 : dbgs() << "*** Scheduling failed! ***\n";
399 : dumpNode(*PredSU);
400 : dbgs() << " has been released too many times!\n";
401 19782864 : llvm_unreachable(nullptr);
402 19782864 : }
403 : #endif
404 : --PredSU->NumSuccsLeft;
405 :
406 : if (!forceUnitLatencies()) {
407 : // Updating predecessor's height. This is now the cycle when the
408 : // predecessor can be scheduled without causing a pipeline stall.
409 : PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
410 : }
411 :
412 19782864 : // If all the node's successors are scheduled, this node is ready
413 : // to be scheduled. Ignore the special EntrySU node.
414 19782864 : if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
415 : PredSU->isAvailable = true;
416 :
417 229184 : unsigned Height = PredSU->getHeight();
418 : if (Height < MinAvailableCycle)
419 : MinAvailableCycle = Height;
420 :
421 : if (isReady(PredSU)) {
422 19782864 : AvailableQueue->push(PredSU);
423 15049218 : }
424 : // CapturePred and others may have left the node in the pending queue, avoid
425 : // adding it twice.
426 15049218 : else if (!PredSU->isPending) {
427 8912324 : PredSU->isPending = true;
428 : PendingQueue.push_back(PredSU);
429 15049218 : }
430 15049218 : }
431 : }
432 :
433 : /// IsChainDependent - Test if Outer is reachable from Inner through
434 0 : /// chain dependencies.
435 0 : static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
436 0 : unsigned NestLevel,
437 : const TargetInstrInfo *TII) {
438 : SDNode *N = Outer;
439 19782864 : while (true) {
440 : if (N == Inner)
441 : return true;
442 : // For a TokenFactor, examine each operand. There may be multiple ways
443 2319 : // to get to the CALLSEQ_BEGIN, but we need to find the path with the
444 : // most nesting in order to ensure that we find the corresponding match.
445 : if (N->getOpcode() == ISD::TokenFactor) {
446 : for (const SDValue &Op : N->op_values())
447 : if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
448 4712 : return true;
449 : return false;
450 : }
451 : // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
452 : if (N->isMachineOpcode()) {
453 4711 : if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
454 1754 : ++NestLevel;
455 1330 : } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
456 : if (NestLevel == 0)
457 : return false;
458 : --NestLevel;
459 : }
460 4287 : }
461 3746 : // Otherwise, find the chain and continue climbing.
462 0 : for (const SDValue &Op : N->op_values())
463 3746 : if (Op.getValueType() == MVT::Other) {
464 1894 : N = Op.getNode();
465 : goto found_chain_operand;
466 0 : }
467 : return false;
468 : found_chain_operand:;
469 : if (N->getOpcode() == ISD::EntryToken)
470 11700 : return false;
471 11700 : }
472 : }
473 :
474 : /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
475 : /// the corresponding (lowered) CALLSEQ_BEGIN node.
476 : ///
477 2393 : /// NestLevel and MaxNested are used in recursion to indcate the current level
478 : /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
479 : /// level seen so far.
480 : ///
481 : /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
482 : /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
483 : static SDNode *
484 : FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
485 : const TargetInstrInfo *TII) {
486 : while (true) {
487 : // For a TokenFactor, examine each operand. There may be multiple ways
488 : // to get to the CALLSEQ_BEGIN, but we need to find the path with the
489 : // most nesting in order to ensure that we find the corresponding match.
490 : if (N->getOpcode() == ISD::TokenFactor) {
491 : SDNode *Best = nullptr;
492 1114591 : unsigned BestMaxNest = MaxNest;
493 : for (const SDValue &Op : N->op_values()) {
494 : unsigned MyNestLevel = NestLevel;
495 : unsigned MyMaxNest = MaxNest;
496 : if (SDNode *New = FindCallSeqStart(Op.getNode(),
497 : MyNestLevel, MyMaxNest, TII))
498 5347447 : if (!Best || (MyMaxNest > BestMaxNest)) {
499 : Best = New;
500 41849 : BestMaxNest = MyMaxNest;
501 157127 : }
502 115278 : }
503 115278 : assert(Best);
504 115278 : MaxNest = BestMaxNest;
505 : return Best;
506 115241 : }
507 : // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
508 41849 : if (N->isMachineOpcode()) {
509 : if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
510 : ++NestLevel;
511 : MaxNest = std::max(MaxNest, NestLevel);
512 41849 : } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
513 41849 : assert(NestLevel != 0);
514 : --NestLevel;
515 : if (NestLevel == 0)
516 5305598 : return N;
517 3246347 : }
518 999314 : }
519 999314 : // Otherwise, find the chain and continue climbing.
520 2247033 : for (const SDValue &Op : N->op_values())
521 : if (Op.getValueType() == MVT::Other) {
522 1072706 : N = Op.getNode();
523 1072706 : goto found_chain_operand;
524 1072705 : }
525 : return nullptr;
526 : found_chain_operand:;
527 : if (N->getOpcode() == ISD::EntryToken)
528 11379035 : return nullptr;
529 11379035 : }
530 : }
531 :
532 : /// Call ReleasePred for each predecessor, then update register live def/gen.
533 : /// Always update LiveRegDefs for a register dependence even if the current SU
534 : /// also defines the register. This effectively create one large live range
535 4232893 : /// across a sequence of two-address node. This is important because the
536 : /// entire chain must be scheduled together. Example:
537 : ///
538 : /// flags = (3) add
539 : /// flags = (2) addc flags
540 : /// flags = (1) addc flags
541 : ///
542 : /// results in
543 : ///
544 : /// LiveRegDefs[flags] = 3
545 : /// LiveRegGens[flags] = 1
546 : ///
547 : /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
548 : /// interference on flags.
549 : void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
550 : // Bottom up: release predecessors
551 : for (SDep &Pred : SU->Preds) {
552 : ReleasePred(SU, &Pred);
553 : if (Pred.isAssignedRegDep()) {
554 : // This is a physical register dependency and it's impossible or
555 : // expensive to copy the register. Make sure nothing that can
556 : // clobber the register is scheduled between the predecessor and
557 17587990 : // this node.
558 : SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
559 37370852 : assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
560 19782864 : "interference on register dependence");
561 : LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
562 : if (!LiveRegGens[Pred.getReg()]) {
563 : ++NumLiveRegs;
564 : LiveRegGens[Pred.getReg()] = SU;
565 : }
566 228489 : }
567 : }
568 :
569 228489 : // If we're scheduling a lowered CALLSEQ_END, find the corresponding
570 228489 : // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
571 223485 : // these nodes, to prevent other calls from being interscheduled with them.
572 446970 : unsigned CallResource = TRI->getNumRegs();
573 : if (!LiveRegDefs[CallResource])
574 : for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
575 : if (Node->isMachineOpcode() &&
576 : Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
577 : unsigned NestLevel = 0;
578 : unsigned MaxNest = 0;
579 : SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
580 17587988 : assert(N && "Must find call sequence start");
581 35175976 :
582 16046011 : SUnit *Def = &SUnits[N->getNodeId()];
583 14776870 : CallSeqEndForStart[Def] = SU;
584 8899277 :
585 999313 : ++NumLiveRegs;
586 999313 : LiveRegDefs[CallResource] = Def;
587 999313 : LiveRegGens[CallResource] = SU;
588 : break;
589 : }
590 999313 : }
591 999313 :
592 : /// Check to see if any of the pending instructions are ready to issue. If
593 999313 : /// so, add them to the available queue.
594 999313 : void ScheduleDAGRRList::ReleasePending() {
595 999313 : if (DisableSchedCycles) {
596 : assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
597 : return;
598 17587988 : }
599 :
600 : // If the available queue is empty, it is safe to reset MinAvailableCycle.
601 : if (AvailableQueue->empty())
602 16288428 : MinAvailableCycle = std::numeric_limits<unsigned>::max();
603 16288428 :
604 : // Check to see if any of the pending instructions are ready to issue. If
605 : // so, add them to the available queue.
606 : for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
607 : unsigned ReadyCycle = PendingQueue[i]->getHeight();
608 : if (ReadyCycle < MinAvailableCycle)
609 16288428 : MinAvailableCycle = ReadyCycle;
610 10078210 :
611 : if (PendingQueue[i]->isAvailable) {
612 : if (!isReady(PendingQueue[i]))
613 : continue;
614 32576856 : AvailableQueue->push(PendingQueue[i]);
615 0 : }
616 0 : PendingQueue[i]->isPending = false;
617 0 : PendingQueue[i] = PendingQueue.back();
618 : PendingQueue.pop_back();
619 0 : --i; --e;
620 : }
621 : }
622 0 :
623 : /// Move the scheduler state forward by the specified number of Cycles.
624 0 : void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
625 0 : if (NextCycle <= CurCycle)
626 : return;
627 0 :
628 : IssueCount = 0;
629 : AvailableQueue->setCurCycle(NextCycle);
630 : if (!HazardRec->isEnabled()) {
631 : // Bypass lots of virtual calls in case of long latency.
632 47880727 : CurCycle = NextCycle;
633 47880727 : }
634 : else {
635 : for (; CurCycle != NextCycle; ++CurCycle) {
636 16287476 : HazardRec->RecedeCycle();
637 16287476 : }
638 16287476 : }
639 : // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
640 16236782 : // available Q to release pending nodes at least once before popping.
641 : ReleasePending();
642 : }
643 168868 :
644 118174 : /// Move the scheduler state forward until the specified node's dependents are
645 : /// ready and can be scheduled with no resource conflicts.
646 : void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
647 : if (DisableSchedCycles)
648 : return;
649 16287476 :
650 : // FIXME: Nodes such as CopyFromReg probably should not advance the current
651 : // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
652 : // has predecessors the cycle will be advanced when they are scheduled.
653 : // But given the crude nature of modeling latency though such nodes, we
654 16318988 : // currently need to treat these nodes like real instructions.
655 16318988 : // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
656 :
657 : unsigned ReadyCycle = SU->getHeight();
658 :
659 : // Bump CurCycle to account for latency. We assume the latency of other
660 : // available instructions may be hidden by the stall (not a full pipe stall).
661 : // This updates the hazard recognizer's cycle before reserving resources for
662 : // this instruction.
663 : AdvanceToCycle(ReadyCycle);
664 :
665 : // Calls are scheduled in their preceding cycle, so don't conflict with
666 : // hazards from instructions after the call. EmitNode will reset the
667 : // scoreboard state before emitting the call.
668 : if (SU->isCall)
669 : return;
670 :
671 16318987 : // FIXME: For resource conflicts in very long non-pipelined stages, we
672 : // should probably skip ahead here to avoid useless scoreboard checks.
673 : int Stalls = 0;
674 : while (true) {
675 : ScheduleHazardRecognizer::HazardType HT =
676 16318988 : HazardRec->getHazardType(SU, -Stalls);
677 :
678 : if (HT == ScheduleHazardRecognizer::NoHazard)
679 : break;
680 :
681 : ++Stalls;
682 : }
683 : AdvanceToCycle(CurCycle + Stalls);
684 15329800 : }
685 :
686 15329800 : /// Record this SUnit in the HazardRecognizer.
687 : /// Does not update CurCycle.
688 : void ScheduleDAGRRList::EmitNode(SUnit *SU) {
689 9741 : if (!HazardRec->isEnabled())
690 9741 : return;
691 15320059 :
692 : // Check for phys reg copy.
693 : if (!SU->getNode())
694 : return;
695 :
696 0 : switch (SU->getNode()->getOpcode()) {
697 0 : default:
698 0 : assert(SU->getNode()->isMachineOpcode() &&
699 : "This target-independent node should not be scheduled.");
700 : break;
701 0 : case ISD::MERGE_VALUES:
702 0 : case ISD::TokenFactor:
703 : case ISD::LIFETIME_START:
704 0 : case ISD::LIFETIME_END:
705 : case ISD::CopyToReg:
706 : case ISD::CopyFromReg:
707 : case ISD::EH_LABEL:
708 : // Noops don't affect the scoreboard state. Copies are likely to be
709 : // removed.
710 : return;
711 : case ISD::INLINEASM:
712 : // For inline asm, clear the pipeline state.
713 : HazardRec->Reset();
714 : return;
715 : }
716 : if (SU->isCall) {
717 : // Calls are scheduled with their preceding instructions. For bottom-up
718 : // scheduling, clear the pipeline state before emitting.
719 0 : HazardRec->Reset();
720 : }
721 0 :
722 0 : HazardRec->EmitInstruction(SU);
723 : }
724 0 :
725 : static void resetVRegCycle(SUnit *SU);
726 :
727 0 : /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
728 : /// count of its predecessors. If a predecessor pending count is zero, add it to
729 : /// the Available queue.
730 0 : void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
731 : LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
732 : LLVM_DEBUG(dumpNode(*SU));
733 :
734 : #ifndef NDEBUG
735 : if (CurCycle < SU->getHeight())
736 : LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
737 : << "] pipeline stall!\n");
738 16318988 : #endif
739 :
740 : // FIXME: Do not modify node height. It may interfere with
741 : // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
742 : // node its ready cycle can aid heuristics, and after scheduling it can
743 : // indicate the scheduled cycle.
744 : SU->setHeightToAtLeast(CurCycle);
745 :
746 : // Reserve resources for the scheduled instruction.
747 : EmitNode(SU);
748 :
749 : Sequence.push_back(SU);
750 :
751 : AvailableQueue->scheduledNode(SU);
752 16318988 :
753 : // If HazardRec is disabled, and each inst counts as one cycle, then
754 : // advance CurCycle before ReleasePredecessors to avoid useless pushes to
755 16318988 : // PendingQueue for schedulers that implement HasReadyFilter.
756 : if (!HazardRec->isEnabled() && AvgIPC < 2)
757 16318987 : AdvanceToCycle(CurCycle + 1);
758 :
759 16318988 : // Update liveness of predecessors before successors to avoid treating a
760 : // two-address node as a live range def.
761 : ReleasePredecessors(SU);
762 :
763 : // Release all the implicit physical register defs that are live.
764 16318987 : for (SDep &Succ : SU->Succs) {
765 16236716 : // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
766 : if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
767 : assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
768 : --NumLiveRegs;
769 16318987 : LiveRegDefs[Succ.getReg()] = nullptr;
770 : LiveRegGens[Succ.getReg()] = nullptr;
771 : releaseInterferences(Succ.getReg());
772 36151353 : }
773 : }
774 454348 : // Release the special call resource dependence, if this is the beginning
775 : // of a call.
776 222532 : unsigned CallResource = TRI->getNumRegs();
777 445064 : if (LiveRegDefs[CallResource] == SU)
778 222532 : for (const SDNode *SUNode = SU->getNode(); SUNode;
779 222532 : SUNode = SUNode->getGluedNode()) {
780 : if (SUNode->isMachineOpcode() &&
781 : SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
782 : assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
783 : --NumLiveRegs;
784 16318987 : LiveRegDefs[CallResource] = nullptr;
785 32637974 : LiveRegGens[CallResource] = nullptr;
786 1002616 : releaseInterferences(CallResource);
787 : }
788 1002616 : }
789 1000984 :
790 : resetVRegCycle(SU);
791 999314 :
792 999314 : SU->isScheduled = true;
793 999314 :
794 999314 : // Conditions under which the scheduler should eagerly advance the cycle:
795 : // (1) No available instructions
796 : // (2) All pipelines full, so available instructions must have hazards.
797 : //
798 16318987 : // If HazardRec is disabled, the cycle was pre-advanced before calling
799 : // ReleasePredecessors. In that case, IssueCount should remain 0.
800 16318987 : //
801 : // Check AvailableQueue after ReleasePredecessors in case of zero latency.
802 : if (HazardRec->isEnabled() || AvgIPC > 1) {
803 : if (SU->getNode() && SU->getNode()->isMachineOpcode())
804 : ++IssueCount;
805 : if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
806 : || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
807 : AdvanceToCycle(CurCycle + 1);
808 : }
809 : }
810 16318987 :
811 82272 : /// CapturePred - This does the opposite of ReleasePred. Since SU is being
812 58164 : /// unscheduled, increase the succ left count of its predecessors. Remove
813 82272 : /// them from AvailableQueue if necessary.
814 159579 : void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
815 4965 : SUnit *PredSU = PredEdge->getSUnit();
816 : if (PredSU->isAvailable) {
817 16318987 : PredSU->isAvailable = false;
818 : if (!PredSU->isPending)
819 : AvailableQueue->remove(PredSU);
820 : }
821 :
822 0 : assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
823 : "NumSuccsLeft will overflow!");
824 0 : ++PredSU->NumSuccsLeft;
825 0 : }
826 0 :
827 0 : /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
828 : /// its predecessor states to reflect the change.
829 : void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
830 : LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
831 : LLVM_DEBUG(dumpNode(*SU));
832 0 :
833 0 : for (SDep &Pred : SU->Preds) {
834 : CapturePred(&Pred);
835 : if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
836 : assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
837 8840 : assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
838 : "Physical register dependency violated?");
839 : --NumLiveRegs;
840 : LiveRegDefs[Pred.getReg()] = nullptr;
841 22127 : LiveRegGens[Pred.getReg()] = nullptr;
842 13287 : releaseInterferences(Pred.getReg());
843 3876 : }
844 : }
845 :
846 : // Reclaim the special call resource dependence, if this is the beginning
847 952 : // of a call.
848 1904 : unsigned CallResource = TRI->getNumRegs();
849 952 : for (const SDNode *SUNode = SU->getNode(); SUNode;
850 952 : SUNode = SUNode->getGluedNode()) {
851 : if (SUNode->isMachineOpcode() &&
852 : SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
853 : SUnit *SeqEnd = CallSeqEndForStart[SU];
854 : assert(SeqEnd && "Call sequence start/end must be known");
855 : assert(!LiveRegDefs[CallResource]);
856 8840 : assert(!LiveRegGens[CallResource]);
857 10752 : ++NumLiveRegs;
858 : LiveRegDefs[CallResource] = SU;
859 10719 : LiveRegGens[CallResource] = SeqEnd;
860 8618 : }
861 1 : }
862 :
863 : // Release the special call resource dependence, if this is the end
864 : // of a call.
865 1 : if (LiveRegGens[CallResource] == SU)
866 2 : for (const SDNode *SUNode = SU->getNode(); SUNode;
867 1 : SUNode = SUNode->getGluedNode()) {
868 : if (SUNode->isMachineOpcode() &&
869 : SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
870 : assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
871 : assert(LiveRegDefs[CallResource]);
872 : assert(LiveRegGens[CallResource]);
873 17680 : --NumLiveRegs;
874 0 : LiveRegDefs[CallResource] = nullptr;
875 : LiveRegGens[CallResource] = nullptr;
876 0 : releaseInterferences(CallResource);
877 0 : }
878 : }
879 :
880 : for (auto &Succ : SU->Succs) {
881 0 : if (Succ.isAssignedRegDep()) {
882 0 : auto Reg = Succ.getReg();
883 0 : if (!LiveRegDefs[Reg])
884 0 : ++NumLiveRegs;
885 : // This becomes the nearest def. Note that an earlier def may still be
886 : // pending if this is a two-address node.
887 : LiveRegDefs[Reg] = SU;
888 71536 :
889 : // Update LiveRegGen only if was empty before this unscheduling.
890 : // This is to avoid incorrect updating LiveRegGen set in previous run.
891 1108 : if (!LiveRegGens[Reg]) {
892 0 : // Find the successor with the lowest height.
893 : LiveRegGens[Reg] = Succ.getSUnit();
894 : for (auto &Succ2 : SU->Succs) {
895 554 : if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
896 : Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
897 : LiveRegGens[Reg] = Succ2.getSUnit();
898 : }
899 554 : }
900 : }
901 0 : }
902 0 : if (SU->getHeight() < MinAvailableCycle)
903 0 : MinAvailableCycle = SU->getHeight();
904 0 :
905 0 : SU->setHeightDirty();
906 : SU->isScheduled = false;
907 : SU->isAvailable = true;
908 : if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
909 : // Don't make available until backtracking is complete.
910 8840 : SU->isPending = true;
911 8231 : PendingQueue.push_back(SU);
912 : }
913 8840 : else {
914 8840 : AvailableQueue->push(SU);
915 8840 : }
916 8840 : AvailableQueue->unscheduledNode(SU);
917 : }
918 0 :
919 0 : /// After backtracking, the hazard checker needs to be restored to a state
920 : /// corresponding the current cycle.
921 : void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
922 8840 : HazardRec->Reset();
923 :
924 8840 : unsigned LookAhead = std::min((unsigned)Sequence.size(),
925 8840 : HazardRec->getMaxLookAhead());
926 : if (LookAhead == 0)
927 : return;
928 :
929 952 : std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
930 952 : unsigned HazardCycle = (*I)->getHeight();
931 : for (auto E = Sequence.end(); I != E; ++I) {
932 952 : SUnit *SU = *I;
933 952 : for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
934 952 : HazardRec->RecedeCycle();
935 : }
936 : EmitNode(SU);
937 : }
938 10 : }
939 244 :
940 234 : /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
941 542 : /// BTCycle in order to schedule a specific node.
942 154 : void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
943 : SUnit *OldSU = Sequence.back();
944 234 : while (true) {
945 : Sequence.pop_back();
946 : // FIXME: use ready cycle instead of height
947 : CurCycle = OldSU->getHeight();
948 : UnscheduleNodeBottomUp(OldSU);
949 : AvailableQueue->setCurCycle(CurCycle);
950 0 : if (OldSU == BtSU)
951 0 : break;
952 : OldSU = Sequence.back();
953 0 : }
954 :
955 0 : assert(!SU->isSucc(OldSU) && "Something is wrong!");
956 0 :
957 0 : RestoreHazardCheckerBottomUp();
958 0 :
959 : ReleasePending();
960 0 :
961 : ++NumBacktracks;
962 : }
963 :
964 : static bool isOperandOf(const SUnit *SU, SDNode *N) {
965 0 : for (const SDNode *SUNode = SU->getNode(); SUNode;
966 : SUNode = SUNode->getGluedNode()) {
967 0 : if (SUNode->isOperandOf(N))
968 : return true;
969 : }
970 0 : return false;
971 : }
972 201 :
973 201 : /// TryUnfold - Attempt to unfold
974 : SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
975 201 : SDNode *N = SU->getNode();
976 : // Use while over if to ease fall through.
977 : SmallVector<SDNode *, 2> NewNodes;
978 : if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
979 : return nullptr;
980 :
981 : // unfolding an x86 DEC64m operation results in store, dec, load which
982 144 : // can't be handled here so quit
983 144 : if (NewNodes.size() == 3)
984 : return nullptr;
985 :
986 144 : assert(NewNodes.size() == 2 && "Expected a load folding node!");
987 :
988 : N = NewNodes[1];
989 : SDNode *LoadNode = NewNodes[0];
990 : unsigned NumVals = N->getNumValues();
991 144 : unsigned OldNumVals = SU->getNode()->getNumValues();
992 :
993 : // LoadNode may already exist. This can happen when there is another
994 : // load from the same location and producing the same type of value
995 : // but it has different alignment or volatileness.
996 137 : bool isNewLoad = true;
997 137 : SUnit *LoadSU;
998 137 : if (LoadNode->getNodeId() != -1) {
999 137 : LoadSU = &SUnits[LoadNode->getNodeId()];
1000 : // If LoadSU has already been scheduled, we should clone it but
1001 : // this would negate the benefit to unfolding so just return SU.
1002 : if (LoadSU->isScheduled)
1003 : return SU;
1004 : isNewLoad = false;
1005 : } else {
1006 137 : LoadSU = CreateNewSUnit(LoadNode);
1007 2 : LoadNode->setNodeId(LoadSU->NodeNum);
1008 :
1009 : InitNumRegDefsLeft(LoadSU);
1010 2 : computeLatency(LoadSU);
1011 : }
1012 :
1013 : bool isNewN = true;
1014 135 : SUnit *NewSU;
1015 135 : // This can only happen when isNewLoad is false.
1016 : if (N->getNodeId() != -1) {
1017 135 : NewSU = &SUnits[N->getNodeId()];
1018 135 : // If NewSU has already been scheduled, we need to clone it, but this
1019 : // negates the benefit to unfolding so just return SU.
1020 : if (NewSU->isScheduled)
1021 : return SU;
1022 : isNewN = false;
1023 : } else {
1024 136 : NewSU = CreateNewSUnit(N);
1025 1 : N->setNodeId(NewSU->NodeNum);
1026 :
1027 : const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1028 1 : for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1029 : if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1030 : NewSU->isTwoAddress = true;
1031 : break;
1032 135 : }
1033 135 : }
1034 : if (MCID.isCommutable())
1035 270 : NewSU->isCommutable = true;
1036 480 :
1037 : InitNumRegDefsLeft(NewSU);
1038 60 : computeLatency(NewSU);
1039 60 : }
1040 :
1041 : LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1042 270 :
1043 116 : // Now that we are committed to unfolding replace DAG Uses.
1044 : for (unsigned i = 0; i != NumVals; ++i)
1045 135 : DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1046 135 : DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1047 : SDValue(LoadNode, 1));
1048 :
1049 : // Record all the edges to and from the old SU, by category.
1050 : SmallVector<SDep, 4> ChainPreds;
1051 : SmallVector<SDep, 4> ChainSuccs;
1052 333 : SmallVector<SDep, 4> LoadPreds;
1053 591 : SmallVector<SDep, 4> NodePreds;
1054 408 : SmallVector<SDep, 4> NodeSuccs;
1055 : for (SDep &Pred : SU->Preds) {
1056 : if (Pred.isCtrl())
1057 : ChainPreds.push_back(Pred);
1058 : else if (isOperandOf(Pred.getSUnit(), LoadNode))
1059 : LoadPreds.push_back(Pred);
1060 : else
1061 : NodePreds.push_back(Pred);
1062 : }
1063 447 : for (SDep &Succ : SU->Succs) {
1064 311 : if (Succ.isCtrl())
1065 110 : ChainSuccs.push_back(Succ);
1066 201 : else
1067 140 : NodeSuccs.push_back(Succ);
1068 : }
1069 61 :
1070 : // Now assign edges to the newly-created nodes.
1071 582 : for (const SDep &Pred : ChainPreds) {
1072 446 : RemovePred(SU, Pred);
1073 245 : if (isNewLoad)
1074 : AddPred(LoadSU, Pred);
1075 201 : }
1076 : for (const SDep &Pred : LoadPreds) {
1077 : RemovePred(SU, Pred);
1078 : if (isNewLoad)
1079 246 : AddPred(LoadSU, Pred);
1080 110 : }
1081 110 : for (const SDep &Pred : NodePreds) {
1082 110 : RemovePred(SU, Pred);
1083 : AddPred(NewSU, Pred);
1084 276 : }
1085 140 : for (SDep D : NodeSuccs) {
1086 140 : SUnit *SuccDep = D.getSUnit();
1087 140 : D.setSUnit(SU);
1088 : RemovePred(SuccDep, D);
1089 197 : D.setSUnit(NewSU);
1090 61 : AddPred(SuccDep, D);
1091 61 : // Balance register pressure.
1092 : if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1093 337 : !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1094 : --NewSU->NumRegDefsLeft;
1095 : }
1096 201 : for (SDep D : ChainSuccs) {
1097 : SUnit *SuccDep = D.getSUnit();
1098 201 : D.setSUnit(SU);
1099 : RemovePred(SuccDep, D);
1100 201 : if (isNewLoad) {
1101 201 : D.setSUnit(LoadSU);
1102 0 : AddPred(SuccDep, D);
1103 : }
1104 381 : }
1105 :
1106 : // Add a data dependency to reflect that NewSU reads the value defined
1107 245 : // by LoadSU.
1108 245 : SDep D(LoadSU, SDep::Data, 0);
1109 : D.setLatency(LoadSU->Latency);
1110 245 : AddPred(NewSU, D);
1111 :
1112 : if (isNewLoad)
1113 : AvailableQueue->addNode(LoadSU);
1114 : if (isNewN)
1115 : AvailableQueue->addNode(NewSU);
1116 :
1117 136 : ++NumUnfolds;
1118 136 :
1119 : if (NewSU->NumSuccsLeft == 0)
1120 136 : NewSU->isAvailable = true;
1121 135 :
1122 136 : return NewSU;
1123 135 : }
1124 :
1125 : /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1126 : /// successors to the newly created node.
1127 136 : SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1128 126 : SDNode *N = SU->getNode();
1129 : if (!N)
1130 : return nullptr;
1131 :
1132 : LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1133 : LLVM_DEBUG(dumpNode(*SU));
1134 :
1135 453 : if (N->getGluedNode() &&
1136 453 : !TII->canCopyGluedNodeDuringSchedule(N)) {
1137 453 : LLVM_DEBUG(
1138 : dbgs()
1139 : << "Giving up because it has incoming glue and the target does not "
1140 : "want to copy it\n");
1141 : return nullptr;
1142 : }
1143 48 :
1144 48 : SUnit *NewSU;
1145 : bool TryUnfold = false;
1146 : for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1147 : MVT VT = N->getSimpleValueType(i);
1148 : if (VT == MVT::Glue) {
1149 : LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1150 : return nullptr;
1151 : } else if (VT == MVT::Other)
1152 : TryUnfold = true;
1153 : }
1154 1135 : for (const SDValue &Op : N->op_values()) {
1155 : MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1156 728 : if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1157 : LLVM_DEBUG(
1158 : dbgs() << "Giving up because it one of the operands is glue and "
1159 728 : "the target does not want to copy it\n");
1160 : return nullptr;
1161 : }
1162 1936 : }
1163 1529 :
1164 1529 : // If possible unfold instruction.
1165 : if (TryUnfold) {
1166 : SUnit *UnfoldSU = TryUnfoldSU(SU);
1167 : if (!UnfoldSU)
1168 : return nullptr;
1169 : SU = UnfoldSU;
1170 : N = SU->getNode();
1171 : // If this can be scheduled don't bother duplicating and just return
1172 : if (SU->NumSuccsLeft == 0)
1173 407 : return SU;
1174 144 : }
1175 144 :
1176 : LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1177 : NewSU = CreateClone(SU);
1178 :
1179 : // New SUnit has the exact same predecessors.
1180 137 : for (SDep &Pred : SU->Preds)
1181 : if (!Pred.isArtificial())
1182 : AddPred(NewSU, Pred);
1183 :
1184 : // Only copy scheduled successors. Cut them from old node's successor
1185 274 : // list and move them over.
1186 : SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1187 : for (SDep &Succ : SU->Succs) {
1188 661 : if (Succ.isArtificial())
1189 : continue;
1190 387 : SUnit *SuccSU = Succ.getSUnit();
1191 : if (SuccSU->isScheduled) {
1192 : SDep D = Succ;
1193 : D.setSUnit(NewSU);
1194 : AddPred(SuccSU, D);
1195 1252 : D.setSUnit(SU);
1196 : DelDeps.push_back(std::make_pair(SuccSU, D));
1197 : }
1198 : }
1199 978 : for (auto &DelDep : DelDeps)
1200 415 : RemovePred(DelDep.first, DelDep.second);
1201 :
1202 415 : AvailableQueue->updateNode(SU);
1203 : AvailableQueue->addNode(NewSU);
1204 415 :
1205 : ++NumDups;
1206 : return NewSU;
1207 689 : }
1208 415 :
1209 : /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1210 274 : /// scheduled successors of the given SUnit to the last copy.
1211 274 : void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1212 : const TargetRegisterClass *DestRC,
1213 : const TargetRegisterClass *SrcRC,
1214 : SmallVectorImpl<SUnit*> &Copies) {
1215 : SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1216 : CopyFromSU->CopySrcRC = SrcRC;
1217 : CopyFromSU->CopyDstRC = DestRC;
1218 :
1219 53 : SUnit *CopyToSU = CreateNewSUnit(nullptr);
1220 : CopyToSU->CopySrcRC = DestRC;
1221 : CopyToSU->CopyDstRC = SrcRC;
1222 :
1223 53 : // Only copy scheduled successors. Cut them from old node's successor
1224 53 : // list and move them over.
1225 53 : SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1226 : for (SDep &Succ : SU->Succs) {
1227 53 : if (Succ.isArtificial())
1228 53 : continue;
1229 53 : SUnit *SuccSU = Succ.getSUnit();
1230 : if (SuccSU->isScheduled) {
1231 : SDep D = Succ;
1232 : D.setSUnit(CopyToSU);
1233 : AddPred(SuccSU, D);
1234 236 : DelDeps.push_back(std::make_pair(SuccSU, Succ));
1235 : }
1236 : else {
1237 : // Avoid scheduling the def-side copy before other successors. Otherwise
1238 183 : // we could introduce another physreg interference on the copy and
1239 93 : // continue inserting copies indefinitely.
1240 93 : AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1241 93 : }
1242 93 : }
1243 : for (auto &DelDep : DelDeps)
1244 : RemovePred(DelDep.first, DelDep.second);
1245 :
1246 : SDep FromDep(SU, SDep::Data, Reg);
1247 : FromDep.setLatency(SU->Latency);
1248 180 : AddPred(CopyFromSU, FromDep);
1249 : SDep ToDep(CopyFromSU, SDep::Data, 0);
1250 : ToDep.setLatency(CopyFromSU->Latency);
1251 146 : AddPred(CopyToSU, ToDep);
1252 93 :
1253 : AvailableQueue->updateNode(SU);
1254 : AvailableQueue->addNode(CopyFromSU);
1255 53 : AvailableQueue->addNode(CopyToSU);
1256 53 : Copies.push_back(CopyFromSU);
1257 53 : Copies.push_back(CopyToSU);
1258 53 :
1259 53 : ++NumPRCopies;
1260 : }
1261 53 :
1262 53 : /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1263 53 : /// definition of the specified node.
1264 53 : /// FIXME: Move to SelectionDAG?
1265 53 : static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1266 : const TargetInstrInfo *TII) {
1267 : unsigned NumRes;
1268 53 : if (N->getOpcode() == ISD::CopyFromReg) {
1269 : // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1270 : NumRes = 1;
1271 : } else {
1272 : const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1273 453 : assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1274 : NumRes = MCID.getNumDefs();
1275 : for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1276 453 : if (Reg == *ImpDef)
1277 : break;
1278 : ++NumRes;
1279 : }
1280 439 : }
1281 : return N->getSimpleValueType(NumRes);
1282 439 : }
1283 441 :
1284 441 : /// CheckForLiveRegDef - Return true and update live register vector if the
1285 : /// specified register def of the specified SUnit clobbers any "live" registers.
1286 2 : static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1287 : SUnit **LiveRegDefs,
1288 : SmallSet<unsigned, 4> &RegAdded,
1289 453 : SmallVectorImpl<unsigned> &LRegs,
1290 : const TargetRegisterInfo *TRI) {
1291 : for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1292 :
1293 : // Check if Ref is live.
1294 3331846 : if (!LiveRegDefs[*AliasI]) continue;
1295 :
1296 : // Allow multiple uses of the same def.
1297 : if (LiveRegDefs[*AliasI] == SU) continue;
1298 :
1299 16573388 : // Add Reg to the set of interfering live regs.
1300 : if (RegAdded.insert(*AliasI).second) {
1301 : LRegs.push_back(*AliasI);
1302 13241542 : }
1303 : }
1304 : }
1305 238836 :
1306 : /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1307 : /// by RegMask, and add them to LRegs.
1308 11592 : static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1309 17996 : ArrayRef<SUnit*> LiveRegDefs,
1310 : SmallSet<unsigned, 4> &RegAdded,
1311 : SmallVectorImpl<unsigned> &LRegs) {
1312 3331846 : // Look at all live registers. Skip Reg0 and the special CallResource.
1313 : for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1314 : if (!LiveRegDefs[i]) continue;
1315 : if (LiveRegDefs[i] == SU) continue;
1316 1078 : if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1317 : if (RegAdded.insert(i).second)
1318 : LRegs.push_back(i);
1319 : }
1320 : }
1321 310153 :
1322 618150 : /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1323 87 : static const uint32_t *getNodeRegMask(const SDNode *N) {
1324 87 : for (const SDValue &Op : N->op_values())
1325 87 : if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1326 8 : return RegOp->getRegMask();
1327 : return nullptr;
1328 1078 : }
1329 :
1330 : /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1331 : /// scheduling of the given node to satisfy live physical register dependencies.
1332 12100047 : /// If the specific node is the last one that's available to schedule, do
1333 9591337 : /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1334 1082 : bool ScheduleDAGRRList::
1335 : DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1336 : if (NumLiveRegs == 0)
1337 : return false;
1338 :
1339 : SmallSet<unsigned, 4> RegAdded;
1340 : // If this node would clobber any "live" register, then it's not ready.
1341 : //
1342 16328529 : // If SU is the currently live definition of the same register that it uses,
1343 : // then we are free to schedule it.
1344 16328529 : for (SDep &Pred : SU->Preds) {
1345 : if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1346 : CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1347 2775795 : RegAdded, LRegs, TRI);
1348 : }
1349 :
1350 : for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1351 : if (Node->getOpcode() == ISD::INLINEASM) {
1352 4896872 : // Inline asm can clobber physical defs.
1353 20216 : unsigned NumOps = Node->getNumOperands();
1354 15980 : if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1355 : --NumOps; // Ignore the glue operand.
1356 :
1357 : for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1358 2810863 : unsigned Flags =
1359 2810795 : cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1360 : unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1361 4 :
1362 8 : ++i; // Skip the ID value.
1363 : if (InlineAsm::isRegDefKind(Flags) ||
1364 : InlineAsm::isRegDefEarlyClobberKind(Flags) ||
1365 14 : InlineAsm::isClobberKind(Flags)) {
1366 : // Check for def of register or earlyclobber register.
1367 30 : for (; NumVals; --NumVals, ++i) {
1368 : unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1369 : if (TargetRegisterInfo::isPhysicalRegister(Reg))
1370 10 : CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1371 5 : }
1372 15 : } else
1373 : i += NumVals;
1374 : }
1375 16 : continue;
1376 16 : }
1377 8 :
1378 6 : if (!Node->isMachineOpcode())
1379 : continue;
1380 : // If we're in the middle of scheduling a call, don't begin scheduling
1381 2 : // another call. Also, don't allow any physical registers to be live across
1382 : // the call.
1383 : if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1384 : // Check the special calling-sequence resource.
1385 : unsigned CallResource = TRI->getNumRegs();
1386 2810791 : if (LiveRegDefs[CallResource]) {
1387 : SDNode *Gen = LiveRegGens[CallResource]->getNode();
1388 : while (SDNode *Glued = Gen->getGluedNode())
1389 : Gen = Glued;
1390 : if (!IsChainDependent(Gen, Node, 0, TII) &&
1391 2509784 : RegAdded.insert(CallResource).second)
1392 : LRegs.push_back(CallResource);
1393 1077 : }
1394 2154 : }
1395 989 : if (const uint32_t *RegMask = getNodeRegMask(Node))
1396 : CheckForLiveRegDefMasked(SU, RegMask,
1397 : makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1398 989 : RegAdded, LRegs);
1399 988 :
1400 988 : const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1401 : if (MCID.hasOptionalDef()) {
1402 : // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1403 2509784 : // This operand can be either a def of CPSR, if the S bit is set; or a use
1404 1078 : // of %noreg. When the OptionalDef is set to a valid register, we need to
1405 1078 : // handle it in the same way as an ImplicitDef.
1406 : for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1407 : if (MCID.OpInfo[i].isOptionalDef()) {
1408 5019568 : const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1409 5019568 : unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1410 : CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1411 : }
1412 : }
1413 : if (!MCID.ImplicitDefs)
1414 10787 : continue;
1415 3889 : for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1416 880 : CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1417 440 : }
1418 880 :
1419 : return !LRegs.empty();
1420 : }
1421 2509784 :
1422 : void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1423 4670939 : // Add the nodes that aren't ready back onto the available list.
1424 6646826 : for (unsigned i = Interferences.size(); i > 0; --i) {
1425 : SUnit *SU = Interferences[i-1];
1426 : LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1427 2775795 : if (Reg) {
1428 : SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1429 : if (!is_contained(LRegs, Reg))
1430 1222798 : continue;
1431 : }
1432 1232792 : SU->isPending = false;
1433 9994 : // The interfering node may no longer be available due to backtracking.
1434 9994 : // Furthermore, it may have been made available again, in which case it is
1435 9994 : // now already in the AvailableQueue.
1436 : if (SU->isAvailable && !SU->NodeQueueId) {
1437 9994 : LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1438 2 : AvailableQueue->push(SU);
1439 : }
1440 9992 : if (i < Interferences.size())
1441 : Interferences[i-1] = Interferences.back();
1442 : Interferences.pop_back();
1443 : LRegsMap.erase(LRegsPos);
1444 9992 : }
1445 : }
1446 8325 :
1447 : /// Return a node that can be scheduled in this cycle. Requirements:
1448 19984 : /// (1) Ready: latency has been satisfied
1449 0 : /// (2) No Hazards: resources are available
1450 : /// (3) No Interferences: may unschedule to break register interferences.
1451 : SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1452 : SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1453 1222798 : auto FindAvailableNode = [&]() {
1454 : while (CurSU) {
1455 : SmallVector<unsigned, 4> LRegs;
1456 : if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1457 : break;
1458 : LLVM_DEBUG(dbgs() << " Interfering reg ";
1459 16318987 : if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1460 16318987 : else dbgs() << printReg(LRegs[0], TRI);
1461 : dbgs() << " SU #" << CurSU->NodeNum << '\n');
1462 : std::pair<LRegsMapT::iterator, bool> LRegsPair =
1463 : LRegsMap.insert(std::make_pair(CurSU, LRegs));
1464 : if (LRegsPair.second) {
1465 : CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1466 : Interferences.push_back(CurSU);
1467 : }
1468 : else {
1469 : assert(CurSU->isPending && "Interferences are pending");
1470 : // Update the interference with current live regs.
1471 : LRegsPair.first->second = LRegs;
1472 : }
1473 : CurSU = AvailableQueue->pop();
1474 : }
1475 : };
1476 : FindAvailableNode();
1477 : if (CurSU)
1478 : return CurSU;
1479 :
1480 : // All candidates are delayed due to live physical reg dependencies.
1481 : // Try backtracking, code duplication, or inserting cross class copies
1482 : // to resolve it.
1483 16318988 : for (SUnit *TrySU : Interferences) {
1484 16318988 : SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1485 16318988 :
1486 : // Try unscheduling up to the point where it's safe to schedule
1487 : // this node.
1488 : SUnit *BtSU = nullptr;
1489 : unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1490 : for (unsigned Reg : LRegs) {
1491 3876 : if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1492 3423 : BtSU = LiveRegGens[Reg];
1493 : LiveCycle = BtSU->getHeight();
1494 : }
1495 : }
1496 : if (!WillCreateCycle(TrySU, BtSU)) {
1497 : // BacktrackBottomUp mutates Interferences!
1498 6846 : BacktrackBottomUp(TrySU, BtSU);
1499 6846 :
1500 3423 : // Force the current node to be scheduled before the node that
1501 : // requires the physical reg dep.
1502 : if (BtSU->isAvailable) {
1503 : BtSU->isAvailable = false;
1504 3423 : if (!BtSU->isPending)
1505 : AvailableQueue->remove(BtSU);
1506 952 : }
1507 : LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1508 : << ") to SU(" << TrySU->NodeNum << ")\n");
1509 : AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1510 952 :
1511 952 : // If one or more successors has been unscheduled, then the current
1512 952 : // node is no longer available.
1513 952 : if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1514 : LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1515 : CurSU = AvailableQueue->pop();
1516 : } else {
1517 952 : LLVM_DEBUG(dbgs() << "TrySU available\n");
1518 : // Available and in AvailableQueue
1519 : AvailableQueue->remove(TrySU);
1520 : CurSU = TrySU;
1521 952 : }
1522 : FindAvailableNode();
1523 299 : // Interferences has been mutated. We must break.
1524 : break;
1525 : }
1526 : }
1527 653 :
1528 653 : if (!CurSU) {
1529 : // Can't backtrack. If it's too expensive to copy the value, then try
1530 952 : // duplicate the nodes that produces these "too expensive to copy"
1531 : // values to break the dependency. In case even that doesn't work,
1532 952 : // insert cross class copies.
1533 : // If it's not too expensive, i.e. cost != -1, issue copies.
1534 : SUnit *TrySU = Interferences[0];
1535 : SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1536 1405 : assert(LRegs.size() == 1 && "Can't handle this yet!");
1537 : unsigned Reg = LRegs[0];
1538 : SUnit *LRDef = LiveRegDefs[Reg];
1539 : MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1540 : const TargetRegisterClass *RC =
1541 : TRI->getMinimalPhysRegClass(Reg, VT);
1542 453 : const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1543 453 :
1544 : // If cross copy register class is the same as RC, then it must be possible
1545 453 : // copy the value directly. Do not try duplicate the def.
1546 453 : // If cross copy register class is not the same as RC, then it's possible to
1547 453 : // copy the value but it require cross register class copies and it is
1548 : // expensive.
1549 453 : // If cross copy register class is null, then it's not possible to copy
1550 453 : // the value at all.
1551 : SUnit *NewDef = nullptr;
1552 : if (DestRC != RC) {
1553 : NewDef = CopyAndMoveSuccessors(LRDef);
1554 : if (!DestRC && !NewDef)
1555 : report_fatal_error("Can't handle live physical register dependency!");
1556 : }
1557 : if (!NewDef) {
1558 : // Issue copies, these can be expensive cross register class copies.
1559 : SmallVector<SUnit*, 2> Copies;
1560 453 : InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1561 453 : LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1562 453 : << " to SU #" << Copies.front()->NodeNum << "\n");
1563 0 : AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1564 : NewDef = Copies.back();
1565 453 : }
1566 :
1567 : LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1568 53 : << " to SU #" << TrySU->NodeNum << "\n");
1569 : LiveRegDefs[Reg] = NewDef;
1570 : AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1571 106 : TrySU->isAvailable = false;
1572 53 : CurSU = NewDef;
1573 : }
1574 : assert(CurSU && "Unable to resolve live physical register dependencies!");
1575 : return CurSU;
1576 : }
1577 453 :
1578 906 : /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1579 453 : /// schedulers.
1580 453 : void ScheduleDAGRRList::ListScheduleBottomUp() {
1581 : // Release any predecessors of the special Exit node.
1582 : ReleasePredecessors(&ExitSU);
1583 1405 :
1584 : // Add root to Available queue.
1585 : if (!SUnits.empty()) {
1586 : SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1587 : assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1588 1269002 : RootSU->isAvailable = true;
1589 : AvailableQueue->push(RootSU);
1590 1269002 : }
1591 :
1592 : // While Available queue is not empty, grab the node with the highest
1593 1269002 : // priority. If it is not ready put it back. Schedule the node.
1594 1265025 : Sequence.reserve(SUnits.size());
1595 : while (!AvailableQueue->empty() || !Interferences.empty()) {
1596 1265025 : LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1597 1265025 : AvailableQueue->dump(this));
1598 :
1599 : // Pick the best node to schedule taking all constraints into
1600 : // consideration.
1601 : SUnit *SU = PickNodeToScheduleBottomUp();
1602 2538004 :
1603 17587989 : AdvancePastStalls(SU);
1604 :
1605 : ScheduleNodeBottomUp(SU);
1606 :
1607 : while (AvailableQueue->empty() && !PendingQueue.empty()) {
1608 : // Advance the cycle to free resources. Skip ahead to the next ready SU.
1609 16318987 : assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1610 : "MinAvailableCycle uninitialized");
1611 16318988 : AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1612 : }
1613 16318988 : }
1614 :
1615 16318987 : // Reverse the order if it is bottom up.
1616 : std::reverse(Sequence.begin(), Sequence.end());
1617 :
1618 : #ifndef NDEBUG
1619 0 : VerifyScheduledSequence(/*isBottomUp=*/true);
1620 : #endif
1621 : }
1622 :
1623 : namespace {
1624 :
1625 : class RegReductionPQBase;
1626 :
1627 : struct queue_sort {
1628 : bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1629 1269001 : };
1630 :
1631 : #ifndef NDEBUG
1632 : template<class SF>
1633 : struct reverse_sort : public queue_sort {
1634 : SF &SortFunc;
1635 :
1636 : reverse_sort(SF &sf) : SortFunc(sf) {}
1637 :
1638 : bool operator()(SUnit* left, SUnit* right) const {
1639 : // reverse left/right rather than simply !SortFunc(left, right)
1640 : // to expose different paths in the comparison logic.
1641 : return SortFunc(right, left);
1642 : }
1643 : };
1644 : #endif // NDEBUG
1645 :
1646 : /// bu_ls_rr_sort - Priority function for bottom up register pressure
1647 : // reduction scheduler.
1648 : struct bu_ls_rr_sort : public queue_sort {
1649 : enum {
1650 : IsBottomUp = true,
1651 : HasReadyFilter = false
1652 : };
1653 :
1654 : RegReductionPQBase *SPQ;
1655 :
1656 : bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1657 :
1658 : bool operator()(SUnit* left, SUnit* right) const;
1659 : };
1660 :
1661 : // src_ls_rr_sort - Priority function for source order scheduler.
1662 : struct src_ls_rr_sort : public queue_sort {
1663 : enum {
1664 9613 : IsBottomUp = true,
1665 : HasReadyFilter = false
1666 : };
1667 :
1668 : RegReductionPQBase *SPQ;
1669 :
1670 : src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1671 :
1672 : bool operator()(SUnit* left, SUnit* right) const;
1673 : };
1674 :
1675 : // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1676 : struct hybrid_ls_rr_sort : public queue_sort {
1677 : enum {
1678 1232168 : IsBottomUp = true,
1679 : HasReadyFilter = false
1680 : };
1681 :
1682 : RegReductionPQBase *SPQ;
1683 :
1684 : hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1685 :
1686 : bool isReady(SUnit *SU, unsigned CurCycle) const;
1687 :
1688 : bool operator()(SUnit* left, SUnit* right) const;
1689 : };
1690 :
1691 : // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1692 11341 : // scheduler.
1693 : struct ilp_ls_rr_sort : public queue_sort {
1694 : enum {
1695 : IsBottomUp = true,
1696 : HasReadyFilter = false
1697 : };
1698 :
1699 : RegReductionPQBase *SPQ;
1700 :
1701 : ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1702 :
1703 : bool isReady(SUnit *SU, unsigned CurCycle) const;
1704 :
1705 : bool operator()(SUnit* left, SUnit* right) const;
1706 : };
1707 :
1708 : class RegReductionPQBase : public SchedulingPriorityQueue {
1709 15880 : protected:
1710 : std::vector<SUnit *> Queue;
1711 : unsigned CurQueueId = 0;
1712 : bool TracksRegPressure;
1713 : bool SrcOrder;
1714 :
1715 : // SUnits - The SUnits for the current graph.
1716 : std::vector<SUnit> *SUnits;
1717 :
1718 : MachineFunction &MF;
1719 : const TargetInstrInfo *TII;
1720 : const TargetRegisterInfo *TRI;
1721 : const TargetLowering *TLI;
1722 : ScheduleDAGRRList *scheduleDAG = nullptr;
1723 :
1724 : // SethiUllmanNumbers - The SethiUllman number for each node.
1725 : std::vector<unsigned> SethiUllmanNumbers;
1726 :
1727 : /// RegPressure - Tracking current reg pressure per register class.
1728 : std::vector<unsigned> RegPressure;
1729 :
1730 : /// RegLimit - Tracking the number of allocatable registers per register
1731 : /// class.
1732 : std::vector<unsigned> RegLimit;
1733 :
1734 : public:
1735 : RegReductionPQBase(MachineFunction &mf,
1736 : bool hasReadyFilter,
1737 : bool tracksrp,
1738 : bool srcorder,
1739 : const TargetInstrInfo *tii,
1740 : const TargetRegisterInfo *tri,
1741 : const TargetLowering *tli)
1742 : : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1743 1269002 : SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1744 : if (TracksRegPressure) {
1745 : unsigned NumRC = TRI->getNumRegClasses();
1746 : RegLimit.resize(NumRC);
1747 : RegPressure.resize(NumRC);
1748 : std::fill(RegLimit.begin(), RegLimit.end(), 0);
1749 : std::fill(RegPressure.begin(), RegPressure.end(), 0);
1750 1269002 : for (const TargetRegisterClass *RC : TRI->regclasses())
1751 5076008 : RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1752 1269002 : }
1753 : }
1754 27221 :
1755 27221 : void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1756 : scheduleDAG = scheduleDag;
1757 : }
1758 2474883 :
1759 7342986 : ScheduleHazardRecognizer* getHazardRec() {
1760 : return scheduleDAG->getHazardRec();
1761 1269002 : }
1762 :
1763 0 : void initNodes(std::vector<SUnit> &sunits) override;
1764 1269002 :
1765 0 : void addNode(const SUnit *SU) override;
1766 :
1767 0 : void updateNode(const SUnit *SU) override;
1768 9609188 :
1769 : void releaseState() override {
1770 : SUnits = nullptr;
1771 : SethiUllmanNumbers.clear();
1772 : std::fill(RegPressure.begin(), RegPressure.end(), 0);
1773 : }
1774 :
1775 : unsigned getNodePriority(const SUnit *SU) const;
1776 :
1777 1269001 : unsigned getNodeOrdering(const SUnit *SU) const {
1778 1269001 : if (!SU->getNode()) return 0;
1779 :
1780 : return SU->getNode()->getIROrder();
1781 1269001 : }
1782 :
1783 : bool empty() const override { return Queue.empty(); }
1784 :
1785 0 : void push(SUnit *U) override {
1786 12156 : assert(!U->NodeQueueId && "Node in the queue already");
1787 : U->NodeQueueId = ++CurQueueId;
1788 12156 : Queue.push_back(U);
1789 : }
1790 :
1791 133028776 : void remove(SUnit *SU) override {
1792 : assert(!Queue.empty() && "Queue is empty!");
1793 16331408 : assert(SU->NodeQueueId != 0 && "Not in queue!");
1794 : std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1795 16331408 : if (I != std::prev(Queue.end()))
1796 16331408 : std::swap(*I, Queue.back());
1797 16331408 : Queue.pop_back();
1798 : SU->NodeQueueId = 0;
1799 3533 : }
1800 :
1801 : bool tracksRegPressure() const override { return TracksRegPressure; }
1802 :
1803 3533 : void dumpRegPressure() const;
1804 :
1805 : bool HighRegPressure(const SUnit *SU) const;
1806 3533 :
1807 3533 : bool MayReduceRegPressure(SUnit *SU) const;
1808 :
1809 201 : int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1810 :
1811 : void scheduledNode(SUnit *SU) override;
1812 :
1813 : void unscheduledNode(SUnit *SU) override;
1814 :
1815 : protected:
1816 : bool canClobber(const SUnit *SU, const SUnit *Op);
1817 : void AddPseudoTwoAddrDeps();
1818 : void PrescheduleNodesWithMultipleUses();
1819 : void CalculateSethiUllmanNumbers();
1820 : };
1821 :
1822 : template<class SF>
1823 : static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1824 : std::vector<SUnit *>::iterator Best = Q.begin();
1825 : for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
1826 : if (Picker(*Best, *I))
1827 : Best = I;
1828 : SUnit *V = *Best;
1829 : if (Best != std::prev(Q.end()))
1830 : std::swap(*Best, Q.back());
1831 16327875 : Q.pop_back();
1832 : return V;
1833 35725813 : }
1834 19397937 :
1835 : template<class SF>
1836 16327876 : SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1837 16327876 : #ifndef NDEBUG
1838 : if (DAG->StressSched) {
1839 : reverse_sort<SF> RPicker(Picker);
1840 16327876 : return popFromQueueImpl(Q, RPicker);
1841 : }
1842 70657 : #endif
1843 : (void)DAG;
1844 179211 : return popFromQueueImpl(Q, Picker);
1845 108554 : }
1846 :
1847 70657 : //===----------------------------------------------------------------------===//
1848 70657 : // RegReductionPriorityQueue Definition
1849 : //===----------------------------------------------------------------------===//
1850 : //
1851 70657 : // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1852 : // to reduce register pressure.
1853 16046194 : //
1854 : template<class SF>
1855 35034690 : class RegReductionPriorityQueue : public RegReductionPQBase {
1856 18988495 : SF Picker;
1857 :
1858 16046195 : public:
1859 16046195 : RegReductionPriorityQueue(MachineFunction &mf,
1860 : bool tracksrp,
1861 : bool srcorder,
1862 16046195 : const TargetInstrInfo *tii,
1863 : const TargetRegisterInfo *tri,
1864 86880 : const TargetLowering *tli)
1865 : : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1866 236001 : tii, tri, tli),
1867 149121 : Picker(this) {}
1868 :
1869 86880 : bool isBottomUp() const override { return SF::IsBottomUp; }
1870 86880 :
1871 : bool isReady(SUnit *U) const override {
1872 : return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1873 86880 : }
1874 :
1875 124144 : SUnit *pop() override {
1876 : if (Queue.empty()) return nullptr;
1877 275911 :
1878 151767 : SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1879 : V->NodeQueueId = 0;
1880 124144 : return V;
1881 124144 : }
1882 :
1883 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1884 124144 : LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1885 : // Emulate pop() without clobbering NodeQueueIds.
1886 : std::vector<SUnit *> DumpQueue = Queue;
1887 : SF DumpPicker = Picker;
1888 0 : while (!DumpQueue.empty()) {
1889 : SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1890 : dbgs() << "Height " << SU->getHeight() << ": ";
1891 : DAG->dumpNode(*SU);
1892 : }
1893 : }
1894 : #endif
1895 : };
1896 16327875 :
1897 : using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1898 0 : using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1899 : using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1900 : using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1901 :
1902 : } // end anonymous namespace
1903 :
1904 : //===----------------------------------------------------------------------===//
1905 : // Static Node Priority for Register Pressure Reduction
1906 0 : //===----------------------------------------------------------------------===//
1907 :
1908 0 : // Check for special nodes that bypass scheduling heuristics.
1909 : // Currently this pushes TokenFactor nodes down, but may be used for other
1910 : // pseudo-ops as well.
1911 : //
1912 : // Return -1 to schedule right above left, 1 for left above right.
1913 : // Return 0 if no bias exists.
1914 : static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1915 : bool LSchedLow = left->isScheduleLow;
1916 0 : bool RSchedLow = right->isScheduleLow;
1917 : if (LSchedLow != RSchedLow)
1918 0 : return LSchedLow < RSchedLow ? 1 : -1;
1919 : return 0;
1920 : }
1921 :
1922 : /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1923 : /// Smaller number is the higher priority.
1924 : static unsigned
1925 : CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1926 0 : if (SUNumbers[SU->NodeNum] != 0)
1927 : return SUNumbers[SU->NodeNum];
1928 0 :
1929 : // Use WorkList to avoid stack overflow on excessively large IRs.
1930 : struct WorkState {
1931 : WorkState(const SUnit *SU) : SU(SU) {}
1932 : const SUnit *SU;
1933 : unsigned PredsProcessed = 0;
1934 : };
1935 :
1936 0 : SmallVector<WorkState, 16> WorkList;
1937 : WorkList.push_back(SU);
1938 : while (!WorkList.empty()) {
1939 : auto &Temp = WorkList.back();
1940 : auto *TempSU = Temp.SU;
1941 : bool AllPredsKnown = true;
1942 : // Try to find a non-evaluated pred and push it into the processing stack.
1943 : for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1944 : auto &Pred = TempSU->Preds[P];
1945 : if (Pred.isCtrl()) continue; // ignore chain preds
1946 : SUnit *PredSU = Pred.getSUnit();
1947 : if (SUNumbers[PredSU->NodeNum] == 0) {
1948 : #ifndef NDEBUG
1949 : // In debug mode, check that we don't have such element in the stack.
1950 : for (auto It : WorkList)
1951 1269002 : assert(It.SU != PredSU && "Trying to push an element twice?");
1952 : #endif
1953 : // Next time start processing this one starting from the next pred.
1954 : Temp.PredsProcessed = P + 1;
1955 : WorkList.push_back(PredSU);
1956 : AllPredsKnown = false;
1957 : break;
1958 : }
1959 2538004 : }
1960 :
1961 0 : if (!AllPredsKnown)
1962 : continue;
1963 0 :
1964 0 : // Once all preds are known, we can calculate the answer for this one.
1965 : unsigned SethiUllmanNumber = 0;
1966 0 : unsigned Extra = 0;
1967 0 : for (const SDep &Pred : TempSU->Preds) {
1968 : if (Pred.isCtrl()) continue; // ignore chain preds
1969 0 : SUnit *PredSU = Pred.getSUnit();
1970 0 : unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1971 : assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1972 0 : if (PredSethiUllman > SethiUllmanNumber) {
1973 0 : SethiUllmanNumber = PredSethiUllman;
1974 : Extra = 0;
1975 0 : } else if (PredSethiUllman == SethiUllmanNumber)
1976 0 : ++Extra;
1977 : }
1978 :
1979 16328570 : SethiUllmanNumber += Extra;
1980 16328570 : if (SethiUllmanNumber == 0)
1981 : SethiUllmanNumber = 1;
1982 16327875 : SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
1983 16327876 : WorkList.pop_back();
1984 16327876 : }
1985 :
1986 70679 : assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
1987 70679 : return SUNumbers[SU->NodeNum];
1988 : }
1989 70657 :
1990 70657 : /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1991 70657 : /// scheduling units.
1992 : void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1993 16046843 : SethiUllmanNumbers.assign(SUnits->size(), 0);
1994 16046843 :
1995 : for (const SUnit &SU : *SUnits)
1996 16046194 : CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
1997 16046195 : }
1998 16046195 :
1999 : void RegReductionPQBase::addNode(const SUnit *SU) {
2000 86894 : unsigned SUSize = SethiUllmanNumbers.size();
2001 86894 : if (SUnits->size() > SUSize)
2002 : SethiUllmanNumbers.resize(SUSize*2, 0);
2003 86880 : CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2004 86880 : }
2005 86880 :
2006 : void RegReductionPQBase::updateNode(const SUnit *SU) {
2007 124154 : SethiUllmanNumbers[SU->NodeNum] = 0;
2008 124154 : CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2009 : }
2010 124144 :
2011 124144 : // Lower priority means schedule further down. For bottom-up scheduling, lower
2012 124144 : // priority SUs are scheduled before higher priority SUs.
2013 : unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2014 : assert(SU->NodeNum < SethiUllmanNumbers.size());
2015 : unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2016 : if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2017 : // CopyToReg should be close to its uses to facilitate coalescing and
2018 : // avoid spilling.
2019 : return 0;
2020 : if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2021 : Opc == TargetOpcode::SUBREG_TO_REG ||
2022 : Opc == TargetOpcode::INSERT_SUBREG)
2023 : // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2024 : // close to their uses to facilitate coalescing.
2025 : return 0;
2026 : if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2027 : // If SU does not have a register use, i.e. it doesn't produce a value
2028 : // that would be consumed (e.g. store), then it terminates a chain of
2029 : // computation. Give it a large SethiUllman number so it will be
2030 : // scheduled right before its predecessors that it doesn't lengthen
2031 : // their live ranges.
2032 : return 0xffff;
2033 : if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2034 : // If SU does not have a register def, schedule it close to its uses
2035 : // because it does not lengthen any live ranges.
2036 : return 0;
2037 : #if 1
2038 : return SethiUllmanNumbers[SU->NodeNum];
2039 : #else
2040 : unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2041 : if (SU->isCallOp) {
2042 : // FIXME: This assumes all of the defs are used as call operands.
2043 : int NP = (int)Priority - SU->getNode()->getNumValues();
2044 : return (NP > 0) ? NP : 0;
2045 : }
2046 : return Priority;
2047 108554 : #endif
2048 108554 : }
2049 108554 :
2050 352 : //===----------------------------------------------------------------------===//
2051 : // Register Pressure Tracking
2052 : //===----------------------------------------------------------------------===//
2053 :
2054 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2055 : LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2056 : for (const TargetRegisterClass *RC : TRI->regclasses()) {
2057 16310610 : unsigned Id = RC->getID();
2058 32621220 : unsigned RP = RegPressure[Id];
2059 : if (!RP) continue;
2060 : LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2061 : << RegLimit[Id] << '\n');
2062 : }
2063 16310611 : }
2064 : #endif
2065 :
2066 : bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2067 : if (!TLI)
2068 9046265 : return false;
2069 9046265 :
2070 32621222 : for (const SDep &Pred : SU->Preds) {
2071 : if (Pred.isCtrl())
2072 23574957 : continue;
2073 : SUnit *PredSU = Pred.getSUnit();
2074 : // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2075 36079476 : // to cover the number of registers defined (they are all live).
2076 : if (PredSU->NumRegDefsLeft == 0) {
2077 19768865 : continue;
2078 : }
2079 17778940 : for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2080 : RegDefPos.IsValid(); RegDefPos.Advance()) {
2081 : unsigned RCId, Cost;
2082 : GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2083 :
2084 : if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2085 : return true;
2086 7264346 : }
2087 7264346 : }
2088 : return false;
2089 : }
2090 :
2091 : bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2092 : const SDNode *N = SU->getNode();
2093 :
2094 7264346 : if (!N->isMachineOpcode() || !SU->NumSuccs)
2095 : return false;
2096 :
2097 : unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2098 : for (unsigned i = 0; i != NumDefs; ++i) {
2099 36079476 : MVT VT = N->getSimpleValueType(i);
2100 19768865 : if (!N->hasAnyUseOfValue(i))
2101 : continue;
2102 8889470 : unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2103 : if (RegPressure[RCId] >= RegLimit[RCId])
2104 8889470 : return true;
2105 : }
2106 : return false;
2107 2088706 : }
2108 1920511 :
2109 : // Compute the register pressure contribution by this instruction by count up
2110 : // for uses that are not live and down for defs. Only count register classes
2111 16310611 : // that are already under high pressure. As a side effect, compute the number of
2112 16310611 : // uses of registers that are already live.
2113 : //
2114 32621222 : // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2115 : // so could probably be factored.
2116 : int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2117 : LiveUses = 0;
2118 : int PDiff = 0;
2119 18092530 : for (const SDep &Pred : SU->Preds) {
2120 : if (Pred.isCtrl())
2121 : continue;
2122 : SUnit *PredSU = Pred.getSUnit();
2123 : // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2124 1269002 : // to cover the number of registers defined (they are all live).
2125 1269002 : if (PredSU->NumRegDefsLeft == 0) {
2126 : if (PredSU->getNode()->isMachineOpcode())
2127 17578636 : ++LiveUses;
2128 16309634 : continue;
2129 1269002 : }
2130 : for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2131 650 : RegDefPos.IsValid(); RegDefPos.Advance()) {
2132 650 : MVT VT = RegDefPos.GetValue();
2133 1300 : unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2134 295 : if (RegPressure[RCId] >= RegLimit[RCId])
2135 650 : ++PDiff;
2136 650 : }
2137 : }
2138 327 : const SDNode *N = SU->getNode();
2139 327 :
2140 327 : if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2141 327 : return PDiff;
2142 :
2143 : unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2144 : for (unsigned i = 0; i != NumDefs; ++i) {
2145 20103644 : MVT VT = N->getSimpleValueType(i);
2146 : if (!N->hasAnyUseOfValue(i))
2147 20103644 : continue;
2148 20103644 : unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2149 : if (RegPressure[RCId] >= RegLimit[RCId])
2150 : --PDiff;
2151 : }
2152 36218672 : return PDiff;
2153 18109336 : }
2154 :
2155 : void RegReductionPQBase::scheduledNode(SUnit *SU) {
2156 : if (!TracksRegPressure)
2157 : return;
2158 18109336 :
2159 : if (!SU->getNode())
2160 : return;
2161 :
2162 : for (const SDep &Pred : SU->Preds) {
2163 : if (Pred.isCtrl())
2164 : continue;
2165 14774422 : SUnit *PredSU = Pred.getSUnit();
2166 : // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2167 : // to cover the number of registers defined (they are all live).
2168 : if (PredSU->NumRegDefsLeft == 0) {
2169 : continue;
2170 22019426 : }
2171 : // FIXME: The ScheduleDAG currently loses information about which of a
2172 : // node's values is consumed by each dependence. Consequently, if the node
2173 : // defines multiple register classes, we don't know which to pressurize
2174 : // here. Instead the following loop consumes the register defs in an
2175 : // arbitrary order. At least it handles the common case of clustered loads
2176 : // to the same class. For precise liveness, each SDep needs to indicate the
2177 : // result number. But that tightly couples the ScheduleDAG with the
2178 : // SelectionDAG making updates tricky. A simpler hack would be to attach a
2179 : // value type or register class to SDep.
2180 : //
2181 : // The most important aspect of register tracking is balancing the increase
2182 : // here with the reduction further below. Note that this SU may use multiple
2183 : // defs in PredSU. The can't be determined here, but we've already
2184 : // compensated by reducing NumRegDefsLeft in PredSU during
2185 : // ScheduleDAGSDNodes::AddSchedEdges.
2186 : --PredSU->NumRegDefsLeft;
2187 : unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2188 : for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2189 : RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2190 : if (SkipRegDefs)
2191 : continue;
2192 :
2193 : unsigned RCId, Cost;
2194 : GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2195 : RegPressure[RCId] += Cost;
2196 : break;
2197 : }
2198 292004 : }
2199 292004 :
2200 : // We should have this assert, but there may be dead SDNodes that never
2201 : // materialize as SUnits, so they don't appear to generate liveness.
2202 609177 : //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2203 337297 : int SkipRegDefs = (int)SU->NumRegDefsLeft;
2204 : for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2205 : RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2206 : if (SkipRegDefs > 0)
2207 : continue;
2208 321333 : unsigned RCId, Cost;
2209 : GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2210 : if (RegPressure[RCId] < Cost) {
2211 369594 : // Register pressure tracking is imprecise. This can happen. But we try
2212 556868 : // hard not to let it happen because it likely results in poor scheduling.
2213 : LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2214 207398 : << ") has too many regdefs\n");
2215 : RegPressure[RCId] = 0;
2216 622194 : }
2217 20124 : else {
2218 : RegPressure[RCId] -= Cost;
2219 : }
2220 : }
2221 : LLVM_DEBUG(dumpRegPressure());
2222 : }
2223 :
2224 : void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2225 : if (!TracksRegPressure)
2226 : return;
2227 :
2228 : const SDNode *N = SU->getNode();
2229 : if (!N) return;
2230 :
2231 : if (!N->isMachineOpcode()) {
2232 : if (N->getOpcode() != ISD::CopyToReg)
2233 : return;
2234 : } else {
2235 : unsigned Opc = N->getMachineOpcode();
2236 : if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2237 : Opc == TargetOpcode::INSERT_SUBREG ||
2238 : Opc == TargetOpcode::SUBREG_TO_REG ||
2239 : Opc == TargetOpcode::REG_SEQUENCE ||
2240 : Opc == TargetOpcode::IMPLICIT_DEF)
2241 : return;
2242 : }
2243 :
2244 : for (const SDep &Pred : SU->Preds) {
2245 : if (Pred.isCtrl())
2246 : continue;
2247 : SUnit *PredSU = Pred.getSUnit();
2248 292100 : // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2249 292100 : // counts data deps.
2250 : if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2251 598843 : continue;
2252 306743 : const SDNode *PN = PredSU->getNode();
2253 : if (!PN->isMachineOpcode()) {
2254 : if (PN->getOpcode() == ISD::CopyFromReg) {
2255 : MVT VT = PN->getSimpleValueType(0);
2256 : unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2257 281889 : RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2258 67958 : }
2259 56278 : continue;
2260 67958 : }
2261 : unsigned POpc = PN->getMachineOpcode();
2262 427973 : if (POpc == TargetOpcode::IMPLICIT_DEF)
2263 642015 : continue;
2264 214042 : if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2265 214042 : POpc == TargetOpcode::INSERT_SUBREG ||
2266 642126 : POpc == TargetOpcode::SUBREG_TO_REG) {
2267 80982 : MVT VT = PN->getSimpleValueType(0);
2268 : unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2269 : RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2270 292100 : continue;
2271 : }
2272 292100 : unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2273 : for (unsigned i = 0; i != NumDefs; ++i) {
2274 : MVT VT = PN->getSimpleValueType(i);
2275 385509 : if (!PN->hasAnyUseOfValue(i))
2276 256676 : continue;
2277 128173 : unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2278 128173 : if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2279 6 : // Register pressure tracking is imprecise. This can happen.
2280 128167 : RegPressure[RCId] = 0;
2281 384501 : else
2282 24695 : RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2283 : }
2284 : }
2285 :
2286 : // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2287 16318987 : // may transfer data dependencies to CopyToReg.
2288 16318987 : if (SU->NumSuccs && N->isMachineOpcode()) {
2289 : unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2290 : for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2291 210746 : MVT VT = N->getSimpleValueType(i);
2292 : if (VT == MVT::Glue || VT == MVT::Other)
2293 : continue;
2294 439925 : if (!N->hasAnyUseOfValue(i))
2295 229181 : continue;
2296 : unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2297 : RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2298 : }
2299 : }
2300 163763 :
2301 : LLVM_DEBUG(dumpRegPressure());
2302 : }
2303 :
2304 : //===----------------------------------------------------------------------===//
2305 : // Dynamic Node Priority for Register Pressure Reduction
2306 : //===----------------------------------------------------------------------===//
2307 :
2308 : /// closestSucc - Returns the scheduled cycle of the successor which is
2309 : /// closest to the current cycle.
2310 : static unsigned closestSucc(const SUnit *SU) {
2311 : unsigned MaxHeight = 0;
2312 : for (const SDep &Succ : SU->Succs) {
2313 : if (Succ.isCtrl()) continue; // ignore chain succs
2314 : unsigned Height = Succ.getSUnit()->getHeight();
2315 : // If there are bunch of CopyToRegs stacked up, they should be considered
2316 : // to be at the same position.
2317 : if (Succ.getSUnit()->getNode() &&
2318 139910 : Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2319 139910 : Height = closestSucc(Succ.getSUnit())+1;
2320 140376 : if (Height > MaxHeight)
2321 140376 : MaxHeight = Height;
2322 140376 : }
2323 466 : return MaxHeight;
2324 : }
2325 :
2326 139910 : /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2327 139910 : /// for scratch registers, i.e. number of data dependencies.
2328 139910 : static unsigned calcMaxScratches(const SUnit *SU) {
2329 : unsigned Scratches = 0;
2330 : for (const SDep &Pred : SU->Preds) {
2331 : if (Pred.isCtrl()) continue; // ignore chain preds
2332 : Scratches++;
2333 : }
2334 : return Scratches;
2335 210744 : }
2336 354561 :
2337 498378 : /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2338 143817 : /// CopyFromReg from a virtual register.
2339 5 : static bool hasOnlyLiveInOpers(const SUnit *SU) {
2340 : bool RetVal = false;
2341 143812 : for (const SDep &Pred : SU->Preds) {
2342 287624 : if (Pred.isCtrl()) continue;
2343 : const SUnit *PredSU = Pred.getSUnit();
2344 : if (PredSU->getNode() &&
2345 : PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2346 : unsigned Reg =
2347 3941 : cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2348 : if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2349 : RetVal = true;
2350 139871 : continue;
2351 : }
2352 : }
2353 : return false;
2354 : }
2355 : return RetVal;
2356 8840 : }
2357 8840 :
2358 : /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2359 : /// CopyToReg to a virtual register. This SU def is probably a liveout and
2360 156 : /// it has no other use. It should be scheduled closer to the terminator.
2361 156 : static bool hasOnlyLiveOutUses(const SUnit *SU) {
2362 : bool RetVal = false;
2363 156 : for (const SDep &Succ : SU->Succs) {
2364 38 : if (Succ.isCtrl()) continue;
2365 : const SUnit *SuccSU = Succ.getSUnit();
2366 : if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2367 : unsigned Reg =
2368 236 : cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2369 118 : if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2370 118 : RetVal = true;
2371 216 : continue;
2372 108 : }
2373 : }
2374 : return false;
2375 : }
2376 418 : return RetVal;
2377 301 : }
2378 :
2379 : // Set isVRegCycle for a node with only live in opers and live out uses. Also
2380 : // set isVRegCycle for its CopyFromReg operands.
2381 : //
2382 253 : // This is only relevant for single-block loops, in which case the VRegCycle
2383 : // node is likely an induction variable in which the operand and target virtual
2384 150 : // registers should be coalesced (e.g. pre/post increment values). Setting the
2385 150 : // isVRegCycle flag helps the scheduler prioritize other uses of the same
2386 24 : // CopyFromReg so that this node becomes the virtual register "kill". This
2387 24 : // avoids interference between the values live in and out of the block and
2388 24 : // eliminates a copy inside the loop.
2389 24 : static void initVRegCycle(SUnit *SU) {
2390 : if (DisableSchedVRegCycle)
2391 24 : return;
2392 :
2393 : if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2394 126 : return;
2395 :
2396 252 : LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2397 126 :
2398 126 : SU->isVRegCycle = true;
2399 2 :
2400 2 : for (const SDep &Pred : SU->Preds) {
2401 2 : if (Pred.isCtrl()) continue;
2402 : Pred.getSUnit()->isVRegCycle = true;
2403 : }
2404 248 : }
2405 234 :
2406 110 : // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2407 110 : // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2408 0 : static void resetVRegCycle(SUnit *SU) {
2409 110 : if (!SU->isVRegCycle)
2410 220 : return;
2411 :
2412 84 : for (const SDep &Pred : SU->Preds) {
2413 : if (Pred.isCtrl()) continue; // ignore chain preds
2414 68 : SUnit *PredSU = Pred.getSUnit();
2415 : if (PredSU->isVRegCycle) {
2416 : assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2417 : "VRegCycle def must be CopyFromReg");
2418 : Pred.getSUnit()->isVRegCycle = false;
2419 : }
2420 117 : }
2421 204 : }
2422 88 :
2423 : // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2424 20 : // means a node that defines the VRegCycle has not been scheduled yet.
2425 : static bool hasVRegCycleUse(const SUnit *SU) {
2426 20 : // If this SU also defines the VReg, don't hoist it as a "use".
2427 : if (SU->isVRegCycle)
2428 3 : return false;
2429 3 :
2430 : for (const SDep &Pred : SU->Preds) {
2431 : if (Pred.isCtrl()) continue; // ignore chain preds
2432 : if (Pred.getSUnit()->isVRegCycle &&
2433 : Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2434 : LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2435 : return true;
2436 : }
2437 : }
2438 : return false;
2439 : }
2440 :
2441 : // Check for either a dependence (latency) or resource (hazard) stall.
2442 16715603 : //
2443 : // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2444 36765604 : static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2445 20050001 : if ((int)SPQ->getCurCycle() < Height) return true;
2446 : if (SPQ->getHazardRec()->getHazardType(SU, 0)
2447 : != ScheduleHazardRecognizer::NoHazard)
2448 : return true;
2449 14435208 : return false;
2450 14435208 : }
2451 226951 :
2452 14435208 : // Return -1 if left has higher priority, 1 if right has higher priority.
2453 : // Return 0 if latency-based priority is equivalent.
2454 : static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2455 16715603 : RegReductionPQBase *SPQ) {
2456 : // Scheduling an instruction that uses a VReg whose postincrement has not yet
2457 : // been scheduled will induce a copy. Model this as an extra cycle of latency.
2458 : int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2459 : int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2460 : int LHeight = (int)left->getHeight() + LPenalty;
2461 : int RHeight = (int)right->getHeight() + RPenalty;
2462 16642518 :
2463 9768626 : bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2464 7491295 : BUHasStall(left, LHeight, SPQ);
2465 : bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2466 : BUHasStall(right, RHeight, SPQ);
2467 :
2468 : // If scheduling one of the node will cause a pipeline stall, delay it.
2469 : // If scheduling either one of the node will cause a pipeline stall, sort
2470 : // them according to their height.
2471 82971 : if (LStall) {
2472 : if (!RStall)
2473 137869 : return 1;
2474 92317 : if (LHeight != RHeight)
2475 : return LHeight > RHeight ? 1 : -1;
2476 57898 : } else if (RStall)
2477 57898 : return -1;
2478 :
2479 49428 : // If either node is scheduling for latency, sort them by height/depth
2480 24714 : // and latency.
2481 : if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2482 : right->SchedulingPref == Sched::ILP)) {
2483 : // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2484 : // is enabled, grouping instructions by cycle, then its height is already
2485 : // covered so only its depth matters. We also reach this point if both stall
2486 : // but have the same height.
2487 : if (!SPQ->getHazardRec()->isEnabled()) {
2488 : if (LHeight != RHeight)
2489 : return LHeight > RHeight ? 1 : -1;
2490 : }
2491 : int LDepth = left->getDepth() - LPenalty;
2492 : int RDepth = right->getDepth() - RPenalty;
2493 9726 : if (LDepth != RDepth) {
2494 : LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2495 13453 : << ") depth " << LDepth << " vs SU (" << right->NodeNum
2496 11377 : << ") depth " << RDepth << "\n");
2497 : return LDepth < RDepth ? 1 : -1;
2498 9349 : }
2499 : if (left->Latency != right->Latency)
2500 3408 : return left->Latency > right->Latency ? 1 : -1;
2501 1704 : }
2502 : return 0;
2503 : }
2504 :
2505 : static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2506 : // Schedule physical register definitions close to their use. This is
2507 : // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2508 : // long as shortening physreg live ranges is generally good, we can defer
2509 : // creating a subtarget hook.
2510 : if (!DisableSchedPhysRegJoin) {
2511 : bool LHasPhysReg = left->hasPhysRegDefs;
2512 : bool RHasPhysReg = right->hasPhysRegDefs;
2513 : if (LHasPhysReg != RHasPhysReg) {
2514 : #ifndef NDEBUG
2515 : static const char *const PhysRegMsg[] = { " has no physreg",
2516 : " defines a physreg" };
2517 : #endif
2518 : LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2519 : << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2520 : << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2521 82971 : return LHasPhysReg < RHasPhysReg;
2522 82971 : }
2523 : }
2524 :
2525 82971 : // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2526 : unsigned LPriority = SPQ->getNodePriority(left);
2527 : unsigned RPriority = SPQ->getNodePriority(right);
2528 :
2529 : // Be really careful about hoisting call operands above previous calls.
2530 1234 : // Only allows it if it would reduce register pressure.
2531 : if (left->isCall && right->isCallOp) {
2532 2704 : unsigned RNumVals = right->getNode()->getNumValues();
2533 1470 : RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2534 1447 : }
2535 : if (right->isCall && left->isCallOp) {
2536 : unsigned LNumVals = left->getNode()->getNumValues();
2537 : LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2538 : }
2539 :
2540 16318987 : if (LPriority != RPriority)
2541 16318987 : return LPriority > RPriority;
2542 :
2543 : // One or both of the nodes are calls and their sethi-ullman numbers are the
2544 2704 : // same, then keep source order.
2545 1470 : if (left->isCall || right->isCall) {
2546 : unsigned LOrder = SPQ->getNodeOrdering(left);
2547 1447 : unsigned ROrder = SPQ->getNodeOrdering(right);
2548 :
2549 : // Prefer an ordering where the lower the non-zero order number, the higher
2550 1405 : // the preference.
2551 : if ((LOrder || ROrder) && LOrder != ROrder)
2552 : return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2553 : }
2554 :
2555 : // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2556 : // e.g.
2557 6647702 : // t1 = op t2, c1
2558 : // t3 = op t4, c2
2559 6647702 : //
2560 : // and the following instructions are both ready.
2561 : // t2 = op c3
2562 15398135 : // t4 = op c4
2563 8750730 : //
2564 6897747 : // Then schedule t2 = op first.
2565 904 : // i.e.
2566 : // t4 = op c4
2567 : // t2 = op c3
2568 : // t1 = op t2, c1
2569 : // t3 = op t4, c2
2570 : //
2571 : // This creates more short live intervals.
2572 : unsigned LDist = closestSucc(left);
2573 : unsigned RDist = closestSucc(right);
2574 : if (LDist != RDist)
2575 : return LDist < RDist;
2576 :
2577 6441707 : // How many registers becomes live when the node is scheduled.
2578 6399954 : unsigned LScratch = calcMaxScratches(left);
2579 : unsigned RScratch = calcMaxScratches(right);
2580 : if (LScratch != RScratch)
2581 : return LScratch > RScratch;
2582 :
2583 : // Comparing latency against a call makes little sense unless the node
2584 : // is register pressure-neutral.
2585 : if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2586 3323851 : return (left->NodeQueueId > right->NodeQueueId);
2587 :
2588 : // Do not compare latencies when one or both of the nodes are calls.
2589 : if (!DisableSchedCycles &&
2590 3323851 : !(left->isCall || right->isCall)) {
2591 3323851 : int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2592 3323851 : if (result != 0)
2593 3323851 : return result > 0;
2594 : }
2595 3323851 : else {
2596 : if (left->getHeight() != right->getHeight())
2597 3323851 : return left->getHeight() > right->getHeight();
2598 :
2599 : if (left->getDepth() != right->getDepth())
2600 : return left->getDepth() < right->getDepth();
2601 : }
2602 :
2603 3323851 : assert(left->NodeQueueId && right->NodeQueueId &&
2604 43501 : "NodeQueueId cannot be zero");
2605 : return (left->NodeQueueId > right->NodeQueueId);
2606 37285 : }
2607 9979 :
2608 3280350 : // Bottom up
2609 : bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2610 : if (int res = checkSpecialNodes(left, right))
2611 : return res > 0;
2612 :
2613 3304955 : return BURRSort(left, right, SPQ);
2614 97969 : }
2615 :
2616 : // Source order, otherwise bottom up.
2617 : bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2618 : if (int res = checkSpecialNodes(left, right))
2619 3209234 : return res > 0;
2620 3103414 :
2621 192596 : unsigned LOrder = SPQ->getNodeOrdering(left);
2622 : unsigned ROrder = SPQ->getNodeOrdering(right);
2623 3103773 :
2624 3103773 : // Prefer an ordering where the lower the non-zero order number, the higher
2625 3103773 : // the preference.
2626 : if ((LOrder || ROrder) && LOrder != ROrder)
2627 : return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2628 :
2629 672031 : return BURRSort(left, right, SPQ);
2630 : }
2631 2566767 :
2632 2509 : // If the time between now and when the instruction will be ready can cover
2633 : // the spill code, then avoid adding it to the ready queue. This gives long
2634 : // stalls highest priority and allows hoisting across calls. It should also
2635 : // speed up processing the available queue.
2636 : bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2637 10097556 : static const unsigned ReadyDelay = 3;
2638 :
2639 : if (SPQ->MayReduceRegPressure(SU)) return true;
2640 :
2641 : if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2642 10097556 :
2643 10097556 : if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2644 10097556 : != ScheduleHazardRecognizer::NoHazard)
2645 10097556 : return false;
2646 :
2647 : return true;
2648 : }
2649 :
2650 : // Return true if right should be scheduled with higher priority than left.
2651 : bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2652 : if (int res = checkSpecialNodes(left, right))
2653 45734 : return res > 0;
2654 :
2655 : if (left->isCall || right->isCall)
2656 : // No way to compute latency of calls.
2657 : return BURRSort(left, right, SPQ);
2658 10051822 :
2659 10051822 : bool LHigh = SPQ->HighRegPressure(left);
2660 : bool RHigh = SPQ->HighRegPressure(right);
2661 : // Avoid causing spills. If register pressure is high, schedule for
2662 : // register pressure reduction.
2663 10051822 : if (LHigh && !RHigh) {
2664 727 : LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2665 727 : << right->NodeNum << ")\n");
2666 : return true;
2667 10051822 : }
2668 800 : else if (!LHigh && RHigh) {
2669 800 : LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2670 : << left->NodeNum << ")\n");
2671 : return false;
2672 10051822 : }
2673 1805916 : if (!LHigh && !RHigh) {
2674 : int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2675 : if (result != 0)
2676 : return result > 0;
2677 8245906 : }
2678 : return BURRSort(left, right, SPQ);
2679 : }
2680 :
2681 : // Schedule as many instructions in each cycle as possible. So don't make an
2682 : // instruction available unless it is ready in the current cycle.
2683 6078 : bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2684 2212 : if (SU->getHeight() > CurCycle) return false;
2685 :
2686 : if (SPQ->getHazardRec()->getHazardType(SU, 0)
2687 : != ScheduleHazardRecognizer::NoHazard)
2688 : return false;
2689 :
2690 : return true;
2691 : }
2692 :
2693 : static bool canEnableCoalescing(SUnit *SU) {
2694 : unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2695 : if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2696 : // CopyToReg should be close to its uses to facilitate coalescing and
2697 : // avoid spilling.
2698 : return true;
2699 :
2700 : if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2701 : Opc == TargetOpcode::SUBREG_TO_REG ||
2702 : Opc == TargetOpcode::INSERT_SUBREG)
2703 : // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2704 8244326 : // close to their uses to facilitate coalescing.
2705 8244326 : return true;
2706 8244326 :
2707 4807380 : if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2708 : // If SU does not have a register def, schedule it close to its uses
2709 : // because it does not lengthen any live ranges.
2710 : return true;
2711 :
2712 3436946 : return false;
2713 242936 : }
2714 :
2715 : // list-ilp is currently an experimental scheduler that allows various
2716 : // heuristics to be enabled prior to the normal register reduction logic.
2717 3194010 : bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2718 401 : if (int res = checkSpecialNodes(left, right))
2719 : return res > 0;
2720 :
2721 3193609 : if (left->isCall || right->isCall)
2722 3193602 : // No way to compute latency of calls.
2723 3193580 : return BURRSort(left, right, SPQ);
2724 3193580 :
2725 636806 : unsigned LLiveUses = 0, RLiveUses = 0;
2726 : int LPDiff = 0, RPDiff = 0;
2727 : if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2728 29 : LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2729 0 : RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2730 : }
2731 29 : if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2732 8 : LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2733 : << "): " << LPDiff << " != SU(" << right->NodeNum
2734 : << "): " << RPDiff << "\n");
2735 : return LPDiff > RPDiff;
2736 : }
2737 2556795 :
2738 : if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2739 : bool LReduce = canEnableCoalescing(left);
2740 : bool RReduce = canEnableCoalescing(right);
2741 0 : if (LReduce && !RReduce) return false;
2742 : if (RReduce && !LReduce) return true;
2743 352 : }
2744 :
2745 108202 : if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2746 : LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2747 : << " != SU(" << right->NodeNum << "): " << RLiveUses
2748 : << "\n");
2749 0 : return LLiveUses < RLiveUses;
2750 : }
2751 0 :
2752 : if (!DisableSchedStalls) {
2753 0 : bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2754 : bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2755 : if (LStall != RStall)
2756 : return left->getHeight() > right->getHeight();
2757 : }
2758 0 :
2759 0 : if (!DisableSchedCriticalPath) {
2760 : int spread = (int)left->getDepth() - (int)right->getDepth();
2761 0 : if (std::abs(spread) > MaxReorderWindow) {
2762 : LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2763 : << left->getDepth() << " != SU(" << right->NodeNum
2764 : << "): " << right->getDepth() << "\n");
2765 : return left->getDepth() < right->getDepth();
2766 : }
2767 : }
2768 :
2769 : if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2770 : int spread = (int)left->getHeight() - (int)right->getHeight();
2771 : if (std::abs(spread) > MaxReorderWindow)
2772 : return left->getHeight() > right->getHeight();
2773 : }
2774 :
2775 : return BURRSort(left, right, SPQ);
2776 : }
2777 :
2778 : void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2779 : SUnits = &sunits;
2780 : // Add pseudo dependency edges for two-address nodes.
2781 : if (!Disable2AddrHack)
2782 : AddPseudoTwoAddrDeps();
2783 0 : // Reroute edges to nodes with multiple uses.
2784 : if (!TracksRegPressure && !SrcOrder)
2785 0 : PrescheduleNodesWithMultipleUses();
2786 : // Calculate node priorities.
2787 0 : CalculateSethiUllmanNumbers();
2788 :
2789 0 : // For single block loops, mark nodes that look like canonical IV increments.
2790 : if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2791 0 : for (SUnit &SU : sunits)
2792 0 : initVRegCycle(&SU);
2793 : }
2794 :
2795 0 : //===----------------------------------------------------------------------===//
2796 : // Preschedule for Register Pressure
2797 : //===----------------------------------------------------------------------===//
2798 0 :
2799 : bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2800 0 : if (SU->isTwoAddress) {
2801 : unsigned Opc = SU->getNode()->getMachineOpcode();
2802 : const MCInstrDesc &MCID = TII->get(Opc);
2803 0 : unsigned NumRes = MCID.getNumDefs();
2804 : unsigned NumOps = MCID.getNumOperands() - NumRes;
2805 0 : for (unsigned i = 0; i != NumOps; ++i) {
2806 0 : if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2807 0 : SDNode *DU = SU->getNode()->getOperand(i).getNode();
2808 0 : if (DU->getNodeId() != -1 &&
2809 : Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2810 0 : return true;
2811 : }
2812 : }
2813 : }
2814 : return false;
2815 : }
2816 :
2817 : /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2818 : /// successor's explicit physregs whose definition can reach DepSU.
2819 : /// i.e. DepSU should not be scheduled above SU.
2820 : static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2821 : ScheduleDAGRRList *scheduleDAG,
2822 : const TargetInstrInfo *TII,
2823 : const TargetRegisterInfo *TRI) {
2824 : const MCPhysReg *ImpDefs
2825 33420 : = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2826 33420 : const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2827 33420 : if(!ImpDefs && !RegMask)
2828 : return false;
2829 :
2830 : for (const SDep &Succ : SU->Succs) {
2831 : SUnit *SuccSU = Succ.getSUnit();
2832 52208 : for (const SDep &SuccPred : SuccSU->Preds) {
2833 26104 : if (!SuccPred.isAssignedRegDep())
2834 : continue;
2835 :
2836 : if (RegMask &&
2837 : MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2838 : scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2839 26104 : return true;
2840 :
2841 : if (ImpDefs)
2842 0 : for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2843 : // Return true if SU clobbers this physical register use and the
2844 : // definition of the register reaches from DepSU. IsReachable queries
2845 : // a topological forward sort of the DAG (following the successors).
2846 : if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2847 : scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2848 : return true;
2849 0 : }
2850 : }
2851 0 : return false;
2852 : }
2853 0 :
2854 : /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2855 0 : /// physical register defs.
2856 : static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2857 0 : const TargetInstrInfo *TII,
2858 : const TargetRegisterInfo *TRI) {
2859 0 : SDNode *N = SuccSU->getNode();
2860 0 : unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2861 0 : const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2862 : assert(ImpDefs && "Caller should check hasPhysRegDefs");
2863 0 : for (const SDNode *SUNode = SU->getNode(); SUNode;
2864 : SUNode = SUNode->getGluedNode()) {
2865 : if (!SUNode->isMachineOpcode())
2866 : continue;
2867 0 : const MCPhysReg *SUImpDefs =
2868 : TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2869 : const uint32_t *SURegMask = getNodeRegMask(SUNode);
2870 0 : if (!SUImpDefs && !SURegMask)
2871 0 : continue;
2872 0 : for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2873 0 : MVT VT = N->getSimpleValueType(i);
2874 0 : if (VT == MVT::Glue || VT == MVT::Other)
2875 : continue;
2876 : if (!N->hasAnyUseOfValue(i))
2877 0 : continue;
2878 : unsigned Reg = ImpDefs[i - NumDefs];
2879 : if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2880 : return true;
2881 0 : if (!SUImpDefs)
2882 : continue;
2883 : for (;*SUImpDefs; ++SUImpDefs) {
2884 0 : unsigned SUReg = *SUImpDefs;
2885 0 : if (TRI->regsOverlap(Reg, SUReg))
2886 0 : return true;
2887 0 : }
2888 0 : }
2889 : }
2890 : return false;
2891 0 : }
2892 0 :
2893 0 : /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2894 : /// are not handled well by the general register pressure reduction
2895 : /// heuristics. When presented with code like this:
2896 : ///
2897 0 : /// N
2898 : /// / |
2899 : /// / |
2900 : /// U store
2901 0 : /// |
2902 0 : /// ...
2903 0 : ///
2904 0 : /// the heuristics tend to push the store up, but since the
2905 : /// operand of the store has another use (U), this would increase
2906 : /// the length of that other use (the U->N edge).
2907 0 : ///
2908 : /// This function transforms code like the above to route U's
2909 : /// dependence through the store when possible, like this:
2910 1269002 : ///
2911 1269002 : /// N
2912 : /// ||
2913 1269002 : /// ||
2914 0 : /// store
2915 : /// |
2916 1269002 : /// U
2917 9613 : /// |
2918 : /// ...
2919 1269002 : ///
2920 : /// This results in the store being scheduled immediately
2921 : /// after N, which shortens the U->N live range, reducing
2922 1269002 : /// register pressure.
2923 88428 : void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2924 82971 : // Visit all the nodes in topological order, working top-down.
2925 1269001 : for (SUnit &SU : *SUnits) {
2926 : // For now, only look at nodes with no data successors, such as stores.
2927 : // These are especially important, due to the heuristics in
2928 : // getNodePriority for nodes with no data successors.
2929 : if (SU.NumSuccs != 0)
2930 : continue;
2931 0 : // For now, only look at nodes with exactly one data predecessor.
2932 0 : if (SU.NumPreds != 1)
2933 0 : continue;
2934 0 : // Avoid prescheduling copies to virtual registers, which don't behave
2935 0 : // like other nodes from the perspective of scheduling heuristics.
2936 0 : if (SDNode *N = SU.getNode())
2937 0 : if (N->getOpcode() == ISD::CopyToReg &&
2938 0 : TargetRegisterInfo::isVirtualRegister
2939 0 : (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2940 0 : continue;
2941 0 :
2942 0 : // Locate the single data predecessor.
2943 : SUnit *PredSU = nullptr;
2944 : for (const SDep &Pred : SU.Preds)
2945 : if (!Pred.isCtrl()) {
2946 : PredSU = Pred.getSUnit();
2947 : break;
2948 : }
2949 : assert(PredSU);
2950 :
2951 : // Don't rewrite edges that carry physregs, because that requires additional
2952 0 : // support infrastructure.
2953 : if (PredSU->hasPhysRegDefs)
2954 : continue;
2955 : // Short-circuit the case where SU is PredSU's only data successor.
2956 : if (PredSU->NumSuccs == 1)
2957 0 : continue;
2958 : // Avoid prescheduling to copies from virtual registers, which don't behave
2959 0 : // like other nodes from the perspective of scheduling heuristics.
2960 : if (SDNode *N = SU.getNode())
2961 : if (N->getOpcode() == ISD::CopyFromReg &&
2962 0 : TargetRegisterInfo::isVirtualRegister
2963 : (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2964 0 : continue;
2965 :
2966 : // Perform checks on the successors of PredSU.
2967 : for (const SDep &PredSucc : PredSU->Succs) {
2968 0 : SUnit *PredSuccSU = PredSucc.getSUnit();
2969 0 : if (PredSuccSU == &SU) continue;
2970 : // If PredSU has another successor with no data successors, for
2971 : // now don't attempt to choose either over the other.
2972 : if (PredSuccSU->NumSuccs == 0)
2973 0 : goto outer_loop_continue;
2974 0 : // Don't break physical register dependencies.
2975 : if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2976 : if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
2977 : goto outer_loop_continue;
2978 0 : // Don't introduce graph cycles.
2979 : if (scheduleDAG->IsReachable(&SU, PredSuccSU))
2980 : goto outer_loop_continue;
2981 : }
2982 :
2983 : // Ok, the transformation is safe and the heuristics suggest it is
2984 : // profitable. Update the graph.
2985 : LLVM_DEBUG(
2986 : dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
2987 : << PredSU->NodeNum
2988 4 : << " to guide scheduling in the presence of multiple uses\n");
2989 : for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2990 : SDep Edge = PredSU->Succs[i];
2991 4 : assert(!Edge.isAssignedRegDep());
2992 12 : SUnit *SuccSU = Edge.getSUnit();
2993 4 : if (SuccSU != &SU) {
2994 : Edge.setSUnit(PredSU);
2995 8 : scheduleDAG->RemovePred(SuccSU, Edge);
2996 : scheduleDAG->AddPred(&SU, Edge);
2997 8 : Edge.setSUnit(&SU);
2998 : scheduleDAG->AddPred(SuccSU, Edge);
2999 : --i;
3000 16 : }
3001 : }
3002 8 : outer_loop_continue:;
3003 : }
3004 12 : }
3005 :
3006 8 : /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3007 : /// it as a def&use operand. Add a pseudo control edge from it to the other
3008 8 : /// node (if it won't create a cycle) so the two-address one will be scheduled
3009 : /// first (lower in the schedule). If both nodes are two-address, favor the
3010 8 : /// one that has a CopyToReg use (more likely to be a loop induction update).
3011 8 : /// If both are two-address, but one is commutable while the other is not
3012 : /// commutable, favor the one that's not commutable.
3013 4 : void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3014 : for (SUnit &SU : *SUnits) {
3015 8 : if (!SU.isTwoAddress)
3016 4 : continue;
3017 4 :
3018 : SDNode *Node = SU.getNode();
3019 : if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3020 : continue;
3021 :
3022 : bool isLiveOut = hasOnlyLiveOutUses(&SU);
3023 : unsigned Opc = Node->getMachineOpcode();
3024 : const MCInstrDesc &MCID = TII->get(Opc);
3025 : unsigned NumRes = MCID.getNumDefs();
3026 : unsigned NumOps = MCID.getNumOperands() - NumRes;
3027 : for (unsigned j = 0; j != NumOps; ++j) {
3028 : if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3029 : continue;
3030 : SDNode *DU = SU.getNode()->getOperand(j).getNode();
3031 : if (DU->getNodeId() == -1)
3032 : continue;
3033 : const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3034 : if (!DUSU)
3035 : continue;
3036 : for (const SDep &Succ : DUSU->Succs) {
3037 : if (Succ.isCtrl())
3038 : continue;
3039 : SUnit *SuccSU = Succ.getSUnit();
3040 : if (SuccSU == &SU)
3041 : continue;
3042 : // Be conservative. Ignore if nodes aren't at roughly the same
3043 : // depth and height.
3044 : if (SuccSU->getHeight() < SU.getHeight() &&
3045 : (SU.getHeight() - SuccSU->getHeight()) > 1)
3046 : continue;
3047 : // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3048 : // constrains whatever is using the copy, instead of the copy
3049 : // itself. In the case that the copy is coalesced, this
3050 : // preserves the intent of the pseudo two-address heurietics.
3051 : while (SuccSU->Succs.size() == 1 &&
3052 : SuccSU->getNode()->isMachineOpcode() &&
3053 : SuccSU->getNode()->getMachineOpcode() ==
3054 : TargetOpcode::COPY_TO_REGCLASS)
3055 9613 : SuccSU = SuccSU->Succs.front().getSUnit();
3056 : // Don't constrain non-instruction nodes.
3057 80032 : if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3058 : continue;
3059 : // Don't constrain nodes with physical register defs if the
3060 : // predecessor can clobber them.
3061 70419 : if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3062 : if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3063 : continue;
3064 28030 : }
3065 : // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3066 : // these may be coalesced away. We want them close to their uses.
3067 : unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3068 9857 : if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3069 9857 : SuccOpc == TargetOpcode::INSERT_SUBREG ||
3070 : SuccOpc == TargetOpcode::SUBREG_TO_REG)
3071 5930 : continue;
3072 : if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3073 : (!canClobber(SuccSU, DUSU) ||
3074 : (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3075 : (!SU.isCommutable && SuccSU->isCommutable)) &&
3076 7900 : !scheduleDAG->IsReachable(SuccSU, &SU)) {
3077 7900 : LLVM_DEBUG(dbgs()
3078 : << " Adding a pseudo-two-addr edge from SU #"
3079 6955 : << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3080 : scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
3081 : }
3082 : }
3083 : }
3084 : }
3085 6955 : }
3086 :
3087 : //===----------------------------------------------------------------------===//
3088 6942 : // Public Constructor Functions
3089 : //===----------------------------------------------------------------------===//
3090 :
3091 : ScheduleDAGSDNodes *
3092 679 : llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
3093 679 : CodeGenOpt::Level OptLevel) {
3094 : const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3095 14 : const TargetInstrInfo *TII = STI.getInstrInfo();
3096 : const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3097 :
3098 : BURegReductionPriorityQueue *PQ =
3099 1188 : new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3100 : ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3101 1121 : PQ->setScheduleDAG(SD);
3102 : return SD;
3103 : }
3104 706 :
3105 : ScheduleDAGSDNodes *
3106 : llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
3107 124 : CodeGenOpt::Level OptLevel) {
3108 4 : const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3109 : const TargetInstrInfo *TII = STI.getInstrInfo();
3110 : const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3111 120 :
3112 : SrcRegReductionPriorityQueue *PQ =
3113 : new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3114 : ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3115 : PQ->setScheduleDAG(SD);
3116 : return SD;
3117 : }
3118 :
3119 : ScheduleDAGSDNodes *
3120 : llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
3121 213 : CodeGenOpt::Level OptLevel) {
3122 146 : const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3123 : const TargetInstrInfo *TII = STI.getInstrInfo();
3124 : const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3125 146 : const TargetLowering *TLI = IS->TLI;
3126 :
3127 79 : HybridBURRPriorityQueue *PQ =
3128 79 : new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3129 :
3130 79 : ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3131 79 : PQ->setScheduleDAG(SD);
3132 : return SD;
3133 : }
3134 70419 :
3135 : ScheduleDAGSDNodes *
3136 9613 : llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
3137 : CodeGenOpt::Level OptLevel) {
3138 : const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3139 : const TargetInstrInfo *TII = STI.getInstrInfo();
3140 : const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3141 : const TargetLowering *TLI = IS->TLI;
3142 :
3143 : ILPBURRPriorityQueue *PQ =
3144 : new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3145 0 : ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3146 0 : PQ->setScheduleDAG(SD);
3147 0 : return SD;
3148 : }
|