LLVM 20.0.0git
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements bottom-up and top-down register pressure reduction list
10// schedulers, using standard algorithms. The basic approach uses a priority
11// queue of available nodes to schedule. One at a time, nodes are taken from
12// the priority queue (thus in priority order), checked for legality to
13// schedule, and emitted if legal.
14//
15//===----------------------------------------------------------------------===//
16
17#include "ScheduleDAGSDNodes.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
39#include "llvm/Config/llvm-config.h"
40#include "llvm/IR/InlineAsm.h"
41#include "llvm/MC/MCInstrDesc.h"
47#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <cstdlib>
54#include <iterator>
55#include <limits>
56#include <memory>
57#include <utility>
58#include <vector>
59
60using namespace llvm;
61
62#define DEBUG_TYPE "pre-RA-sched"
63
64STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65STATISTIC(NumUnfolds, "Number of nodes unfolded");
66STATISTIC(NumDups, "Number of duplicated nodes");
67STATISTIC(NumPRCopies, "Number of physical register copies");
68
71 "Bottom-up register reduction list scheduling",
73
76 "Similar to list-burr but schedules in source "
77 "order when possible",
79
82 "Bottom-up register pressure aware list scheduling "
83 "which tries to balance latency and register pressure",
85
88 "Bottom-up register pressure aware list scheduling "
89 "which tries to balance ILP and register pressure",
91
93 "disable-sched-cycles", cl::Hidden, cl::init(false),
94 cl::desc("Disable cycle-level precision during preRA scheduling"));
95
96// Temporary sched=list-ilp flags until the heuristics are robust.
97// Some options are also available under sched=list-hybrid.
99 "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100 cl::desc("Disable regpressure priority in sched=list-ilp"));
102 "disable-sched-live-uses", cl::Hidden, cl::init(true),
103 cl::desc("Disable live use priority in sched=list-ilp"));
105 "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106 cl::desc("Disable virtual register cycle interference checks"));
108 "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109 cl::desc("Disable physreg def-use affinity"));
111 "disable-sched-stalls", cl::Hidden, cl::init(true),
112 cl::desc("Disable no-stall priority in sched=list-ilp"));
114 "disable-sched-critical-path", cl::Hidden, cl::init(false),
115 cl::desc("Disable critical path priority in sched=list-ilp"));
117 "disable-sched-height", cl::Hidden, cl::init(false),
118 cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120 "disable-2addr-hack", cl::Hidden, cl::init(true),
121 cl::desc("Disable scheduler's two-address hack"));
122
124 "max-sched-reorder", cl::Hidden, cl::init(6),
125 cl::desc("Number of instructions to allow ahead of the critical path "
126 "in sched=list-ilp"));
127
129 "sched-avg-ipc", cl::Hidden, cl::init(1),
130 cl::desc("Average inst/cycle whan no target itinerary exists."));
131
132namespace {
133
134//===----------------------------------------------------------------------===//
135/// ScheduleDAGRRList - The actual register reduction list scheduler
136/// implementation. This supports both top-down and bottom-up scheduling.
137///
138class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139private:
140 /// NeedLatency - True if the scheduler will make use of latency information.
141 bool NeedLatency;
142
143 /// AvailableQueue - The priority queue to use for the available SUnits.
144 SchedulingPriorityQueue *AvailableQueue;
145
146 /// PendingQueue - This contains all of the instructions whose operands have
147 /// been issued, but their results are not ready yet (due to the latency of
148 /// the operation). Once the operands becomes available, the instruction is
149 /// added to the AvailableQueue.
150 std::vector<SUnit *> PendingQueue;
151
152 /// HazardRec - The hazard recognizer to use.
153 ScheduleHazardRecognizer *HazardRec;
154
155 /// CurCycle - The current scheduler state corresponds to this cycle.
156 unsigned CurCycle = 0;
157
158 /// MinAvailableCycle - Cycle of the soonest available instruction.
159 unsigned MinAvailableCycle = ~0u;
160
161 /// IssueCount - Count instructions issued in this cycle
162 /// Currently valid only for bottom-up scheduling.
163 unsigned IssueCount = 0u;
164
165 /// LiveRegDefs - A set of physical registers and their definition
166 /// that are "live". These nodes must be scheduled before any other nodes that
167 /// modifies the registers can be scheduled.
168 unsigned NumLiveRegs = 0u;
169 std::unique_ptr<SUnit*[]> LiveRegDefs;
170 std::unique_ptr<SUnit*[]> LiveRegGens;
171
172 // Collect interferences between physical register use/defs.
173 // Each interference is an SUnit and set of physical registers.
174 SmallVector<SUnit*, 4> Interferences;
175
177
178 LRegsMapT LRegsMap;
179
180 /// Topo - A topological ordering for SUnits which permits fast IsReachable
181 /// and similar queries.
183
184 // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185 // DAG crawling.
186 SmallDenseMap<SUnit *, SUnit *, 16> CallSeqEndForStart;
187
188public:
189 ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190 SchedulingPriorityQueue *availqueue,
191 CodeGenOptLevel OptLevel)
192 : ScheduleDAGSDNodes(mf), NeedLatency(needlatency),
193 AvailableQueue(availqueue), Topo(SUnits, nullptr) {
194 const TargetSubtargetInfo &STI = mf.getSubtarget();
195 if (DisableSchedCycles || !NeedLatency)
196 HazardRec = new ScheduleHazardRecognizer();
197 else
198 HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
199 }
200
201 ~ScheduleDAGRRList() override {
202 delete HazardRec;
203 delete AvailableQueue;
204 }
205
206 void Schedule() override;
207
208 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
209
210 /// IsReachable - Checks if SU is reachable from TargetSU.
211 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
212 return Topo.IsReachable(SU, TargetSU);
213 }
214
215 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
216 /// create a cycle.
217 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
218 return Topo.WillCreateCycle(SU, TargetSU);
219 }
220
221 /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
222 /// This returns true if this is a new predecessor.
223 /// Does *NOT* update the topological ordering! It just queues an update.
224 void AddPredQueued(SUnit *SU, const SDep &D) {
225 Topo.AddPredQueued(SU, D.getSUnit());
226 SU->addPred(D);
227 }
228
229 /// AddPred - adds a predecessor edge to SUnit SU.
230 /// This returns true if this is a new predecessor.
231 /// Updates the topological ordering if required.
232 void AddPred(SUnit *SU, const SDep &D) {
233 Topo.AddPred(SU, D.getSUnit());
234 SU->addPred(D);
235 }
236
237 /// RemovePred - removes a predecessor edge from SUnit SU.
238 /// This returns true if an edge was removed.
239 /// Updates the topological ordering if required.
240 void RemovePred(SUnit *SU, const SDep &D) {
241 Topo.RemovePred(SU, D.getSUnit());
242 SU->removePred(D);
243 }
244
245private:
246 bool isReady(SUnit *SU) {
247 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
248 AvailableQueue->isReady(SU);
249 }
250
251 void ReleasePred(SUnit *SU, const SDep *PredEdge);
252 void ReleasePredecessors(SUnit *SU);
253 void ReleasePending();
254 void AdvanceToCycle(unsigned NextCycle);
255 void AdvancePastStalls(SUnit *SU);
256 void EmitNode(SUnit *SU);
257 void ScheduleNodeBottomUp(SUnit*);
258 void CapturePred(SDep *PredEdge);
259 void UnscheduleNodeBottomUp(SUnit*);
260 void RestoreHazardCheckerBottomUp();
261 void BacktrackBottomUp(SUnit*, SUnit*);
262 SUnit *TryUnfoldSU(SUnit *);
263 SUnit *CopyAndMoveSuccessors(SUnit*);
264 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
265 const TargetRegisterClass*,
266 const TargetRegisterClass*,
268 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
269
270 void releaseInterferences(unsigned Reg = 0);
271
272 SUnit *PickNodeToScheduleBottomUp();
273 void ListScheduleBottomUp();
274
275 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
276 SUnit *CreateNewSUnit(SDNode *N) {
277 unsigned NumSUnits = SUnits.size();
278 SUnit *NewNode = newSUnit(N);
279 // Update the topological ordering.
280 if (NewNode->NodeNum >= NumSUnits)
281 Topo.AddSUnitWithoutPredecessors(NewNode);
282 return NewNode;
283 }
284
285 /// CreateClone - Creates a new SUnit from an existing one.
286 SUnit *CreateClone(SUnit *N) {
287 unsigned NumSUnits = SUnits.size();
288 SUnit *NewNode = Clone(N);
289 // Update the topological ordering.
290 if (NewNode->NodeNum >= NumSUnits)
291 Topo.AddSUnitWithoutPredecessors(NewNode);
292 return NewNode;
293 }
294
295 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
296 /// need actual latency information but the hybrid scheduler does.
297 bool forceUnitLatencies() const override {
298 return !NeedLatency;
299 }
300};
301
302} // end anonymous namespace
303
304static constexpr unsigned RegSequenceCost = 1;
305
306/// GetCostForDef - Looks up the register class and cost for a given definition.
307/// Typically this just means looking up the representative register class,
308/// but for untyped values (MVT::Untyped) it means inspecting the node's
309/// opcode to determine what register class is being generated.
311 const TargetLowering *TLI,
312 const TargetInstrInfo *TII,
313 const TargetRegisterInfo *TRI,
314 unsigned &RegClass, unsigned &Cost,
315 const MachineFunction &MF) {
316 MVT VT = RegDefPos.GetValue();
317
318 // Special handling for untyped values. These values can only come from
319 // the expansion of custom DAG-to-DAG patterns.
320 if (VT == MVT::Untyped) {
321 const SDNode *Node = RegDefPos.GetNode();
322
323 // Special handling for CopyFromReg of untyped values.
324 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
325 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
326 const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
327 RegClass = RC->getID();
328 Cost = 1;
329 return;
330 }
331
332 unsigned Opcode = Node->getMachineOpcode();
333 if (Opcode == TargetOpcode::REG_SEQUENCE) {
334 unsigned DstRCIdx = Node->getConstantOperandVal(0);
335 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
336 RegClass = RC->getID();
338 return;
339 }
340
341 unsigned Idx = RegDefPos.GetIdx();
342 const MCInstrDesc &Desc = TII->get(Opcode);
343 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
344 assert(RC && "Not a valid register class");
345 RegClass = RC->getID();
346 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
347 // better way to determine it.
348 Cost = 1;
349 } else {
350 RegClass = TLI->getRepRegClassFor(VT)->getID();
351 Cost = TLI->getRepRegClassCostFor(VT);
352 }
353}
354
355/// Schedule - Schedule the DAG using list scheduling.
356void ScheduleDAGRRList::Schedule() {
357 LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
358 << " '" << BB->getName() << "' **********\n");
359
360 CurCycle = 0;
361 IssueCount = 0;
362 MinAvailableCycle =
363 DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
364 NumLiveRegs = 0;
365 // Allocate slots for each physical register, plus one for a special register
366 // to track the virtual resource of a calling sequence.
367 LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
368 LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
369 CallSeqEndForStart.clear();
370 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
371
372 // Build the scheduling graph.
373 BuildSchedGraph(nullptr);
374
375 LLVM_DEBUG(dump());
376 Topo.MarkDirty();
377
378 AvailableQueue->initNodes(SUnits);
379
380 HazardRec->Reset();
381
382 // Execute the actual scheduling loop.
383 ListScheduleBottomUp();
384
385 AvailableQueue->releaseState();
386
387 LLVM_DEBUG({
388 dbgs() << "*** Final schedule ***\n";
389 dumpSchedule();
390 dbgs() << '\n';
391 });
392}
393
394//===----------------------------------------------------------------------===//
395// Bottom-Up Scheduling
396//===----------------------------------------------------------------------===//
397
398/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
399/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
400void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
401 SUnit *PredSU = PredEdge->getSUnit();
402
403#ifndef NDEBUG
404 if (PredSU->NumSuccsLeft == 0) {
405 dbgs() << "*** Scheduling failed! ***\n";
406 dumpNode(*PredSU);
407 dbgs() << " has been released too many times!\n";
408 llvm_unreachable(nullptr);
409 }
410#endif
411 --PredSU->NumSuccsLeft;
412
413 if (!forceUnitLatencies()) {
414 // Updating predecessor's height. This is now the cycle when the
415 // predecessor can be scheduled without causing a pipeline stall.
416 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
417 }
418
419 // If all the node's successors are scheduled, this node is ready
420 // to be scheduled. Ignore the special EntrySU node.
421 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
422 PredSU->isAvailable = true;
423
424 unsigned Height = PredSU->getHeight();
425 if (Height < MinAvailableCycle)
426 MinAvailableCycle = Height;
427
428 if (isReady(PredSU)) {
429 AvailableQueue->push(PredSU);
430 }
431 // CapturePred and others may have left the node in the pending queue, avoid
432 // adding it twice.
433 else if (!PredSU->isPending) {
434 PredSU->isPending = true;
435 PendingQueue.push_back(PredSU);
436 }
437 }
438}
439
440/// IsChainDependent - Test if Outer is reachable from Inner through
441/// chain dependencies.
442static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
443 unsigned NestLevel,
444 const TargetInstrInfo *TII) {
445 SDNode *N = Outer;
446 while (true) {
447 if (N == Inner)
448 return true;
449 // For a TokenFactor, examine each operand. There may be multiple ways
450 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
451 // most nesting in order to ensure that we find the corresponding match.
452 if (N->getOpcode() == ISD::TokenFactor) {
453 for (const SDValue &Op : N->op_values())
454 if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
455 return true;
456 return false;
457 }
458 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
459 if (N->isMachineOpcode()) {
460 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
461 ++NestLevel;
462 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
463 if (NestLevel == 0)
464 return false;
465 --NestLevel;
466 }
467 }
468 // Otherwise, find the chain and continue climbing.
469 for (const SDValue &Op : N->op_values())
470 if (Op.getValueType() == MVT::Other) {
471 N = Op.getNode();
472 goto found_chain_operand;
473 }
474 return false;
475 found_chain_operand:;
476 if (N->getOpcode() == ISD::EntryToken)
477 return false;
478 }
479}
480
481/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
482/// the corresponding (lowered) CALLSEQ_BEGIN node.
483///
484/// NestLevel and MaxNested are used in recursion to indcate the current level
485/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
486/// level seen so far.
487///
488/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
489/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
490static SDNode *
491FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
492 const TargetInstrInfo *TII) {
493 while (true) {
494 // For a TokenFactor, examine each operand. There may be multiple ways
495 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
496 // most nesting in order to ensure that we find the corresponding match.
497 if (N->getOpcode() == ISD::TokenFactor) {
498 SDNode *Best = nullptr;
499 unsigned BestMaxNest = MaxNest;
500 for (const SDValue &Op : N->op_values()) {
501 unsigned MyNestLevel = NestLevel;
502 unsigned MyMaxNest = MaxNest;
503 if (SDNode *New = FindCallSeqStart(Op.getNode(),
504 MyNestLevel, MyMaxNest, TII))
505 if (!Best || (MyMaxNest > BestMaxNest)) {
506 Best = New;
507 BestMaxNest = MyMaxNest;
508 }
509 }
510 assert(Best);
511 MaxNest = BestMaxNest;
512 return Best;
513 }
514 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
515 if (N->isMachineOpcode()) {
516 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
517 ++NestLevel;
518 MaxNest = std::max(MaxNest, NestLevel);
519 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
520 assert(NestLevel != 0);
521 --NestLevel;
522 if (NestLevel == 0)
523 return N;
524 }
525 }
526 // Otherwise, find the chain and continue climbing.
527 for (const SDValue &Op : N->op_values())
528 if (Op.getValueType() == MVT::Other) {
529 N = Op.getNode();
530 goto found_chain_operand;
531 }
532 return nullptr;
533 found_chain_operand:;
534 if (N->getOpcode() == ISD::EntryToken)
535 return nullptr;
536 }
537}
538
539/// Call ReleasePred for each predecessor, then update register live def/gen.
540/// Always update LiveRegDefs for a register dependence even if the current SU
541/// also defines the register. This effectively create one large live range
542/// across a sequence of two-address node. This is important because the
543/// entire chain must be scheduled together. Example:
544///
545/// flags = (3) add
546/// flags = (2) addc flags
547/// flags = (1) addc flags
548///
549/// results in
550///
551/// LiveRegDefs[flags] = 3
552/// LiveRegGens[flags] = 1
553///
554/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
555/// interference on flags.
556void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
557 // Bottom up: release predecessors
558 for (SDep &Pred : SU->Preds) {
559 ReleasePred(SU, &Pred);
560 if (Pred.isAssignedRegDep()) {
561 // This is a physical register dependency and it's impossible or
562 // expensive to copy the register. Make sure nothing that can
563 // clobber the register is scheduled between the predecessor and
564 // this node.
565 SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
566 assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
567 "interference on register dependence");
568 LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
569 if (!LiveRegGens[Pred.getReg()]) {
570 ++NumLiveRegs;
571 LiveRegGens[Pred.getReg()] = SU;
572 }
573 }
574 }
575
576 // If we're scheduling a lowered CALLSEQ_END, find the corresponding
577 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
578 // these nodes, to prevent other calls from being interscheduled with them.
579 unsigned CallResource = TRI->getNumRegs();
580 if (!LiveRegDefs[CallResource])
581 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
582 if (Node->isMachineOpcode() &&
583 Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
584 unsigned NestLevel = 0;
585 unsigned MaxNest = 0;
586 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
587 assert(N && "Must find call sequence start");
588
589 SUnit *Def = &SUnits[N->getNodeId()];
590 CallSeqEndForStart[Def] = SU;
591
592 ++NumLiveRegs;
593 LiveRegDefs[CallResource] = Def;
594 LiveRegGens[CallResource] = SU;
595 break;
596 }
597}
598
599/// Check to see if any of the pending instructions are ready to issue. If
600/// so, add them to the available queue.
601void ScheduleDAGRRList::ReleasePending() {
602 if (DisableSchedCycles) {
603 assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
604 return;
605 }
606
607 // If the available queue is empty, it is safe to reset MinAvailableCycle.
608 if (AvailableQueue->empty())
609 MinAvailableCycle = std::numeric_limits<unsigned>::max();
610
611 // Check to see if any of the pending instructions are ready to issue. If
612 // so, add them to the available queue.
613 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
614 unsigned ReadyCycle = PendingQueue[i]->getHeight();
615 if (ReadyCycle < MinAvailableCycle)
616 MinAvailableCycle = ReadyCycle;
617
618 if (PendingQueue[i]->isAvailable) {
619 if (!isReady(PendingQueue[i]))
620 continue;
621 AvailableQueue->push(PendingQueue[i]);
622 }
623 PendingQueue[i]->isPending = false;
624 PendingQueue[i] = PendingQueue.back();
625 PendingQueue.pop_back();
626 --i; --e;
627 }
628}
629
630/// Move the scheduler state forward by the specified number of Cycles.
631void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
632 if (NextCycle <= CurCycle)
633 return;
634
635 IssueCount = 0;
636 AvailableQueue->setCurCycle(NextCycle);
637 if (!HazardRec->isEnabled()) {
638 // Bypass lots of virtual calls in case of long latency.
639 CurCycle = NextCycle;
640 }
641 else {
642 for (; CurCycle != NextCycle; ++CurCycle) {
643 HazardRec->RecedeCycle();
644 }
645 }
646 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
647 // available Q to release pending nodes at least once before popping.
648 ReleasePending();
649}
650
651/// Move the scheduler state forward until the specified node's dependents are
652/// ready and can be scheduled with no resource conflicts.
653void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
655 return;
656
657 // FIXME: Nodes such as CopyFromReg probably should not advance the current
658 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
659 // has predecessors the cycle will be advanced when they are scheduled.
660 // But given the crude nature of modeling latency though such nodes, we
661 // currently need to treat these nodes like real instructions.
662 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
663
664 unsigned ReadyCycle = SU->getHeight();
665
666 // Bump CurCycle to account for latency. We assume the latency of other
667 // available instructions may be hidden by the stall (not a full pipe stall).
668 // This updates the hazard recognizer's cycle before reserving resources for
669 // this instruction.
670 AdvanceToCycle(ReadyCycle);
671
672 // Calls are scheduled in their preceding cycle, so don't conflict with
673 // hazards from instructions after the call. EmitNode will reset the
674 // scoreboard state before emitting the call.
675 if (SU->isCall)
676 return;
677
678 // FIXME: For resource conflicts in very long non-pipelined stages, we
679 // should probably skip ahead here to avoid useless scoreboard checks.
680 int Stalls = 0;
681 while (true) {
683 HazardRec->getHazardType(SU, -Stalls);
684
686 break;
687
688 ++Stalls;
689 }
690 AdvanceToCycle(CurCycle + Stalls);
691}
692
693/// Record this SUnit in the HazardRecognizer.
694/// Does not update CurCycle.
695void ScheduleDAGRRList::EmitNode(SUnit *SU) {
696 if (!HazardRec->isEnabled())
697 return;
698
699 // Check for phys reg copy.
700 if (!SU->getNode())
701 return;
702
703 switch (SU->getNode()->getOpcode()) {
704 default:
705 assert(SU->getNode()->isMachineOpcode() &&
706 "This target-independent node should not be scheduled.");
707 break;
709 case ISD::TokenFactor:
712 case ISD::CopyToReg:
713 case ISD::CopyFromReg:
714 case ISD::EH_LABEL:
715 // Noops don't affect the scoreboard state. Copies are likely to be
716 // removed.
717 return;
718 case ISD::INLINEASM:
720 // For inline asm, clear the pipeline state.
721 HazardRec->Reset();
722 return;
723 }
724 if (SU->isCall) {
725 // Calls are scheduled with their preceding instructions. For bottom-up
726 // scheduling, clear the pipeline state before emitting.
727 HazardRec->Reset();
728 }
729
730 HazardRec->EmitInstruction(SU);
731}
732
733static void resetVRegCycle(SUnit *SU);
734
735/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
736/// count of its predecessors. If a predecessor pending count is zero, add it to
737/// the Available queue.
738void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
739 LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
740 LLVM_DEBUG(dumpNode(*SU));
741
742#ifndef NDEBUG
743 if (CurCycle < SU->getHeight())
744 LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
745 << "] pipeline stall!\n");
746#endif
747
748 // FIXME: Do not modify node height. It may interfere with
749 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
750 // node its ready cycle can aid heuristics, and after scheduling it can
751 // indicate the scheduled cycle.
752 SU->setHeightToAtLeast(CurCycle);
753
754 // Reserve resources for the scheduled instruction.
755 EmitNode(SU);
756
757 Sequence.push_back(SU);
758
759 AvailableQueue->scheduledNode(SU);
760
761 // If HazardRec is disabled, and each inst counts as one cycle, then
762 // advance CurCycle before ReleasePredecessors to avoid useless pushes to
763 // PendingQueue for schedulers that implement HasReadyFilter.
764 if (!HazardRec->isEnabled() && AvgIPC < 2)
765 AdvanceToCycle(CurCycle + 1);
766
767 // Update liveness of predecessors before successors to avoid treating a
768 // two-address node as a live range def.
769 ReleasePredecessors(SU);
770
771 // Release all the implicit physical register defs that are live.
772 for (SDep &Succ : SU->Succs) {
773 // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
774 if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
775 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
776 --NumLiveRegs;
777 LiveRegDefs[Succ.getReg()] = nullptr;
778 LiveRegGens[Succ.getReg()] = nullptr;
779 releaseInterferences(Succ.getReg());
780 }
781 }
782 // Release the special call resource dependence, if this is the beginning
783 // of a call.
784 unsigned CallResource = TRI->getNumRegs();
785 if (LiveRegDefs[CallResource] == SU)
786 for (const SDNode *SUNode = SU->getNode(); SUNode;
787 SUNode = SUNode->getGluedNode()) {
788 if (SUNode->isMachineOpcode() &&
789 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
790 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
791 --NumLiveRegs;
792 LiveRegDefs[CallResource] = nullptr;
793 LiveRegGens[CallResource] = nullptr;
794 releaseInterferences(CallResource);
795 }
796 }
797
798 resetVRegCycle(SU);
799
800 SU->isScheduled = true;
801
802 // Conditions under which the scheduler should eagerly advance the cycle:
803 // (1) No available instructions
804 // (2) All pipelines full, so available instructions must have hazards.
805 //
806 // If HazardRec is disabled, the cycle was pre-advanced before calling
807 // ReleasePredecessors. In that case, IssueCount should remain 0.
808 //
809 // Check AvailableQueue after ReleasePredecessors in case of zero latency.
810 if (HazardRec->isEnabled() || AvgIPC > 1) {
811 if (SU->getNode() && SU->getNode()->isMachineOpcode())
812 ++IssueCount;
813 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
814 || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
815 AdvanceToCycle(CurCycle + 1);
816 }
817}
818
819/// CapturePred - This does the opposite of ReleasePred. Since SU is being
820/// unscheduled, increase the succ left count of its predecessors. Remove
821/// them from AvailableQueue if necessary.
822void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
823 SUnit *PredSU = PredEdge->getSUnit();
824 if (PredSU->isAvailable) {
825 PredSU->isAvailable = false;
826 if (!PredSU->isPending)
827 AvailableQueue->remove(PredSU);
828 }
829
830 assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
831 "NumSuccsLeft will overflow!");
832 ++PredSU->NumSuccsLeft;
833}
834
835/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
836/// its predecessor states to reflect the change.
837void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
838 LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
839 LLVM_DEBUG(dumpNode(*SU));
840
841 for (SDep &Pred : SU->Preds) {
842 CapturePred(&Pred);
843 if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
844 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
845 assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
846 "Physical register dependency violated?");
847 --NumLiveRegs;
848 LiveRegDefs[Pred.getReg()] = nullptr;
849 LiveRegGens[Pred.getReg()] = nullptr;
850 releaseInterferences(Pred.getReg());
851 }
852 }
853
854 // Reclaim the special call resource dependence, if this is the beginning
855 // of a call.
856 unsigned CallResource = TRI->getNumRegs();
857 for (const SDNode *SUNode = SU->getNode(); SUNode;
858 SUNode = SUNode->getGluedNode()) {
859 if (SUNode->isMachineOpcode() &&
860 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
861 SUnit *SeqEnd = CallSeqEndForStart[SU];
862 assert(SeqEnd && "Call sequence start/end must be known");
863 assert(!LiveRegDefs[CallResource]);
864 assert(!LiveRegGens[CallResource]);
865 ++NumLiveRegs;
866 LiveRegDefs[CallResource] = SU;
867 LiveRegGens[CallResource] = SeqEnd;
868 }
869 }
870
871 // Release the special call resource dependence, if this is the end
872 // of a call.
873 if (LiveRegGens[CallResource] == SU)
874 for (const SDNode *SUNode = SU->getNode(); SUNode;
875 SUNode = SUNode->getGluedNode()) {
876 if (SUNode->isMachineOpcode() &&
877 SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
878 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
879 assert(LiveRegDefs[CallResource]);
880 assert(LiveRegGens[CallResource]);
881 --NumLiveRegs;
882 LiveRegDefs[CallResource] = nullptr;
883 LiveRegGens[CallResource] = nullptr;
884 releaseInterferences(CallResource);
885 }
886 }
887
888 for (auto &Succ : SU->Succs) {
889 if (Succ.isAssignedRegDep()) {
890 auto Reg = Succ.getReg();
891 if (!LiveRegDefs[Reg])
892 ++NumLiveRegs;
893 // This becomes the nearest def. Note that an earlier def may still be
894 // pending if this is a two-address node.
895 LiveRegDefs[Reg] = SU;
896
897 // Update LiveRegGen only if was empty before this unscheduling.
898 // This is to avoid incorrect updating LiveRegGen set in previous run.
899 if (!LiveRegGens[Reg]) {
900 // Find the successor with the lowest height.
901 LiveRegGens[Reg] = Succ.getSUnit();
902 for (auto &Succ2 : SU->Succs) {
903 if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
904 Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
905 LiveRegGens[Reg] = Succ2.getSUnit();
906 }
907 }
908 }
909 }
910 if (SU->getHeight() < MinAvailableCycle)
911 MinAvailableCycle = SU->getHeight();
912
913 SU->setHeightDirty();
914 SU->isScheduled = false;
915 SU->isAvailable = true;
916 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
917 // Don't make available until backtracking is complete.
918 SU->isPending = true;
919 PendingQueue.push_back(SU);
920 }
921 else {
922 AvailableQueue->push(SU);
923 }
924 AvailableQueue->unscheduledNode(SU);
925}
926
927/// After backtracking, the hazard checker needs to be restored to a state
928/// corresponding the current cycle.
929void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
930 HazardRec->Reset();
931
932 unsigned LookAhead = std::min((unsigned)Sequence.size(),
933 HazardRec->getMaxLookAhead());
934 if (LookAhead == 0)
935 return;
936
937 std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
938 unsigned HazardCycle = (*I)->getHeight();
939 for (auto E = Sequence.end(); I != E; ++I) {
940 SUnit *SU = *I;
941 for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
942 HazardRec->RecedeCycle();
943 }
944 EmitNode(SU);
945 }
946}
947
948/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
949/// BTCycle in order to schedule a specific node.
950void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
951 SUnit *OldSU = Sequence.back();
952 while (true) {
953 Sequence.pop_back();
954 // FIXME: use ready cycle instead of height
955 CurCycle = OldSU->getHeight();
956 UnscheduleNodeBottomUp(OldSU);
957 AvailableQueue->setCurCycle(CurCycle);
958 if (OldSU == BtSU)
959 break;
960 OldSU = Sequence.back();
961 }
962
963 assert(!SU->isSucc(OldSU) && "Something is wrong!");
964
965 RestoreHazardCheckerBottomUp();
966
967 ReleasePending();
968
969 ++NumBacktracks;
970}
971
972static bool isOperandOf(const SUnit *SU, SDNode *N) {
973 for (const SDNode *SUNode = SU->getNode(); SUNode;
974 SUNode = SUNode->getGluedNode()) {
975 if (SUNode->isOperandOf(N))
976 return true;
977 }
978 return false;
979}
980
981/// TryUnfold - Attempt to unfold
982SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
983 SDNode *N = SU->getNode();
984 // Use while over if to ease fall through.
986 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
987 return nullptr;
988
989 assert(NewNodes.size() == 2 && "Expected a load folding node!");
990
991 N = NewNodes[1];
992 SDNode *LoadNode = NewNodes[0];
993 unsigned NumVals = N->getNumValues();
994 unsigned OldNumVals = SU->getNode()->getNumValues();
995
996 // LoadNode may already exist. This can happen when there is another
997 // load from the same location and producing the same type of value
998 // but it has different alignment or volatileness.
999 bool isNewLoad = true;
1000 SUnit *LoadSU;
1001 if (LoadNode->getNodeId() != -1) {
1002 LoadSU = &SUnits[LoadNode->getNodeId()];
1003 // If LoadSU has already been scheduled, we should clone it but
1004 // this would negate the benefit to unfolding so just return SU.
1005 if (LoadSU->isScheduled)
1006 return SU;
1007 isNewLoad = false;
1008 } else {
1009 LoadSU = CreateNewSUnit(LoadNode);
1010 LoadNode->setNodeId(LoadSU->NodeNum);
1011
1012 InitNumRegDefsLeft(LoadSU);
1013 computeLatency(LoadSU);
1014 }
1015
1016 bool isNewN = true;
1017 SUnit *NewSU;
1018 // This can only happen when isNewLoad is false.
1019 if (N->getNodeId() != -1) {
1020 NewSU = &SUnits[N->getNodeId()];
1021 // If NewSU has already been scheduled, we need to clone it, but this
1022 // negates the benefit to unfolding so just return SU.
1023 if (NewSU->isScheduled) {
1024 return SU;
1025 }
1026 isNewN = false;
1027 } else {
1028 NewSU = CreateNewSUnit(N);
1029 N->setNodeId(NewSU->NodeNum);
1030
1031 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1032 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1033 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1034 NewSU->isTwoAddress = true;
1035 break;
1036 }
1037 }
1038 if (MCID.isCommutable())
1039 NewSU->isCommutable = true;
1040
1041 InitNumRegDefsLeft(NewSU);
1042 computeLatency(NewSU);
1043 }
1044
1045 LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1046
1047 // Now that we are committed to unfolding replace DAG Uses.
1048 for (unsigned i = 0; i != NumVals; ++i)
1049 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1050 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1051 SDValue(LoadNode, 1));
1052
1053 // Record all the edges to and from the old SU, by category.
1054 SmallVector<SDep, 4> ChainPreds;
1055 SmallVector<SDep, 4> ChainSuccs;
1056 SmallVector<SDep, 4> LoadPreds;
1057 SmallVector<SDep, 4> NodePreds;
1058 SmallVector<SDep, 4> NodeSuccs;
1059 for (SDep &Pred : SU->Preds) {
1060 if (Pred.isCtrl())
1061 ChainPreds.push_back(Pred);
1062 else if (isOperandOf(Pred.getSUnit(), LoadNode))
1063 LoadPreds.push_back(Pred);
1064 else
1065 NodePreds.push_back(Pred);
1066 }
1067 for (SDep &Succ : SU->Succs) {
1068 if (Succ.isCtrl())
1069 ChainSuccs.push_back(Succ);
1070 else
1071 NodeSuccs.push_back(Succ);
1072 }
1073
1074 // Now assign edges to the newly-created nodes.
1075 for (const SDep &Pred : ChainPreds) {
1076 RemovePred(SU, Pred);
1077 if (isNewLoad)
1078 AddPredQueued(LoadSU, Pred);
1079 }
1080 for (const SDep &Pred : LoadPreds) {
1081 RemovePred(SU, Pred);
1082 if (isNewLoad)
1083 AddPredQueued(LoadSU, Pred);
1084 }
1085 for (const SDep &Pred : NodePreds) {
1086 RemovePred(SU, Pred);
1087 AddPredQueued(NewSU, Pred);
1088 }
1089 for (SDep &D : NodeSuccs) {
1090 SUnit *SuccDep = D.getSUnit();
1091 D.setSUnit(SU);
1092 RemovePred(SuccDep, D);
1093 D.setSUnit(NewSU);
1094 AddPredQueued(SuccDep, D);
1095 // Balance register pressure.
1096 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1097 !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1098 --NewSU->NumRegDefsLeft;
1099 }
1100 for (SDep &D : ChainSuccs) {
1101 SUnit *SuccDep = D.getSUnit();
1102 D.setSUnit(SU);
1103 RemovePred(SuccDep, D);
1104 if (isNewLoad) {
1105 D.setSUnit(LoadSU);
1106 AddPredQueued(SuccDep, D);
1107 }
1108 }
1109
1110 // Add a data dependency to reflect that NewSU reads the value defined
1111 // by LoadSU.
1112 SDep D(LoadSU, SDep::Data, 0);
1113 D.setLatency(LoadSU->Latency);
1114 AddPredQueued(NewSU, D);
1115
1116 if (isNewLoad)
1117 AvailableQueue->addNode(LoadSU);
1118 if (isNewN)
1119 AvailableQueue->addNode(NewSU);
1120
1121 ++NumUnfolds;
1122
1123 if (NewSU->NumSuccsLeft == 0)
1124 NewSU->isAvailable = true;
1125
1126 return NewSU;
1127}
1128
1129/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1130/// successors to the newly created node.
1131SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1132 SDNode *N = SU->getNode();
1133 if (!N)
1134 return nullptr;
1135
1136 LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1137 LLVM_DEBUG(dumpNode(*SU));
1138
1139 if (N->getGluedNode() &&
1140 !TII->canCopyGluedNodeDuringSchedule(N)) {
1141 LLVM_DEBUG(
1142 dbgs()
1143 << "Giving up because it has incoming glue and the target does not "
1144 "want to copy it\n");
1145 return nullptr;
1146 }
1147
1148 SUnit *NewSU;
1149 bool TryUnfold = false;
1150 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1151 MVT VT = N->getSimpleValueType(i);
1152 if (VT == MVT::Glue) {
1153 LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1154 return nullptr;
1155 } else if (VT == MVT::Other)
1156 TryUnfold = true;
1157 }
1158 for (const SDValue &Op : N->op_values()) {
1159 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1160 if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1161 LLVM_DEBUG(
1162 dbgs() << "Giving up because it one of the operands is glue and "
1163 "the target does not want to copy it\n");
1164 return nullptr;
1165 }
1166 }
1167
1168 // If possible unfold instruction.
1169 if (TryUnfold) {
1170 SUnit *UnfoldSU = TryUnfoldSU(SU);
1171 if (!UnfoldSU)
1172 return nullptr;
1173 SU = UnfoldSU;
1174 N = SU->getNode();
1175 // If this can be scheduled don't bother duplicating and just return
1176 if (SU->NumSuccsLeft == 0)
1177 return SU;
1178 }
1179
1180 LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1181 NewSU = CreateClone(SU);
1182
1183 // New SUnit has the exact same predecessors.
1184 for (SDep &Pred : SU->Preds)
1185 if (!Pred.isArtificial())
1186 AddPredQueued(NewSU, Pred);
1187
1188 // Make sure the clone comes after the original. (InstrEmitter assumes
1189 // this ordering.)
1190 AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
1191
1192 // Only copy scheduled successors. Cut them from old node's successor
1193 // list and move them over.
1195 for (SDep &Succ : SU->Succs) {
1196 if (Succ.isArtificial())
1197 continue;
1198 SUnit *SuccSU = Succ.getSUnit();
1199 if (SuccSU->isScheduled) {
1200 SDep D = Succ;
1201 D.setSUnit(NewSU);
1202 AddPredQueued(SuccSU, D);
1203 D.setSUnit(SU);
1204 DelDeps.emplace_back(SuccSU, D);
1205 }
1206 }
1207 for (const auto &[DelSU, DelD] : DelDeps)
1208 RemovePred(DelSU, DelD);
1209
1210 AvailableQueue->updateNode(SU);
1211 AvailableQueue->addNode(NewSU);
1212
1213 ++NumDups;
1214 return NewSU;
1215}
1216
1217/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1218/// scheduled successors of the given SUnit to the last copy.
1219void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1220 const TargetRegisterClass *DestRC,
1221 const TargetRegisterClass *SrcRC,
1223 SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1224 CopyFromSU->CopySrcRC = SrcRC;
1225 CopyFromSU->CopyDstRC = DestRC;
1226
1227 SUnit *CopyToSU = CreateNewSUnit(nullptr);
1228 CopyToSU->CopySrcRC = DestRC;
1229 CopyToSU->CopyDstRC = SrcRC;
1230
1231 // Only copy scheduled successors. Cut them from old node's successor
1232 // list and move them over.
1234 for (SDep &Succ : SU->Succs) {
1235 if (Succ.isArtificial())
1236 continue;
1237 SUnit *SuccSU = Succ.getSUnit();
1238 if (SuccSU->isScheduled) {
1239 SDep D = Succ;
1240 D.setSUnit(CopyToSU);
1241 AddPredQueued(SuccSU, D);
1242 DelDeps.emplace_back(SuccSU, Succ);
1243 }
1244 else {
1245 // Avoid scheduling the def-side copy before other successors. Otherwise,
1246 // we could introduce another physreg interference on the copy and
1247 // continue inserting copies indefinitely.
1248 AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1249 }
1250 }
1251 for (const auto &[DelSU, DelD] : DelDeps)
1252 RemovePred(DelSU, DelD);
1253
1254 SDep FromDep(SU, SDep::Data, Reg);
1255 FromDep.setLatency(SU->Latency);
1256 AddPredQueued(CopyFromSU, FromDep);
1257 SDep ToDep(CopyFromSU, SDep::Data, 0);
1258 ToDep.setLatency(CopyFromSU->Latency);
1259 AddPredQueued(CopyToSU, ToDep);
1260
1261 AvailableQueue->updateNode(SU);
1262 AvailableQueue->addNode(CopyFromSU);
1263 AvailableQueue->addNode(CopyToSU);
1264 Copies.push_back(CopyFromSU);
1265 Copies.push_back(CopyToSU);
1266
1267 ++NumPRCopies;
1268}
1269
1270/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1271/// definition of the specified node.
1272/// FIXME: Move to SelectionDAG?
1273static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1274 const TargetInstrInfo *TII) {
1275 unsigned NumRes;
1276 if (N->getOpcode() == ISD::CopyFromReg) {
1277 // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1278 NumRes = 1;
1279 } else {
1280 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1281 assert(!MCID.implicit_defs().empty() &&
1282 "Physical reg def must be in implicit def list!");
1283 NumRes = MCID.getNumDefs();
1284 for (MCPhysReg ImpDef : MCID.implicit_defs()) {
1285 if (Reg == ImpDef)
1286 break;
1287 ++NumRes;
1288 }
1289 }
1290 return N->getSimpleValueType(NumRes);
1291}
1292
1293/// CheckForLiveRegDef - Return true and update live register vector if the
1294/// specified register def of the specified SUnit clobbers any "live" registers.
1295static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
1296 SmallSet<unsigned, 4> &RegAdded,
1298 const TargetRegisterInfo *TRI,
1299 const SDNode *Node = nullptr) {
1300 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1301
1302 // Check if Ref is live.
1303 if (!LiveRegDefs[*AliasI]) continue;
1304
1305 // Allow multiple uses of the same def.
1306 if (LiveRegDefs[*AliasI] == SU) continue;
1307
1308 // Allow multiple uses of same def
1309 if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
1310 continue;
1311
1312 // Add Reg to the set of interfering live regs.
1313 if (RegAdded.insert(*AliasI).second) {
1314 LRegs.push_back(*AliasI);
1315 }
1316 }
1317}
1318
1319/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1320/// by RegMask, and add them to LRegs.
1321static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1322 ArrayRef<SUnit*> LiveRegDefs,
1323 SmallSet<unsigned, 4> &RegAdded,
1325 // Look at all live registers. Skip Reg0 and the special CallResource.
1326 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1327 if (!LiveRegDefs[i]) continue;
1328 if (LiveRegDefs[i] == SU) continue;
1329 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1330 if (RegAdded.insert(i).second)
1331 LRegs.push_back(i);
1332 }
1333}
1334
1335/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1336static const uint32_t *getNodeRegMask(const SDNode *N) {
1337 for (const SDValue &Op : N->op_values())
1338 if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1339 return RegOp->getRegMask();
1340 return nullptr;
1341}
1342
1343/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1344/// scheduling of the given node to satisfy live physical register dependencies.
1345/// If the specific node is the last one that's available to schedule, do
1346/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1347bool ScheduleDAGRRList::
1348DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1349 if (NumLiveRegs == 0)
1350 return false;
1351
1352 SmallSet<unsigned, 4> RegAdded;
1353 // If this node would clobber any "live" register, then it's not ready.
1354 //
1355 // If SU is the currently live definition of the same register that it uses,
1356 // then we are free to schedule it.
1357 for (SDep &Pred : SU->Preds) {
1358 if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1359 CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1360 RegAdded, LRegs, TRI);
1361 }
1362
1363 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1364 if (Node->getOpcode() == ISD::INLINEASM ||
1365 Node->getOpcode() == ISD::INLINEASM_BR) {
1366 // Inline asm can clobber physical defs.
1367 unsigned NumOps = Node->getNumOperands();
1368 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1369 --NumOps; // Ignore the glue operand.
1370
1371 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1372 unsigned Flags = Node->getConstantOperandVal(i);
1373 const InlineAsm::Flag F(Flags);
1374 unsigned NumVals = F.getNumOperandRegisters();
1375
1376 ++i; // Skip the ID value.
1377 if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
1378 F.isClobberKind()) {
1379 // Check for def of register or earlyclobber register.
1380 for (; NumVals; --NumVals, ++i) {
1381 Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1382 if (Reg.isPhysical())
1383 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1384 }
1385 } else
1386 i += NumVals;
1387 }
1388 continue;
1389 }
1390
1391 if (Node->getOpcode() == ISD::CopyToReg) {
1392 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
1393 if (Reg.isPhysical()) {
1394 SDNode *SrcNode = Node->getOperand(2).getNode();
1395 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
1396 SrcNode);
1397 }
1398 }
1399
1400 if (!Node->isMachineOpcode())
1401 continue;
1402 // If we're in the middle of scheduling a call, don't begin scheduling
1403 // another call. Also, don't allow any physical registers to be live across
1404 // the call.
1405 if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1406 // Check the special calling-sequence resource.
1407 unsigned CallResource = TRI->getNumRegs();
1408 if (LiveRegDefs[CallResource]) {
1409 SDNode *Gen = LiveRegGens[CallResource]->getNode();
1410 while (SDNode *Glued = Gen->getGluedNode())
1411 Gen = Glued;
1412 if (!IsChainDependent(Gen, Node, 0, TII) &&
1413 RegAdded.insert(CallResource).second)
1414 LRegs.push_back(CallResource);
1415 }
1416 }
1417 if (const uint32_t *RegMask = getNodeRegMask(Node))
1418 CheckForLiveRegDefMasked(SU, RegMask,
1419 ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1420 RegAdded, LRegs);
1421
1422 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1423 if (MCID.hasOptionalDef()) {
1424 // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1425 // This operand can be either a def of CPSR, if the S bit is set; or a use
1426 // of %noreg. When the OptionalDef is set to a valid register, we need to
1427 // handle it in the same way as an ImplicitDef.
1428 for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1429 if (MCID.operands()[i].isOptionalDef()) {
1430 const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1431 Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1432 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1433 }
1434 }
1435 for (MCPhysReg Reg : MCID.implicit_defs())
1436 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1437 }
1438
1439 return !LRegs.empty();
1440}
1441
1442void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1443 // Add the nodes that aren't ready back onto the available list.
1444 for (unsigned i = Interferences.size(); i > 0; --i) {
1445 SUnit *SU = Interferences[i-1];
1446 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1447 if (Reg) {
1448 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1449 if (!is_contained(LRegs, Reg))
1450 continue;
1451 }
1452 SU->isPending = false;
1453 // The interfering node may no longer be available due to backtracking.
1454 // Furthermore, it may have been made available again, in which case it is
1455 // now already in the AvailableQueue.
1456 if (SU->isAvailable && !SU->NodeQueueId) {
1457 LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1458 AvailableQueue->push(SU);
1459 }
1460 if (i < Interferences.size())
1461 Interferences[i-1] = Interferences.back();
1462 Interferences.pop_back();
1463 LRegsMap.erase(LRegsPos);
1464 }
1465}
1466
1467/// Return a node that can be scheduled in this cycle. Requirements:
1468/// (1) Ready: latency has been satisfied
1469/// (2) No Hazards: resources are available
1470/// (3) No Interferences: may unschedule to break register interferences.
1471SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1472 SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1473 auto FindAvailableNode = [&]() {
1474 while (CurSU) {
1476 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1477 break;
1478 LLVM_DEBUG(dbgs() << " Interfering reg ";
1479 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1480 else dbgs() << printReg(LRegs[0], TRI);
1481 dbgs() << " SU #" << CurSU->NodeNum << '\n');
1482 auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs);
1483 if (LRegsInserted) {
1484 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1485 Interferences.push_back(CurSU);
1486 }
1487 else {
1488 assert(CurSU->isPending && "Interferences are pending");
1489 // Update the interference with current live regs.
1490 LRegsIter->second = LRegs;
1491 }
1492 CurSU = AvailableQueue->pop();
1493 }
1494 };
1495 FindAvailableNode();
1496 if (CurSU)
1497 return CurSU;
1498
1499 // We query the topological order in the loop body, so make sure outstanding
1500 // updates are applied before entering it (we only enter the loop if there
1501 // are some interferences). If we make changes to the ordering, we exit
1502 // the loop.
1503
1504 // All candidates are delayed due to live physical reg dependencies.
1505 // Try backtracking, code duplication, or inserting cross class copies
1506 // to resolve it.
1507 for (SUnit *TrySU : Interferences) {
1508 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1509
1510 // Try unscheduling up to the point where it's safe to schedule
1511 // this node.
1512 SUnit *BtSU = nullptr;
1513 unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1514 for (unsigned Reg : LRegs) {
1515 if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1516 BtSU = LiveRegGens[Reg];
1517 LiveCycle = BtSU->getHeight();
1518 }
1519 }
1520 if (!WillCreateCycle(TrySU, BtSU)) {
1521 // BacktrackBottomUp mutates Interferences!
1522 BacktrackBottomUp(TrySU, BtSU);
1523
1524 // Force the current node to be scheduled before the node that
1525 // requires the physical reg dep.
1526 if (BtSU->isAvailable) {
1527 BtSU->isAvailable = false;
1528 if (!BtSU->isPending)
1529 AvailableQueue->remove(BtSU);
1530 }
1531 LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1532 << ") to SU(" << TrySU->NodeNum << ")\n");
1533 AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
1534
1535 // If one or more successors has been unscheduled, then the current
1536 // node is no longer available.
1537 if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1538 LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1539 CurSU = AvailableQueue->pop();
1540 } else {
1541 LLVM_DEBUG(dbgs() << "TrySU available\n");
1542 // Available and in AvailableQueue
1543 AvailableQueue->remove(TrySU);
1544 CurSU = TrySU;
1545 }
1546 FindAvailableNode();
1547 // Interferences has been mutated. We must break.
1548 break;
1549 }
1550 }
1551
1552 if (!CurSU) {
1553 // Can't backtrack. If it's too expensive to copy the value, then try
1554 // duplicate the nodes that produces these "too expensive to copy"
1555 // values to break the dependency. In case even that doesn't work,
1556 // insert cross class copies.
1557 // If it's not too expensive, i.e. cost != -1, issue copies.
1558 SUnit *TrySU = Interferences[0];
1559 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1560 assert(LRegs.size() == 1 && "Can't handle this yet!");
1561 unsigned Reg = LRegs[0];
1562 SUnit *LRDef = LiveRegDefs[Reg];
1563 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1564 const TargetRegisterClass *RC =
1565 TRI->getMinimalPhysRegClass(Reg, VT);
1566 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1567
1568 // If cross copy register class is the same as RC, then it must be possible
1569 // copy the value directly. Do not try duplicate the def.
1570 // If cross copy register class is not the same as RC, then it's possible to
1571 // copy the value but it require cross register class copies and it is
1572 // expensive.
1573 // If cross copy register class is null, then it's not possible to copy
1574 // the value at all.
1575 SUnit *NewDef = nullptr;
1576 if (DestRC != RC) {
1577 NewDef = CopyAndMoveSuccessors(LRDef);
1578 if (!DestRC && !NewDef)
1579 report_fatal_error("Can't handle live physical register dependency!");
1580 }
1581 if (!NewDef) {
1582 // Issue copies, these can be expensive cross register class copies.
1584 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1585 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1586 << " to SU #" << Copies.front()->NodeNum << "\n");
1587 AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
1588 NewDef = Copies.back();
1589 }
1590
1591 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1592 << " to SU #" << TrySU->NodeNum << "\n");
1593 LiveRegDefs[Reg] = NewDef;
1594 AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
1595 TrySU->isAvailable = false;
1596 CurSU = NewDef;
1597 }
1598 assert(CurSU && "Unable to resolve live physical register dependencies!");
1599 return CurSU;
1600}
1601
1602/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1603/// schedulers.
1604void ScheduleDAGRRList::ListScheduleBottomUp() {
1605 // Release any predecessors of the special Exit node.
1606 ReleasePredecessors(&ExitSU);
1607
1608 // Add root to Available queue.
1609 if (!SUnits.empty()) {
1610 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1611 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1612 RootSU->isAvailable = true;
1613 AvailableQueue->push(RootSU);
1614 }
1615
1616 // While Available queue is not empty, grab the node with the highest
1617 // priority. If it is not ready put it back. Schedule the node.
1618 Sequence.reserve(SUnits.size());
1619 while (!AvailableQueue->empty() || !Interferences.empty()) {
1620 LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1621 AvailableQueue->dump(this));
1622
1623 // Pick the best node to schedule taking all constraints into
1624 // consideration.
1625 SUnit *SU = PickNodeToScheduleBottomUp();
1626
1627 AdvancePastStalls(SU);
1628
1629 ScheduleNodeBottomUp(SU);
1630
1631 while (AvailableQueue->empty() && !PendingQueue.empty()) {
1632 // Advance the cycle to free resources. Skip ahead to the next ready SU.
1633 assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1634 "MinAvailableCycle uninitialized");
1635 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1636 }
1637 }
1638
1639 // Reverse the order if it is bottom up.
1640 std::reverse(Sequence.begin(), Sequence.end());
1641
1642#ifndef NDEBUG
1643 VerifyScheduledSequence(/*isBottomUp=*/true);
1644#endif
1645}
1646
1647namespace {
1648
1649class RegReductionPQBase;
1650
1651struct queue_sort {
1652 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1653};
1654
1655#ifndef NDEBUG
1656template<class SF>
1657struct reverse_sort : public queue_sort {
1658 SF &SortFunc;
1659
1660 reverse_sort(SF &sf) : SortFunc(sf) {}
1661
1662 bool operator()(SUnit* left, SUnit* right) const {
1663 // reverse left/right rather than simply !SortFunc(left, right)
1664 // to expose different paths in the comparison logic.
1665 return SortFunc(right, left);
1666 }
1667};
1668#endif // NDEBUG
1669
1670/// bu_ls_rr_sort - Priority function for bottom up register pressure
1671// reduction scheduler.
1672struct bu_ls_rr_sort : public queue_sort {
1673 enum {
1674 IsBottomUp = true,
1675 HasReadyFilter = false
1676 };
1677
1678 RegReductionPQBase *SPQ;
1679
1680 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1681
1682 bool operator()(SUnit* left, SUnit* right) const;
1683};
1684
1685// src_ls_rr_sort - Priority function for source order scheduler.
1686struct src_ls_rr_sort : public queue_sort {
1687 enum {
1688 IsBottomUp = true,
1689 HasReadyFilter = false
1690 };
1691
1692 RegReductionPQBase *SPQ;
1693
1694 src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1695
1696 bool operator()(SUnit* left, SUnit* right) const;
1697};
1698
1699// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1700struct hybrid_ls_rr_sort : public queue_sort {
1701 enum {
1702 IsBottomUp = true,
1703 HasReadyFilter = false
1704 };
1705
1706 RegReductionPQBase *SPQ;
1707
1708 hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1709
1710 bool isReady(SUnit *SU, unsigned CurCycle) const;
1711
1712 bool operator()(SUnit* left, SUnit* right) const;
1713};
1714
1715// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1716// scheduler.
1717struct ilp_ls_rr_sort : public queue_sort {
1718 enum {
1719 IsBottomUp = true,
1720 HasReadyFilter = false
1721 };
1722
1723 RegReductionPQBase *SPQ;
1724
1725 ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1726
1727 bool isReady(SUnit *SU, unsigned CurCycle) const;
1728
1729 bool operator()(SUnit* left, SUnit* right) const;
1730};
1731
1732class RegReductionPQBase : public SchedulingPriorityQueue {
1733protected:
1734 std::vector<SUnit *> Queue;
1735 unsigned CurQueueId = 0;
1736 bool TracksRegPressure;
1737 bool SrcOrder;
1738
1739 // SUnits - The SUnits for the current graph.
1740 std::vector<SUnit> *SUnits = nullptr;
1741
1742 MachineFunction &MF;
1743 const TargetInstrInfo *TII = nullptr;
1744 const TargetRegisterInfo *TRI = nullptr;
1745 const TargetLowering *TLI = nullptr;
1746 ScheduleDAGRRList *scheduleDAG = nullptr;
1747
1748 // SethiUllmanNumbers - The SethiUllman number for each node.
1749 std::vector<unsigned> SethiUllmanNumbers;
1750
1751 /// RegPressure - Tracking current reg pressure per register class.
1752 std::vector<unsigned> RegPressure;
1753
1754 /// RegLimit - Tracking the number of allocatable registers per register
1755 /// class.
1756 std::vector<unsigned> RegLimit;
1757
1758public:
1759 RegReductionPQBase(MachineFunction &mf,
1760 bool hasReadyFilter,
1761 bool tracksrp,
1762 bool srcorder,
1763 const TargetInstrInfo *tii,
1764 const TargetRegisterInfo *tri,
1765 const TargetLowering *tli)
1766 : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1767 SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1768 if (TracksRegPressure) {
1769 unsigned NumRC = TRI->getNumRegClasses();
1770 RegLimit.resize(NumRC);
1771 RegPressure.resize(NumRC);
1772 std::fill(RegLimit.begin(), RegLimit.end(), 0);
1773 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1774 for (const TargetRegisterClass *RC : TRI->regclasses())
1775 RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1776 }
1777 }
1778
1779 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1780 scheduleDAG = scheduleDag;
1781 }
1782
1783 ScheduleHazardRecognizer* getHazardRec() {
1784 return scheduleDAG->getHazardRec();
1785 }
1786
1787 void initNodes(std::vector<SUnit> &sunits) override;
1788
1789 void addNode(const SUnit *SU) override;
1790
1791 void updateNode(const SUnit *SU) override;
1792
1793 void releaseState() override {
1794 SUnits = nullptr;
1795 SethiUllmanNumbers.clear();
1796 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1797 }
1798
1799 unsigned getNodePriority(const SUnit *SU) const;
1800
1801 unsigned getNodeOrdering(const SUnit *SU) const {
1802 if (!SU->getNode()) return 0;
1803
1804 return SU->getNode()->getIROrder();
1805 }
1806
1807 bool empty() const override { return Queue.empty(); }
1808
1809 void push(SUnit *U) override {
1810 assert(!U->NodeQueueId && "Node in the queue already");
1811 U->NodeQueueId = ++CurQueueId;
1812 Queue.push_back(U);
1813 }
1814
1815 void remove(SUnit *SU) override {
1816 assert(!Queue.empty() && "Queue is empty!");
1817 assert(SU->NodeQueueId != 0 && "Not in queue!");
1818 std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1819 if (I != std::prev(Queue.end()))
1820 std::swap(*I, Queue.back());
1821 Queue.pop_back();
1822 SU->NodeQueueId = 0;
1823 }
1824
1825 bool tracksRegPressure() const override { return TracksRegPressure; }
1826
1827 void dumpRegPressure() const;
1828
1829 bool HighRegPressure(const SUnit *SU) const;
1830
1831 bool MayReduceRegPressure(SUnit *SU) const;
1832
1833 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1834
1835 void scheduledNode(SUnit *SU) override;
1836
1837 void unscheduledNode(SUnit *SU) override;
1838
1839protected:
1840 bool canClobber(const SUnit *SU, const SUnit *Op);
1841 void AddPseudoTwoAddrDeps();
1842 void PrescheduleNodesWithMultipleUses();
1843 void CalculateSethiUllmanNumbers();
1844};
1845
1846template<class SF>
1847static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1848 unsigned BestIdx = 0;
1849 // Only compute the cost for the first 1000 items in the queue, to avoid
1850 // excessive compile-times for very large queues.
1851 for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
1852 I++)
1853 if (Picker(Q[BestIdx], Q[I]))
1854 BestIdx = I;
1855 SUnit *V = Q[BestIdx];
1856 if (BestIdx + 1 != Q.size())
1857 std::swap(Q[BestIdx], Q.back());
1858 Q.pop_back();
1859 return V;
1860}
1861
1862template<class SF>
1863SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1864#ifndef NDEBUG
1865 if (DAG->StressSched) {
1866 reverse_sort<SF> RPicker(Picker);
1867 return popFromQueueImpl(Q, RPicker);
1868 }
1869#endif
1870 (void)DAG;
1871 return popFromQueueImpl(Q, Picker);
1872}
1873
1874//===----------------------------------------------------------------------===//
1875// RegReductionPriorityQueue Definition
1876//===----------------------------------------------------------------------===//
1877//
1878// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1879// to reduce register pressure.
1880//
1881template<class SF>
1882class RegReductionPriorityQueue : public RegReductionPQBase {
1883 SF Picker;
1884
1885public:
1886 RegReductionPriorityQueue(MachineFunction &mf,
1887 bool tracksrp,
1888 bool srcorder,
1889 const TargetInstrInfo *tii,
1890 const TargetRegisterInfo *tri,
1891 const TargetLowering *tli)
1892 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1893 tii, tri, tli),
1894 Picker(this) {}
1895
1896 bool isBottomUp() const override { return SF::IsBottomUp; }
1897
1898 bool isReady(SUnit *U) const override {
1899 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1900 }
1901
1902 SUnit *pop() override {
1903 if (Queue.empty()) return nullptr;
1904
1905 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1906 V->NodeQueueId = 0;
1907 return V;
1908 }
1909
1910#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1911 LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1912 // Emulate pop() without clobbering NodeQueueIds.
1913 std::vector<SUnit *> DumpQueue = Queue;
1914 SF DumpPicker = Picker;
1915 while (!DumpQueue.empty()) {
1916 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1917 dbgs() << "Height " << SU->getHeight() << ": ";
1918 DAG->dumpNode(*SU);
1919 }
1920 }
1921#endif
1922};
1923
1924using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1925using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1926using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1927using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1928
1929} // end anonymous namespace
1930
1931//===----------------------------------------------------------------------===//
1932// Static Node Priority for Register Pressure Reduction
1933//===----------------------------------------------------------------------===//
1934
1935// Check for special nodes that bypass scheduling heuristics.
1936// Currently this pushes TokenFactor nodes down, but may be used for other
1937// pseudo-ops as well.
1938//
1939// Return -1 to schedule right above left, 1 for left above right.
1940// Return 0 if no bias exists.
1941static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1942 bool LSchedLow = left->isScheduleLow;
1943 bool RSchedLow = right->isScheduleLow;
1944 if (LSchedLow != RSchedLow)
1945 return LSchedLow < RSchedLow ? 1 : -1;
1946 return 0;
1947}
1948
1949/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1950/// Smaller number is the higher priority.
1951static unsigned
1952CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1953 if (SUNumbers[SU->NodeNum] != 0)
1954 return SUNumbers[SU->NodeNum];
1955
1956 // Use WorkList to avoid stack overflow on excessively large IRs.
1957 struct WorkState {
1958 WorkState(const SUnit *SU) : SU(SU) {}
1959 const SUnit *SU;
1960 unsigned PredsProcessed = 0;
1961 };
1962
1964 WorkList.push_back(SU);
1965 while (!WorkList.empty()) {
1966 auto &Temp = WorkList.back();
1967 auto *TempSU = Temp.SU;
1968 bool AllPredsKnown = true;
1969 // Try to find a non-evaluated pred and push it into the processing stack.
1970 for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1971 auto &Pred = TempSU->Preds[P];
1972 if (Pred.isCtrl()) continue; // ignore chain preds
1973 SUnit *PredSU = Pred.getSUnit();
1974 if (SUNumbers[PredSU->NodeNum] == 0) {
1975#ifndef NDEBUG
1976 // In debug mode, check that we don't have such element in the stack.
1977 for (auto It : WorkList)
1978 assert(It.SU != PredSU && "Trying to push an element twice?");
1979#endif
1980 // Next time start processing this one starting from the next pred.
1981 Temp.PredsProcessed = P + 1;
1982 WorkList.push_back(PredSU);
1983 AllPredsKnown = false;
1984 break;
1985 }
1986 }
1987
1988 if (!AllPredsKnown)
1989 continue;
1990
1991 // Once all preds are known, we can calculate the answer for this one.
1992 unsigned SethiUllmanNumber = 0;
1993 unsigned Extra = 0;
1994 for (const SDep &Pred : TempSU->Preds) {
1995 if (Pred.isCtrl()) continue; // ignore chain preds
1996 SUnit *PredSU = Pred.getSUnit();
1997 unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1998 assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1999 if (PredSethiUllman > SethiUllmanNumber) {
2000 SethiUllmanNumber = PredSethiUllman;
2001 Extra = 0;
2002 } else if (PredSethiUllman == SethiUllmanNumber)
2003 ++Extra;
2004 }
2005
2006 SethiUllmanNumber += Extra;
2007 if (SethiUllmanNumber == 0)
2008 SethiUllmanNumber = 1;
2009 SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
2010 WorkList.pop_back();
2011 }
2012
2013 assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
2014 return SUNumbers[SU->NodeNum];
2015}
2016
2017/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2018/// scheduling units.
2019void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2020 SethiUllmanNumbers.assign(SUnits->size(), 0);
2021
2022 for (const SUnit &SU : *SUnits)
2023 CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
2024}
2025
2026void RegReductionPQBase::addNode(const SUnit *SU) {
2027 unsigned SUSize = SethiUllmanNumbers.size();
2028 if (SUnits->size() > SUSize)
2029 SethiUllmanNumbers.resize(SUSize*2, 0);
2030 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2031}
2032
2033void RegReductionPQBase::updateNode(const SUnit *SU) {
2034 SethiUllmanNumbers[SU->NodeNum] = 0;
2035 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2036}
2037
2038// Lower priority means schedule further down. For bottom-up scheduling, lower
2039// priority SUs are scheduled before higher priority SUs.
2040unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2041 assert(SU->NodeNum < SethiUllmanNumbers.size());
2042 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2043 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2044 // CopyToReg should be close to its uses to facilitate coalescing and
2045 // avoid spilling.
2046 return 0;
2047 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2048 Opc == TargetOpcode::SUBREG_TO_REG ||
2049 Opc == TargetOpcode::INSERT_SUBREG)
2050 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2051 // close to their uses to facilitate coalescing.
2052 return 0;
2053 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2054 // If SU does not have a register use, i.e. it doesn't produce a value
2055 // that would be consumed (e.g. store), then it terminates a chain of
2056 // computation. Give it a large SethiUllman number so it will be
2057 // scheduled right before its predecessors that it doesn't lengthen
2058 // their live ranges.
2059 return 0xffff;
2060 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2061 // If SU does not have a register def, schedule it close to its uses
2062 // because it does not lengthen any live ranges.
2063 return 0;
2064#if 1
2065 return SethiUllmanNumbers[SU->NodeNum];
2066#else
2067 unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2068 if (SU->isCallOp) {
2069 // FIXME: This assumes all of the defs are used as call operands.
2070 int NP = (int)Priority - SU->getNode()->getNumValues();
2071 return (NP > 0) ? NP : 0;
2072 }
2073 return Priority;
2074#endif
2075}
2076
2077//===----------------------------------------------------------------------===//
2078// Register Pressure Tracking
2079//===----------------------------------------------------------------------===//
2080
2081#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2082LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2083 for (const TargetRegisterClass *RC : TRI->regclasses()) {
2084 unsigned Id = RC->getID();
2085 unsigned RP = RegPressure[Id];
2086 if (!RP) continue;
2087 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2088 << RegLimit[Id] << '\n');
2089 }
2090}
2091#endif
2092
2093bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2094 if (!TLI)
2095 return false;
2096
2097 for (const SDep &Pred : SU->Preds) {
2098 if (Pred.isCtrl())
2099 continue;
2100 SUnit *PredSU = Pred.getSUnit();
2101 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2102 // to cover the number of registers defined (they are all live).
2103 if (PredSU->NumRegDefsLeft == 0) {
2104 continue;
2105 }
2106 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2107 RegDefPos.IsValid(); RegDefPos.Advance()) {
2108 unsigned RCId, Cost;
2109 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2110
2111 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2112 return true;
2113 }
2114 }
2115 return false;
2116}
2117
2118bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2119 const SDNode *N = SU->getNode();
2120
2121 if (!N->isMachineOpcode() || !SU->NumSuccs)
2122 return false;
2123
2124 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2125 for (unsigned i = 0; i != NumDefs; ++i) {
2126 MVT VT = N->getSimpleValueType(i);
2127 if (!N->hasAnyUseOfValue(i))
2128 continue;
2129 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2130 if (RegPressure[RCId] >= RegLimit[RCId])
2131 return true;
2132 }
2133 return false;
2134}
2135
2136// Compute the register pressure contribution by this instruction by count up
2137// for uses that are not live and down for defs. Only count register classes
2138// that are already under high pressure. As a side effect, compute the number of
2139// uses of registers that are already live.
2140//
2141// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2142// so could probably be factored.
2143int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2144 LiveUses = 0;
2145 int PDiff = 0;
2146 for (const SDep &Pred : SU->Preds) {
2147 if (Pred.isCtrl())
2148 continue;
2149 SUnit *PredSU = Pred.getSUnit();
2150 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2151 // to cover the number of registers defined (they are all live).
2152 if (PredSU->NumRegDefsLeft == 0) {
2153 if (PredSU->getNode()->isMachineOpcode())
2154 ++LiveUses;
2155 continue;
2156 }
2157 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2158 RegDefPos.IsValid(); RegDefPos.Advance()) {
2159 MVT VT = RegDefPos.GetValue();
2160 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2161 if (RegPressure[RCId] >= RegLimit[RCId])
2162 ++PDiff;
2163 }
2164 }
2165 const SDNode *N = SU->getNode();
2166
2167 if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2168 return PDiff;
2169
2170 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2171 for (unsigned i = 0; i != NumDefs; ++i) {
2172 MVT VT = N->getSimpleValueType(i);
2173 if (!N->hasAnyUseOfValue(i))
2174 continue;
2175 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2176 if (RegPressure[RCId] >= RegLimit[RCId])
2177 --PDiff;
2178 }
2179 return PDiff;
2180}
2181
2182void RegReductionPQBase::scheduledNode(SUnit *SU) {
2183 if (!TracksRegPressure)
2184 return;
2185
2186 if (!SU->getNode())
2187 return;
2188
2189 for (const SDep &Pred : SU->Preds) {
2190 if (Pred.isCtrl())
2191 continue;
2192 SUnit *PredSU = Pred.getSUnit();
2193 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2194 // to cover the number of registers defined (they are all live).
2195 if (PredSU->NumRegDefsLeft == 0) {
2196 continue;
2197 }
2198 // FIXME: The ScheduleDAG currently loses information about which of a
2199 // node's values is consumed by each dependence. Consequently, if the node
2200 // defines multiple register classes, we don't know which to pressurize
2201 // here. Instead the following loop consumes the register defs in an
2202 // arbitrary order. At least it handles the common case of clustered loads
2203 // to the same class. For precise liveness, each SDep needs to indicate the
2204 // result number. But that tightly couples the ScheduleDAG with the
2205 // SelectionDAG making updates tricky. A simpler hack would be to attach a
2206 // value type or register class to SDep.
2207 //
2208 // The most important aspect of register tracking is balancing the increase
2209 // here with the reduction further below. Note that this SU may use multiple
2210 // defs in PredSU. The can't be determined here, but we've already
2211 // compensated by reducing NumRegDefsLeft in PredSU during
2212 // ScheduleDAGSDNodes::AddSchedEdges.
2213 --PredSU->NumRegDefsLeft;
2214 unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2215 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2216 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2217 if (SkipRegDefs)
2218 continue;
2219
2220 unsigned RCId, Cost;
2221 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2222 RegPressure[RCId] += Cost;
2223 break;
2224 }
2225 }
2226
2227 // We should have this assert, but there may be dead SDNodes that never
2228 // materialize as SUnits, so they don't appear to generate liveness.
2229 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2230 int SkipRegDefs = (int)SU->NumRegDefsLeft;
2231 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2232 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2233 if (SkipRegDefs > 0)
2234 continue;
2235 unsigned RCId, Cost;
2236 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2237 if (RegPressure[RCId] < Cost) {
2238 // Register pressure tracking is imprecise. This can happen. But we try
2239 // hard not to let it happen because it likely results in poor scheduling.
2240 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2241 << ") has too many regdefs\n");
2242 RegPressure[RCId] = 0;
2243 }
2244 else {
2245 RegPressure[RCId] -= Cost;
2246 }
2247 }
2248 LLVM_DEBUG(dumpRegPressure());
2249}
2250
2251void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2252 if (!TracksRegPressure)
2253 return;
2254
2255 const SDNode *N = SU->getNode();
2256 if (!N) return;
2257
2258 if (!N->isMachineOpcode()) {
2259 if (N->getOpcode() != ISD::CopyToReg)
2260 return;
2261 } else {
2262 unsigned Opc = N->getMachineOpcode();
2263 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2264 Opc == TargetOpcode::INSERT_SUBREG ||
2265 Opc == TargetOpcode::SUBREG_TO_REG ||
2266 Opc == TargetOpcode::REG_SEQUENCE ||
2267 Opc == TargetOpcode::IMPLICIT_DEF)
2268 return;
2269 }
2270
2271 for (const SDep &Pred : SU->Preds) {
2272 if (Pred.isCtrl())
2273 continue;
2274 SUnit *PredSU = Pred.getSUnit();
2275 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2276 // counts data deps.
2277 if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2278 continue;
2279 const SDNode *PN = PredSU->getNode();
2280 if (!PN->isMachineOpcode()) {
2281 if (PN->getOpcode() == ISD::CopyFromReg) {
2282 MVT VT = PN->getSimpleValueType(0);
2283 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2284 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2285 }
2286 continue;
2287 }
2288 unsigned POpc = PN->getMachineOpcode();
2289 if (POpc == TargetOpcode::IMPLICIT_DEF)
2290 continue;
2291 if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2292 POpc == TargetOpcode::INSERT_SUBREG ||
2293 POpc == TargetOpcode::SUBREG_TO_REG) {
2294 MVT VT = PN->getSimpleValueType(0);
2295 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2296 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2297 continue;
2298 }
2299 if (POpc == TargetOpcode::REG_SEQUENCE) {
2300 unsigned DstRCIdx = PN->getConstantOperandVal(0);
2301 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
2302 unsigned RCId = RC->getID();
2303 // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
2304 // here. Instead use the same constant as in GetCostForDef.
2306 continue;
2307 }
2308 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2309 for (unsigned i = 0; i != NumDefs; ++i) {
2310 MVT VT = PN->getSimpleValueType(i);
2311 if (!PN->hasAnyUseOfValue(i))
2312 continue;
2313 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2314 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2315 // Register pressure tracking is imprecise. This can happen.
2316 RegPressure[RCId] = 0;
2317 else
2318 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2319 }
2320 }
2321
2322 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2323 // may transfer data dependencies to CopyToReg.
2324 if (SU->NumSuccs && N->isMachineOpcode()) {
2325 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2326 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2327 MVT VT = N->getSimpleValueType(i);
2328 if (VT == MVT::Glue || VT == MVT::Other)
2329 continue;
2330 if (!N->hasAnyUseOfValue(i))
2331 continue;
2332 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2333 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2334 }
2335 }
2336
2337 LLVM_DEBUG(dumpRegPressure());
2338}
2339
2340//===----------------------------------------------------------------------===//
2341// Dynamic Node Priority for Register Pressure Reduction
2342//===----------------------------------------------------------------------===//
2343
2344/// closestSucc - Returns the scheduled cycle of the successor which is
2345/// closest to the current cycle.
2346static unsigned closestSucc(const SUnit *SU) {
2347 unsigned MaxHeight = 0;
2348 for (const SDep &Succ : SU->Succs) {
2349 if (Succ.isCtrl()) continue; // ignore chain succs
2350 unsigned Height = Succ.getSUnit()->getHeight();
2351 // If there are bunch of CopyToRegs stacked up, they should be considered
2352 // to be at the same position.
2353 if (Succ.getSUnit()->getNode() &&
2354 Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2355 Height = closestSucc(Succ.getSUnit())+1;
2356 if (Height > MaxHeight)
2357 MaxHeight = Height;
2358 }
2359 return MaxHeight;
2360}
2361
2362/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2363/// for scratch registers, i.e. number of data dependencies.
2364static unsigned calcMaxScratches(const SUnit *SU) {
2365 unsigned Scratches = 0;
2366 for (const SDep &Pred : SU->Preds) {
2367 if (Pred.isCtrl()) continue; // ignore chain preds
2368 Scratches++;
2369 }
2370 return Scratches;
2371}
2372
2373/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2374/// CopyFromReg from a virtual register.
2375static bool hasOnlyLiveInOpers(const SUnit *SU) {
2376 bool RetVal = false;
2377 for (const SDep &Pred : SU->Preds) {
2378 if (Pred.isCtrl()) continue;
2379 const SUnit *PredSU = Pred.getSUnit();
2380 if (PredSU->getNode() &&
2381 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2382 Register Reg =
2383 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2384 if (Reg.isVirtual()) {
2385 RetVal = true;
2386 continue;
2387 }
2388 }
2389 return false;
2390 }
2391 return RetVal;
2392}
2393
2394/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2395/// CopyToReg to a virtual register. This SU def is probably a liveout and
2396/// it has no other use. It should be scheduled closer to the terminator.
2397static bool hasOnlyLiveOutUses(const SUnit *SU) {
2398 bool RetVal = false;
2399 for (const SDep &Succ : SU->Succs) {
2400 if (Succ.isCtrl()) continue;
2401 const SUnit *SuccSU = Succ.getSUnit();
2402 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2403 Register Reg =
2404 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2405 if (Reg.isVirtual()) {
2406 RetVal = true;
2407 continue;
2408 }
2409 }
2410 return false;
2411 }
2412 return RetVal;
2413}
2414
2415// Set isVRegCycle for a node with only live in opers and live out uses. Also
2416// set isVRegCycle for its CopyFromReg operands.
2417//
2418// This is only relevant for single-block loops, in which case the VRegCycle
2419// node is likely an induction variable in which the operand and target virtual
2420// registers should be coalesced (e.g. pre/post increment values). Setting the
2421// isVRegCycle flag helps the scheduler prioritize other uses of the same
2422// CopyFromReg so that this node becomes the virtual register "kill". This
2423// avoids interference between the values live in and out of the block and
2424// eliminates a copy inside the loop.
2425static void initVRegCycle(SUnit *SU) {
2427 return;
2428
2429 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2430 return;
2431
2432 LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2433
2434 SU->isVRegCycle = true;
2435
2436 for (const SDep &Pred : SU->Preds) {
2437 if (Pred.isCtrl()) continue;
2438 Pred.getSUnit()->isVRegCycle = true;
2439 }
2440}
2441
2442// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2443// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2444static void resetVRegCycle(SUnit *SU) {
2445 if (!SU->isVRegCycle)
2446 return;
2447
2448 for (const SDep &Pred : SU->Preds) {
2449 if (Pred.isCtrl()) continue; // ignore chain preds
2450 SUnit *PredSU = Pred.getSUnit();
2451 if (PredSU->isVRegCycle) {
2452 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2453 "VRegCycle def must be CopyFromReg");
2454 Pred.getSUnit()->isVRegCycle = false;
2455 }
2456 }
2457}
2458
2459// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2460// means a node that defines the VRegCycle has not been scheduled yet.
2461static bool hasVRegCycleUse(const SUnit *SU) {
2462 // If this SU also defines the VReg, don't hoist it as a "use".
2463 if (SU->isVRegCycle)
2464 return false;
2465
2466 for (const SDep &Pred : SU->Preds) {
2467 if (Pred.isCtrl()) continue; // ignore chain preds
2468 if (Pred.getSUnit()->isVRegCycle &&
2469 Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2470 LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2471 return true;
2472 }
2473 }
2474 return false;
2475}
2476
2477// Check for either a dependence (latency) or resource (hazard) stall.
2478//
2479// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2480static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2481 if ((int)SPQ->getCurCycle() < Height) return true;
2482 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2484 return true;
2485 return false;
2486}
2487
2488// Return -1 if left has higher priority, 1 if right has higher priority.
2489// Return 0 if latency-based priority is equivalent.
2490static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2491 RegReductionPQBase *SPQ) {
2492 // Scheduling an instruction that uses a VReg whose postincrement has not yet
2493 // been scheduled will induce a copy. Model this as an extra cycle of latency.
2494 int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2495 int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2496 int LHeight = (int)left->getHeight() + LPenalty;
2497 int RHeight = (int)right->getHeight() + RPenalty;
2498
2499 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2500 BUHasStall(left, LHeight, SPQ);
2501 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2502 BUHasStall(right, RHeight, SPQ);
2503
2504 // If scheduling one of the node will cause a pipeline stall, delay it.
2505 // If scheduling either one of the node will cause a pipeline stall, sort
2506 // them according to their height.
2507 if (LStall) {
2508 if (!RStall)
2509 return 1;
2510 if (LHeight != RHeight)
2511 return LHeight > RHeight ? 1 : -1;
2512 } else if (RStall)
2513 return -1;
2514
2515 // If either node is scheduling for latency, sort them by height/depth
2516 // and latency.
2517 if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2518 right->SchedulingPref == Sched::ILP)) {
2519 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2520 // is enabled, grouping instructions by cycle, then its height is already
2521 // covered so only its depth matters. We also reach this point if both stall
2522 // but have the same height.
2523 if (!SPQ->getHazardRec()->isEnabled()) {
2524 if (LHeight != RHeight)
2525 return LHeight > RHeight ? 1 : -1;
2526 }
2527 int LDepth = left->getDepth() - LPenalty;
2528 int RDepth = right->getDepth() - RPenalty;
2529 if (LDepth != RDepth) {
2530 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2531 << ") depth " << LDepth << " vs SU (" << right->NodeNum
2532 << ") depth " << RDepth << "\n");
2533 return LDepth < RDepth ? 1 : -1;
2534 }
2535 if (left->Latency != right->Latency)
2536 return left->Latency > right->Latency ? 1 : -1;
2537 }
2538 return 0;
2539}
2540
2541static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2542 // Schedule physical register definitions close to their use. This is
2543 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2544 // long as shortening physreg live ranges is generally good, we can defer
2545 // creating a subtarget hook.
2547 bool LHasPhysReg = left->hasPhysRegDefs;
2548 bool RHasPhysReg = right->hasPhysRegDefs;
2549 if (LHasPhysReg != RHasPhysReg) {
2550 #ifndef NDEBUG
2551 static const char *const PhysRegMsg[] = { " has no physreg",
2552 " defines a physreg" };
2553 #endif
2554 LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2555 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2556 << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2557 return LHasPhysReg < RHasPhysReg;
2558 }
2559 }
2560
2561 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2562 unsigned LPriority = SPQ->getNodePriority(left);
2563 unsigned RPriority = SPQ->getNodePriority(right);
2564
2565 // Be really careful about hoisting call operands above previous calls.
2566 // Only allows it if it would reduce register pressure.
2567 if (left->isCall && right->isCallOp) {
2568 unsigned RNumVals = right->getNode()->getNumValues();
2569 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2570 }
2571 if (right->isCall && left->isCallOp) {
2572 unsigned LNumVals = left->getNode()->getNumValues();
2573 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2574 }
2575
2576 if (LPriority != RPriority)
2577 return LPriority > RPriority;
2578
2579 // One or both of the nodes are calls and their sethi-ullman numbers are the
2580 // same, then keep source order.
2581 if (left->isCall || right->isCall) {
2582 unsigned LOrder = SPQ->getNodeOrdering(left);
2583 unsigned ROrder = SPQ->getNodeOrdering(right);
2584
2585 // Prefer an ordering where the lower the non-zero order number, the higher
2586 // the preference.
2587 if ((LOrder || ROrder) && LOrder != ROrder)
2588 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2589 }
2590
2591 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2592 // e.g.
2593 // t1 = op t2, c1
2594 // t3 = op t4, c2
2595 //
2596 // and the following instructions are both ready.
2597 // t2 = op c3
2598 // t4 = op c4
2599 //
2600 // Then schedule t2 = op first.
2601 // i.e.
2602 // t4 = op c4
2603 // t2 = op c3
2604 // t1 = op t2, c1
2605 // t3 = op t4, c2
2606 //
2607 // This creates more short live intervals.
2608 unsigned LDist = closestSucc(left);
2609 unsigned RDist = closestSucc(right);
2610 if (LDist != RDist)
2611 return LDist < RDist;
2612
2613 // How many registers becomes live when the node is scheduled.
2614 unsigned LScratch = calcMaxScratches(left);
2615 unsigned RScratch = calcMaxScratches(right);
2616 if (LScratch != RScratch)
2617 return LScratch > RScratch;
2618
2619 // Comparing latency against a call makes little sense unless the node
2620 // is register pressure-neutral.
2621 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2622 return (left->NodeQueueId > right->NodeQueueId);
2623
2624 // Do not compare latencies when one or both of the nodes are calls.
2625 if (!DisableSchedCycles &&
2626 !(left->isCall || right->isCall)) {
2627 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2628 if (result != 0)
2629 return result > 0;
2630 }
2631 else {
2632 if (left->getHeight() != right->getHeight())
2633 return left->getHeight() > right->getHeight();
2634
2635 if (left->getDepth() != right->getDepth())
2636 return left->getDepth() < right->getDepth();
2637 }
2638
2639 assert(left->NodeQueueId && right->NodeQueueId &&
2640 "NodeQueueId cannot be zero");
2641 return (left->NodeQueueId > right->NodeQueueId);
2642}
2643
2644// Bottom up
2645bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2646 if (int res = checkSpecialNodes(left, right))
2647 return res > 0;
2648
2649 return BURRSort(left, right, SPQ);
2650}
2651
2652// Source order, otherwise bottom up.
2653bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2654 if (int res = checkSpecialNodes(left, right))
2655 return res > 0;
2656
2657 unsigned LOrder = SPQ->getNodeOrdering(left);
2658 unsigned ROrder = SPQ->getNodeOrdering(right);
2659
2660 // Prefer an ordering where the lower the non-zero order number, the higher
2661 // the preference.
2662 if ((LOrder || ROrder) && LOrder != ROrder)
2663 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2664
2665 return BURRSort(left, right, SPQ);
2666}
2667
2668// If the time between now and when the instruction will be ready can cover
2669// the spill code, then avoid adding it to the ready queue. This gives long
2670// stalls highest priority and allows hoisting across calls. It should also
2671// speed up processing the available queue.
2672bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2673 static const unsigned ReadyDelay = 3;
2674
2675 if (SPQ->MayReduceRegPressure(SU)) return true;
2676
2677 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2678
2679 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2681 return false;
2682
2683 return true;
2684}
2685
2686// Return true if right should be scheduled with higher priority than left.
2687bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2688 if (int res = checkSpecialNodes(left, right))
2689 return res > 0;
2690
2691 if (left->isCall || right->isCall)
2692 // No way to compute latency of calls.
2693 return BURRSort(left, right, SPQ);
2694
2695 bool LHigh = SPQ->HighRegPressure(left);
2696 bool RHigh = SPQ->HighRegPressure(right);
2697 // Avoid causing spills. If register pressure is high, schedule for
2698 // register pressure reduction.
2699 if (LHigh && !RHigh) {
2700 LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2701 << right->NodeNum << ")\n");
2702 return true;
2703 }
2704 else if (!LHigh && RHigh) {
2705 LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2706 << left->NodeNum << ")\n");
2707 return false;
2708 }
2709 if (!LHigh && !RHigh) {
2710 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2711 if (result != 0)
2712 return result > 0;
2713 }
2714 return BURRSort(left, right, SPQ);
2715}
2716
2717// Schedule as many instructions in each cycle as possible. So don't make an
2718// instruction available unless it is ready in the current cycle.
2719bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2720 if (SU->getHeight() > CurCycle) return false;
2721
2722 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2724 return false;
2725
2726 return true;
2727}
2728
2729static bool canEnableCoalescing(SUnit *SU) {
2730 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2731 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2732 // CopyToReg should be close to its uses to facilitate coalescing and
2733 // avoid spilling.
2734 return true;
2735
2736 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2737 Opc == TargetOpcode::SUBREG_TO_REG ||
2738 Opc == TargetOpcode::INSERT_SUBREG)
2739 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2740 // close to their uses to facilitate coalescing.
2741 return true;
2742
2743 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2744 // If SU does not have a register def, schedule it close to its uses
2745 // because it does not lengthen any live ranges.
2746 return true;
2747
2748 return false;
2749}
2750
2751// list-ilp is currently an experimental scheduler that allows various
2752// heuristics to be enabled prior to the normal register reduction logic.
2753bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2754 if (int res = checkSpecialNodes(left, right))
2755 return res > 0;
2756
2757 if (left->isCall || right->isCall)
2758 // No way to compute latency of calls.
2759 return BURRSort(left, right, SPQ);
2760
2761 unsigned LLiveUses = 0, RLiveUses = 0;
2762 int LPDiff = 0, RPDiff = 0;
2764 LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2765 RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2766 }
2767 if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2768 LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2769 << "): " << LPDiff << " != SU(" << right->NodeNum
2770 << "): " << RPDiff << "\n");
2771 return LPDiff > RPDiff;
2772 }
2773
2774 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2775 bool LReduce = canEnableCoalescing(left);
2776 bool RReduce = canEnableCoalescing(right);
2777 if (LReduce && !RReduce) return false;
2778 if (RReduce && !LReduce) return true;
2779 }
2780
2781 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2782 LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2783 << " != SU(" << right->NodeNum << "): " << RLiveUses
2784 << "\n");
2785 return LLiveUses < RLiveUses;
2786 }
2787
2788 if (!DisableSchedStalls) {
2789 bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2790 bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2791 if (LStall != RStall)
2792 return left->getHeight() > right->getHeight();
2793 }
2794
2796 int spread = (int)left->getDepth() - (int)right->getDepth();
2797 if (std::abs(spread) > MaxReorderWindow) {
2798 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2799 << left->getDepth() << " != SU(" << right->NodeNum
2800 << "): " << right->getDepth() << "\n");
2801 return left->getDepth() < right->getDepth();
2802 }
2803 }
2804
2805 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2806 int spread = (int)left->getHeight() - (int)right->getHeight();
2807 if (std::abs(spread) > MaxReorderWindow)
2808 return left->getHeight() > right->getHeight();
2809 }
2810
2811 return BURRSort(left, right, SPQ);
2812}
2813
2814void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2815 SUnits = &sunits;
2816 // Add pseudo dependency edges for two-address nodes.
2817 if (!Disable2AddrHack)
2818 AddPseudoTwoAddrDeps();
2819 // Reroute edges to nodes with multiple uses.
2820 if (!TracksRegPressure && !SrcOrder)
2821 PrescheduleNodesWithMultipleUses();
2822 // Calculate node priorities.
2823 CalculateSethiUllmanNumbers();
2824
2825 // For single block loops, mark nodes that look like canonical IV increments.
2826 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2827 for (SUnit &SU : sunits)
2828 initVRegCycle(&SU);
2829}
2830
2831//===----------------------------------------------------------------------===//
2832// Preschedule for Register Pressure
2833//===----------------------------------------------------------------------===//
2834
2835bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2836 if (SU->isTwoAddress) {
2837 unsigned Opc = SU->getNode()->getMachineOpcode();
2838 const MCInstrDesc &MCID = TII->get(Opc);
2839 unsigned NumRes = MCID.getNumDefs();
2840 unsigned NumOps = MCID.getNumOperands() - NumRes;
2841 for (unsigned i = 0; i != NumOps; ++i) {
2842 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2843 SDNode *DU = SU->getNode()->getOperand(i).getNode();
2844 if (DU->getNodeId() != -1 &&
2845 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2846 return true;
2847 }
2848 }
2849 }
2850 return false;
2851}
2852
2853/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2854/// successor's explicit physregs whose definition can reach DepSU.
2855/// i.e. DepSU should not be scheduled above SU.
2856static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2857 ScheduleDAGRRList *scheduleDAG,
2858 const TargetInstrInfo *TII,
2859 const TargetRegisterInfo *TRI) {
2860 ArrayRef<MCPhysReg> ImpDefs =
2861 TII->get(SU->getNode()->getMachineOpcode()).implicit_defs();
2862 const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2863 if (ImpDefs.empty() && !RegMask)
2864 return false;
2865
2866 for (const SDep &Succ : SU->Succs) {
2867 SUnit *SuccSU = Succ.getSUnit();
2868 for (const SDep &SuccPred : SuccSU->Preds) {
2869 if (!SuccPred.isAssignedRegDep())
2870 continue;
2871
2872 if (RegMask &&
2873 MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2874 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2875 return true;
2876
2877 for (MCPhysReg ImpDef : ImpDefs) {
2878 // Return true if SU clobbers this physical register use and the
2879 // definition of the register reaches from DepSU. IsReachable queries
2880 // a topological forward sort of the DAG (following the successors).
2881 if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) &&
2882 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2883 return true;
2884 }
2885 }
2886 }
2887 return false;
2888}
2889
2890/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2891/// physical register defs.
2892static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2893 const TargetInstrInfo *TII,
2894 const TargetRegisterInfo *TRI) {
2895 SDNode *N = SuccSU->getNode();
2896 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2897 ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs();
2898 assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs");
2899 for (const SDNode *SUNode = SU->getNode(); SUNode;
2900 SUNode = SUNode->getGluedNode()) {
2901 if (!SUNode->isMachineOpcode())
2902 continue;
2903 ArrayRef<MCPhysReg> SUImpDefs =
2904 TII->get(SUNode->getMachineOpcode()).implicit_defs();
2905 const uint32_t *SURegMask = getNodeRegMask(SUNode);
2906 if (SUImpDefs.empty() && !SURegMask)
2907 continue;
2908 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2909 MVT VT = N->getSimpleValueType(i);
2910 if (VT == MVT::Glue || VT == MVT::Other)
2911 continue;
2912 if (!N->hasAnyUseOfValue(i))
2913 continue;
2914 MCPhysReg Reg = ImpDefs[i - NumDefs];
2915 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2916 return true;
2917 for (MCPhysReg SUReg : SUImpDefs) {
2918 if (TRI->regsOverlap(Reg, SUReg))
2919 return true;
2920 }
2921 }
2922 }
2923 return false;
2924}
2925
2926/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2927/// are not handled well by the general register pressure reduction
2928/// heuristics. When presented with code like this:
2929///
2930/// N
2931/// / |
2932/// / |
2933/// U store
2934/// |
2935/// ...
2936///
2937/// the heuristics tend to push the store up, but since the
2938/// operand of the store has another use (U), this would increase
2939/// the length of that other use (the U->N edge).
2940///
2941/// This function transforms code like the above to route U's
2942/// dependence through the store when possible, like this:
2943///
2944/// N
2945/// ||
2946/// ||
2947/// store
2948/// |
2949/// U
2950/// |
2951/// ...
2952///
2953/// This results in the store being scheduled immediately
2954/// after N, which shortens the U->N live range, reducing
2955/// register pressure.
2956void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2957 // Visit all the nodes in topological order, working top-down.
2958 for (SUnit &SU : *SUnits) {
2959 // For now, only look at nodes with no data successors, such as stores.
2960 // These are especially important, due to the heuristics in
2961 // getNodePriority for nodes with no data successors.
2962 if (SU.NumSuccs != 0)
2963 continue;
2964 // For now, only look at nodes with exactly one data predecessor.
2965 if (SU.NumPreds != 1)
2966 continue;
2967 // Avoid prescheduling copies to virtual registers, which don't behave
2968 // like other nodes from the perspective of scheduling heuristics.
2969 if (SDNode *N = SU.getNode())
2970 if (N->getOpcode() == ISD::CopyToReg &&
2971 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
2972 continue;
2973
2974 SDNode *PredFrameSetup = nullptr;
2975 for (const SDep &Pred : SU.Preds)
2976 if (Pred.isCtrl() && Pred.getSUnit()) {
2977 // Find the predecessor which is not data dependence.
2978 SDNode *PredND = Pred.getSUnit()->getNode();
2979
2980 // If PredND is FrameSetup, we should not pre-scheduled the node,
2981 // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2982 // ADJCALLSTACKUP may hold CallResource too long and make other
2983 // calls can't be scheduled. If there's no other available node
2984 // to schedule, the schedular will try to rename the register by
2985 // creating copy to avoid the conflict which will fail because
2986 // CallResource is not a real physical register.
2987 if (PredND && PredND->isMachineOpcode() &&
2988 (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2989 PredFrameSetup = PredND;
2990 break;
2991 }
2992 }
2993 // Skip the node has FrameSetup parent.
2994 if (PredFrameSetup != nullptr)
2995 continue;
2996
2997 // Locate the single data predecessor.
2998 SUnit *PredSU = nullptr;
2999 for (const SDep &Pred : SU.Preds)
3000 if (!Pred.isCtrl()) {
3001 PredSU = Pred.getSUnit();
3002 break;
3003 }
3004 assert(PredSU);
3005
3006 // Don't rewrite edges that carry physregs, because that requires additional
3007 // support infrastructure.
3008 if (PredSU->hasPhysRegDefs)
3009 continue;
3010 // Short-circuit the case where SU is PredSU's only data successor.
3011 if (PredSU->NumSuccs == 1)
3012 continue;
3013 // Avoid prescheduling to copies from virtual registers, which don't behave
3014 // like other nodes from the perspective of scheduling heuristics.
3015 if (SDNode *N = SU.getNode())
3016 if (N->getOpcode() == ISD::CopyFromReg &&
3017 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
3018 continue;
3019
3020 // Perform checks on the successors of PredSU.
3021 for (const SDep &PredSucc : PredSU->Succs) {
3022 SUnit *PredSuccSU = PredSucc.getSUnit();
3023 if (PredSuccSU == &SU) continue;
3024 // If PredSU has another successor with no data successors, for
3025 // now don't attempt to choose either over the other.
3026 if (PredSuccSU->NumSuccs == 0)
3027 goto outer_loop_continue;
3028 // Don't break physical register dependencies.
3029 if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3030 if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3031 goto outer_loop_continue;
3032 // Don't introduce graph cycles.
3033 if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3034 goto outer_loop_continue;
3035 }
3036
3037 // Ok, the transformation is safe and the heuristics suggest it is
3038 // profitable. Update the graph.
3039 LLVM_DEBUG(
3040 dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3041 << PredSU->NodeNum
3042 << " to guide scheduling in the presence of multiple uses\n");
3043 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3044 SDep Edge = PredSU->Succs[i];
3045 assert(!Edge.isAssignedRegDep());
3046 SUnit *SuccSU = Edge.getSUnit();
3047 if (SuccSU != &SU) {
3048 Edge.setSUnit(PredSU);
3049 scheduleDAG->RemovePred(SuccSU, Edge);
3050 scheduleDAG->AddPredQueued(&SU, Edge);
3051 Edge.setSUnit(&SU);
3052 scheduleDAG->AddPredQueued(SuccSU, Edge);
3053 --i;
3054 }
3055 }
3056 outer_loop_continue:;
3057 }
3058}
3059
3060/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3061/// it as a def&use operand. Add a pseudo control edge from it to the other
3062/// node (if it won't create a cycle) so the two-address one will be scheduled
3063/// first (lower in the schedule). If both nodes are two-address, favor the
3064/// one that has a CopyToReg use (more likely to be a loop induction update).
3065/// If both are two-address, but one is commutable while the other is not
3066/// commutable, favor the one that's not commutable.
3067void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3068 for (SUnit &SU : *SUnits) {
3069 if (!SU.isTwoAddress)
3070 continue;
3071
3072 SDNode *Node = SU.getNode();
3073 if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3074 continue;
3075
3076 bool isLiveOut = hasOnlyLiveOutUses(&SU);
3077 unsigned Opc = Node->getMachineOpcode();
3078 const MCInstrDesc &MCID = TII->get(Opc);
3079 unsigned NumRes = MCID.getNumDefs();
3080 unsigned NumOps = MCID.getNumOperands() - NumRes;
3081 for (unsigned j = 0; j != NumOps; ++j) {
3082 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3083 continue;
3084 SDNode *DU = SU.getNode()->getOperand(j).getNode();
3085 if (DU->getNodeId() == -1)
3086 continue;
3087 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3088 if (!DUSU)
3089 continue;
3090 for (const SDep &Succ : DUSU->Succs) {
3091 if (Succ.isCtrl())
3092 continue;
3093 SUnit *SuccSU = Succ.getSUnit();
3094 if (SuccSU == &SU)
3095 continue;
3096 // Be conservative. Ignore if nodes aren't at roughly the same
3097 // depth and height.
3098 if (SuccSU->getHeight() < SU.getHeight() &&
3099 (SU.getHeight() - SuccSU->getHeight()) > 1)
3100 continue;
3101 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3102 // constrains whatever is using the copy, instead of the copy
3103 // itself. In the case that the copy is coalesced, this
3104 // preserves the intent of the pseudo two-address heurietics.
3105 while (SuccSU->Succs.size() == 1 &&
3106 SuccSU->getNode()->isMachineOpcode() &&
3107 SuccSU->getNode()->getMachineOpcode() ==
3108 TargetOpcode::COPY_TO_REGCLASS)
3109 SuccSU = SuccSU->Succs.front().getSUnit();
3110 // Don't constrain non-instruction nodes.
3111 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3112 continue;
3113 // Don't constrain nodes with physical register defs if the
3114 // predecessor can clobber them.
3115 if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3116 if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3117 continue;
3118 }
3119 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3120 // these may be coalesced away. We want them close to their uses.
3121 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3122 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3123 SuccOpc == TargetOpcode::INSERT_SUBREG ||
3124 SuccOpc == TargetOpcode::SUBREG_TO_REG)
3125 continue;
3126 if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3127 (!canClobber(SuccSU, DUSU) ||
3128 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3129 (!SU.isCommutable && SuccSU->isCommutable)) &&
3130 !scheduleDAG->IsReachable(SuccSU, &SU)) {
3132 << " Adding a pseudo-two-addr edge from SU #"
3133 << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3134 scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
3135 }
3136 }
3137 }
3138 }
3139}
3140
3141//===----------------------------------------------------------------------===//
3142// Public Constructor Functions
3143//===----------------------------------------------------------------------===//
3144
3146 CodeGenOptLevel OptLevel) {
3147 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3148 const TargetInstrInfo *TII = STI.getInstrInfo();
3149 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3150
3151 BURegReductionPriorityQueue *PQ =
3152 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3153 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3154 PQ->setScheduleDAG(SD);
3155 return SD;
3156}
3157
3160 CodeGenOptLevel OptLevel) {
3161 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3162 const TargetInstrInfo *TII = STI.getInstrInfo();
3163 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3164
3165 SrcRegReductionPriorityQueue *PQ =
3166 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3167 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3168 PQ->setScheduleDAG(SD);
3169 return SD;
3170}
3171
3174 CodeGenOptLevel OptLevel) {
3175 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3176 const TargetInstrInfo *TII = STI.getInstrInfo();
3177 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3178 const TargetLowering *TLI = IS->TLI;
3179
3180 HybridBURRPriorityQueue *PQ =
3181 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3182
3183 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3184 PQ->setScheduleDAG(SD);
3185 return SD;
3186}
3187
3189 CodeGenOptLevel OptLevel) {
3190 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3191 const TargetInstrInfo *TII = STI.getInstrInfo();
3192 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3193 const TargetLowering *TLI = IS->TLI;
3194
3195 ILPBURRPriorityQueue *PQ =
3196 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3197 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3198 PQ->setScheduleDAG(SD);
3199 return SD;
3200}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
This file contains some templates that are useful if you are working with the STL at all.
static bool canEnableCoalescing(SUnit *SU)
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI, const SDNode *Node=nullptr)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
static void resetVRegCycle(SUnit *SU)
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it's successor's explicit physregs who...
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU's physical register defs.
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
static void initVRegCycle(SUnit *SU)
static constexpr unsigned RegSequenceCost
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
static bool isOperandOf(const SUnit *SU, SDNode *N)
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
static bool hasVRegCycleUse(const SUnit *SU)
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit * > LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask,...
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file describes how to lower LLVM code to machine code.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
This class represents an Operation in the Expression.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:481
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getIROrder() const
Return the node ordering.
void setNodeId(int Id)
Set unique node id.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:498
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:501
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:287
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
unsigned NumSuccs
Definition: ScheduleDAG.h:273
unsigned NumPreds
Definition: ScheduleDAG.h:272
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:271
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:270
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:275
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:293
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:288
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:258
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:424
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:457
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:303
unsigned short NumRegDefsLeft
Definition: ScheduleDAG.h:302
bool isPending
True once pending.
Definition: ScheduleDAG.h:294
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:416
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:296
bool isAvailable
True once available.
Definition: ScheduleDAG.h:295
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:298
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:292
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:263
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:312
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:260
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:370
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:289
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:290
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:286
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
RegDefIter - In place iteration over the values defined by an SUnit.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
SUnit * newSUnit(SDNode *N)
NewSUnit - Creates a new SUnit and return a ptr to it.
virtual void Schedule()=0
Schedule - Order nodes according to selected style, filling in the Sequence member.
virtual bool forceUnitLatencies() const
ForceUnitLatencies - Return true if all scheduling edges should be given a latency value of one.
SUnit * Clone(SUnit *Old)
Clone - Creates a clone of the specified SUnit.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:720
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:794
void AddSUnitWithoutPredecessors(const SUnit *SU)
Add a SUnit without predecessors to the end of the topological order.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
virtual void dumpNode(const SUnit &SU) const =0
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
This interface is used to plug different priorities computation algorithms into the list scheduler.
Definition: ScheduleDAG.h:514
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:563
virtual void remove(SUnit *SU)=0
virtual void releaseState()=0
virtual SUnit * pop()=0
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:559
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:538
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:536
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:554
virtual void initNodes(std::vector< SUnit > &SUnits)=0
virtual bool empty() const =0
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:561
virtual void addNode(const SUnit *SU)=0
virtual void updateNode(const SUnit *SU)=0
virtual void push(SUnit *U)=0
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetLowering * TLI
MachineFunction * MF
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the 'representative' register class for the specified value type.
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ EH_LABEL
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:1173
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1377
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1168
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1165
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:41
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler.
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel)
createSourceListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source...
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Description of the encoding of one expression Op.