LLVM 23.0.0git
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements bottom-up and top-down register pressure reduction list
10// schedulers, using standard algorithms. The basic approach uses a priority
11// queue of available nodes to schedule. One at a time, nodes are taken from
12// the priority queue (thus in priority order), checked for legality to
13// schedule, and emitted if legal.
14//
15//===----------------------------------------------------------------------===//
16
17#include "ScheduleDAGSDNodes.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
39#include "llvm/Config/llvm-config.h"
40#include "llvm/IR/InlineAsm.h"
41#include "llvm/MC/MCInstrDesc.h"
47#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <cstdlib>
54#include <iterator>
55#include <limits>
56#include <memory>
57#include <utility>
58#include <vector>
59
60using namespace llvm;
61
62#define DEBUG_TYPE "pre-RA-sched"
63
64STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65STATISTIC(NumUnfolds, "Number of nodes unfolded");
66STATISTIC(NumDups, "Number of duplicated nodes");
67STATISTIC(NumPRCopies, "Number of physical register copies");
68
71 "Bottom-up register reduction list scheduling",
73
76 "Similar to list-burr but schedules in source "
77 "order when possible",
79
82 "Bottom-up register pressure aware list scheduling "
83 "which tries to balance latency and register pressure",
85
88 "Bottom-up register pressure aware list scheduling "
89 "which tries to balance ILP and register pressure",
91
93 "disable-sched-cycles", cl::Hidden, cl::init(false),
94 cl::desc("Disable cycle-level precision during preRA scheduling"));
95
96// Temporary sched=list-ilp flags until the heuristics are robust.
97// Some options are also available under sched=list-hybrid.
99 "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100 cl::desc("Disable regpressure priority in sched=list-ilp"));
102 "disable-sched-live-uses", cl::Hidden, cl::init(true),
103 cl::desc("Disable live use priority in sched=list-ilp"));
105 "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106 cl::desc("Disable virtual register cycle interference checks"));
108 "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109 cl::desc("Disable physreg def-use affinity"));
111 "disable-sched-stalls", cl::Hidden, cl::init(true),
112 cl::desc("Disable no-stall priority in sched=list-ilp"));
114 "disable-sched-critical-path", cl::Hidden, cl::init(false),
115 cl::desc("Disable critical path priority in sched=list-ilp"));
117 "disable-sched-height", cl::Hidden, cl::init(false),
118 cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120 "disable-2addr-hack", cl::Hidden, cl::init(true),
121 cl::desc("Disable scheduler's two-address hack"));
122
124 "max-sched-reorder", cl::Hidden, cl::init(6),
125 cl::desc("Number of instructions to allow ahead of the critical path "
126 "in sched=list-ilp"));
127
129 AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1),
130 cl::desc("Average inst/cycle when no target itinerary exists."));
131
132namespace {
133
134//===----------------------------------------------------------------------===//
135/// ScheduleDAGRRList - The actual register reduction list scheduler
136/// implementation. This supports both top-down and bottom-up scheduling.
137///
138class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139private:
140 /// NeedLatency - True if the scheduler will make use of latency information.
141 bool NeedLatency;
142
143 /// AvailableQueue - The priority queue to use for the available SUnits.
144 SchedulingPriorityQueue *AvailableQueue;
145
146 /// PendingQueue - This contains all of the instructions whose operands have
147 /// been issued, but their results are not ready yet (due to the latency of
148 /// the operation). Once the operands becomes available, the instruction is
149 /// added to the AvailableQueue.
150 std::vector<SUnit *> PendingQueue;
151
152 /// HazardRec - The hazard recognizer to use.
153 ScheduleHazardRecognizer *HazardRec;
154
155 /// CurCycle - The current scheduler state corresponds to this cycle.
156 unsigned CurCycle = 0;
157
158 /// MinAvailableCycle - Cycle of the soonest available instruction.
159 unsigned MinAvailableCycle = ~0u;
160
161 /// IssueCount - Count instructions issued in this cycle
162 /// Currently valid only for bottom-up scheduling.
163 unsigned IssueCount = 0u;
164
165 /// LiveRegDefs - A set of physical registers and their definition
166 /// that are "live". These nodes must be scheduled before any other nodes that
167 /// modifies the registers can be scheduled.
168 unsigned NumLiveRegs = 0u;
169 std::unique_ptr<SUnit*[]> LiveRegDefs;
170 std::unique_ptr<SUnit*[]> LiveRegGens;
171
172 // Collect interferences between physical register use/defs.
173 // Each interference is an SUnit and set of physical registers.
174 SmallVector<SUnit*, 4> Interferences;
175
177
178 LRegsMapT LRegsMap;
179
180 /// Topo - A topological ordering for SUnits which permits fast IsReachable
181 /// and similar queries.
183
184 // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185 // DAG crawling.
186 SmallDenseMap<SUnit *, SUnit *, 16> CallSeqEndForStart;
187
188public:
189 ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190 SchedulingPriorityQueue *availqueue,
191 CodeGenOptLevel OptLevel)
192 : ScheduleDAGSDNodes(mf), NeedLatency(needlatency),
193 AvailableQueue(availqueue), Topo(SUnits, nullptr) {
194 const TargetSubtargetInfo &STI = mf.getSubtarget();
195 if (DisableSchedCycles || !NeedLatency)
196 HazardRec = new ScheduleHazardRecognizer();
197 else
198 HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
199 }
200
201 ~ScheduleDAGRRList() override {
202 delete HazardRec;
203 delete AvailableQueue;
204 }
205
206 void Schedule() override;
207
208 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
209
210 /// IsReachable - Checks if SU is reachable from TargetSU.
211 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
212 return Topo.IsReachable(SU, TargetSU);
213 }
214
215 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
216 /// create a cycle.
217 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
218 return Topo.WillCreateCycle(SU, TargetSU);
219 }
220
221 /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
222 /// This returns true if this is a new predecessor.
223 /// Does *NOT* update the topological ordering! It just queues an update.
224 void AddPredQueued(SUnit *SU, const SDep &D) {
225 Topo.AddPredQueued(SU, D.getSUnit());
226 SU->addPred(D);
227 }
228
229 /// AddPred - adds a predecessor edge to SUnit SU.
230 /// This returns true if this is a new predecessor.
231 /// Updates the topological ordering if required.
232 void AddPred(SUnit *SU, const SDep &D) {
233 Topo.AddPred(SU, D.getSUnit());
234 SU->addPred(D);
235 }
236
237 /// RemovePred - removes a predecessor edge from SUnit SU.
238 /// This returns true if an edge was removed.
239 /// Updates the topological ordering if required.
240 void RemovePred(SUnit *SU, const SDep &D) {
241 Topo.RemovePred(SU, D.getSUnit());
242 SU->removePred(D);
243 }
244
245private:
246 bool isReady(SUnit *SU) {
247 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
248 AvailableQueue->isReady(SU);
249 }
250
251 void ReleasePred(SUnit *SU, const SDep *PredEdge);
252 void ReleasePredecessors(SUnit *SU);
253 void ReleasePending();
254 void AdvanceToCycle(unsigned NextCycle);
255 void AdvancePastStalls(SUnit *SU);
256 void EmitNode(SUnit *SU);
257 void ScheduleNodeBottomUp(SUnit*);
258 void CapturePred(SDep *PredEdge);
259 void UnscheduleNodeBottomUp(SUnit*);
260 void RestoreHazardCheckerBottomUp();
261 void BacktrackBottomUp(SUnit*, SUnit*);
262 SUnit *TryUnfoldSU(SUnit *);
263 SUnit *CopyAndMoveSuccessors(SUnit*);
264 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
265 const TargetRegisterClass*,
266 const TargetRegisterClass*,
267 SmallVectorImpl<SUnit*>&);
268 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
269
270 void releaseInterferences(unsigned Reg = 0);
271
272 SUnit *PickNodeToScheduleBottomUp();
273 void ListScheduleBottomUp();
274
275 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
276 SUnit *CreateNewSUnit(SDNode *N) {
277 unsigned NumSUnits = SUnits.size();
278 SUnit *NewNode = newSUnit(N);
279 // Update the topological ordering.
280 if (NewNode->NodeNum >= NumSUnits)
281 Topo.AddSUnitWithoutPredecessors(NewNode);
282 return NewNode;
283 }
284
285 /// CreateClone - Creates a new SUnit from an existing one.
286 SUnit *CreateClone(SUnit *N) {
287 unsigned NumSUnits = SUnits.size();
288 SUnit *NewNode = Clone(N);
289 // Update the topological ordering.
290 if (NewNode->NodeNum >= NumSUnits)
291 Topo.AddSUnitWithoutPredecessors(NewNode);
292 return NewNode;
293 }
294
295 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
296 /// need actual latency information but the hybrid scheduler does.
297 bool forceUnitLatencies() const override {
298 return !NeedLatency;
299 }
300};
301
302} // end anonymous namespace
303
304static constexpr unsigned RegSequenceCost = 1;
305
306/// GetCostForDef - Looks up the register class and cost for a given definition.
307/// Typically this just means looking up the representative register class,
308/// but for untyped values (MVT::Untyped) it means inspecting the node's
309/// opcode to determine what register class is being generated.
311 const TargetLowering *TLI,
312 const TargetInstrInfo *TII,
313 const TargetRegisterInfo *TRI,
314 unsigned &RegClass, unsigned &Cost,
315 const MachineFunction &MF) {
316 MVT VT = RegDefPos.GetValue();
317
318 // Special handling for untyped values. These values can only come from
319 // the expansion of custom DAG-to-DAG patterns.
320 if (VT == MVT::Untyped) {
321 const SDNode *Node = RegDefPos.GetNode();
322
323 // Special handling for CopyFromReg of untyped values.
324 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
325 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
327 RegClass = RC->getID();
328 Cost = 1;
329 return;
330 }
331
332 unsigned Opcode = Node->getMachineOpcode();
333 if (Opcode == TargetOpcode::REG_SEQUENCE) {
334 unsigned DstRCIdx = Node->getConstantOperandVal(0);
335 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
336 RegClass = RC->getID();
337 Cost = RegSequenceCost;
338 return;
339 }
340
341 unsigned Idx = RegDefPos.GetIdx();
342 const MCInstrDesc &Desc = TII->get(Opcode);
343 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx);
344 assert(RC && "Not a valid register class");
345 RegClass = RC->getID();
346 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
347 // better way to determine it.
348 Cost = 1;
349 } else {
350 RegClass = TLI->getRepRegClassFor(VT)->getID();
351 Cost = TLI->getRepRegClassCostFor(VT);
352 }
353}
354
355/// Schedule - Schedule the DAG using list scheduling.
356void ScheduleDAGRRList::Schedule() {
357 LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
358 << " '" << BB->getName() << "' **********\n");
359
360 CurCycle = 0;
361 IssueCount = 0;
362 MinAvailableCycle =
363 DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
364 NumLiveRegs = 0;
365 // Allocate slots for each physical register, plus one for a special register
366 // to track the virtual resource of a calling sequence.
367 LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
368 LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
369 CallSeqEndForStart.clear();
370 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
371
372 // Build the scheduling graph.
373 BuildSchedGraph();
374
375 LLVM_DEBUG(dump());
376 Topo.MarkDirty();
377
378 AvailableQueue->initNodes(SUnits);
379
380 HazardRec->Reset();
381
382 // Execute the actual scheduling loop.
383 ListScheduleBottomUp();
384
385 AvailableQueue->releaseState();
386
387 LLVM_DEBUG({
388 dbgs() << "*** Final schedule ***\n";
389 dumpSchedule();
390 dbgs() << '\n';
391 });
392}
393
394//===----------------------------------------------------------------------===//
395// Bottom-Up Scheduling
396//===----------------------------------------------------------------------===//
397
398/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
399/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
400void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
401 SUnit *PredSU = PredEdge->getSUnit();
402
403#ifndef NDEBUG
404 if (PredSU->NumSuccsLeft == 0) {
405 dbgs() << "*** Scheduling failed! ***\n";
406 dumpNode(*PredSU);
407 dbgs() << " has been released too many times!\n";
408 llvm_unreachable(nullptr);
409 }
410#endif
411 --PredSU->NumSuccsLeft;
412
413 if (!forceUnitLatencies()) {
414 // Updating predecessor's height. This is now the cycle when the
415 // predecessor can be scheduled without causing a pipeline stall.
416 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
417 }
418
419 // If all the node's successors are scheduled, this node is ready
420 // to be scheduled. Ignore the special EntrySU node.
421 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
422 PredSU->isAvailable = true;
423
424 unsigned Height = PredSU->getHeight();
425 if (Height < MinAvailableCycle)
426 MinAvailableCycle = Height;
427
428 if (isReady(PredSU)) {
429 AvailableQueue->push(PredSU);
430 }
431 // CapturePred and others may have left the node in the pending queue, avoid
432 // adding it twice.
433 else if (!PredSU->isPending) {
434 PredSU->isPending = true;
435 PendingQueue.push_back(PredSU);
436 }
437 }
438}
439
440/// IsChainDependent - Test if Outer is reachable from Inner through
441/// chain dependencies.
442static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
443 unsigned NestLevel,
444 const TargetInstrInfo *TII) {
445 SDNode *N = Outer;
446 while (true) {
447 if (N == Inner)
448 return true;
449 // For a TokenFactor, examine each operand. There may be multiple ways
450 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
451 // most nesting in order to ensure that we find the corresponding match.
452 if (N->getOpcode() == ISD::TokenFactor) {
453 for (const SDValue &Op : N->op_values())
454 if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
455 return true;
456 return false;
457 }
458 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
459 if (N->isMachineOpcode()) {
460 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
461 ++NestLevel;
462 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
463 if (NestLevel == 0)
464 return false;
465 --NestLevel;
466 }
467 }
468 // Otherwise, find the chain and continue climbing.
469 for (const SDValue &Op : N->op_values())
470 if (Op.getValueType() == MVT::Other) {
471 N = Op.getNode();
472 goto found_chain_operand;
473 }
474 return false;
475 found_chain_operand:;
476 if (N->getOpcode() == ISD::EntryToken)
477 return false;
478 }
479}
480
481/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
482/// the corresponding (lowered) CALLSEQ_BEGIN node.
483///
484/// NestLevel and MaxNested are used in recursion to indcate the current level
485/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
486/// level seen so far.
487///
488/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
489/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
490static SDNode *
491FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
492 const TargetInstrInfo *TII) {
493 while (true) {
494 // For a TokenFactor, examine each operand. There may be multiple ways
495 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
496 // most nesting in order to ensure that we find the corresponding match.
497 if (N->getOpcode() == ISD::TokenFactor) {
498 SDNode *Best = nullptr;
499 unsigned BestMaxNest = MaxNest;
500 for (const SDValue &Op : N->op_values()) {
501 unsigned MyNestLevel = NestLevel;
502 unsigned MyMaxNest = MaxNest;
503 if (SDNode *New = FindCallSeqStart(Op.getNode(),
504 MyNestLevel, MyMaxNest, TII))
505 if (!Best || (MyMaxNest > BestMaxNest)) {
506 Best = New;
507 BestMaxNest = MyMaxNest;
508 }
509 }
510 assert(Best);
511 MaxNest = BestMaxNest;
512 return Best;
513 }
514 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
515 if (N->isMachineOpcode()) {
516 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
517 ++NestLevel;
518 MaxNest = std::max(MaxNest, NestLevel);
519 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
520 assert(NestLevel != 0);
521 --NestLevel;
522 if (NestLevel == 0)
523 return N;
524 }
525 }
526 // Otherwise, find the chain and continue climbing.
527 for (const SDValue &Op : N->op_values())
528 if (Op.getValueType() == MVT::Other) {
529 N = Op.getNode();
530 goto found_chain_operand;
531 }
532 return nullptr;
533 found_chain_operand:;
534 if (N->getOpcode() == ISD::EntryToken)
535 return nullptr;
536 }
537}
538
539/// Call ReleasePred for each predecessor, then update register live def/gen.
540/// Always update LiveRegDefs for a register dependence even if the current SU
541/// also defines the register. This effectively create one large live range
542/// across a sequence of two-address node. This is important because the
543/// entire chain must be scheduled together. Example:
544///
545/// flags = (3) add
546/// flags = (2) addc flags
547/// flags = (1) addc flags
548///
549/// results in
550///
551/// LiveRegDefs[flags] = 3
552/// LiveRegGens[flags] = 1
553///
554/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
555/// interference on flags.
556void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
557 // Bottom up: release predecessors
558 for (SDep &Pred : SU->Preds) {
559 ReleasePred(SU, &Pred);
560 if (Pred.isAssignedRegDep()) {
561 // This is a physical register dependency and it's impossible or
562 // expensive to copy the register. Make sure nothing that can
563 // clobber the register is scheduled between the predecessor and
564 // this node.
565 SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
566 assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
567 "interference on register dependence");
568 LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
569 if (!LiveRegGens[Pred.getReg()]) {
570 ++NumLiveRegs;
571 LiveRegGens[Pred.getReg()] = SU;
572 }
573 }
574 }
575
576 // If we're scheduling a lowered CALLSEQ_END, find the corresponding
577 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
578 // these nodes, to prevent other calls from being interscheduled with them.
579 unsigned CallResource = TRI->getNumRegs();
580 if (!LiveRegDefs[CallResource])
581 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
582 if (Node->isMachineOpcode() &&
583 Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
584 unsigned NestLevel = 0;
585 unsigned MaxNest = 0;
586 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
587 assert(N && "Must find call sequence start");
588
589 SUnit *Def = &SUnits[N->getNodeId()];
590 CallSeqEndForStart[Def] = SU;
591
592 ++NumLiveRegs;
593 LiveRegDefs[CallResource] = Def;
594 LiveRegGens[CallResource] = SU;
595 break;
596 }
597}
598
599/// Check to see if any of the pending instructions are ready to issue. If
600/// so, add them to the available queue.
601void ScheduleDAGRRList::ReleasePending() {
602 if (DisableSchedCycles) {
603 assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
604 return;
605 }
606
607 // If the available queue is empty, it is safe to reset MinAvailableCycle.
608 if (AvailableQueue->empty())
609 MinAvailableCycle = std::numeric_limits<unsigned>::max();
610
611 // Check to see if any of the pending instructions are ready to issue. If
612 // so, add them to the available queue.
613 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
614 unsigned ReadyCycle = PendingQueue[i]->getHeight();
615 if (ReadyCycle < MinAvailableCycle)
616 MinAvailableCycle = ReadyCycle;
617
618 if (PendingQueue[i]->isAvailable) {
619 if (!isReady(PendingQueue[i]))
620 continue;
621 AvailableQueue->push(PendingQueue[i]);
622 }
623 PendingQueue[i]->isPending = false;
624 PendingQueue[i] = PendingQueue.back();
625 PendingQueue.pop_back();
626 --i; --e;
627 }
628}
629
630/// Move the scheduler state forward by the specified number of Cycles.
631void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
632 if (NextCycle <= CurCycle)
633 return;
634
635 IssueCount = 0;
636 AvailableQueue->setCurCycle(NextCycle);
637 if (!HazardRec->isEnabled()) {
638 // Bypass lots of virtual calls in case of long latency.
639 CurCycle = NextCycle;
640 }
641 else {
642 for (; CurCycle != NextCycle; ++CurCycle) {
643 HazardRec->RecedeCycle();
644 }
645 }
646 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
647 // available Q to release pending nodes at least once before popping.
648 ReleasePending();
649}
650
651/// Move the scheduler state forward until the specified node's dependents are
652/// ready and can be scheduled with no resource conflicts.
653void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
655 return;
656
657 // FIXME: Nodes such as CopyFromReg probably should not advance the current
658 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
659 // has predecessors the cycle will be advanced when they are scheduled.
660 // But given the crude nature of modeling latency though such nodes, we
661 // currently need to treat these nodes like real instructions.
662 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
663
664 unsigned ReadyCycle = SU->getHeight();
665
666 // Bump CurCycle to account for latency. We assume the latency of other
667 // available instructions may be hidden by the stall (not a full pipe stall).
668 // This updates the hazard recognizer's cycle before reserving resources for
669 // this instruction.
670 AdvanceToCycle(ReadyCycle);
671
672 // Calls are scheduled in their preceding cycle, so don't conflict with
673 // hazards from instructions after the call. EmitNode will reset the
674 // scoreboard state before emitting the call.
675 if (SU->isCall)
676 return;
677
678 // FIXME: For resource conflicts in very long non-pipelined stages, we
679 // should probably skip ahead here to avoid useless scoreboard checks.
680 int Stalls = 0;
681 while (true) {
683 HazardRec->getHazardType(SU, -Stalls);
684
686 break;
687
688 ++Stalls;
689 }
690 AdvanceToCycle(CurCycle + Stalls);
691}
692
693/// Record this SUnit in the HazardRecognizer.
694/// Does not update CurCycle.
695void ScheduleDAGRRList::EmitNode(SUnit *SU) {
696 if (!HazardRec->isEnabled())
697 return;
698
699 // Check for phys reg copy.
700 if (!SU->getNode())
701 return;
702
703 switch (SU->getNode()->getOpcode()) {
704 default:
705 assert(SU->getNode()->isMachineOpcode() &&
706 "This target-independent node should not be scheduled.");
707 break;
709 case ISD::TokenFactor:
712 case ISD::CopyToReg:
713 case ISD::CopyFromReg:
714 case ISD::EH_LABEL:
716 // Noops don't affect the scoreboard state. Copies are likely to be
717 // removed.
718 return;
719 case ISD::INLINEASM:
721 // For inline asm, clear the pipeline state.
722 HazardRec->Reset();
723 return;
724 }
725 if (SU->isCall) {
726 // Calls are scheduled with their preceding instructions. For bottom-up
727 // scheduling, clear the pipeline state before emitting.
728 HazardRec->Reset();
729 }
730
731 HazardRec->EmitInstruction(SU);
732}
733
734static void resetVRegCycle(SUnit *SU);
735
736/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
737/// count of its predecessors. If a predecessor pending count is zero, add it to
738/// the Available queue.
739void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
740 LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
741 LLVM_DEBUG(dumpNode(*SU));
742
743#ifndef NDEBUG
744 if (CurCycle < SU->getHeight())
745 LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
746 << "] pipeline stall!\n");
747#endif
748
749 // FIXME: Do not modify node height. It may interfere with
750 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
751 // node its ready cycle can aid heuristics, and after scheduling it can
752 // indicate the scheduled cycle.
753 SU->setHeightToAtLeast(CurCycle);
754
755 // Reserve resources for the scheduled instruction.
756 EmitNode(SU);
757
758 Sequence.push_back(SU);
759
760 AvailableQueue->scheduledNode(SU);
761
762 // If HazardRec is disabled, and each inst counts as one cycle, then
763 // advance CurCycle before ReleasePredecessors to avoid useless pushes to
764 // PendingQueue for schedulers that implement HasReadyFilter.
765 if (!HazardRec->isEnabled() && AvgIPC < 2)
766 AdvanceToCycle(CurCycle + 1);
767
768 // Update liveness of predecessors before successors to avoid treating a
769 // two-address node as a live range def.
770 ReleasePredecessors(SU);
771
772 // Release all the implicit physical register defs that are live.
773 for (SDep &Succ : SU->Succs) {
774 // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
775 if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
776 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
777 --NumLiveRegs;
778 LiveRegDefs[Succ.getReg()] = nullptr;
779 LiveRegGens[Succ.getReg()] = nullptr;
780 releaseInterferences(Succ.getReg());
781 }
782 }
783 // Release the special call resource dependence, if this is the beginning
784 // of a call.
785 unsigned CallResource = TRI->getNumRegs();
786 if (LiveRegDefs[CallResource] == SU)
787 for (const SDNode *SUNode = SU->getNode(); SUNode;
788 SUNode = SUNode->getGluedNode()) {
789 if (SUNode->isMachineOpcode() &&
790 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
791 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
792 --NumLiveRegs;
793 LiveRegDefs[CallResource] = nullptr;
794 LiveRegGens[CallResource] = nullptr;
795 releaseInterferences(CallResource);
796 }
797 }
798
799 resetVRegCycle(SU);
800
801 SU->isScheduled = true;
802
803 // Conditions under which the scheduler should eagerly advance the cycle:
804 // (1) No available instructions
805 // (2) All pipelines full, so available instructions must have hazards.
806 //
807 // If HazardRec is disabled, the cycle was pre-advanced before calling
808 // ReleasePredecessors. In that case, IssueCount should remain 0.
809 //
810 // Check AvailableQueue after ReleasePredecessors in case of zero latency.
811 if (HazardRec->isEnabled() || AvgIPC > 1) {
812 if (SU->getNode() && SU->getNode()->isMachineOpcode())
813 ++IssueCount;
814 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
815 || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
816 AdvanceToCycle(CurCycle + 1);
817 }
818}
819
820/// CapturePred - This does the opposite of ReleasePred. Since SU is being
821/// unscheduled, increase the succ left count of its predecessors. Remove
822/// them from AvailableQueue if necessary.
823void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
824 SUnit *PredSU = PredEdge->getSUnit();
825 if (PredSU->isAvailable) {
826 PredSU->isAvailable = false;
827 if (!PredSU->isPending)
828 AvailableQueue->remove(PredSU);
829 }
830
831 assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
832 "NumSuccsLeft will overflow!");
833 ++PredSU->NumSuccsLeft;
834}
835
836/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
837/// its predecessor states to reflect the change.
838void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
839 LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
840 LLVM_DEBUG(dumpNode(*SU));
841
842 for (SDep &Pred : SU->Preds) {
843 CapturePred(&Pred);
844 if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
845 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
846 assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
847 "Physical register dependency violated?");
848 --NumLiveRegs;
849 LiveRegDefs[Pred.getReg()] = nullptr;
850 LiveRegGens[Pred.getReg()] = nullptr;
851 releaseInterferences(Pred.getReg());
852 }
853 }
854
855 // Reclaim the special call resource dependence, if this is the beginning
856 // of a call.
857 unsigned CallResource = TRI->getNumRegs();
858 for (const SDNode *SUNode = SU->getNode(); SUNode;
859 SUNode = SUNode->getGluedNode()) {
860 if (SUNode->isMachineOpcode() &&
861 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
862 SUnit *SeqEnd = CallSeqEndForStart[SU];
863 assert(SeqEnd && "Call sequence start/end must be known");
864 assert(!LiveRegDefs[CallResource]);
865 assert(!LiveRegGens[CallResource]);
866 ++NumLiveRegs;
867 LiveRegDefs[CallResource] = SU;
868 LiveRegGens[CallResource] = SeqEnd;
869 }
870 }
871
872 // Release the special call resource dependence, if this is the end
873 // of a call.
874 if (LiveRegGens[CallResource] == SU)
875 for (const SDNode *SUNode = SU->getNode(); SUNode;
876 SUNode = SUNode->getGluedNode()) {
877 if (SUNode->isMachineOpcode() &&
878 SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
879 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
880 assert(LiveRegDefs[CallResource]);
881 assert(LiveRegGens[CallResource]);
882 --NumLiveRegs;
883 LiveRegDefs[CallResource] = nullptr;
884 LiveRegGens[CallResource] = nullptr;
885 releaseInterferences(CallResource);
886 }
887 }
888
889 for (auto &Succ : SU->Succs) {
890 if (Succ.isAssignedRegDep()) {
891 auto Reg = Succ.getReg();
892 if (!LiveRegDefs[Reg])
893 ++NumLiveRegs;
894 // This becomes the nearest def. Note that an earlier def may still be
895 // pending if this is a two-address node.
896 LiveRegDefs[Reg] = SU;
897
898 // Update LiveRegGen only if was empty before this unscheduling.
899 // This is to avoid incorrect updating LiveRegGen set in previous run.
900 if (!LiveRegGens[Reg]) {
901 // Find the successor with the lowest height.
902 LiveRegGens[Reg] = Succ.getSUnit();
903 for (auto &Succ2 : SU->Succs) {
904 if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
905 Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
906 LiveRegGens[Reg] = Succ2.getSUnit();
907 }
908 }
909 }
910 }
911 if (SU->getHeight() < MinAvailableCycle)
912 MinAvailableCycle = SU->getHeight();
913
914 SU->setHeightDirty();
915 SU->isScheduled = false;
916 SU->isAvailable = true;
917 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
918 // Don't make available until backtracking is complete.
919 SU->isPending = true;
920 PendingQueue.push_back(SU);
921 }
922 else {
923 AvailableQueue->push(SU);
924 }
925 AvailableQueue->unscheduledNode(SU);
926}
927
928/// After backtracking, the hazard checker needs to be restored to a state
929/// corresponding the current cycle.
930void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
931 HazardRec->Reset();
932
933 unsigned LookAhead = std::min((unsigned)Sequence.size(),
934 HazardRec->getMaxLookAhead());
935 if (LookAhead == 0)
936 return;
937
938 std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
939 unsigned HazardCycle = (*I)->getHeight();
940 for (auto E = Sequence.end(); I != E; ++I) {
941 SUnit *SU = *I;
942 for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
943 HazardRec->RecedeCycle();
944 }
945 EmitNode(SU);
946 }
947}
948
949/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
950/// BTCycle in order to schedule a specific node.
951void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
952 SUnit *OldSU = Sequence.back();
953 while (true) {
954 Sequence.pop_back();
955 // FIXME: use ready cycle instead of height
956 CurCycle = OldSU->getHeight();
957 UnscheduleNodeBottomUp(OldSU);
958 AvailableQueue->setCurCycle(CurCycle);
959 if (OldSU == BtSU)
960 break;
961 OldSU = Sequence.back();
962 }
963
964 assert(!SU->isSucc(OldSU) && "Something is wrong!");
965
966 RestoreHazardCheckerBottomUp();
967
968 ReleasePending();
969
970 ++NumBacktracks;
971}
972
973static bool isOperandOf(const SUnit *SU, SDNode *N) {
974 for (const SDNode *SUNode = SU->getNode(); SUNode;
975 SUNode = SUNode->getGluedNode()) {
976 if (SUNode->isOperandOf(N))
977 return true;
978 }
979 return false;
980}
981
982/// TryUnfold - Attempt to unfold
983SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
984 SDNode *N = SU->getNode();
985 // Use while over if to ease fall through.
987 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
988 return nullptr;
989
990 assert(NewNodes.size() == 2 && "Expected a load folding node!");
991
992 N = NewNodes[1];
993 SDNode *LoadNode = NewNodes[0];
994 unsigned NumVals = N->getNumValues();
995 unsigned OldNumVals = SU->getNode()->getNumValues();
996
997 // LoadNode may already exist. This can happen when there is another
998 // load from the same location and producing the same type of value
999 // but it has different alignment or volatileness.
1000 bool isNewLoad = true;
1001 SUnit *LoadSU;
1002 if (LoadNode->getNodeId() != -1) {
1003 LoadSU = &SUnits[LoadNode->getNodeId()];
1004 // If LoadSU has already been scheduled, we should clone it but
1005 // this would negate the benefit to unfolding so just return SU.
1006 if (LoadSU->isScheduled)
1007 return SU;
1008 isNewLoad = false;
1009 } else {
1010 LoadSU = CreateNewSUnit(LoadNode);
1011 LoadNode->setNodeId(LoadSU->NodeNum);
1012
1013 InitNumRegDefsLeft(LoadSU);
1014 computeLatency(LoadSU);
1015 }
1016
1017 bool isNewN = true;
1018 SUnit *NewSU;
1019 // This can only happen when isNewLoad is false.
1020 if (N->getNodeId() != -1) {
1021 NewSU = &SUnits[N->getNodeId()];
1022 // If NewSU has already been scheduled, we need to clone it, but this
1023 // negates the benefit to unfolding so just return SU.
1024 if (NewSU->isScheduled) {
1025 return SU;
1026 }
1027 isNewN = false;
1028 } else {
1029 NewSU = CreateNewSUnit(N);
1030 N->setNodeId(NewSU->NodeNum);
1031
1032 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1033 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1034 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1035 NewSU->isTwoAddress = true;
1036 break;
1037 }
1038 }
1039 if (MCID.isCommutable())
1040 NewSU->isCommutable = true;
1041
1042 InitNumRegDefsLeft(NewSU);
1043 computeLatency(NewSU);
1044 }
1045
1046 LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1047
1048 // Now that we are committed to unfolding replace DAG Uses.
1049 for (unsigned i = 0; i != NumVals; ++i)
1050 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1051 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1052 SDValue(LoadNode, 1));
1053
1054 // Record all the edges to and from the old SU, by category.
1055 SmallVector<SDep, 4> ChainPreds;
1056 SmallVector<SDep, 4> ChainSuccs;
1057 SmallVector<SDep, 4> LoadPreds;
1058 SmallVector<SDep, 4> NodePreds;
1059 SmallVector<SDep, 4> NodeSuccs;
1060 for (SDep &Pred : SU->Preds) {
1061 if (Pred.isCtrl())
1062 ChainPreds.push_back(Pred);
1063 else if (isOperandOf(Pred.getSUnit(), LoadNode))
1064 LoadPreds.push_back(Pred);
1065 else
1066 NodePreds.push_back(Pred);
1067 }
1068 for (SDep &Succ : SU->Succs) {
1069 if (Succ.isCtrl())
1070 ChainSuccs.push_back(Succ);
1071 else
1072 NodeSuccs.push_back(Succ);
1073 }
1074
1075 // Now assign edges to the newly-created nodes.
1076 for (const SDep &Pred : ChainPreds) {
1077 RemovePred(SU, Pred);
1078 if (isNewLoad)
1079 AddPredQueued(LoadSU, Pred);
1080 }
1081 for (const SDep &Pred : LoadPreds) {
1082 RemovePred(SU, Pred);
1083 if (isNewLoad)
1084 AddPredQueued(LoadSU, Pred);
1085 }
1086 for (const SDep &Pred : NodePreds) {
1087 RemovePred(SU, Pred);
1088 AddPredQueued(NewSU, Pred);
1089 }
1090 for (SDep &D : NodeSuccs) {
1091 SUnit *SuccDep = D.getSUnit();
1092 D.setSUnit(SU);
1093 RemovePred(SuccDep, D);
1094 D.setSUnit(NewSU);
1095 AddPredQueued(SuccDep, D);
1096 // Balance register pressure.
1097 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1098 !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1099 --NewSU->NumRegDefsLeft;
1100 }
1101 for (SDep &D : ChainSuccs) {
1102 SUnit *SuccDep = D.getSUnit();
1103 D.setSUnit(SU);
1104 RemovePred(SuccDep, D);
1105 if (isNewLoad) {
1106 D.setSUnit(LoadSU);
1107 AddPredQueued(SuccDep, D);
1108 }
1109 }
1110
1111 // Add a data dependency to reflect that NewSU reads the value defined
1112 // by LoadSU.
1113 SDep D(LoadSU, SDep::Data, 0);
1114 D.setLatency(LoadSU->Latency);
1115 AddPredQueued(NewSU, D);
1116
1117 if (isNewLoad)
1118 AvailableQueue->addNode(LoadSU);
1119 if (isNewN)
1120 AvailableQueue->addNode(NewSU);
1121
1122 ++NumUnfolds;
1123
1124 if (NewSU->NumSuccsLeft == 0)
1125 NewSU->isAvailable = true;
1126
1127 return NewSU;
1128}
1129
1130/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1131/// successors to the newly created node.
1132SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1133 SDNode *N = SU->getNode();
1134 if (!N)
1135 return nullptr;
1136
1137 LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1138 LLVM_DEBUG(dumpNode(*SU));
1139
1140 if (N->getGluedNode() &&
1141 !TII->canCopyGluedNodeDuringSchedule(N)) {
1142 LLVM_DEBUG(
1143 dbgs()
1144 << "Giving up because it has incoming glue and the target does not "
1145 "want to copy it\n");
1146 return nullptr;
1147 }
1148
1149 SUnit *NewSU;
1150 bool TryUnfold = false;
1151 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1152 MVT VT = N->getSimpleValueType(i);
1153 if (VT == MVT::Glue) {
1154 LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1155 return nullptr;
1156 } else if (VT == MVT::Other)
1157 TryUnfold = true;
1158 }
1159 for (const SDValue &Op : N->op_values()) {
1160 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1161 if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1162 LLVM_DEBUG(
1163 dbgs() << "Giving up because it one of the operands is glue and "
1164 "the target does not want to copy it\n");
1165 return nullptr;
1166 }
1167 }
1168
1169 // If possible unfold instruction.
1170 if (TryUnfold) {
1171 SUnit *UnfoldSU = TryUnfoldSU(SU);
1172 if (!UnfoldSU)
1173 return nullptr;
1174 SU = UnfoldSU;
1175 N = SU->getNode();
1176 // If this can be scheduled don't bother duplicating and just return
1177 if (SU->NumSuccsLeft == 0)
1178 return SU;
1179 }
1180
1181 LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1182 NewSU = CreateClone(SU);
1183
1184 // New SUnit has the exact same predecessors.
1185 for (SDep &Pred : SU->Preds)
1186 if (!Pred.isArtificial())
1187 AddPredQueued(NewSU, Pred);
1188
1189 // Make sure the clone comes after the original. (InstrEmitter assumes
1190 // this ordering.)
1191 AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
1192
1193 // Only copy scheduled successors. Cut them from old node's successor
1194 // list and move them over.
1196 for (SDep &Succ : SU->Succs) {
1197 if (Succ.isArtificial())
1198 continue;
1199 SUnit *SuccSU = Succ.getSUnit();
1200 if (SuccSU->isScheduled) {
1201 SDep D = Succ;
1202 D.setSUnit(NewSU);
1203 AddPredQueued(SuccSU, D);
1204 D.setSUnit(SU);
1205 DelDeps.emplace_back(SuccSU, D);
1206 }
1207 }
1208 for (const auto &[DelSU, DelD] : DelDeps)
1209 RemovePred(DelSU, DelD);
1210
1211 AvailableQueue->updateNode(SU);
1212 AvailableQueue->addNode(NewSU);
1213
1214 ++NumDups;
1215 return NewSU;
1216}
1217
1218/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1219/// scheduled successors of the given SUnit to the last copy.
1220void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1221 const TargetRegisterClass *DestRC,
1222 const TargetRegisterClass *SrcRC,
1223 SmallVectorImpl<SUnit*> &Copies) {
1224 SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1225 CopyFromSU->CopySrcRC = SrcRC;
1226 CopyFromSU->CopyDstRC = DestRC;
1227
1228 SUnit *CopyToSU = CreateNewSUnit(nullptr);
1229 CopyToSU->CopySrcRC = DestRC;
1230 CopyToSU->CopyDstRC = SrcRC;
1231
1232 // Only copy scheduled successors. Cut them from old node's successor
1233 // list and move them over.
1235 for (SDep &Succ : SU->Succs) {
1236 if (Succ.isArtificial())
1237 continue;
1238 SUnit *SuccSU = Succ.getSUnit();
1239 if (SuccSU->isScheduled) {
1240 SDep D = Succ;
1241 D.setSUnit(CopyToSU);
1242 AddPredQueued(SuccSU, D);
1243 DelDeps.emplace_back(SuccSU, Succ);
1244 }
1245 else {
1246 // Avoid scheduling the def-side copy before other successors. Otherwise,
1247 // we could introduce another physreg interference on the copy and
1248 // continue inserting copies indefinitely.
1249 AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1250 }
1251 }
1252 for (const auto &[DelSU, DelD] : DelDeps)
1253 RemovePred(DelSU, DelD);
1254
1255 SDep FromDep(SU, SDep::Data, Reg);
1256 FromDep.setLatency(SU->Latency);
1257 AddPredQueued(CopyFromSU, FromDep);
1258 SDep ToDep(CopyFromSU, SDep::Data, 0);
1259 ToDep.setLatency(CopyFromSU->Latency);
1260 AddPredQueued(CopyToSU, ToDep);
1261
1262 AvailableQueue->updateNode(SU);
1263 AvailableQueue->addNode(CopyFromSU);
1264 AvailableQueue->addNode(CopyToSU);
1265 Copies.push_back(CopyFromSU);
1266 Copies.push_back(CopyToSU);
1267
1268 ++NumPRCopies;
1269}
1270
1271/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1272/// definition of the specified node.
1273/// FIXME: Move to SelectionDAG?
1275 const TargetInstrInfo *TII) {
1276 unsigned NumRes;
1277 if (N->getOpcode() == ISD::CopyFromReg) {
1278 // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1279 NumRes = 1;
1280 } else {
1281 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1282 assert(!MCID.implicit_defs().empty() &&
1283 "Physical reg def must be in implicit def list!");
1284 NumRes = MCID.getNumDefs();
1285 for (MCPhysReg ImpDef : MCID.implicit_defs()) {
1286 if (Reg == ImpDef)
1287 break;
1288 ++NumRes;
1289 }
1290 }
1291 return N->getSimpleValueType(NumRes);
1292}
1293
1294/// CheckForLiveRegDef - Return true and update live register vector if the
1295/// specified register def of the specified SUnit clobbers any "live" registers.
1296static void CheckForLiveRegDef(SUnit *SU, MCRegister Reg, SUnit **LiveRegDefs,
1297 SmallSet<unsigned, 4> &RegAdded,
1299 const TargetRegisterInfo *TRI,
1300 const SDNode *Node = nullptr) {
1301 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1302
1303 // Check if Ref is live.
1304 if (!LiveRegDefs[*AliasI]) continue;
1305
1306 // Allow multiple uses of the same def.
1307 if (LiveRegDefs[*AliasI] == SU) continue;
1308
1309 // Allow multiple uses of same def
1310 if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
1311 continue;
1312
1313 // Add Reg to the set of interfering live regs.
1314 if (RegAdded.insert(*AliasI).second) {
1315 LRegs.push_back(*AliasI);
1316 }
1317 }
1318}
1319
1320/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1321/// by RegMask, and add them to LRegs.
1322static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1323 ArrayRef<SUnit*> LiveRegDefs,
1324 SmallSet<unsigned, 4> &RegAdded,
1326 // Look at all live registers. Skip Reg0 and the special CallResource.
1327 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1328 if (!LiveRegDefs[i]) continue;
1329 if (LiveRegDefs[i] == SU) continue;
1330 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1331 if (RegAdded.insert(i).second)
1332 LRegs.push_back(i);
1333 }
1334}
1335
1336/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1337static const uint32_t *getNodeRegMask(const SDNode *N) {
1338 for (const SDValue &Op : N->op_values())
1339 if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1340 return RegOp->getRegMask();
1341 return nullptr;
1342}
1343
1344/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1345/// scheduling of the given node to satisfy live physical register dependencies.
1346/// If the specific node is the last one that's available to schedule, do
1347/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1348bool ScheduleDAGRRList::
1349DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1350 if (NumLiveRegs == 0)
1351 return false;
1352
1353 SmallSet<unsigned, 4> RegAdded;
1354 // If this node would clobber any "live" register, then it's not ready.
1355 //
1356 // If SU is the currently live definition of the same register that it uses,
1357 // then we are free to schedule it.
1358 for (SDep &Pred : SU->Preds) {
1359 if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1360 CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1361 RegAdded, LRegs, TRI);
1362 }
1363
1364 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1365 if (Node->getOpcode() == ISD::INLINEASM ||
1366 Node->getOpcode() == ISD::INLINEASM_BR) {
1367 // Inline asm can clobber physical defs.
1368 unsigned NumOps = Node->getNumOperands();
1369 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1370 --NumOps; // Ignore the glue operand.
1371
1372 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1373 unsigned Flags = Node->getConstantOperandVal(i);
1374 const InlineAsm::Flag F(Flags);
1375 unsigned NumVals = F.getNumOperandRegisters();
1376
1377 ++i; // Skip the ID value.
1378 if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
1379 F.isClobberKind()) {
1380 // Check for def of register or earlyclobber register.
1381 for (; NumVals; --NumVals, ++i) {
1382 Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1383 if (Reg.isPhysical())
1384 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1385 }
1386 } else
1387 i += NumVals;
1388 }
1389 continue;
1390 }
1391
1392 if (Node->getOpcode() == ISD::CopyToReg) {
1393 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
1394 if (Reg.isPhysical()) {
1395 SDNode *SrcNode = Node->getOperand(2).getNode();
1396 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
1397 SrcNode);
1398 }
1399 }
1400
1401 if (!Node->isMachineOpcode())
1402 continue;
1403 // If we're in the middle of scheduling a call, don't begin scheduling
1404 // another call. Also, don't allow any physical registers to be live across
1405 // the call.
1406 if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1407 // Check the special calling-sequence resource.
1408 unsigned CallResource = TRI->getNumRegs();
1409 if (LiveRegDefs[CallResource]) {
1410 SDNode *Gen = LiveRegGens[CallResource]->getNode();
1411 while (SDNode *Glued = Gen->getGluedNode())
1412 Gen = Glued;
1413 if (!IsChainDependent(Gen, Node, 0, TII) &&
1414 RegAdded.insert(CallResource).second)
1415 LRegs.push_back(CallResource);
1416 }
1417 }
1418 if (const uint32_t *RegMask = getNodeRegMask(Node))
1419 CheckForLiveRegDefMasked(SU, RegMask,
1420 ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1421 RegAdded, LRegs);
1422
1423 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1424 if (MCID.hasOptionalDef()) {
1425 // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1426 // This operand can be either a def of CPSR, if the S bit is set; or a use
1427 // of %noreg. When the OptionalDef is set to a valid register, we need to
1428 // handle it in the same way as an ImplicitDef.
1429 for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1430 if (MCID.operands()[i].isOptionalDef()) {
1431 const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1432 Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1433 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1434 }
1435 }
1436 for (MCPhysReg Reg : MCID.implicit_defs())
1437 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1438 }
1439
1440 return !LRegs.empty();
1441}
1442
1443void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1444 // Add the nodes that aren't ready back onto the available list.
1445 for (unsigned i = Interferences.size(); i > 0; --i) {
1446 SUnit *SU = Interferences[i-1];
1447 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1448 if (Reg) {
1449 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1450 if (!is_contained(LRegs, Reg))
1451 continue;
1452 }
1453 SU->isPending = false;
1454 // The interfering node may no longer be available due to backtracking.
1455 // Furthermore, it may have been made available again, in which case it is
1456 // now already in the AvailableQueue.
1457 if (SU->isAvailable && !SU->NodeQueueId) {
1458 LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1459 AvailableQueue->push(SU);
1460 }
1461 if (i < Interferences.size())
1462 Interferences[i-1] = Interferences.back();
1463 Interferences.pop_back();
1464 LRegsMap.erase(LRegsPos);
1465 }
1466}
1467
1468/// Return a node that can be scheduled in this cycle. Requirements:
1469/// (1) Ready: latency has been satisfied
1470/// (2) No Hazards: resources are available
1471/// (3) No Interferences: may unschedule to break register interferences.
1472SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1473 SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1474 auto FindAvailableNode = [&]() {
1475 while (CurSU) {
1476 SmallVector<unsigned, 4> LRegs;
1477 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1478 break;
1479 LLVM_DEBUG(dbgs() << " Interfering reg ";
1480 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1481 else dbgs() << printReg(LRegs[0], TRI);
1482 dbgs() << " SU #" << CurSU->NodeNum << '\n');
1483 auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs);
1484 if (LRegsInserted) {
1485 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1486 Interferences.push_back(CurSU);
1487 }
1488 else {
1489 assert(CurSU->isPending && "Interferences are pending");
1490 // Update the interference with current live regs.
1491 LRegsIter->second = LRegs;
1492 }
1493 CurSU = AvailableQueue->pop();
1494 }
1495 };
1496 FindAvailableNode();
1497 if (CurSU)
1498 return CurSU;
1499
1500 // We query the topological order in the loop body, so make sure outstanding
1501 // updates are applied before entering it (we only enter the loop if there
1502 // are some interferences). If we make changes to the ordering, we exit
1503 // the loop.
1504
1505 // All candidates are delayed due to live physical reg dependencies.
1506 // Try backtracking, code duplication, or inserting cross class copies
1507 // to resolve it.
1508 for (SUnit *TrySU : Interferences) {
1509 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1510
1511 // Try unscheduling up to the point where it's safe to schedule
1512 // this node.
1513 SUnit *BtSU = nullptr;
1514 unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1515 for (unsigned Reg : LRegs) {
1516 if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1517 BtSU = LiveRegGens[Reg];
1518 LiveCycle = BtSU->getHeight();
1519 }
1520 }
1521 if (!WillCreateCycle(TrySU, BtSU)) {
1522 // BacktrackBottomUp mutates Interferences!
1523 BacktrackBottomUp(TrySU, BtSU);
1524
1525 // Force the current node to be scheduled before the node that
1526 // requires the physical reg dep.
1527 if (BtSU->isAvailable) {
1528 BtSU->isAvailable = false;
1529 if (!BtSU->isPending)
1530 AvailableQueue->remove(BtSU);
1531 }
1532 LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1533 << ") to SU(" << TrySU->NodeNum << ")\n");
1534 AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
1535
1536 // If one or more successors has been unscheduled, then the current
1537 // node is no longer available.
1538 if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1539 LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1540 CurSU = AvailableQueue->pop();
1541 } else {
1542 LLVM_DEBUG(dbgs() << "TrySU available\n");
1543 // Available and in AvailableQueue
1544 AvailableQueue->remove(TrySU);
1545 CurSU = TrySU;
1546 }
1547 FindAvailableNode();
1548 // Interferences has been mutated. We must break.
1549 break;
1550 }
1551 }
1552
1553 if (!CurSU) {
1554 // Can't backtrack. If it's too expensive to copy the value, then try
1555 // duplicate the nodes that produces these "too expensive to copy"
1556 // values to break the dependency. In case even that doesn't work,
1557 // insert cross class copies.
1558 // If it's not too expensive, i.e. cost != -1, issue copies.
1559 SUnit *TrySU = Interferences[0];
1560 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1561 assert(LRegs.size() == 1 && "Can't handle this yet!");
1562 unsigned Reg = LRegs[0];
1563 SUnit *LRDef = LiveRegDefs[Reg];
1564 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1565 const TargetRegisterClass *RC =
1566 TRI->getMinimalPhysRegClass(Reg, VT);
1567 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1568
1569 // If cross copy register class is the same as RC, then it must be possible
1570 // copy the value directly. Do not try duplicate the def.
1571 // If cross copy register class is not the same as RC, then it's possible to
1572 // copy the value but it require cross register class copies and it is
1573 // expensive.
1574 // If cross copy register class is null, then it's not possible to copy
1575 // the value at all.
1576 SUnit *NewDef = nullptr;
1577 if (DestRC != RC) {
1578 NewDef = CopyAndMoveSuccessors(LRDef);
1579 if (!DestRC && !NewDef)
1580 report_fatal_error("Can't handle live physical register dependency!");
1581 }
1582 if (!NewDef) {
1583 // Issue copies, these can be expensive cross register class copies.
1585 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1586 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1587 << " to SU #" << Copies.front()->NodeNum << "\n");
1588 AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
1589 NewDef = Copies.back();
1590 }
1591
1592 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1593 << " to SU #" << TrySU->NodeNum << "\n");
1594 LiveRegDefs[Reg] = NewDef;
1595 AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
1596 TrySU->isAvailable = false;
1597 CurSU = NewDef;
1598 }
1599 assert(CurSU && "Unable to resolve live physical register dependencies!");
1600 return CurSU;
1601}
1602
1603/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1604/// schedulers.
1605void ScheduleDAGRRList::ListScheduleBottomUp() {
1606 // Release any predecessors of the special Exit node.
1607 ReleasePredecessors(&ExitSU);
1608
1609 // Add root to Available queue.
1610 if (!SUnits.empty()) {
1611 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1612 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1613 RootSU->isAvailable = true;
1614 AvailableQueue->push(RootSU);
1615 }
1616
1617 // While Available queue is not empty, grab the node with the highest
1618 // priority. If it is not ready put it back. Schedule the node.
1619 Sequence.reserve(SUnits.size());
1620 while (!AvailableQueue->empty() || !Interferences.empty()) {
1621 LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1622 AvailableQueue->dump(this));
1623
1624 // Pick the best node to schedule taking all constraints into
1625 // consideration.
1626 SUnit *SU = PickNodeToScheduleBottomUp();
1627
1628 AdvancePastStalls(SU);
1629
1630 ScheduleNodeBottomUp(SU);
1631
1632 while (AvailableQueue->empty() && !PendingQueue.empty()) {
1633 // Advance the cycle to free resources. Skip ahead to the next ready SU.
1634 assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1635 "MinAvailableCycle uninitialized");
1636 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1637 }
1638 }
1639
1640 // Reverse the order if it is bottom up.
1641 std::reverse(Sequence.begin(), Sequence.end());
1642
1643#ifndef NDEBUG
1644 VerifyScheduledSequence(/*isBottomUp=*/true);
1645#endif
1646}
1647
1648namespace {
1649
1650class RegReductionPQBase;
1651
1652struct queue_sort {
1653 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1654};
1655
1656#ifndef NDEBUG
1657template<class SF>
1658struct reverse_sort : public queue_sort {
1659 SF &SortFunc;
1660
1661 reverse_sort(SF &sf) : SortFunc(sf) {}
1662
1663 bool operator()(SUnit* left, SUnit* right) const {
1664 // reverse left/right rather than simply !SortFunc(left, right)
1665 // to expose different paths in the comparison logic.
1666 return SortFunc(right, left);
1667 }
1668};
1669#endif // NDEBUG
1670
1671/// bu_ls_rr_sort - Priority function for bottom up register pressure
1672// reduction scheduler.
1673struct bu_ls_rr_sort : public queue_sort {
1674 enum {
1675 IsBottomUp = true,
1676 HasReadyFilter = false
1677 };
1678
1679 RegReductionPQBase *SPQ;
1680
1681 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1682
1683 bool operator()(SUnit* left, SUnit* right) const;
1684};
1685
1686// src_ls_rr_sort - Priority function for source order scheduler.
1687struct src_ls_rr_sort : public queue_sort {
1688 enum {
1689 IsBottomUp = true,
1690 HasReadyFilter = false
1691 };
1692
1693 RegReductionPQBase *SPQ;
1694
1695 src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1696
1697 bool operator()(SUnit* left, SUnit* right) const;
1698};
1699
1700// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1701struct hybrid_ls_rr_sort : public queue_sort {
1702 enum {
1703 IsBottomUp = true,
1704 HasReadyFilter = false
1705 };
1706
1707 RegReductionPQBase *SPQ;
1708
1709 hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1710
1711 bool isReady(SUnit *SU, unsigned CurCycle) const;
1712
1713 bool operator()(SUnit* left, SUnit* right) const;
1714};
1715
1716// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1717// scheduler.
1718struct ilp_ls_rr_sort : public queue_sort {
1719 enum {
1720 IsBottomUp = true,
1721 HasReadyFilter = false
1722 };
1723
1724 RegReductionPQBase *SPQ;
1725
1726 ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1727
1728 bool isReady(SUnit *SU, unsigned CurCycle) const;
1729
1730 bool operator()(SUnit* left, SUnit* right) const;
1731};
1732
1733class RegReductionPQBase : public SchedulingPriorityQueue {
1734protected:
1735 std::vector<SUnit *> Queue;
1736 unsigned CurQueueId = 0;
1737 bool TracksRegPressure;
1738 bool SrcOrder;
1739
1740 // SUnits - The SUnits for the current graph.
1741 std::vector<SUnit> *SUnits = nullptr;
1742
1743 MachineFunction &MF;
1744 const TargetInstrInfo *TII = nullptr;
1745 const TargetRegisterInfo *TRI = nullptr;
1746 const TargetLowering *TLI = nullptr;
1747 ScheduleDAGRRList *scheduleDAG = nullptr;
1748
1749 // SethiUllmanNumbers - The SethiUllman number for each node.
1750 std::vector<unsigned> SethiUllmanNumbers;
1751
1752 /// RegPressure - Tracking current reg pressure per register class.
1753 std::vector<unsigned> RegPressure;
1754
1755 /// RegLimit - Tracking the number of allocatable registers per register
1756 /// class.
1757 std::vector<unsigned> RegLimit;
1758
1759public:
1760 RegReductionPQBase(MachineFunction &mf,
1761 bool hasReadyFilter,
1762 bool tracksrp,
1763 bool srcorder,
1764 const TargetInstrInfo *tii,
1765 const TargetRegisterInfo *tri,
1766 const TargetLowering *tli)
1767 : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1768 SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1769 if (TracksRegPressure) {
1770 unsigned NumRC = TRI->getNumRegClasses();
1771 RegLimit.resize(NumRC);
1772 RegPressure.resize(NumRC);
1773 llvm::fill(RegLimit, 0);
1774 llvm::fill(RegPressure, 0);
1775 for (const TargetRegisterClass *RC : TRI->regclasses())
1776 RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1777 }
1778 }
1779
1780 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1781 scheduleDAG = scheduleDag;
1782 }
1783
1784 ScheduleHazardRecognizer* getHazardRec() {
1785 return scheduleDAG->getHazardRec();
1786 }
1787
1788 void initNodes(std::vector<SUnit> &sunits) override;
1789
1790 void addNode(const SUnit *SU) override;
1791
1792 void updateNode(const SUnit *SU) override;
1793
1794 void releaseState() override {
1795 SUnits = nullptr;
1796 SethiUllmanNumbers.clear();
1797 llvm::fill(RegPressure, 0);
1798 }
1799
1800 unsigned getNodePriority(const SUnit *SU) const;
1801
1802 unsigned getNodeOrdering(const SUnit *SU) const {
1803 if (!SU->getNode()) return 0;
1804
1805 return SU->getNode()->getIROrder();
1806 }
1807
1808 bool empty() const override { return Queue.empty(); }
1809
1810 void push(SUnit *U) override {
1811 assert(!U->NodeQueueId && "Node in the queue already");
1812 U->NodeQueueId = ++CurQueueId;
1813 Queue.push_back(U);
1814 }
1815
1816 void remove(SUnit *SU) override {
1817 assert(!Queue.empty() && "Queue is empty!");
1818 assert(SU->NodeQueueId != 0 && "Not in queue!");
1819 std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1820 if (I != std::prev(Queue.end()))
1821 std::swap(*I, Queue.back());
1822 Queue.pop_back();
1823 SU->NodeQueueId = 0;
1824 }
1825
1826 bool tracksRegPressure() const override { return TracksRegPressure; }
1827
1828 void dumpRegPressure() const;
1829
1830 bool HighRegPressure(const SUnit *SU) const;
1831
1832 bool MayReduceRegPressure(SUnit *SU) const;
1833
1834 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1835
1836 void scheduledNode(SUnit *SU) override;
1837
1838 void unscheduledNode(SUnit *SU) override;
1839
1840protected:
1841 bool canClobber(const SUnit *SU, const SUnit *Op);
1842 void AddPseudoTwoAddrDeps();
1843 void PrescheduleNodesWithMultipleUses();
1844 void CalculateSethiUllmanNumbers();
1845};
1846
1847template<class SF>
1848static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1849 unsigned BestIdx = 0;
1850 // Only compute the cost for the first 1000 items in the queue, to avoid
1851 // excessive compile-times for very large queues.
1852 for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
1853 I++)
1854 if (Picker(Q[BestIdx], Q[I]))
1855 BestIdx = I;
1856 SUnit *V = Q[BestIdx];
1857 if (BestIdx + 1 != Q.size())
1858 std::swap(Q[BestIdx], Q.back());
1859 Q.pop_back();
1860 return V;
1861}
1862
1863template<class SF>
1864SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1865#ifndef NDEBUG
1866 if (DAG->StressSched) {
1867 reverse_sort<SF> RPicker(Picker);
1868 return popFromQueueImpl(Q, RPicker);
1869 }
1870#endif
1871 (void)DAG;
1872 return popFromQueueImpl(Q, Picker);
1873}
1874
1875//===----------------------------------------------------------------------===//
1876// RegReductionPriorityQueue Definition
1877//===----------------------------------------------------------------------===//
1878//
1879// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1880// to reduce register pressure.
1881//
1882template<class SF>
1883class RegReductionPriorityQueue : public RegReductionPQBase {
1884 SF Picker;
1885
1886public:
1887 RegReductionPriorityQueue(MachineFunction &mf,
1888 bool tracksrp,
1889 bool srcorder,
1890 const TargetInstrInfo *tii,
1891 const TargetRegisterInfo *tri,
1892 const TargetLowering *tli)
1893 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1894 tii, tri, tli),
1895 Picker(this) {}
1896
1897 bool isBottomUp() const override { return SF::IsBottomUp; }
1898
1899 bool isReady(SUnit *U) const override {
1900 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1901 }
1902
1903 SUnit *pop() override {
1904 if (Queue.empty()) return nullptr;
1905
1906 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1907 V->NodeQueueId = 0;
1908 return V;
1909 }
1910
1911#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1912 LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1913 // Emulate pop() without clobbering NodeQueueIds.
1914 std::vector<SUnit *> DumpQueue = Queue;
1915 SF DumpPicker = Picker;
1916 while (!DumpQueue.empty()) {
1917 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1918 dbgs() << "Height " << SU->getHeight() << ": ";
1919 DAG->dumpNode(*SU);
1920 }
1921 }
1922#endif
1923};
1924
1925using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1926using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1927using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1928using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1929
1930} // end anonymous namespace
1931
1932//===----------------------------------------------------------------------===//
1933// Static Node Priority for Register Pressure Reduction
1934//===----------------------------------------------------------------------===//
1935
1936// Check for special nodes that bypass scheduling heuristics.
1937// Currently this pushes TokenFactor nodes down, but may be used for other
1938// pseudo-ops as well.
1939//
1940// Return -1 to schedule right above left, 1 for left above right.
1941// Return 0 if no bias exists.
1942static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1943 bool LSchedLow = left->isScheduleLow;
1944 bool RSchedLow = right->isScheduleLow;
1945 if (LSchedLow != RSchedLow)
1946 return LSchedLow < RSchedLow ? 1 : -1;
1947 return 0;
1948}
1949
1950/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1951/// Smaller number is the higher priority.
1952static unsigned
1953CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1954 if (SUNumbers[SU->NodeNum] != 0)
1955 return SUNumbers[SU->NodeNum];
1956
1957 // Use WorkList to avoid stack overflow on excessively large IRs.
1958 struct WorkState {
1959 WorkState(const SUnit *SU) : SU(SU) {}
1960 const SUnit *SU;
1961 unsigned PredsProcessed = 0;
1962 };
1963
1965 WorkList.push_back(SU);
1966 while (!WorkList.empty()) {
1967 auto &Temp = WorkList.back();
1968 auto *TempSU = Temp.SU;
1969 bool AllPredsKnown = true;
1970 // Try to find a non-evaluated pred and push it into the processing stack.
1971 for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1972 auto &Pred = TempSU->Preds[P];
1973 if (Pred.isCtrl()) continue; // ignore chain preds
1974 SUnit *PredSU = Pred.getSUnit();
1975 if (SUNumbers[PredSU->NodeNum] == 0) {
1976#ifndef NDEBUG
1977 // In debug mode, check that we don't have such element in the stack.
1978 for (auto It : WorkList)
1979 assert(It.SU != PredSU && "Trying to push an element twice?");
1980#endif
1981 // Next time start processing this one starting from the next pred.
1982 Temp.PredsProcessed = P + 1;
1983 WorkList.push_back(PredSU);
1984 AllPredsKnown = false;
1985 break;
1986 }
1987 }
1988
1989 if (!AllPredsKnown)
1990 continue;
1991
1992 // Once all preds are known, we can calculate the answer for this one.
1993 unsigned SethiUllmanNumber = 0;
1994 unsigned Extra = 0;
1995 for (const SDep &Pred : TempSU->Preds) {
1996 if (Pred.isCtrl()) continue; // ignore chain preds
1997 SUnit *PredSU = Pred.getSUnit();
1998 unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1999 assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
2000 if (PredSethiUllman > SethiUllmanNumber) {
2001 SethiUllmanNumber = PredSethiUllman;
2002 Extra = 0;
2003 } else if (PredSethiUllman == SethiUllmanNumber)
2004 ++Extra;
2005 }
2006
2007 SethiUllmanNumber += Extra;
2008 if (SethiUllmanNumber == 0)
2009 SethiUllmanNumber = 1;
2010 SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
2011 WorkList.pop_back();
2012 }
2013
2014 assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
2015 return SUNumbers[SU->NodeNum];
2016}
2017
2018/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2019/// scheduling units.
2020void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2021 SethiUllmanNumbers.assign(SUnits->size(), 0);
2022
2023 for (const SUnit &SU : *SUnits)
2024 CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
2025}
2026
2027void RegReductionPQBase::addNode(const SUnit *SU) {
2028 unsigned SUSize = SethiUllmanNumbers.size();
2029 if (SUnits->size() > SUSize)
2030 SethiUllmanNumbers.resize(SUSize*2, 0);
2031 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2032}
2033
2034void RegReductionPQBase::updateNode(const SUnit *SU) {
2035 SethiUllmanNumbers[SU->NodeNum] = 0;
2036 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2037}
2038
2039// Lower priority means schedule further down. For bottom-up scheduling, lower
2040// priority SUs are scheduled before higher priority SUs.
2041unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2042 assert(SU->NodeNum < SethiUllmanNumbers.size());
2043 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2045 // CopyToReg should be close to its uses to facilitate coalescing and
2046 // avoid spilling.
2047 return 0;
2048 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2049 Opc == TargetOpcode::SUBREG_TO_REG ||
2050 Opc == TargetOpcode::INSERT_SUBREG)
2051 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2052 // close to their uses to facilitate coalescing.
2053 return 0;
2054 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2055 // If SU does not have a register use, i.e. it doesn't produce a value
2056 // that would be consumed (e.g. store), then it terminates a chain of
2057 // computation. Give it a large SethiUllman number so it will be
2058 // scheduled right before its predecessors that it doesn't lengthen
2059 // their live ranges.
2060 return 0xffff;
2061 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2062 // If SU does not have a register def, schedule it close to its uses
2063 // because it does not lengthen any live ranges.
2064 return 0;
2065#if 1
2066 return SethiUllmanNumbers[SU->NodeNum];
2067#else
2068 unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2069 if (SU->isCallOp) {
2070 // FIXME: This assumes all of the defs are used as call operands.
2071 int NP = (int)Priority - SU->getNode()->getNumValues();
2072 return (NP > 0) ? NP : 0;
2073 }
2074 return Priority;
2075#endif
2076}
2077
2078//===----------------------------------------------------------------------===//
2079// Register Pressure Tracking
2080//===----------------------------------------------------------------------===//
2081
2082#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2083LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2084 for (const TargetRegisterClass *RC : TRI->regclasses()) {
2085 unsigned Id = RC->getID();
2086 unsigned RP = RegPressure[Id];
2087 if (!RP) continue;
2088 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2089 << RegLimit[Id] << '\n');
2090 }
2091}
2092#endif
2093
2094bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2095 if (!TLI)
2096 return false;
2097
2098 for (const SDep &Pred : SU->Preds) {
2099 if (Pred.isCtrl())
2100 continue;
2101 SUnit *PredSU = Pred.getSUnit();
2102 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2103 // to cover the number of registers defined (they are all live).
2104 if (PredSU->NumRegDefsLeft == 0) {
2105 continue;
2106 }
2107 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2108 RegDefPos.IsValid(); RegDefPos.Advance()) {
2109 unsigned RCId, Cost;
2110 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2111
2112 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2113 return true;
2114 }
2115 }
2116 return false;
2117}
2118
2119bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2120 const SDNode *N = SU->getNode();
2121
2122 if (!N->isMachineOpcode() || !SU->NumSuccs)
2123 return false;
2124
2125 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2126 for (unsigned i = 0; i != NumDefs; ++i) {
2127 MVT VT = N->getSimpleValueType(i);
2128 if (!N->hasAnyUseOfValue(i))
2129 continue;
2130 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2131 if (RegPressure[RCId] >= RegLimit[RCId])
2132 return true;
2133 }
2134 return false;
2135}
2136
2137// Compute the register pressure contribution by this instruction by count up
2138// for uses that are not live and down for defs. Only count register classes
2139// that are already under high pressure. As a side effect, compute the number of
2140// uses of registers that are already live.
2141//
2142// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2143// so could probably be factored.
2144int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2145 LiveUses = 0;
2146 int PDiff = 0;
2147 for (const SDep &Pred : SU->Preds) {
2148 if (Pred.isCtrl())
2149 continue;
2150 SUnit *PredSU = Pred.getSUnit();
2151 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2152 // to cover the number of registers defined (they are all live).
2153 if (PredSU->NumRegDefsLeft == 0) {
2154 if (PredSU->getNode()->isMachineOpcode())
2155 ++LiveUses;
2156 continue;
2157 }
2158 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2159 RegDefPos.IsValid(); RegDefPos.Advance()) {
2160 MVT VT = RegDefPos.GetValue();
2161 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2162 if (RegPressure[RCId] >= RegLimit[RCId])
2163 ++PDiff;
2164 }
2165 }
2166 const SDNode *N = SU->getNode();
2167
2168 if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2169 return PDiff;
2170
2171 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2172 for (unsigned i = 0; i != NumDefs; ++i) {
2173 MVT VT = N->getSimpleValueType(i);
2174 if (!N->hasAnyUseOfValue(i))
2175 continue;
2176 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2177 if (RegPressure[RCId] >= RegLimit[RCId])
2178 --PDiff;
2179 }
2180 return PDiff;
2181}
2182
2183void RegReductionPQBase::scheduledNode(SUnit *SU) {
2184 if (!TracksRegPressure)
2185 return;
2186
2187 if (!SU->getNode())
2188 return;
2189
2190 for (const SDep &Pred : SU->Preds) {
2191 if (Pred.isCtrl())
2192 continue;
2193 SUnit *PredSU = Pred.getSUnit();
2194 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2195 // to cover the number of registers defined (they are all live).
2196 if (PredSU->NumRegDefsLeft == 0) {
2197 continue;
2198 }
2199 // FIXME: The ScheduleDAG currently loses information about which of a
2200 // node's values is consumed by each dependence. Consequently, if the node
2201 // defines multiple register classes, we don't know which to pressurize
2202 // here. Instead the following loop consumes the register defs in an
2203 // arbitrary order. At least it handles the common case of clustered loads
2204 // to the same class. For precise liveness, each SDep needs to indicate the
2205 // result number. But that tightly couples the ScheduleDAG with the
2206 // SelectionDAG making updates tricky. A simpler hack would be to attach a
2207 // value type or register class to SDep.
2208 //
2209 // The most important aspect of register tracking is balancing the increase
2210 // here with the reduction further below. Note that this SU may use multiple
2211 // defs in PredSU. The can't be determined here, but we've already
2212 // compensated by reducing NumRegDefsLeft in PredSU during
2213 // ScheduleDAGSDNodes::AddSchedEdges.
2214 --PredSU->NumRegDefsLeft;
2215 unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2216 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2217 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2218 if (SkipRegDefs)
2219 continue;
2220
2221 unsigned RCId, Cost;
2222 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2223 RegPressure[RCId] += Cost;
2224 break;
2225 }
2226 }
2227
2228 // We should have this assert, but there may be dead SDNodes that never
2229 // materialize as SUnits, so they don't appear to generate liveness.
2230 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2231 int SkipRegDefs = (int)SU->NumRegDefsLeft;
2232 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2233 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2234 if (SkipRegDefs > 0)
2235 continue;
2236 unsigned RCId, Cost;
2237 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2238 if (RegPressure[RCId] < Cost) {
2239 // Register pressure tracking is imprecise. This can happen. But we try
2240 // hard not to let it happen because it likely results in poor scheduling.
2241 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2242 << ") has too many regdefs\n");
2243 RegPressure[RCId] = 0;
2244 }
2245 else {
2246 RegPressure[RCId] -= Cost;
2247 }
2248 }
2249 LLVM_DEBUG(dumpRegPressure());
2250}
2251
2252void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2253 if (!TracksRegPressure)
2254 return;
2255
2256 const SDNode *N = SU->getNode();
2257 if (!N) return;
2258
2259 if (!N->isMachineOpcode()) {
2260 if (N->getOpcode() != ISD::CopyToReg)
2261 return;
2262 } else {
2263 unsigned Opc = N->getMachineOpcode();
2264 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2265 Opc == TargetOpcode::INSERT_SUBREG ||
2266 Opc == TargetOpcode::SUBREG_TO_REG ||
2267 Opc == TargetOpcode::REG_SEQUENCE ||
2268 Opc == TargetOpcode::IMPLICIT_DEF)
2269 return;
2270 }
2271
2272 for (const SDep &Pred : SU->Preds) {
2273 if (Pred.isCtrl())
2274 continue;
2275 SUnit *PredSU = Pred.getSUnit();
2276 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2277 // counts data deps.
2278 if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2279 continue;
2280 const SDNode *PN = PredSU->getNode();
2281 if (!PN->isMachineOpcode()) {
2282 if (PN->getOpcode() == ISD::CopyFromReg) {
2283 MVT VT = PN->getSimpleValueType(0);
2284 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2285 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2286 }
2287 continue;
2288 }
2289 unsigned POpc = PN->getMachineOpcode();
2290 if (POpc == TargetOpcode::IMPLICIT_DEF)
2291 continue;
2292 if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2293 POpc == TargetOpcode::INSERT_SUBREG ||
2294 POpc == TargetOpcode::SUBREG_TO_REG) {
2295 MVT VT = PN->getSimpleValueType(0);
2296 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2297 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2298 continue;
2299 }
2300 if (POpc == TargetOpcode::REG_SEQUENCE) {
2301 unsigned DstRCIdx = PN->getConstantOperandVal(0);
2302 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
2303 unsigned RCId = RC->getID();
2304 // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
2305 // here. Instead use the same constant as in GetCostForDef.
2307 continue;
2308 }
2309 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2310 for (unsigned i = 0; i != NumDefs; ++i) {
2311 MVT VT = PN->getSimpleValueType(i);
2312 if (!PN->hasAnyUseOfValue(i))
2313 continue;
2314 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2315 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2316 // Register pressure tracking is imprecise. This can happen.
2317 RegPressure[RCId] = 0;
2318 else
2319 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2320 }
2321 }
2322
2323 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2324 // may transfer data dependencies to CopyToReg.
2325 if (SU->NumSuccs && N->isMachineOpcode()) {
2326 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2327 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2328 MVT VT = N->getSimpleValueType(i);
2329 if (VT == MVT::Glue || VT == MVT::Other)
2330 continue;
2331 if (!N->hasAnyUseOfValue(i))
2332 continue;
2333 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2334 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2335 }
2336 }
2337
2338 LLVM_DEBUG(dumpRegPressure());
2339}
2340
2341//===----------------------------------------------------------------------===//
2342// Dynamic Node Priority for Register Pressure Reduction
2343//===----------------------------------------------------------------------===//
2344
2345/// closestSucc - Returns the scheduled cycle of the successor which is
2346/// closest to the current cycle.
2347static unsigned closestSucc(const SUnit *SU) {
2348 unsigned MaxHeight = 0;
2349 for (const SDep &Succ : SU->Succs) {
2350 if (Succ.isCtrl()) continue; // ignore chain succs
2351 unsigned Height = Succ.getSUnit()->getHeight();
2352 // If there are bunch of CopyToRegs stacked up, they should be considered
2353 // to be at the same position.
2354 if (Succ.getSUnit()->getNode() &&
2355 Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2356 Height = closestSucc(Succ.getSUnit())+1;
2357 if (Height > MaxHeight)
2358 MaxHeight = Height;
2359 }
2360 return MaxHeight;
2361}
2362
2363/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2364/// for scratch registers, i.e. number of data dependencies.
2365static unsigned calcMaxScratches(const SUnit *SU) {
2366 unsigned Scratches = 0;
2367 for (const SDep &Pred : SU->Preds) {
2368 if (Pred.isCtrl()) continue; // ignore chain preds
2369 Scratches++;
2370 }
2371 return Scratches;
2372}
2373
2374/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2375/// CopyFromReg from a virtual register.
2376static bool hasOnlyLiveInOpers(const SUnit *SU) {
2377 bool RetVal = false;
2378 for (const SDep &Pred : SU->Preds) {
2379 if (Pred.isCtrl()) continue;
2380 const SUnit *PredSU = Pred.getSUnit();
2381 if (PredSU->getNode() &&
2382 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2383 Register Reg =
2384 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2385 if (Reg.isVirtual()) {
2386 RetVal = true;
2387 continue;
2388 }
2389 }
2390 return false;
2391 }
2392 return RetVal;
2393}
2394
2395/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2396/// CopyToReg to a virtual register. This SU def is probably a liveout and
2397/// it has no other use. It should be scheduled closer to the terminator.
2398static bool hasOnlyLiveOutUses(const SUnit *SU) {
2399 bool RetVal = false;
2400 for (const SDep &Succ : SU->Succs) {
2401 if (Succ.isCtrl()) continue;
2402 const SUnit *SuccSU = Succ.getSUnit();
2403 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2404 Register Reg =
2405 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2406 if (Reg.isVirtual()) {
2407 RetVal = true;
2408 continue;
2409 }
2410 }
2411 return false;
2412 }
2413 return RetVal;
2414}
2415
2416// Set isVRegCycle for a node with only live in opers and live out uses. Also
2417// set isVRegCycle for its CopyFromReg operands.
2418//
2419// This is only relevant for single-block loops, in which case the VRegCycle
2420// node is likely an induction variable in which the operand and target virtual
2421// registers should be coalesced (e.g. pre/post increment values). Setting the
2422// isVRegCycle flag helps the scheduler prioritize other uses of the same
2423// CopyFromReg so that this node becomes the virtual register "kill". This
2424// avoids interference between the values live in and out of the block and
2425// eliminates a copy inside the loop.
2426static void initVRegCycle(SUnit *SU) {
2428 return;
2429
2430 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2431 return;
2432
2433 LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2434
2435 SU->isVRegCycle = true;
2436
2437 for (const SDep &Pred : SU->Preds) {
2438 if (Pred.isCtrl()) continue;
2439 Pred.getSUnit()->isVRegCycle = true;
2440 }
2441}
2442
2443// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2444// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2445static void resetVRegCycle(SUnit *SU) {
2446 if (!SU->isVRegCycle)
2447 return;
2448
2449 for (const SDep &Pred : SU->Preds) {
2450 if (Pred.isCtrl()) continue; // ignore chain preds
2451 SUnit *PredSU = Pred.getSUnit();
2452 if (PredSU->isVRegCycle) {
2453 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2454 "VRegCycle def must be CopyFromReg");
2455 Pred.getSUnit()->isVRegCycle = false;
2456 }
2457 }
2458}
2459
2460// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2461// means a node that defines the VRegCycle has not been scheduled yet.
2462static bool hasVRegCycleUse(const SUnit *SU) {
2463 // If this SU also defines the VReg, don't hoist it as a "use".
2464 if (SU->isVRegCycle)
2465 return false;
2466
2467 for (const SDep &Pred : SU->Preds) {
2468 if (Pred.isCtrl()) continue; // ignore chain preds
2469 if (Pred.getSUnit()->isVRegCycle &&
2470 Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2471 LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2472 return true;
2473 }
2474 }
2475 return false;
2476}
2477
2478// Check for either a dependence (latency) or resource (hazard) stall.
2479//
2480// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2481static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2482 if ((int)SPQ->getCurCycle() < Height) return true;
2483 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2485 return true;
2486 return false;
2487}
2488
2489// Return -1 if left has higher priority, 1 if right has higher priority.
2490// Return 0 if latency-based priority is equivalent.
2491static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2492 RegReductionPQBase *SPQ) {
2493 // Scheduling an instruction that uses a VReg whose postincrement has not yet
2494 // been scheduled will induce a copy. Model this as an extra cycle of latency.
2495 int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2496 int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2497 int LHeight = (int)left->getHeight() + LPenalty;
2498 int RHeight = (int)right->getHeight() + RPenalty;
2499
2500 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2501 BUHasStall(left, LHeight, SPQ);
2502 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2503 BUHasStall(right, RHeight, SPQ);
2504
2505 // If scheduling one of the node will cause a pipeline stall, delay it.
2506 // If scheduling either one of the node will cause a pipeline stall, sort
2507 // them according to their height.
2508 if (LStall) {
2509 if (!RStall)
2510 return 1;
2511 if (LHeight != RHeight)
2512 return LHeight > RHeight ? 1 : -1;
2513 } else if (RStall)
2514 return -1;
2515
2516 // If either node is scheduling for latency, sort them by height/depth
2517 // and latency.
2518 if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2519 right->SchedulingPref == Sched::ILP)) {
2520 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2521 // is enabled, grouping instructions by cycle, then its height is already
2522 // covered so only its depth matters. We also reach this point if both stall
2523 // but have the same height.
2524 if (!SPQ->getHazardRec()->isEnabled()) {
2525 if (LHeight != RHeight)
2526 return LHeight > RHeight ? 1 : -1;
2527 }
2528 int LDepth = left->getDepth() - LPenalty;
2529 int RDepth = right->getDepth() - RPenalty;
2530 if (LDepth != RDepth) {
2531 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2532 << ") depth " << LDepth << " vs SU (" << right->NodeNum
2533 << ") depth " << RDepth << "\n");
2534 return LDepth < RDepth ? 1 : -1;
2535 }
2536 if (left->Latency != right->Latency)
2537 return left->Latency > right->Latency ? 1 : -1;
2538 }
2539 return 0;
2540}
2541
2542static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2543 // Schedule physical register definitions close to their use. This is
2544 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2545 // long as shortening physreg live ranges is generally good, we can defer
2546 // creating a subtarget hook.
2548 bool LHasPhysReg = left->hasPhysRegDefs;
2549 bool RHasPhysReg = right->hasPhysRegDefs;
2550 if (LHasPhysReg != RHasPhysReg) {
2551 #ifndef NDEBUG
2552 static const char *const PhysRegMsg[] = { " has no physreg",
2553 " defines a physreg" };
2554 #endif
2555 LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2556 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2557 << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2558 return LHasPhysReg < RHasPhysReg;
2559 }
2560 }
2561
2562 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2563 unsigned LPriority = SPQ->getNodePriority(left);
2564 unsigned RPriority = SPQ->getNodePriority(right);
2565
2566 // Be really careful about hoisting call operands above previous calls.
2567 // Only allows it if it would reduce register pressure.
2568 if (left->isCall && right->isCallOp) {
2569 unsigned RNumVals = right->getNode()->getNumValues();
2570 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2571 }
2572 if (right->isCall && left->isCallOp) {
2573 unsigned LNumVals = left->getNode()->getNumValues();
2574 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2575 }
2576
2577 if (LPriority != RPriority)
2578 return LPriority > RPriority;
2579
2580 // One or both of the nodes are calls and their sethi-ullman numbers are the
2581 // same, then keep source order.
2582 if (left->isCall || right->isCall) {
2583 unsigned LOrder = SPQ->getNodeOrdering(left);
2584 unsigned ROrder = SPQ->getNodeOrdering(right);
2585
2586 // Prefer an ordering where the lower the non-zero order number, the higher
2587 // the preference.
2588 if ((LOrder || ROrder) && LOrder != ROrder)
2589 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2590 }
2591
2592 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2593 // e.g.
2594 // t1 = op t2, c1
2595 // t3 = op t4, c2
2596 //
2597 // and the following instructions are both ready.
2598 // t2 = op c3
2599 // t4 = op c4
2600 //
2601 // Then schedule t2 = op first.
2602 // i.e.
2603 // t4 = op c4
2604 // t2 = op c3
2605 // t1 = op t2, c1
2606 // t3 = op t4, c2
2607 //
2608 // This creates more short live intervals.
2609 unsigned LDist = closestSucc(left);
2610 unsigned RDist = closestSucc(right);
2611 if (LDist != RDist)
2612 return LDist < RDist;
2613
2614 // How many registers becomes live when the node is scheduled.
2615 unsigned LScratch = calcMaxScratches(left);
2616 unsigned RScratch = calcMaxScratches(right);
2617 if (LScratch != RScratch)
2618 return LScratch > RScratch;
2619
2620 // Comparing latency against a call makes little sense unless the node
2621 // is register pressure-neutral.
2622 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2623 return (left->NodeQueueId > right->NodeQueueId);
2624
2625 // Do not compare latencies when one or both of the nodes are calls.
2626 if (!DisableSchedCycles &&
2627 !(left->isCall || right->isCall)) {
2628 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2629 if (result != 0)
2630 return result > 0;
2631 }
2632 else {
2633 if (left->getHeight() != right->getHeight())
2634 return left->getHeight() > right->getHeight();
2635
2636 if (left->getDepth() != right->getDepth())
2637 return left->getDepth() < right->getDepth();
2638 }
2639
2640 assert(left->NodeQueueId && right->NodeQueueId &&
2641 "NodeQueueId cannot be zero");
2642 return (left->NodeQueueId > right->NodeQueueId);
2643}
2644
2645// Bottom up
2646bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2647 if (int res = checkSpecialNodes(left, right))
2648 return res > 0;
2649
2650 return BURRSort(left, right, SPQ);
2651}
2652
2653// Source order, otherwise bottom up.
2654bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2655 if (int res = checkSpecialNodes(left, right))
2656 return res > 0;
2657
2658 unsigned LOrder = SPQ->getNodeOrdering(left);
2659 unsigned ROrder = SPQ->getNodeOrdering(right);
2660
2661 // Prefer an ordering where the lower the non-zero order number, the higher
2662 // the preference.
2663 if ((LOrder || ROrder) && LOrder != ROrder)
2664 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2665
2666 return BURRSort(left, right, SPQ);
2667}
2668
2669// If the time between now and when the instruction will be ready can cover
2670// the spill code, then avoid adding it to the ready queue. This gives long
2671// stalls highest priority and allows hoisting across calls. It should also
2672// speed up processing the available queue.
2673bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2674 static const unsigned ReadyDelay = 3;
2675
2676 if (SPQ->MayReduceRegPressure(SU)) return true;
2677
2678 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2679
2680 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2682 return false;
2683
2684 return true;
2685}
2686
2687// Return true if right should be scheduled with higher priority than left.
2688bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2689 if (int res = checkSpecialNodes(left, right))
2690 return res > 0;
2691
2692 if (left->isCall || right->isCall)
2693 // No way to compute latency of calls.
2694 return BURRSort(left, right, SPQ);
2695
2696 bool LHigh = SPQ->HighRegPressure(left);
2697 bool RHigh = SPQ->HighRegPressure(right);
2698 // Avoid causing spills. If register pressure is high, schedule for
2699 // register pressure reduction.
2700 if (LHigh && !RHigh) {
2701 LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2702 << right->NodeNum << ")\n");
2703 return true;
2704 }
2705 else if (!LHigh && RHigh) {
2706 LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2707 << left->NodeNum << ")\n");
2708 return false;
2709 }
2710 if (!LHigh && !RHigh) {
2711 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2712 if (result != 0)
2713 return result > 0;
2714 }
2715 return BURRSort(left, right, SPQ);
2716}
2717
2718// Schedule as many instructions in each cycle as possible. So don't make an
2719// instruction available unless it is ready in the current cycle.
2720bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2721 if (SU->getHeight() > CurCycle) return false;
2722
2723 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2725 return false;
2726
2727 return true;
2728}
2729
2730static bool canEnableCoalescing(SUnit *SU) {
2731 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2733 // CopyToReg should be close to its uses to facilitate coalescing and
2734 // avoid spilling.
2735 return true;
2736
2737 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2738 Opc == TargetOpcode::SUBREG_TO_REG ||
2739 Opc == TargetOpcode::INSERT_SUBREG)
2740 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2741 // close to their uses to facilitate coalescing.
2742 return true;
2743
2744 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2745 // If SU does not have a register def, schedule it close to its uses
2746 // because it does not lengthen any live ranges.
2747 return true;
2748
2749 return false;
2750}
2751
2752// list-ilp is currently an experimental scheduler that allows various
2753// heuristics to be enabled prior to the normal register reduction logic.
2754bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2755 if (int res = checkSpecialNodes(left, right))
2756 return res > 0;
2757
2758 if (left->isCall || right->isCall)
2759 // No way to compute latency of calls.
2760 return BURRSort(left, right, SPQ);
2761
2762 unsigned LLiveUses = 0, RLiveUses = 0;
2763 int LPDiff = 0, RPDiff = 0;
2765 LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2766 RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2767 }
2768 if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2769 LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2770 << "): " << LPDiff << " != SU(" << right->NodeNum
2771 << "): " << RPDiff << "\n");
2772 return LPDiff > RPDiff;
2773 }
2774
2775 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2776 bool LReduce = canEnableCoalescing(left);
2777 bool RReduce = canEnableCoalescing(right);
2778 if (LReduce && !RReduce) return false;
2779 if (RReduce && !LReduce) return true;
2780 }
2781
2782 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2783 LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2784 << " != SU(" << right->NodeNum << "): " << RLiveUses
2785 << "\n");
2786 return LLiveUses < RLiveUses;
2787 }
2788
2789 if (!DisableSchedStalls) {
2790 bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2791 bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2792 if (LStall != RStall)
2793 return left->getHeight() > right->getHeight();
2794 }
2795
2797 int spread = (int)left->getDepth() - (int)right->getDepth();
2798 if (std::abs(spread) > MaxReorderWindow) {
2799 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2800 << left->getDepth() << " != SU(" << right->NodeNum
2801 << "): " << right->getDepth() << "\n");
2802 return left->getDepth() < right->getDepth();
2803 }
2804 }
2805
2806 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2807 int spread = (int)left->getHeight() - (int)right->getHeight();
2808 if (std::abs(spread) > MaxReorderWindow)
2809 return left->getHeight() > right->getHeight();
2810 }
2811
2812 return BURRSort(left, right, SPQ);
2813}
2814
2815void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2816 SUnits = &sunits;
2817 // Add pseudo dependency edges for two-address nodes.
2818 if (!Disable2AddrHack)
2819 AddPseudoTwoAddrDeps();
2820 // Reroute edges to nodes with multiple uses.
2821 if (!TracksRegPressure && !SrcOrder)
2822 PrescheduleNodesWithMultipleUses();
2823 // Calculate node priorities.
2824 CalculateSethiUllmanNumbers();
2825
2826 // For single block loops, mark nodes that look like canonical IV increments.
2827 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2828 for (SUnit &SU : sunits)
2829 initVRegCycle(&SU);
2830}
2831
2832//===----------------------------------------------------------------------===//
2833// Preschedule for Register Pressure
2834//===----------------------------------------------------------------------===//
2835
2836bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2837 if (SU->isTwoAddress) {
2838 unsigned Opc = SU->getNode()->getMachineOpcode();
2839 const MCInstrDesc &MCID = TII->get(Opc);
2840 unsigned NumRes = MCID.getNumDefs();
2841 unsigned NumOps = MCID.getNumOperands() - NumRes;
2842 for (unsigned i = 0; i != NumOps; ++i) {
2843 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2844 SDNode *DU = SU->getNode()->getOperand(i).getNode();
2845 if (DU->getNodeId() != -1 &&
2846 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2847 return true;
2848 }
2849 }
2850 }
2851 return false;
2852}
2853
2854/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2855/// successor's explicit physregs whose definition can reach DepSU.
2856/// i.e. DepSU should not be scheduled above SU.
2857static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2858 ScheduleDAGRRList *scheduleDAG,
2859 const TargetInstrInfo *TII,
2860 const TargetRegisterInfo *TRI) {
2861 ArrayRef<MCPhysReg> ImpDefs =
2862 TII->get(SU->getNode()->getMachineOpcode()).implicit_defs();
2863 const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2864 if (ImpDefs.empty() && !RegMask)
2865 return false;
2866
2867 for (const SDep &Succ : SU->Succs) {
2868 SUnit *SuccSU = Succ.getSUnit();
2869 for (const SDep &SuccPred : SuccSU->Preds) {
2870 if (!SuccPred.isAssignedRegDep())
2871 continue;
2872
2873 if (RegMask &&
2874 MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2875 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2876 return true;
2877
2878 for (MCPhysReg ImpDef : ImpDefs) {
2879 // Return true if SU clobbers this physical register use and the
2880 // definition of the register reaches from DepSU. IsReachable queries
2881 // a topological forward sort of the DAG (following the successors).
2882 if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) &&
2883 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2884 return true;
2885 }
2886 }
2887 }
2888 return false;
2889}
2890
2891/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2892/// physical register defs.
2893static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2894 const TargetInstrInfo *TII,
2895 const TargetRegisterInfo *TRI) {
2896 SDNode *N = SuccSU->getNode();
2897 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2898 ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs();
2899 assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs");
2900 for (const SDNode *SUNode = SU->getNode(); SUNode;
2901 SUNode = SUNode->getGluedNode()) {
2902 if (!SUNode->isMachineOpcode())
2903 continue;
2904 ArrayRef<MCPhysReg> SUImpDefs =
2905 TII->get(SUNode->getMachineOpcode()).implicit_defs();
2906 const uint32_t *SURegMask = getNodeRegMask(SUNode);
2907 if (SUImpDefs.empty() && !SURegMask)
2908 continue;
2909 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2910 MVT VT = N->getSimpleValueType(i);
2911 if (VT == MVT::Glue || VT == MVT::Other)
2912 continue;
2913 if (!N->hasAnyUseOfValue(i))
2914 continue;
2915 MCPhysReg Reg = ImpDefs[i - NumDefs];
2916 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2917 return true;
2918 for (MCPhysReg SUReg : SUImpDefs) {
2919 if (TRI->regsOverlap(Reg, SUReg))
2920 return true;
2921 }
2922 }
2923 }
2924 return false;
2925}
2926
2927/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2928/// are not handled well by the general register pressure reduction
2929/// heuristics. When presented with code like this:
2930///
2931/// N
2932/// / |
2933/// / |
2934/// U store
2935/// |
2936/// ...
2937///
2938/// the heuristics tend to push the store up, but since the
2939/// operand of the store has another use (U), this would increase
2940/// the length of that other use (the U->N edge).
2941///
2942/// This function transforms code like the above to route U's
2943/// dependence through the store when possible, like this:
2944///
2945/// N
2946/// ||
2947/// ||
2948/// store
2949/// |
2950/// U
2951/// |
2952/// ...
2953///
2954/// This results in the store being scheduled immediately
2955/// after N, which shortens the U->N live range, reducing
2956/// register pressure.
2957void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2958 // Visit all the nodes in topological order, working top-down.
2959 for (SUnit &SU : *SUnits) {
2960 // For now, only look at nodes with no data successors, such as stores.
2961 // These are especially important, due to the heuristics in
2962 // getNodePriority for nodes with no data successors.
2963 if (SU.NumSuccs != 0)
2964 continue;
2965 // For now, only look at nodes with exactly one data predecessor.
2966 if (SU.NumPreds != 1)
2967 continue;
2968 // Avoid prescheduling copies to virtual registers, which don't behave
2969 // like other nodes from the perspective of scheduling heuristics.
2970 if (SDNode *N = SU.getNode())
2971 if (N->getOpcode() == ISD::CopyToReg &&
2972 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
2973 continue;
2974
2975 SDNode *PredFrameSetup = nullptr;
2976 for (const SDep &Pred : SU.Preds)
2977 if (Pred.isCtrl() && Pred.getSUnit()) {
2978 // Find the predecessor which is not data dependence.
2979 SDNode *PredND = Pred.getSUnit()->getNode();
2980
2981 // If PredND is FrameSetup, we should not pre-scheduled the node,
2982 // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2983 // ADJCALLSTACKUP may hold CallResource too long and make other
2984 // calls can't be scheduled. If there's no other available node
2985 // to schedule, the schedular will try to rename the register by
2986 // creating copy to avoid the conflict which will fail because
2987 // CallResource is not a real physical register.
2988 if (PredND && PredND->isMachineOpcode() &&
2989 (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2990 PredFrameSetup = PredND;
2991 break;
2992 }
2993 }
2994 // Skip the node has FrameSetup parent.
2995 if (PredFrameSetup != nullptr)
2996 continue;
2997
2998 // Locate the single data predecessor.
2999 SUnit *PredSU = nullptr;
3000 for (const SDep &Pred : SU.Preds)
3001 if (!Pred.isCtrl()) {
3002 PredSU = Pred.getSUnit();
3003 break;
3004 }
3005 assert(PredSU);
3006
3007 // Don't rewrite edges that carry physregs, because that requires additional
3008 // support infrastructure.
3009 if (PredSU->hasPhysRegDefs)
3010 continue;
3011 // Short-circuit the case where SU is PredSU's only data successor.
3012 if (PredSU->NumSuccs == 1)
3013 continue;
3014 // Avoid prescheduling to copies from virtual registers, which don't behave
3015 // like other nodes from the perspective of scheduling heuristics.
3016 if (SDNode *N = SU.getNode())
3017 if (N->getOpcode() == ISD::CopyFromReg &&
3018 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
3019 continue;
3020
3021 // Perform checks on the successors of PredSU.
3022 for (const SDep &PredSucc : PredSU->Succs) {
3023 SUnit *PredSuccSU = PredSucc.getSUnit();
3024 if (PredSuccSU == &SU) continue;
3025 // If PredSU has another successor with no data successors, for
3026 // now don't attempt to choose either over the other.
3027 if (PredSuccSU->NumSuccs == 0)
3028 goto outer_loop_continue;
3029 // Don't break physical register dependencies.
3030 if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3031 if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3032 goto outer_loop_continue;
3033 // Don't introduce graph cycles.
3034 if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3035 goto outer_loop_continue;
3036 }
3037
3038 // Ok, the transformation is safe and the heuristics suggest it is
3039 // profitable. Update the graph.
3040 LLVM_DEBUG(
3041 dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3042 << PredSU->NodeNum
3043 << " to guide scheduling in the presence of multiple uses\n");
3044 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3045 SDep Edge = PredSU->Succs[i];
3046 assert(!Edge.isAssignedRegDep());
3047 SUnit *SuccSU = Edge.getSUnit();
3048 if (SuccSU != &SU) {
3049 Edge.setSUnit(PredSU);
3050 scheduleDAG->RemovePred(SuccSU, Edge);
3051 scheduleDAG->AddPredQueued(&SU, Edge);
3052 Edge.setSUnit(&SU);
3053 scheduleDAG->AddPredQueued(SuccSU, Edge);
3054 --i;
3055 }
3056 }
3057 outer_loop_continue:;
3058 }
3059}
3060
3061/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3062/// it as a def&use operand. Add a pseudo control edge from it to the other
3063/// node (if it won't create a cycle) so the two-address one will be scheduled
3064/// first (lower in the schedule). If both nodes are two-address, favor the
3065/// one that has a CopyToReg use (more likely to be a loop induction update).
3066/// If both are two-address, but one is commutable while the other is not
3067/// commutable, favor the one that's not commutable.
3068void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3069 for (SUnit &SU : *SUnits) {
3070 if (!SU.isTwoAddress)
3071 continue;
3072
3073 SDNode *Node = SU.getNode();
3074 if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3075 continue;
3076
3077 bool isLiveOut = hasOnlyLiveOutUses(&SU);
3078 unsigned Opc = Node->getMachineOpcode();
3079 const MCInstrDesc &MCID = TII->get(Opc);
3080 unsigned NumRes = MCID.getNumDefs();
3081 unsigned NumOps = MCID.getNumOperands() - NumRes;
3082 for (unsigned j = 0; j != NumOps; ++j) {
3083 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3084 continue;
3085 SDNode *DU = SU.getNode()->getOperand(j).getNode();
3086 if (DU->getNodeId() == -1)
3087 continue;
3088 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3089 if (!DUSU)
3090 continue;
3091 for (const SDep &Succ : DUSU->Succs) {
3092 if (Succ.isCtrl())
3093 continue;
3094 SUnit *SuccSU = Succ.getSUnit();
3095 if (SuccSU == &SU)
3096 continue;
3097 // Be conservative. Ignore if nodes aren't at roughly the same
3098 // depth and height.
3099 if (SuccSU->getHeight() < SU.getHeight() &&
3100 (SU.getHeight() - SuccSU->getHeight()) > 1)
3101 continue;
3102 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3103 // constrains whatever is using the copy, instead of the copy
3104 // itself. In the case that the copy is coalesced, this
3105 // preserves the intent of the pseudo two-address heurietics.
3106 while (SuccSU->Succs.size() == 1 &&
3107 SuccSU->getNode()->isMachineOpcode() &&
3108 SuccSU->getNode()->getMachineOpcode() ==
3109 TargetOpcode::COPY_TO_REGCLASS)
3110 SuccSU = SuccSU->Succs.front().getSUnit();
3111 // Don't constrain non-instruction nodes.
3112 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3113 continue;
3114 // Don't constrain nodes with physical register defs if the
3115 // predecessor can clobber them.
3116 if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3117 if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3118 continue;
3119 }
3120 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3121 // these may be coalesced away. We want them close to their uses.
3122 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3123 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3124 SuccOpc == TargetOpcode::INSERT_SUBREG ||
3125 SuccOpc == TargetOpcode::SUBREG_TO_REG)
3126 continue;
3127 if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3128 (!canClobber(SuccSU, DUSU) ||
3129 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3130 (!SU.isCommutable && SuccSU->isCommutable)) &&
3131 !scheduleDAG->IsReachable(SuccSU, &SU)) {
3133 << " Adding a pseudo-two-addr edge from SU #"
3134 << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3135 scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
3136 }
3137 }
3138 }
3139 }
3140}
3141
3142//===----------------------------------------------------------------------===//
3143// Public Constructor Functions
3144//===----------------------------------------------------------------------===//
3145
3147 CodeGenOptLevel OptLevel) {
3148 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3149 const TargetInstrInfo *TII = STI.getInstrInfo();
3150 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3151
3152 BURegReductionPriorityQueue *PQ =
3153 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3154 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3155 PQ->setScheduleDAG(SD);
3156 return SD;
3157}
3158
3161 CodeGenOptLevel OptLevel) {
3162 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3163 const TargetInstrInfo *TII = STI.getInstrInfo();
3164 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3165
3166 SrcRegReductionPriorityQueue *PQ =
3167 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3168 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3169 PQ->setScheduleDAG(SD);
3170 return SD;
3171}
3172
3175 CodeGenOptLevel OptLevel) {
3176 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3177 const TargetInstrInfo *TII = STI.getInstrInfo();
3178 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3179 const TargetLowering *TLI = IS->TLI;
3180
3181 HybridBURRPriorityQueue *PQ =
3182 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3183
3184 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3185 PQ->setScheduleDAG(SD);
3186 return SD;
3187}
3188
3190 CodeGenOptLevel OptLevel) {
3191 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3192 const TargetInstrInfo *TII = STI.getInstrInfo();
3193 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3194 const TargetLowering *TLI = IS->TLI;
3195
3196 ILPBURRPriorityQueue *PQ =
3197 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3198 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3199 PQ->setScheduleDAG(SD);
3200 return SD;
3201}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file defines the DenseMap class.
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define P(N)
SI Lower i1 Copies
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static bool CheckForLiveRegDef(SUnit *SU, MCRegister Reg, std::vector< SUnit * > &LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI, const SDNode *Node=nullptr)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
static bool canEnableCoalescing(SUnit *SU)
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
static void resetVRegCycle(SUnit *SU)
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it's successor's explicit physregs who...
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU's physical register defs.
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle when no target itinerary exists."))
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
static void initVRegCycle(SUnit *SU)
static constexpr unsigned RegSequenceCost
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
static bool isOperandOf(const SUnit *SU, SDNode *N)
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
static bool hasVRegCycleUse(const SUnit *SU)
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit * > LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask,...
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getIROrder() const
Return the node ordering.
void setNodeId(int Id)
Set unique node id.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
Scheduling dependency.
Definition ScheduleDAG.h:51
SUnit * getSUnit() const
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition ScheduleDAG.h:74
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
void setSUnit(SUnit *SU)
Register getReg() const
Returns the register associated with this edge.
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
LLVM_ABI void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
unsigned NumSuccs
unsigned NumPreds
unsigned NodeQueueId
Queue id of node.
unsigned NodeNum
Entry # of node in the node vector.
unsigned NumSuccsLeft
bool hasPhysRegClobbers
Has any physreg defs, used or not.
bool isCallOp
Is a function call operand.
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
LLVM_ABI void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned short Latency
Node latency.
unsigned short NumRegDefsLeft
bool isPending
True once pending.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
bool isAvailable
True once available.
bool isScheduleLow
True if preferable to schedule low.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
Sched::Preference SchedulingPref
Scheduling preference.
const TargetRegisterClass * CopySrcRC
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
bool isTwoAddress
Is a two-address instruction.
bool isCommutable
Is a commutable instruction.
bool isVRegCycle
May use and def the same vreg.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
RegDefIter - In place iteration over the values defined by an SUnit.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
virtual void dumpNode(const SUnit &SU) const =0
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
This interface is used to plug different priorities computation algorithms into the list scheduler.
void setCurCycle(unsigned Cycle)
virtual void remove(SUnit *SU)=0
virtual void releaseState()=0
virtual SUnit * pop()=0
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
virtual bool tracksRegPressure() const
virtual void dump(ScheduleDAG *) const
virtual void initNodes(std::vector< SUnit > &SUnits)=0
virtual bool empty() const =0
virtual void unscheduledNode(SUnit *)
virtual void addNode(const SUnit *SU)=0
virtual void updateNode(const SUnit *SU)=0
virtual void push(SUnit *U)=0
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetLowering * TLI
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the 'representative' register class for the specified value type.
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ EH_LABEL
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
@ ANNOTATION_LABEL
ANNOTATION_LABEL - Represents a mid basic block label used by annotations.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition ISDOpcodes.h:48
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ LIFETIME_START
This corresponds to the llvm.lifetime.
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INLINEASM
INLINEASM - Represents an inline asm block.
initializer< Ty > init(const Ty &Val)
constexpr double e
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
bool empty() const
Definition BasicBlock.h:101
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1759
LLVM_ABI ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler.
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
Op::Description Desc
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel)
createSourceListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N