LLVM 18.0.0git
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements bottom-up and top-down register pressure reduction list
10// schedulers, using standard algorithms. The basic approach uses a priority
11// queue of available nodes to schedule. One at a time, nodes are taken from
12// the priority queue (thus in priority order), checked for legality to
13// schedule, and emitted if legal.
14//
15//===----------------------------------------------------------------------===//
16
17#include "ScheduleDAGSDNodes.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
39#include "llvm/Config/llvm-config.h"
40#include "llvm/IR/InlineAsm.h"
41#include "llvm/MC/MCInstrDesc.h"
47#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <cstdlib>
54#include <iterator>
55#include <limits>
56#include <memory>
57#include <utility>
58#include <vector>
59
60using namespace llvm;
61
62#define DEBUG_TYPE "pre-RA-sched"
63
64STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65STATISTIC(NumUnfolds, "Number of nodes unfolded");
66STATISTIC(NumDups, "Number of duplicated nodes");
67STATISTIC(NumPRCopies, "Number of physical register copies");
68
71 "Bottom-up register reduction list scheduling",
73
76 "Similar to list-burr but schedules in source "
77 "order when possible",
79
82 "Bottom-up register pressure aware list scheduling "
83 "which tries to balance latency and register pressure",
85
88 "Bottom-up register pressure aware list scheduling "
89 "which tries to balance ILP and register pressure",
91
93 "disable-sched-cycles", cl::Hidden, cl::init(false),
94 cl::desc("Disable cycle-level precision during preRA scheduling"));
95
96// Temporary sched=list-ilp flags until the heuristics are robust.
97// Some options are also available under sched=list-hybrid.
99 "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100 cl::desc("Disable regpressure priority in sched=list-ilp"));
102 "disable-sched-live-uses", cl::Hidden, cl::init(true),
103 cl::desc("Disable live use priority in sched=list-ilp"));
105 "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106 cl::desc("Disable virtual register cycle interference checks"));
108 "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109 cl::desc("Disable physreg def-use affinity"));
111 "disable-sched-stalls", cl::Hidden, cl::init(true),
112 cl::desc("Disable no-stall priority in sched=list-ilp"));
114 "disable-sched-critical-path", cl::Hidden, cl::init(false),
115 cl::desc("Disable critical path priority in sched=list-ilp"));
117 "disable-sched-height", cl::Hidden, cl::init(false),
118 cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120 "disable-2addr-hack", cl::Hidden, cl::init(true),
121 cl::desc("Disable scheduler's two-address hack"));
122
124 "max-sched-reorder", cl::Hidden, cl::init(6),
125 cl::desc("Number of instructions to allow ahead of the critical path "
126 "in sched=list-ilp"));
127
129 "sched-avg-ipc", cl::Hidden, cl::init(1),
130 cl::desc("Average inst/cycle whan no target itinerary exists."));
131
132namespace {
133
134//===----------------------------------------------------------------------===//
135/// ScheduleDAGRRList - The actual register reduction list scheduler
136/// implementation. This supports both top-down and bottom-up scheduling.
137///
138class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139private:
140 /// NeedLatency - True if the scheduler will make use of latency information.
141 bool NeedLatency;
142
143 /// AvailableQueue - The priority queue to use for the available SUnits.
144 SchedulingPriorityQueue *AvailableQueue;
145
146 /// PendingQueue - This contains all of the instructions whose operands have
147 /// been issued, but their results are not ready yet (due to the latency of
148 /// the operation). Once the operands becomes available, the instruction is
149 /// added to the AvailableQueue.
150 std::vector<SUnit *> PendingQueue;
151
152 /// HazardRec - The hazard recognizer to use.
153 ScheduleHazardRecognizer *HazardRec;
154
155 /// CurCycle - The current scheduler state corresponds to this cycle.
156 unsigned CurCycle = 0;
157
158 /// MinAvailableCycle - Cycle of the soonest available instruction.
159 unsigned MinAvailableCycle = ~0u;
160
161 /// IssueCount - Count instructions issued in this cycle
162 /// Currently valid only for bottom-up scheduling.
163 unsigned IssueCount = 0u;
164
165 /// LiveRegDefs - A set of physical registers and their definition
166 /// that are "live". These nodes must be scheduled before any other nodes that
167 /// modifies the registers can be scheduled.
168 unsigned NumLiveRegs = 0u;
169 std::unique_ptr<SUnit*[]> LiveRegDefs;
170 std::unique_ptr<SUnit*[]> LiveRegGens;
171
172 // Collect interferences between physical register use/defs.
173 // Each interference is an SUnit and set of physical registers.
174 SmallVector<SUnit*, 4> Interferences;
175
177
178 LRegsMapT LRegsMap;
179
180 /// Topo - A topological ordering for SUnits which permits fast IsReachable
181 /// and similar queries.
183
184 // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185 // DAG crawling.
186 DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
187
188public:
189 ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190 SchedulingPriorityQueue *availqueue,
191 CodeGenOptLevel OptLevel)
192 : ScheduleDAGSDNodes(mf), NeedLatency(needlatency),
193 AvailableQueue(availqueue), Topo(SUnits, nullptr) {
194 const TargetSubtargetInfo &STI = mf.getSubtarget();
195 if (DisableSchedCycles || !NeedLatency)
196 HazardRec = new ScheduleHazardRecognizer();
197 else
198 HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
199 }
200
201 ~ScheduleDAGRRList() override {
202 delete HazardRec;
203 delete AvailableQueue;
204 }
205
206 void Schedule() override;
207
208 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
209
210 /// IsReachable - Checks if SU is reachable from TargetSU.
211 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
212 return Topo.IsReachable(SU, TargetSU);
213 }
214
215 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
216 /// create a cycle.
217 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
218 return Topo.WillCreateCycle(SU, TargetSU);
219 }
220
221 /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
222 /// This returns true if this is a new predecessor.
223 /// Does *NOT* update the topological ordering! It just queues an update.
224 void AddPredQueued(SUnit *SU, const SDep &D) {
225 Topo.AddPredQueued(SU, D.getSUnit());
226 SU->addPred(D);
227 }
228
229 /// AddPred - adds a predecessor edge to SUnit SU.
230 /// This returns true if this is a new predecessor.
231 /// Updates the topological ordering if required.
232 void AddPred(SUnit *SU, const SDep &D) {
233 Topo.AddPred(SU, D.getSUnit());
234 SU->addPred(D);
235 }
236
237 /// RemovePred - removes a predecessor edge from SUnit SU.
238 /// This returns true if an edge was removed.
239 /// Updates the topological ordering if required.
240 void RemovePred(SUnit *SU, const SDep &D) {
241 Topo.RemovePred(SU, D.getSUnit());
242 SU->removePred(D);
243 }
244
245private:
246 bool isReady(SUnit *SU) {
247 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
248 AvailableQueue->isReady(SU);
249 }
250
251 void ReleasePred(SUnit *SU, const SDep *PredEdge);
252 void ReleasePredecessors(SUnit *SU);
253 void ReleasePending();
254 void AdvanceToCycle(unsigned NextCycle);
255 void AdvancePastStalls(SUnit *SU);
256 void EmitNode(SUnit *SU);
257 void ScheduleNodeBottomUp(SUnit*);
258 void CapturePred(SDep *PredEdge);
259 void UnscheduleNodeBottomUp(SUnit*);
260 void RestoreHazardCheckerBottomUp();
261 void BacktrackBottomUp(SUnit*, SUnit*);
262 SUnit *TryUnfoldSU(SUnit *);
263 SUnit *CopyAndMoveSuccessors(SUnit*);
264 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
265 const TargetRegisterClass*,
266 const TargetRegisterClass*,
268 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
269
270 void releaseInterferences(unsigned Reg = 0);
271
272 SUnit *PickNodeToScheduleBottomUp();
273 void ListScheduleBottomUp();
274
275 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
276 SUnit *CreateNewSUnit(SDNode *N) {
277 unsigned NumSUnits = SUnits.size();
278 SUnit *NewNode = newSUnit(N);
279 // Update the topological ordering.
280 if (NewNode->NodeNum >= NumSUnits)
281 Topo.AddSUnitWithoutPredecessors(NewNode);
282 return NewNode;
283 }
284
285 /// CreateClone - Creates a new SUnit from an existing one.
286 SUnit *CreateClone(SUnit *N) {
287 unsigned NumSUnits = SUnits.size();
288 SUnit *NewNode = Clone(N);
289 // Update the topological ordering.
290 if (NewNode->NodeNum >= NumSUnits)
291 Topo.AddSUnitWithoutPredecessors(NewNode);
292 return NewNode;
293 }
294
295 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
296 /// need actual latency information but the hybrid scheduler does.
297 bool forceUnitLatencies() const override {
298 return !NeedLatency;
299 }
300};
301
302} // end anonymous namespace
303
304static constexpr unsigned RegSequenceCost = 1;
305
306/// GetCostForDef - Looks up the register class and cost for a given definition.
307/// Typically this just means looking up the representative register class,
308/// but for untyped values (MVT::Untyped) it means inspecting the node's
309/// opcode to determine what register class is being generated.
311 const TargetLowering *TLI,
312 const TargetInstrInfo *TII,
313 const TargetRegisterInfo *TRI,
314 unsigned &RegClass, unsigned &Cost,
315 const MachineFunction &MF) {
316 MVT VT = RegDefPos.GetValue();
317
318 // Special handling for untyped values. These values can only come from
319 // the expansion of custom DAG-to-DAG patterns.
320 if (VT == MVT::Untyped) {
321 const SDNode *Node = RegDefPos.GetNode();
322
323 // Special handling for CopyFromReg of untyped values.
324 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
325 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
326 const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
327 RegClass = RC->getID();
328 Cost = 1;
329 return;
330 }
331
332 unsigned Opcode = Node->getMachineOpcode();
333 if (Opcode == TargetOpcode::REG_SEQUENCE) {
334 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
335 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
336 RegClass = RC->getID();
338 return;
339 }
340
341 unsigned Idx = RegDefPos.GetIdx();
342 const MCInstrDesc &Desc = TII->get(Opcode);
343 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
344 assert(RC && "Not a valid register class");
345 RegClass = RC->getID();
346 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
347 // better way to determine it.
348 Cost = 1;
349 } else {
350 RegClass = TLI->getRepRegClassFor(VT)->getID();
351 Cost = TLI->getRepRegClassCostFor(VT);
352 }
353}
354
355/// Schedule - Schedule the DAG using list scheduling.
356void ScheduleDAGRRList::Schedule() {
357 LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
358 << " '" << BB->getName() << "' **********\n");
359
360 CurCycle = 0;
361 IssueCount = 0;
362 MinAvailableCycle =
363 DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
364 NumLiveRegs = 0;
365 // Allocate slots for each physical register, plus one for a special register
366 // to track the virtual resource of a calling sequence.
367 LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
368 LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
369 CallSeqEndForStart.clear();
370 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
371
372 // Build the scheduling graph.
373 BuildSchedGraph(nullptr);
374
375 LLVM_DEBUG(dump());
376 Topo.MarkDirty();
377
378 AvailableQueue->initNodes(SUnits);
379
380 HazardRec->Reset();
381
382 // Execute the actual scheduling loop.
383 ListScheduleBottomUp();
384
385 AvailableQueue->releaseState();
386
387 LLVM_DEBUG({
388 dbgs() << "*** Final schedule ***\n";
389 dumpSchedule();
390 dbgs() << '\n';
391 });
392}
393
394//===----------------------------------------------------------------------===//
395// Bottom-Up Scheduling
396//===----------------------------------------------------------------------===//
397
398/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
399/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
400void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
401 SUnit *PredSU = PredEdge->getSUnit();
402
403#ifndef NDEBUG
404 if (PredSU->NumSuccsLeft == 0) {
405 dbgs() << "*** Scheduling failed! ***\n";
406 dumpNode(*PredSU);
407 dbgs() << " has been released too many times!\n";
408 llvm_unreachable(nullptr);
409 }
410#endif
411 --PredSU->NumSuccsLeft;
412
413 if (!forceUnitLatencies()) {
414 // Updating predecessor's height. This is now the cycle when the
415 // predecessor can be scheduled without causing a pipeline stall.
416 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
417 }
418
419 // If all the node's successors are scheduled, this node is ready
420 // to be scheduled. Ignore the special EntrySU node.
421 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
422 PredSU->isAvailable = true;
423
424 unsigned Height = PredSU->getHeight();
425 if (Height < MinAvailableCycle)
426 MinAvailableCycle = Height;
427
428 if (isReady(PredSU)) {
429 AvailableQueue->push(PredSU);
430 }
431 // CapturePred and others may have left the node in the pending queue, avoid
432 // adding it twice.
433 else if (!PredSU->isPending) {
434 PredSU->isPending = true;
435 PendingQueue.push_back(PredSU);
436 }
437 }
438}
439
440/// IsChainDependent - Test if Outer is reachable from Inner through
441/// chain dependencies.
442static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
443 unsigned NestLevel,
444 const TargetInstrInfo *TII) {
445 SDNode *N = Outer;
446 while (true) {
447 if (N == Inner)
448 return true;
449 // For a TokenFactor, examine each operand. There may be multiple ways
450 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
451 // most nesting in order to ensure that we find the corresponding match.
452 if (N->getOpcode() == ISD::TokenFactor) {
453 for (const SDValue &Op : N->op_values())
454 if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
455 return true;
456 return false;
457 }
458 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
459 if (N->isMachineOpcode()) {
460 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
461 ++NestLevel;
462 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
463 if (NestLevel == 0)
464 return false;
465 --NestLevel;
466 }
467 }
468 // Otherwise, find the chain and continue climbing.
469 for (const SDValue &Op : N->op_values())
470 if (Op.getValueType() == MVT::Other) {
471 N = Op.getNode();
472 goto found_chain_operand;
473 }
474 return false;
475 found_chain_operand:;
476 if (N->getOpcode() == ISD::EntryToken)
477 return false;
478 }
479}
480
481/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
482/// the corresponding (lowered) CALLSEQ_BEGIN node.
483///
484/// NestLevel and MaxNested are used in recursion to indcate the current level
485/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
486/// level seen so far.
487///
488/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
489/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
490static SDNode *
491FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
492 const TargetInstrInfo *TII) {
493 while (true) {
494 // For a TokenFactor, examine each operand. There may be multiple ways
495 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
496 // most nesting in order to ensure that we find the corresponding match.
497 if (N->getOpcode() == ISD::TokenFactor) {
498 SDNode *Best = nullptr;
499 unsigned BestMaxNest = MaxNest;
500 for (const SDValue &Op : N->op_values()) {
501 unsigned MyNestLevel = NestLevel;
502 unsigned MyMaxNest = MaxNest;
503 if (SDNode *New = FindCallSeqStart(Op.getNode(),
504 MyNestLevel, MyMaxNest, TII))
505 if (!Best || (MyMaxNest > BestMaxNest)) {
506 Best = New;
507 BestMaxNest = MyMaxNest;
508 }
509 }
510 assert(Best);
511 MaxNest = BestMaxNest;
512 return Best;
513 }
514 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
515 if (N->isMachineOpcode()) {
516 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
517 ++NestLevel;
518 MaxNest = std::max(MaxNest, NestLevel);
519 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
520 assert(NestLevel != 0);
521 --NestLevel;
522 if (NestLevel == 0)
523 return N;
524 }
525 }
526 // Otherwise, find the chain and continue climbing.
527 for (const SDValue &Op : N->op_values())
528 if (Op.getValueType() == MVT::Other) {
529 N = Op.getNode();
530 goto found_chain_operand;
531 }
532 return nullptr;
533 found_chain_operand:;
534 if (N->getOpcode() == ISD::EntryToken)
535 return nullptr;
536 }
537}
538
539/// Call ReleasePred for each predecessor, then update register live def/gen.
540/// Always update LiveRegDefs for a register dependence even if the current SU
541/// also defines the register. This effectively create one large live range
542/// across a sequence of two-address node. This is important because the
543/// entire chain must be scheduled together. Example:
544///
545/// flags = (3) add
546/// flags = (2) addc flags
547/// flags = (1) addc flags
548///
549/// results in
550///
551/// LiveRegDefs[flags] = 3
552/// LiveRegGens[flags] = 1
553///
554/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
555/// interference on flags.
556void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
557 // Bottom up: release predecessors
558 for (SDep &Pred : SU->Preds) {
559 ReleasePred(SU, &Pred);
560 if (Pred.isAssignedRegDep()) {
561 // This is a physical register dependency and it's impossible or
562 // expensive to copy the register. Make sure nothing that can
563 // clobber the register is scheduled between the predecessor and
564 // this node.
565 SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
566 assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
567 "interference on register dependence");
568 LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
569 if (!LiveRegGens[Pred.getReg()]) {
570 ++NumLiveRegs;
571 LiveRegGens[Pred.getReg()] = SU;
572 }
573 }
574 }
575
576 // If we're scheduling a lowered CALLSEQ_END, find the corresponding
577 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
578 // these nodes, to prevent other calls from being interscheduled with them.
579 unsigned CallResource = TRI->getNumRegs();
580 if (!LiveRegDefs[CallResource])
581 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
582 if (Node->isMachineOpcode() &&
583 Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
584 unsigned NestLevel = 0;
585 unsigned MaxNest = 0;
586 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
587 assert(N && "Must find call sequence start");
588
589 SUnit *Def = &SUnits[N->getNodeId()];
590 CallSeqEndForStart[Def] = SU;
591
592 ++NumLiveRegs;
593 LiveRegDefs[CallResource] = Def;
594 LiveRegGens[CallResource] = SU;
595 break;
596 }
597}
598
599/// Check to see if any of the pending instructions are ready to issue. If
600/// so, add them to the available queue.
601void ScheduleDAGRRList::ReleasePending() {
602 if (DisableSchedCycles) {
603 assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
604 return;
605 }
606
607 // If the available queue is empty, it is safe to reset MinAvailableCycle.
608 if (AvailableQueue->empty())
609 MinAvailableCycle = std::numeric_limits<unsigned>::max();
610
611 // Check to see if any of the pending instructions are ready to issue. If
612 // so, add them to the available queue.
613 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
614 unsigned ReadyCycle = PendingQueue[i]->getHeight();
615 if (ReadyCycle < MinAvailableCycle)
616 MinAvailableCycle = ReadyCycle;
617
618 if (PendingQueue[i]->isAvailable) {
619 if (!isReady(PendingQueue[i]))
620 continue;
621 AvailableQueue->push(PendingQueue[i]);
622 }
623 PendingQueue[i]->isPending = false;
624 PendingQueue[i] = PendingQueue.back();
625 PendingQueue.pop_back();
626 --i; --e;
627 }
628}
629
630/// Move the scheduler state forward by the specified number of Cycles.
631void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
632 if (NextCycle <= CurCycle)
633 return;
634
635 IssueCount = 0;
636 AvailableQueue->setCurCycle(NextCycle);
637 if (!HazardRec->isEnabled()) {
638 // Bypass lots of virtual calls in case of long latency.
639 CurCycle = NextCycle;
640 }
641 else {
642 for (; CurCycle != NextCycle; ++CurCycle) {
643 HazardRec->RecedeCycle();
644 }
645 }
646 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
647 // available Q to release pending nodes at least once before popping.
648 ReleasePending();
649}
650
651/// Move the scheduler state forward until the specified node's dependents are
652/// ready and can be scheduled with no resource conflicts.
653void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
655 return;
656
657 // FIXME: Nodes such as CopyFromReg probably should not advance the current
658 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
659 // has predecessors the cycle will be advanced when they are scheduled.
660 // But given the crude nature of modeling latency though such nodes, we
661 // currently need to treat these nodes like real instructions.
662 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
663
664 unsigned ReadyCycle = SU->getHeight();
665
666 // Bump CurCycle to account for latency. We assume the latency of other
667 // available instructions may be hidden by the stall (not a full pipe stall).
668 // This updates the hazard recognizer's cycle before reserving resources for
669 // this instruction.
670 AdvanceToCycle(ReadyCycle);
671
672 // Calls are scheduled in their preceding cycle, so don't conflict with
673 // hazards from instructions after the call. EmitNode will reset the
674 // scoreboard state before emitting the call.
675 if (SU->isCall)
676 return;
677
678 // FIXME: For resource conflicts in very long non-pipelined stages, we
679 // should probably skip ahead here to avoid useless scoreboard checks.
680 int Stalls = 0;
681 while (true) {
683 HazardRec->getHazardType(SU, -Stalls);
684
686 break;
687
688 ++Stalls;
689 }
690 AdvanceToCycle(CurCycle + Stalls);
691}
692
693/// Record this SUnit in the HazardRecognizer.
694/// Does not update CurCycle.
695void ScheduleDAGRRList::EmitNode(SUnit *SU) {
696 if (!HazardRec->isEnabled())
697 return;
698
699 // Check for phys reg copy.
700 if (!SU->getNode())
701 return;
702
703 switch (SU->getNode()->getOpcode()) {
704 default:
705 assert(SU->getNode()->isMachineOpcode() &&
706 "This target-independent node should not be scheduled.");
707 break;
709 case ISD::TokenFactor:
712 case ISD::CopyToReg:
713 case ISD::CopyFromReg:
714 case ISD::EH_LABEL:
715 // Noops don't affect the scoreboard state. Copies are likely to be
716 // removed.
717 return;
718 case ISD::INLINEASM:
720 // For inline asm, clear the pipeline state.
721 HazardRec->Reset();
722 return;
723 }
724 if (SU->isCall) {
725 // Calls are scheduled with their preceding instructions. For bottom-up
726 // scheduling, clear the pipeline state before emitting.
727 HazardRec->Reset();
728 }
729
730 HazardRec->EmitInstruction(SU);
731}
732
733static void resetVRegCycle(SUnit *SU);
734
735/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
736/// count of its predecessors. If a predecessor pending count is zero, add it to
737/// the Available queue.
738void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
739 LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
740 LLVM_DEBUG(dumpNode(*SU));
741
742#ifndef NDEBUG
743 if (CurCycle < SU->getHeight())
744 LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
745 << "] pipeline stall!\n");
746#endif
747
748 // FIXME: Do not modify node height. It may interfere with
749 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
750 // node its ready cycle can aid heuristics, and after scheduling it can
751 // indicate the scheduled cycle.
752 SU->setHeightToAtLeast(CurCycle);
753
754 // Reserve resources for the scheduled instruction.
755 EmitNode(SU);
756
757 Sequence.push_back(SU);
758
759 AvailableQueue->scheduledNode(SU);
760
761 // If HazardRec is disabled, and each inst counts as one cycle, then
762 // advance CurCycle before ReleasePredecessors to avoid useless pushes to
763 // PendingQueue for schedulers that implement HasReadyFilter.
764 if (!HazardRec->isEnabled() && AvgIPC < 2)
765 AdvanceToCycle(CurCycle + 1);
766
767 // Update liveness of predecessors before successors to avoid treating a
768 // two-address node as a live range def.
769 ReleasePredecessors(SU);
770
771 // Release all the implicit physical register defs that are live.
772 for (SDep &Succ : SU->Succs) {
773 // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
774 if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
775 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
776 --NumLiveRegs;
777 LiveRegDefs[Succ.getReg()] = nullptr;
778 LiveRegGens[Succ.getReg()] = nullptr;
779 releaseInterferences(Succ.getReg());
780 }
781 }
782 // Release the special call resource dependence, if this is the beginning
783 // of a call.
784 unsigned CallResource = TRI->getNumRegs();
785 if (LiveRegDefs[CallResource] == SU)
786 for (const SDNode *SUNode = SU->getNode(); SUNode;
787 SUNode = SUNode->getGluedNode()) {
788 if (SUNode->isMachineOpcode() &&
789 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
790 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
791 --NumLiveRegs;
792 LiveRegDefs[CallResource] = nullptr;
793 LiveRegGens[CallResource] = nullptr;
794 releaseInterferences(CallResource);
795 }
796 }
797
798 resetVRegCycle(SU);
799
800 SU->isScheduled = true;
801
802 // Conditions under which the scheduler should eagerly advance the cycle:
803 // (1) No available instructions
804 // (2) All pipelines full, so available instructions must have hazards.
805 //
806 // If HazardRec is disabled, the cycle was pre-advanced before calling
807 // ReleasePredecessors. In that case, IssueCount should remain 0.
808 //
809 // Check AvailableQueue after ReleasePredecessors in case of zero latency.
810 if (HazardRec->isEnabled() || AvgIPC > 1) {
811 if (SU->getNode() && SU->getNode()->isMachineOpcode())
812 ++IssueCount;
813 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
814 || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
815 AdvanceToCycle(CurCycle + 1);
816 }
817}
818
819/// CapturePred - This does the opposite of ReleasePred. Since SU is being
820/// unscheduled, increase the succ left count of its predecessors. Remove
821/// them from AvailableQueue if necessary.
822void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
823 SUnit *PredSU = PredEdge->getSUnit();
824 if (PredSU->isAvailable) {
825 PredSU->isAvailable = false;
826 if (!PredSU->isPending)
827 AvailableQueue->remove(PredSU);
828 }
829
830 assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
831 "NumSuccsLeft will overflow!");
832 ++PredSU->NumSuccsLeft;
833}
834
835/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
836/// its predecessor states to reflect the change.
837void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
838 LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
839 LLVM_DEBUG(dumpNode(*SU));
840
841 for (SDep &Pred : SU->Preds) {
842 CapturePred(&Pred);
843 if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
844 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
845 assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
846 "Physical register dependency violated?");
847 --NumLiveRegs;
848 LiveRegDefs[Pred.getReg()] = nullptr;
849 LiveRegGens[Pred.getReg()] = nullptr;
850 releaseInterferences(Pred.getReg());
851 }
852 }
853
854 // Reclaim the special call resource dependence, if this is the beginning
855 // of a call.
856 unsigned CallResource = TRI->getNumRegs();
857 for (const SDNode *SUNode = SU->getNode(); SUNode;
858 SUNode = SUNode->getGluedNode()) {
859 if (SUNode->isMachineOpcode() &&
860 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
861 SUnit *SeqEnd = CallSeqEndForStart[SU];
862 assert(SeqEnd && "Call sequence start/end must be known");
863 assert(!LiveRegDefs[CallResource]);
864 assert(!LiveRegGens[CallResource]);
865 ++NumLiveRegs;
866 LiveRegDefs[CallResource] = SU;
867 LiveRegGens[CallResource] = SeqEnd;
868 }
869 }
870
871 // Release the special call resource dependence, if this is the end
872 // of a call.
873 if (LiveRegGens[CallResource] == SU)
874 for (const SDNode *SUNode = SU->getNode(); SUNode;
875 SUNode = SUNode->getGluedNode()) {
876 if (SUNode->isMachineOpcode() &&
877 SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
878 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
879 assert(LiveRegDefs[CallResource]);
880 assert(LiveRegGens[CallResource]);
881 --NumLiveRegs;
882 LiveRegDefs[CallResource] = nullptr;
883 LiveRegGens[CallResource] = nullptr;
884 releaseInterferences(CallResource);
885 }
886 }
887
888 for (auto &Succ : SU->Succs) {
889 if (Succ.isAssignedRegDep()) {
890 auto Reg = Succ.getReg();
891 if (!LiveRegDefs[Reg])
892 ++NumLiveRegs;
893 // This becomes the nearest def. Note that an earlier def may still be
894 // pending if this is a two-address node.
895 LiveRegDefs[Reg] = SU;
896
897 // Update LiveRegGen only if was empty before this unscheduling.
898 // This is to avoid incorrect updating LiveRegGen set in previous run.
899 if (!LiveRegGens[Reg]) {
900 // Find the successor with the lowest height.
901 LiveRegGens[Reg] = Succ.getSUnit();
902 for (auto &Succ2 : SU->Succs) {
903 if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
904 Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
905 LiveRegGens[Reg] = Succ2.getSUnit();
906 }
907 }
908 }
909 }
910 if (SU->getHeight() < MinAvailableCycle)
911 MinAvailableCycle = SU->getHeight();
912
913 SU->setHeightDirty();
914 SU->isScheduled = false;
915 SU->isAvailable = true;
916 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
917 // Don't make available until backtracking is complete.
918 SU->isPending = true;
919 PendingQueue.push_back(SU);
920 }
921 else {
922 AvailableQueue->push(SU);
923 }
924 AvailableQueue->unscheduledNode(SU);
925}
926
927/// After backtracking, the hazard checker needs to be restored to a state
928/// corresponding the current cycle.
929void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
930 HazardRec->Reset();
931
932 unsigned LookAhead = std::min((unsigned)Sequence.size(),
933 HazardRec->getMaxLookAhead());
934 if (LookAhead == 0)
935 return;
936
937 std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
938 unsigned HazardCycle = (*I)->getHeight();
939 for (auto E = Sequence.end(); I != E; ++I) {
940 SUnit *SU = *I;
941 for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
942 HazardRec->RecedeCycle();
943 }
944 EmitNode(SU);
945 }
946}
947
948/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
949/// BTCycle in order to schedule a specific node.
950void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
951 SUnit *OldSU = Sequence.back();
952 while (true) {
953 Sequence.pop_back();
954 // FIXME: use ready cycle instead of height
955 CurCycle = OldSU->getHeight();
956 UnscheduleNodeBottomUp(OldSU);
957 AvailableQueue->setCurCycle(CurCycle);
958 if (OldSU == BtSU)
959 break;
960 OldSU = Sequence.back();
961 }
962
963 assert(!SU->isSucc(OldSU) && "Something is wrong!");
964
965 RestoreHazardCheckerBottomUp();
966
967 ReleasePending();
968
969 ++NumBacktracks;
970}
971
972static bool isOperandOf(const SUnit *SU, SDNode *N) {
973 for (const SDNode *SUNode = SU->getNode(); SUNode;
974 SUNode = SUNode->getGluedNode()) {
975 if (SUNode->isOperandOf(N))
976 return true;
977 }
978 return false;
979}
980
981/// TryUnfold - Attempt to unfold
982SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
983 SDNode *N = SU->getNode();
984 // Use while over if to ease fall through.
986 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
987 return nullptr;
988
989 assert(NewNodes.size() == 2 && "Expected a load folding node!");
990
991 N = NewNodes[1];
992 SDNode *LoadNode = NewNodes[0];
993 unsigned NumVals = N->getNumValues();
994 unsigned OldNumVals = SU->getNode()->getNumValues();
995
996 // LoadNode may already exist. This can happen when there is another
997 // load from the same location and producing the same type of value
998 // but it has different alignment or volatileness.
999 bool isNewLoad = true;
1000 SUnit *LoadSU;
1001 if (LoadNode->getNodeId() != -1) {
1002 LoadSU = &SUnits[LoadNode->getNodeId()];
1003 // If LoadSU has already been scheduled, we should clone it but
1004 // this would negate the benefit to unfolding so just return SU.
1005 if (LoadSU->isScheduled)
1006 return SU;
1007 isNewLoad = false;
1008 } else {
1009 LoadSU = CreateNewSUnit(LoadNode);
1010 LoadNode->setNodeId(LoadSU->NodeNum);
1011
1012 InitNumRegDefsLeft(LoadSU);
1013 computeLatency(LoadSU);
1014 }
1015
1016 bool isNewN = true;
1017 SUnit *NewSU;
1018 // This can only happen when isNewLoad is false.
1019 if (N->getNodeId() != -1) {
1020 NewSU = &SUnits[N->getNodeId()];
1021 // If NewSU has already been scheduled, we need to clone it, but this
1022 // negates the benefit to unfolding so just return SU.
1023 if (NewSU->isScheduled) {
1024 return SU;
1025 }
1026 isNewN = false;
1027 } else {
1028 NewSU = CreateNewSUnit(N);
1029 N->setNodeId(NewSU->NodeNum);
1030
1031 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1032 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1033 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1034 NewSU->isTwoAddress = true;
1035 break;
1036 }
1037 }
1038 if (MCID.isCommutable())
1039 NewSU->isCommutable = true;
1040
1041 InitNumRegDefsLeft(NewSU);
1042 computeLatency(NewSU);
1043 }
1044
1045 LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1046
1047 // Now that we are committed to unfolding replace DAG Uses.
1048 for (unsigned i = 0; i != NumVals; ++i)
1049 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1050 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1051 SDValue(LoadNode, 1));
1052
1053 // Record all the edges to and from the old SU, by category.
1054 SmallVector<SDep, 4> ChainPreds;
1055 SmallVector<SDep, 4> ChainSuccs;
1056 SmallVector<SDep, 4> LoadPreds;
1057 SmallVector<SDep, 4> NodePreds;
1058 SmallVector<SDep, 4> NodeSuccs;
1059 for (SDep &Pred : SU->Preds) {
1060 if (Pred.isCtrl())
1061 ChainPreds.push_back(Pred);
1062 else if (isOperandOf(Pred.getSUnit(), LoadNode))
1063 LoadPreds.push_back(Pred);
1064 else
1065 NodePreds.push_back(Pred);
1066 }
1067 for (SDep &Succ : SU->Succs) {
1068 if (Succ.isCtrl())
1069 ChainSuccs.push_back(Succ);
1070 else
1071 NodeSuccs.push_back(Succ);
1072 }
1073
1074 // Now assign edges to the newly-created nodes.
1075 for (const SDep &Pred : ChainPreds) {
1076 RemovePred(SU, Pred);
1077 if (isNewLoad)
1078 AddPredQueued(LoadSU, Pred);
1079 }
1080 for (const SDep &Pred : LoadPreds) {
1081 RemovePred(SU, Pred);
1082 if (isNewLoad)
1083 AddPredQueued(LoadSU, Pred);
1084 }
1085 for (const SDep &Pred : NodePreds) {
1086 RemovePred(SU, Pred);
1087 AddPredQueued(NewSU, Pred);
1088 }
1089 for (SDep &D : NodeSuccs) {
1090 SUnit *SuccDep = D.getSUnit();
1091 D.setSUnit(SU);
1092 RemovePred(SuccDep, D);
1093 D.setSUnit(NewSU);
1094 AddPredQueued(SuccDep, D);
1095 // Balance register pressure.
1096 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1097 !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1098 --NewSU->NumRegDefsLeft;
1099 }
1100 for (SDep &D : ChainSuccs) {
1101 SUnit *SuccDep = D.getSUnit();
1102 D.setSUnit(SU);
1103 RemovePred(SuccDep, D);
1104 if (isNewLoad) {
1105 D.setSUnit(LoadSU);
1106 AddPredQueued(SuccDep, D);
1107 }
1108 }
1109
1110 // Add a data dependency to reflect that NewSU reads the value defined
1111 // by LoadSU.
1112 SDep D(LoadSU, SDep::Data, 0);
1113 D.setLatency(LoadSU->Latency);
1114 AddPredQueued(NewSU, D);
1115
1116 if (isNewLoad)
1117 AvailableQueue->addNode(LoadSU);
1118 if (isNewN)
1119 AvailableQueue->addNode(NewSU);
1120
1121 ++NumUnfolds;
1122
1123 if (NewSU->NumSuccsLeft == 0)
1124 NewSU->isAvailable = true;
1125
1126 return NewSU;
1127}
1128
1129/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1130/// successors to the newly created node.
1131SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1132 SDNode *N = SU->getNode();
1133 if (!N)
1134 return nullptr;
1135
1136 LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1137 LLVM_DEBUG(dumpNode(*SU));
1138
1139 if (N->getGluedNode() &&
1140 !TII->canCopyGluedNodeDuringSchedule(N)) {
1141 LLVM_DEBUG(
1142 dbgs()
1143 << "Giving up because it has incoming glue and the target does not "
1144 "want to copy it\n");
1145 return nullptr;
1146 }
1147
1148 SUnit *NewSU;
1149 bool TryUnfold = false;
1150 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1151 MVT VT = N->getSimpleValueType(i);
1152 if (VT == MVT::Glue) {
1153 LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1154 return nullptr;
1155 } else if (VT == MVT::Other)
1156 TryUnfold = true;
1157 }
1158 for (const SDValue &Op : N->op_values()) {
1159 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1160 if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1161 LLVM_DEBUG(
1162 dbgs() << "Giving up because it one of the operands is glue and "
1163 "the target does not want to copy it\n");
1164 return nullptr;
1165 }
1166 }
1167
1168 // If possible unfold instruction.
1169 if (TryUnfold) {
1170 SUnit *UnfoldSU = TryUnfoldSU(SU);
1171 if (!UnfoldSU)
1172 return nullptr;
1173 SU = UnfoldSU;
1174 N = SU->getNode();
1175 // If this can be scheduled don't bother duplicating and just return
1176 if (SU->NumSuccsLeft == 0)
1177 return SU;
1178 }
1179
1180 LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1181 NewSU = CreateClone(SU);
1182
1183 // New SUnit has the exact same predecessors.
1184 for (SDep &Pred : SU->Preds)
1185 if (!Pred.isArtificial())
1186 AddPredQueued(NewSU, Pred);
1187
1188 // Make sure the clone comes after the original. (InstrEmitter assumes
1189 // this ordering.)
1190 AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
1191
1192 // Only copy scheduled successors. Cut them from old node's successor
1193 // list and move them over.
1195 for (SDep &Succ : SU->Succs) {
1196 if (Succ.isArtificial())
1197 continue;
1198 SUnit *SuccSU = Succ.getSUnit();
1199 if (SuccSU->isScheduled) {
1200 SDep D = Succ;
1201 D.setSUnit(NewSU);
1202 AddPredQueued(SuccSU, D);
1203 D.setSUnit(SU);
1204 DelDeps.emplace_back(SuccSU, D);
1205 }
1206 }
1207 for (const auto &[DelSU, DelD] : DelDeps)
1208 RemovePred(DelSU, DelD);
1209
1210 AvailableQueue->updateNode(SU);
1211 AvailableQueue->addNode(NewSU);
1212
1213 ++NumDups;
1214 return NewSU;
1215}
1216
1217/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1218/// scheduled successors of the given SUnit to the last copy.
1219void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1220 const TargetRegisterClass *DestRC,
1221 const TargetRegisterClass *SrcRC,
1223 SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1224 CopyFromSU->CopySrcRC = SrcRC;
1225 CopyFromSU->CopyDstRC = DestRC;
1226
1227 SUnit *CopyToSU = CreateNewSUnit(nullptr);
1228 CopyToSU->CopySrcRC = DestRC;
1229 CopyToSU->CopyDstRC = SrcRC;
1230
1231 // Only copy scheduled successors. Cut them from old node's successor
1232 // list and move them over.
1234 for (SDep &Succ : SU->Succs) {
1235 if (Succ.isArtificial())
1236 continue;
1237 SUnit *SuccSU = Succ.getSUnit();
1238 if (SuccSU->isScheduled) {
1239 SDep D = Succ;
1240 D.setSUnit(CopyToSU);
1241 AddPredQueued(SuccSU, D);
1242 DelDeps.emplace_back(SuccSU, Succ);
1243 }
1244 else {
1245 // Avoid scheduling the def-side copy before other successors. Otherwise,
1246 // we could introduce another physreg interference on the copy and
1247 // continue inserting copies indefinitely.
1248 AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1249 }
1250 }
1251 for (const auto &[DelSU, DelD] : DelDeps)
1252 RemovePred(DelSU, DelD);
1253
1254 SDep FromDep(SU, SDep::Data, Reg);
1255 FromDep.setLatency(SU->Latency);
1256 AddPredQueued(CopyFromSU, FromDep);
1257 SDep ToDep(CopyFromSU, SDep::Data, 0);
1258 ToDep.setLatency(CopyFromSU->Latency);
1259 AddPredQueued(CopyToSU, ToDep);
1260
1261 AvailableQueue->updateNode(SU);
1262 AvailableQueue->addNode(CopyFromSU);
1263 AvailableQueue->addNode(CopyToSU);
1264 Copies.push_back(CopyFromSU);
1265 Copies.push_back(CopyToSU);
1266
1267 ++NumPRCopies;
1268}
1269
1270/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1271/// definition of the specified node.
1272/// FIXME: Move to SelectionDAG?
1273static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1274 const TargetInstrInfo *TII) {
1275 unsigned NumRes;
1276 if (N->getOpcode() == ISD::CopyFromReg) {
1277 // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1278 NumRes = 1;
1279 } else {
1280 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1281 assert(!MCID.implicit_defs().empty() &&
1282 "Physical reg def must be in implicit def list!");
1283 NumRes = MCID.getNumDefs();
1284 for (MCPhysReg ImpDef : MCID.implicit_defs()) {
1285 if (Reg == ImpDef)
1286 break;
1287 ++NumRes;
1288 }
1289 }
1290 return N->getSimpleValueType(NumRes);
1291}
1292
1293/// CheckForLiveRegDef - Return true and update live register vector if the
1294/// specified register def of the specified SUnit clobbers any "live" registers.
1295static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
1296 SmallSet<unsigned, 4> &RegAdded,
1298 const TargetRegisterInfo *TRI,
1299 const SDNode *Node = nullptr) {
1300 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1301
1302 // Check if Ref is live.
1303 if (!LiveRegDefs[*AliasI]) continue;
1304
1305 // Allow multiple uses of the same def.
1306 if (LiveRegDefs[*AliasI] == SU) continue;
1307
1308 // Allow multiple uses of same def
1309 if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
1310 continue;
1311
1312 // Add Reg to the set of interfering live regs.
1313 if (RegAdded.insert(*AliasI).second) {
1314 LRegs.push_back(*AliasI);
1315 }
1316 }
1317}
1318
1319/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1320/// by RegMask, and add them to LRegs.
1321static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1322 ArrayRef<SUnit*> LiveRegDefs,
1323 SmallSet<unsigned, 4> &RegAdded,
1325 // Look at all live registers. Skip Reg0 and the special CallResource.
1326 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1327 if (!LiveRegDefs[i]) continue;
1328 if (LiveRegDefs[i] == SU) continue;
1329 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1330 if (RegAdded.insert(i).second)
1331 LRegs.push_back(i);
1332 }
1333}
1334
1335/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1336static const uint32_t *getNodeRegMask(const SDNode *N) {
1337 for (const SDValue &Op : N->op_values())
1338 if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1339 return RegOp->getRegMask();
1340 return nullptr;
1341}
1342
1343/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1344/// scheduling of the given node to satisfy live physical register dependencies.
1345/// If the specific node is the last one that's available to schedule, do
1346/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1347bool ScheduleDAGRRList::
1348DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1349 if (NumLiveRegs == 0)
1350 return false;
1351
1352 SmallSet<unsigned, 4> RegAdded;
1353 // If this node would clobber any "live" register, then it's not ready.
1354 //
1355 // If SU is the currently live definition of the same register that it uses,
1356 // then we are free to schedule it.
1357 for (SDep &Pred : SU->Preds) {
1358 if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1359 CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1360 RegAdded, LRegs, TRI);
1361 }
1362
1363 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1364 if (Node->getOpcode() == ISD::INLINEASM ||
1365 Node->getOpcode() == ISD::INLINEASM_BR) {
1366 // Inline asm can clobber physical defs.
1367 unsigned NumOps = Node->getNumOperands();
1368 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1369 --NumOps; // Ignore the glue operand.
1370
1371 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1372 unsigned Flags =
1373 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1374 const InlineAsm::Flag F(Flags);
1375 unsigned NumVals = F.getNumOperandRegisters();
1376
1377 ++i; // Skip the ID value.
1378 if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
1379 F.isClobberKind()) {
1380 // Check for def of register or earlyclobber register.
1381 for (; NumVals; --NumVals, ++i) {
1382 Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1383 if (Reg.isPhysical())
1384 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1385 }
1386 } else
1387 i += NumVals;
1388 }
1389 continue;
1390 }
1391
1392 if (Node->getOpcode() == ISD::CopyToReg) {
1393 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
1394 if (Reg.isPhysical()) {
1395 SDNode *SrcNode = Node->getOperand(2).getNode();
1396 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
1397 SrcNode);
1398 }
1399 }
1400
1401 if (!Node->isMachineOpcode())
1402 continue;
1403 // If we're in the middle of scheduling a call, don't begin scheduling
1404 // another call. Also, don't allow any physical registers to be live across
1405 // the call.
1406 if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1407 // Check the special calling-sequence resource.
1408 unsigned CallResource = TRI->getNumRegs();
1409 if (LiveRegDefs[CallResource]) {
1410 SDNode *Gen = LiveRegGens[CallResource]->getNode();
1411 while (SDNode *Glued = Gen->getGluedNode())
1412 Gen = Glued;
1413 if (!IsChainDependent(Gen, Node, 0, TII) &&
1414 RegAdded.insert(CallResource).second)
1415 LRegs.push_back(CallResource);
1416 }
1417 }
1418 if (const uint32_t *RegMask = getNodeRegMask(Node))
1419 CheckForLiveRegDefMasked(SU, RegMask,
1420 ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1421 RegAdded, LRegs);
1422
1423 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1424 if (MCID.hasOptionalDef()) {
1425 // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1426 // This operand can be either a def of CPSR, if the S bit is set; or a use
1427 // of %noreg. When the OptionalDef is set to a valid register, we need to
1428 // handle it in the same way as an ImplicitDef.
1429 for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1430 if (MCID.operands()[i].isOptionalDef()) {
1431 const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1432 Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1433 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1434 }
1435 }
1436 for (MCPhysReg Reg : MCID.implicit_defs())
1437 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1438 }
1439
1440 return !LRegs.empty();
1441}
1442
1443void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1444 // Add the nodes that aren't ready back onto the available list.
1445 for (unsigned i = Interferences.size(); i > 0; --i) {
1446 SUnit *SU = Interferences[i-1];
1447 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1448 if (Reg) {
1449 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1450 if (!is_contained(LRegs, Reg))
1451 continue;
1452 }
1453 SU->isPending = false;
1454 // The interfering node may no longer be available due to backtracking.
1455 // Furthermore, it may have been made available again, in which case it is
1456 // now already in the AvailableQueue.
1457 if (SU->isAvailable && !SU->NodeQueueId) {
1458 LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1459 AvailableQueue->push(SU);
1460 }
1461 if (i < Interferences.size())
1462 Interferences[i-1] = Interferences.back();
1463 Interferences.pop_back();
1464 LRegsMap.erase(LRegsPos);
1465 }
1466}
1467
1468/// Return a node that can be scheduled in this cycle. Requirements:
1469/// (1) Ready: latency has been satisfied
1470/// (2) No Hazards: resources are available
1471/// (3) No Interferences: may unschedule to break register interferences.
1472SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1473 SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1474 auto FindAvailableNode = [&]() {
1475 while (CurSU) {
1477 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1478 break;
1479 LLVM_DEBUG(dbgs() << " Interfering reg ";
1480 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1481 else dbgs() << printReg(LRegs[0], TRI);
1482 dbgs() << " SU #" << CurSU->NodeNum << '\n');
1483 auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs);
1484 if (LRegsInserted) {
1485 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1486 Interferences.push_back(CurSU);
1487 }
1488 else {
1489 assert(CurSU->isPending && "Interferences are pending");
1490 // Update the interference with current live regs.
1491 LRegsIter->second = LRegs;
1492 }
1493 CurSU = AvailableQueue->pop();
1494 }
1495 };
1496 FindAvailableNode();
1497 if (CurSU)
1498 return CurSU;
1499
1500 // We query the topological order in the loop body, so make sure outstanding
1501 // updates are applied before entering it (we only enter the loop if there
1502 // are some interferences). If we make changes to the ordering, we exit
1503 // the loop.
1504
1505 // All candidates are delayed due to live physical reg dependencies.
1506 // Try backtracking, code duplication, or inserting cross class copies
1507 // to resolve it.
1508 for (SUnit *TrySU : Interferences) {
1509 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1510
1511 // Try unscheduling up to the point where it's safe to schedule
1512 // this node.
1513 SUnit *BtSU = nullptr;
1514 unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1515 for (unsigned Reg : LRegs) {
1516 if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1517 BtSU = LiveRegGens[Reg];
1518 LiveCycle = BtSU->getHeight();
1519 }
1520 }
1521 if (!WillCreateCycle(TrySU, BtSU)) {
1522 // BacktrackBottomUp mutates Interferences!
1523 BacktrackBottomUp(TrySU, BtSU);
1524
1525 // Force the current node to be scheduled before the node that
1526 // requires the physical reg dep.
1527 if (BtSU->isAvailable) {
1528 BtSU->isAvailable = false;
1529 if (!BtSU->isPending)
1530 AvailableQueue->remove(BtSU);
1531 }
1532 LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1533 << ") to SU(" << TrySU->NodeNum << ")\n");
1534 AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
1535
1536 // If one or more successors has been unscheduled, then the current
1537 // node is no longer available.
1538 if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1539 LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1540 CurSU = AvailableQueue->pop();
1541 } else {
1542 LLVM_DEBUG(dbgs() << "TrySU available\n");
1543 // Available and in AvailableQueue
1544 AvailableQueue->remove(TrySU);
1545 CurSU = TrySU;
1546 }
1547 FindAvailableNode();
1548 // Interferences has been mutated. We must break.
1549 break;
1550 }
1551 }
1552
1553 if (!CurSU) {
1554 // Can't backtrack. If it's too expensive to copy the value, then try
1555 // duplicate the nodes that produces these "too expensive to copy"
1556 // values to break the dependency. In case even that doesn't work,
1557 // insert cross class copies.
1558 // If it's not too expensive, i.e. cost != -1, issue copies.
1559 SUnit *TrySU = Interferences[0];
1560 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1561 assert(LRegs.size() == 1 && "Can't handle this yet!");
1562 unsigned Reg = LRegs[0];
1563 SUnit *LRDef = LiveRegDefs[Reg];
1564 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1565 const TargetRegisterClass *RC =
1566 TRI->getMinimalPhysRegClass(Reg, VT);
1567 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1568
1569 // If cross copy register class is the same as RC, then it must be possible
1570 // copy the value directly. Do not try duplicate the def.
1571 // If cross copy register class is not the same as RC, then it's possible to
1572 // copy the value but it require cross register class copies and it is
1573 // expensive.
1574 // If cross copy register class is null, then it's not possible to copy
1575 // the value at all.
1576 SUnit *NewDef = nullptr;
1577 if (DestRC != RC) {
1578 NewDef = CopyAndMoveSuccessors(LRDef);
1579 if (!DestRC && !NewDef)
1580 report_fatal_error("Can't handle live physical register dependency!");
1581 }
1582 if (!NewDef) {
1583 // Issue copies, these can be expensive cross register class copies.
1585 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1586 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1587 << " to SU #" << Copies.front()->NodeNum << "\n");
1588 AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
1589 NewDef = Copies.back();
1590 }
1591
1592 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1593 << " to SU #" << TrySU->NodeNum << "\n");
1594 LiveRegDefs[Reg] = NewDef;
1595 AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
1596 TrySU->isAvailable = false;
1597 CurSU = NewDef;
1598 }
1599 assert(CurSU && "Unable to resolve live physical register dependencies!");
1600 return CurSU;
1601}
1602
1603/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1604/// schedulers.
1605void ScheduleDAGRRList::ListScheduleBottomUp() {
1606 // Release any predecessors of the special Exit node.
1607 ReleasePredecessors(&ExitSU);
1608
1609 // Add root to Available queue.
1610 if (!SUnits.empty()) {
1611 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1612 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1613 RootSU->isAvailable = true;
1614 AvailableQueue->push(RootSU);
1615 }
1616
1617 // While Available queue is not empty, grab the node with the highest
1618 // priority. If it is not ready put it back. Schedule the node.
1619 Sequence.reserve(SUnits.size());
1620 while (!AvailableQueue->empty() || !Interferences.empty()) {
1621 LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1622 AvailableQueue->dump(this));
1623
1624 // Pick the best node to schedule taking all constraints into
1625 // consideration.
1626 SUnit *SU = PickNodeToScheduleBottomUp();
1627
1628 AdvancePastStalls(SU);
1629
1630 ScheduleNodeBottomUp(SU);
1631
1632 while (AvailableQueue->empty() && !PendingQueue.empty()) {
1633 // Advance the cycle to free resources. Skip ahead to the next ready SU.
1634 assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1635 "MinAvailableCycle uninitialized");
1636 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1637 }
1638 }
1639
1640 // Reverse the order if it is bottom up.
1641 std::reverse(Sequence.begin(), Sequence.end());
1642
1643#ifndef NDEBUG
1644 VerifyScheduledSequence(/*isBottomUp=*/true);
1645#endif
1646}
1647
1648namespace {
1649
1650class RegReductionPQBase;
1651
1652struct queue_sort {
1653 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1654};
1655
1656#ifndef NDEBUG
1657template<class SF>
1658struct reverse_sort : public queue_sort {
1659 SF &SortFunc;
1660
1661 reverse_sort(SF &sf) : SortFunc(sf) {}
1662
1663 bool operator()(SUnit* left, SUnit* right) const {
1664 // reverse left/right rather than simply !SortFunc(left, right)
1665 // to expose different paths in the comparison logic.
1666 return SortFunc(right, left);
1667 }
1668};
1669#endif // NDEBUG
1670
1671/// bu_ls_rr_sort - Priority function for bottom up register pressure
1672// reduction scheduler.
1673struct bu_ls_rr_sort : public queue_sort {
1674 enum {
1675 IsBottomUp = true,
1676 HasReadyFilter = false
1677 };
1678
1679 RegReductionPQBase *SPQ;
1680
1681 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1682
1683 bool operator()(SUnit* left, SUnit* right) const;
1684};
1685
1686// src_ls_rr_sort - Priority function for source order scheduler.
1687struct src_ls_rr_sort : public queue_sort {
1688 enum {
1689 IsBottomUp = true,
1690 HasReadyFilter = false
1691 };
1692
1693 RegReductionPQBase *SPQ;
1694
1695 src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1696
1697 bool operator()(SUnit* left, SUnit* right) const;
1698};
1699
1700// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1701struct hybrid_ls_rr_sort : public queue_sort {
1702 enum {
1703 IsBottomUp = true,
1704 HasReadyFilter = false
1705 };
1706
1707 RegReductionPQBase *SPQ;
1708
1709 hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1710
1711 bool isReady(SUnit *SU, unsigned CurCycle) const;
1712
1713 bool operator()(SUnit* left, SUnit* right) const;
1714};
1715
1716// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1717// scheduler.
1718struct ilp_ls_rr_sort : public queue_sort {
1719 enum {
1720 IsBottomUp = true,
1721 HasReadyFilter = false
1722 };
1723
1724 RegReductionPQBase *SPQ;
1725
1726 ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1727
1728 bool isReady(SUnit *SU, unsigned CurCycle) const;
1729
1730 bool operator()(SUnit* left, SUnit* right) const;
1731};
1732
1733class RegReductionPQBase : public SchedulingPriorityQueue {
1734protected:
1735 std::vector<SUnit *> Queue;
1736 unsigned CurQueueId = 0;
1737 bool TracksRegPressure;
1738 bool SrcOrder;
1739
1740 // SUnits - The SUnits for the current graph.
1741 std::vector<SUnit> *SUnits = nullptr;
1742
1743 MachineFunction &MF;
1744 const TargetInstrInfo *TII = nullptr;
1745 const TargetRegisterInfo *TRI = nullptr;
1746 const TargetLowering *TLI = nullptr;
1747 ScheduleDAGRRList *scheduleDAG = nullptr;
1748
1749 // SethiUllmanNumbers - The SethiUllman number for each node.
1750 std::vector<unsigned> SethiUllmanNumbers;
1751
1752 /// RegPressure - Tracking current reg pressure per register class.
1753 std::vector<unsigned> RegPressure;
1754
1755 /// RegLimit - Tracking the number of allocatable registers per register
1756 /// class.
1757 std::vector<unsigned> RegLimit;
1758
1759public:
1760 RegReductionPQBase(MachineFunction &mf,
1761 bool hasReadyFilter,
1762 bool tracksrp,
1763 bool srcorder,
1764 const TargetInstrInfo *tii,
1765 const TargetRegisterInfo *tri,
1766 const TargetLowering *tli)
1767 : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1768 SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1769 if (TracksRegPressure) {
1770 unsigned NumRC = TRI->getNumRegClasses();
1771 RegLimit.resize(NumRC);
1772 RegPressure.resize(NumRC);
1773 std::fill(RegLimit.begin(), RegLimit.end(), 0);
1774 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1775 for (const TargetRegisterClass *RC : TRI->regclasses())
1776 RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1777 }
1778 }
1779
1780 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1781 scheduleDAG = scheduleDag;
1782 }
1783
1784 ScheduleHazardRecognizer* getHazardRec() {
1785 return scheduleDAG->getHazardRec();
1786 }
1787
1788 void initNodes(std::vector<SUnit> &sunits) override;
1789
1790 void addNode(const SUnit *SU) override;
1791
1792 void updateNode(const SUnit *SU) override;
1793
1794 void releaseState() override {
1795 SUnits = nullptr;
1796 SethiUllmanNumbers.clear();
1797 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1798 }
1799
1800 unsigned getNodePriority(const SUnit *SU) const;
1801
1802 unsigned getNodeOrdering(const SUnit *SU) const {
1803 if (!SU->getNode()) return 0;
1804
1805 return SU->getNode()->getIROrder();
1806 }
1807
1808 bool empty() const override { return Queue.empty(); }
1809
1810 void push(SUnit *U) override {
1811 assert(!U->NodeQueueId && "Node in the queue already");
1812 U->NodeQueueId = ++CurQueueId;
1813 Queue.push_back(U);
1814 }
1815
1816 void remove(SUnit *SU) override {
1817 assert(!Queue.empty() && "Queue is empty!");
1818 assert(SU->NodeQueueId != 0 && "Not in queue!");
1819 std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1820 if (I != std::prev(Queue.end()))
1821 std::swap(*I, Queue.back());
1822 Queue.pop_back();
1823 SU->NodeQueueId = 0;
1824 }
1825
1826 bool tracksRegPressure() const override { return TracksRegPressure; }
1827
1828 void dumpRegPressure() const;
1829
1830 bool HighRegPressure(const SUnit *SU) const;
1831
1832 bool MayReduceRegPressure(SUnit *SU) const;
1833
1834 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1835
1836 void scheduledNode(SUnit *SU) override;
1837
1838 void unscheduledNode(SUnit *SU) override;
1839
1840protected:
1841 bool canClobber(const SUnit *SU, const SUnit *Op);
1842 void AddPseudoTwoAddrDeps();
1843 void PrescheduleNodesWithMultipleUses();
1844 void CalculateSethiUllmanNumbers();
1845};
1846
1847template<class SF>
1848static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1849 unsigned BestIdx = 0;
1850 // Only compute the cost for the first 1000 items in the queue, to avoid
1851 // excessive compile-times for very large queues.
1852 for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
1853 I++)
1854 if (Picker(Q[BestIdx], Q[I]))
1855 BestIdx = I;
1856 SUnit *V = Q[BestIdx];
1857 if (BestIdx + 1 != Q.size())
1858 std::swap(Q[BestIdx], Q.back());
1859 Q.pop_back();
1860 return V;
1861}
1862
1863template<class SF>
1864SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1865#ifndef NDEBUG
1866 if (DAG->StressSched) {
1867 reverse_sort<SF> RPicker(Picker);
1868 return popFromQueueImpl(Q, RPicker);
1869 }
1870#endif
1871 (void)DAG;
1872 return popFromQueueImpl(Q, Picker);
1873}
1874
1875//===----------------------------------------------------------------------===//
1876// RegReductionPriorityQueue Definition
1877//===----------------------------------------------------------------------===//
1878//
1879// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1880// to reduce register pressure.
1881//
1882template<class SF>
1883class RegReductionPriorityQueue : public RegReductionPQBase {
1884 SF Picker;
1885
1886public:
1887 RegReductionPriorityQueue(MachineFunction &mf,
1888 bool tracksrp,
1889 bool srcorder,
1890 const TargetInstrInfo *tii,
1891 const TargetRegisterInfo *tri,
1892 const TargetLowering *tli)
1893 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1894 tii, tri, tli),
1895 Picker(this) {}
1896
1897 bool isBottomUp() const override { return SF::IsBottomUp; }
1898
1899 bool isReady(SUnit *U) const override {
1900 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1901 }
1902
1903 SUnit *pop() override {
1904 if (Queue.empty()) return nullptr;
1905
1906 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1907 V->NodeQueueId = 0;
1908 return V;
1909 }
1910
1911#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1912 LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1913 // Emulate pop() without clobbering NodeQueueIds.
1914 std::vector<SUnit *> DumpQueue = Queue;
1915 SF DumpPicker = Picker;
1916 while (!DumpQueue.empty()) {
1917 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1918 dbgs() << "Height " << SU->getHeight() << ": ";
1919 DAG->dumpNode(*SU);
1920 }
1921 }
1922#endif
1923};
1924
1925using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1926using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1927using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1928using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1929
1930} // end anonymous namespace
1931
1932//===----------------------------------------------------------------------===//
1933// Static Node Priority for Register Pressure Reduction
1934//===----------------------------------------------------------------------===//
1935
1936// Check for special nodes that bypass scheduling heuristics.
1937// Currently this pushes TokenFactor nodes down, but may be used for other
1938// pseudo-ops as well.
1939//
1940// Return -1 to schedule right above left, 1 for left above right.
1941// Return 0 if no bias exists.
1942static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1943 bool LSchedLow = left->isScheduleLow;
1944 bool RSchedLow = right->isScheduleLow;
1945 if (LSchedLow != RSchedLow)
1946 return LSchedLow < RSchedLow ? 1 : -1;
1947 return 0;
1948}
1949
1950/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1951/// Smaller number is the higher priority.
1952static unsigned
1953CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1954 if (SUNumbers[SU->NodeNum] != 0)
1955 return SUNumbers[SU->NodeNum];
1956
1957 // Use WorkList to avoid stack overflow on excessively large IRs.
1958 struct WorkState {
1959 WorkState(const SUnit *SU) : SU(SU) {}
1960 const SUnit *SU;
1961 unsigned PredsProcessed = 0;
1962 };
1963
1965 WorkList.push_back(SU);
1966 while (!WorkList.empty()) {
1967 auto &Temp = WorkList.back();
1968 auto *TempSU = Temp.SU;
1969 bool AllPredsKnown = true;
1970 // Try to find a non-evaluated pred and push it into the processing stack.
1971 for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1972 auto &Pred = TempSU->Preds[P];
1973 if (Pred.isCtrl()) continue; // ignore chain preds
1974 SUnit *PredSU = Pred.getSUnit();
1975 if (SUNumbers[PredSU->NodeNum] == 0) {
1976#ifndef NDEBUG
1977 // In debug mode, check that we don't have such element in the stack.
1978 for (auto It : WorkList)
1979 assert(It.SU != PredSU && "Trying to push an element twice?");
1980#endif
1981 // Next time start processing this one starting from the next pred.
1982 Temp.PredsProcessed = P + 1;
1983 WorkList.push_back(PredSU);
1984 AllPredsKnown = false;
1985 break;
1986 }
1987 }
1988
1989 if (!AllPredsKnown)
1990 continue;
1991
1992 // Once all preds are known, we can calculate the answer for this one.
1993 unsigned SethiUllmanNumber = 0;
1994 unsigned Extra = 0;
1995 for (const SDep &Pred : TempSU->Preds) {
1996 if (Pred.isCtrl()) continue; // ignore chain preds
1997 SUnit *PredSU = Pred.getSUnit();
1998 unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1999 assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
2000 if (PredSethiUllman > SethiUllmanNumber) {
2001 SethiUllmanNumber = PredSethiUllman;
2002 Extra = 0;
2003 } else if (PredSethiUllman == SethiUllmanNumber)
2004 ++Extra;
2005 }
2006
2007 SethiUllmanNumber += Extra;
2008 if (SethiUllmanNumber == 0)
2009 SethiUllmanNumber = 1;
2010 SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
2011 WorkList.pop_back();
2012 }
2013
2014 assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
2015 return SUNumbers[SU->NodeNum];
2016}
2017
2018/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2019/// scheduling units.
2020void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2021 SethiUllmanNumbers.assign(SUnits->size(), 0);
2022
2023 for (const SUnit &SU : *SUnits)
2024 CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
2025}
2026
2027void RegReductionPQBase::addNode(const SUnit *SU) {
2028 unsigned SUSize = SethiUllmanNumbers.size();
2029 if (SUnits->size() > SUSize)
2030 SethiUllmanNumbers.resize(SUSize*2, 0);
2031 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2032}
2033
2034void RegReductionPQBase::updateNode(const SUnit *SU) {
2035 SethiUllmanNumbers[SU->NodeNum] = 0;
2036 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2037}
2038
2039// Lower priority means schedule further down. For bottom-up scheduling, lower
2040// priority SUs are scheduled before higher priority SUs.
2041unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2042 assert(SU->NodeNum < SethiUllmanNumbers.size());
2043 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2044 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2045 // CopyToReg should be close to its uses to facilitate coalescing and
2046 // avoid spilling.
2047 return 0;
2048 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2049 Opc == TargetOpcode::SUBREG_TO_REG ||
2050 Opc == TargetOpcode::INSERT_SUBREG)
2051 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2052 // close to their uses to facilitate coalescing.
2053 return 0;
2054 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2055 // If SU does not have a register use, i.e. it doesn't produce a value
2056 // that would be consumed (e.g. store), then it terminates a chain of
2057 // computation. Give it a large SethiUllman number so it will be
2058 // scheduled right before its predecessors that it doesn't lengthen
2059 // their live ranges.
2060 return 0xffff;
2061 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2062 // If SU does not have a register def, schedule it close to its uses
2063 // because it does not lengthen any live ranges.
2064 return 0;
2065#if 1
2066 return SethiUllmanNumbers[SU->NodeNum];
2067#else
2068 unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2069 if (SU->isCallOp) {
2070 // FIXME: This assumes all of the defs are used as call operands.
2071 int NP = (int)Priority - SU->getNode()->getNumValues();
2072 return (NP > 0) ? NP : 0;
2073 }
2074 return Priority;
2075#endif
2076}
2077
2078//===----------------------------------------------------------------------===//
2079// Register Pressure Tracking
2080//===----------------------------------------------------------------------===//
2081
2082#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2083LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2084 for (const TargetRegisterClass *RC : TRI->regclasses()) {
2085 unsigned Id = RC->getID();
2086 unsigned RP = RegPressure[Id];
2087 if (!RP) continue;
2088 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2089 << RegLimit[Id] << '\n');
2090 }
2091}
2092#endif
2093
2094bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2095 if (!TLI)
2096 return false;
2097
2098 for (const SDep &Pred : SU->Preds) {
2099 if (Pred.isCtrl())
2100 continue;
2101 SUnit *PredSU = Pred.getSUnit();
2102 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2103 // to cover the number of registers defined (they are all live).
2104 if (PredSU->NumRegDefsLeft == 0) {
2105 continue;
2106 }
2107 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2108 RegDefPos.IsValid(); RegDefPos.Advance()) {
2109 unsigned RCId, Cost;
2110 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2111
2112 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2113 return true;
2114 }
2115 }
2116 return false;
2117}
2118
2119bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2120 const SDNode *N = SU->getNode();
2121
2122 if (!N->isMachineOpcode() || !SU->NumSuccs)
2123 return false;
2124
2125 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2126 for (unsigned i = 0; i != NumDefs; ++i) {
2127 MVT VT = N->getSimpleValueType(i);
2128 if (!N->hasAnyUseOfValue(i))
2129 continue;
2130 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2131 if (RegPressure[RCId] >= RegLimit[RCId])
2132 return true;
2133 }
2134 return false;
2135}
2136
2137// Compute the register pressure contribution by this instruction by count up
2138// for uses that are not live and down for defs. Only count register classes
2139// that are already under high pressure. As a side effect, compute the number of
2140// uses of registers that are already live.
2141//
2142// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2143// so could probably be factored.
2144int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2145 LiveUses = 0;
2146 int PDiff = 0;
2147 for (const SDep &Pred : SU->Preds) {
2148 if (Pred.isCtrl())
2149 continue;
2150 SUnit *PredSU = Pred.getSUnit();
2151 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2152 // to cover the number of registers defined (they are all live).
2153 if (PredSU->NumRegDefsLeft == 0) {
2154 if (PredSU->getNode()->isMachineOpcode())
2155 ++LiveUses;
2156 continue;
2157 }
2158 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2159 RegDefPos.IsValid(); RegDefPos.Advance()) {
2160 MVT VT = RegDefPos.GetValue();
2161 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2162 if (RegPressure[RCId] >= RegLimit[RCId])
2163 ++PDiff;
2164 }
2165 }
2166 const SDNode *N = SU->getNode();
2167
2168 if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2169 return PDiff;
2170
2171 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2172 for (unsigned i = 0; i != NumDefs; ++i) {
2173 MVT VT = N->getSimpleValueType(i);
2174 if (!N->hasAnyUseOfValue(i))
2175 continue;
2176 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2177 if (RegPressure[RCId] >= RegLimit[RCId])
2178 --PDiff;
2179 }
2180 return PDiff;
2181}
2182
2183void RegReductionPQBase::scheduledNode(SUnit *SU) {
2184 if (!TracksRegPressure)
2185 return;
2186
2187 if (!SU->getNode())
2188 return;
2189
2190 for (const SDep &Pred : SU->Preds) {
2191 if (Pred.isCtrl())
2192 continue;
2193 SUnit *PredSU = Pred.getSUnit();
2194 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2195 // to cover the number of registers defined (they are all live).
2196 if (PredSU->NumRegDefsLeft == 0) {
2197 continue;
2198 }
2199 // FIXME: The ScheduleDAG currently loses information about which of a
2200 // node's values is consumed by each dependence. Consequently, if the node
2201 // defines multiple register classes, we don't know which to pressurize
2202 // here. Instead the following loop consumes the register defs in an
2203 // arbitrary order. At least it handles the common case of clustered loads
2204 // to the same class. For precise liveness, each SDep needs to indicate the
2205 // result number. But that tightly couples the ScheduleDAG with the
2206 // SelectionDAG making updates tricky. A simpler hack would be to attach a
2207 // value type or register class to SDep.
2208 //
2209 // The most important aspect of register tracking is balancing the increase
2210 // here with the reduction further below. Note that this SU may use multiple
2211 // defs in PredSU. The can't be determined here, but we've already
2212 // compensated by reducing NumRegDefsLeft in PredSU during
2213 // ScheduleDAGSDNodes::AddSchedEdges.
2214 --PredSU->NumRegDefsLeft;
2215 unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2216 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2217 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2218 if (SkipRegDefs)
2219 continue;
2220
2221 unsigned RCId, Cost;
2222 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2223 RegPressure[RCId] += Cost;
2224 break;
2225 }
2226 }
2227
2228 // We should have this assert, but there may be dead SDNodes that never
2229 // materialize as SUnits, so they don't appear to generate liveness.
2230 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2231 int SkipRegDefs = (int)SU->NumRegDefsLeft;
2232 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2233 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2234 if (SkipRegDefs > 0)
2235 continue;
2236 unsigned RCId, Cost;
2237 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2238 if (RegPressure[RCId] < Cost) {
2239 // Register pressure tracking is imprecise. This can happen. But we try
2240 // hard not to let it happen because it likely results in poor scheduling.
2241 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2242 << ") has too many regdefs\n");
2243 RegPressure[RCId] = 0;
2244 }
2245 else {
2246 RegPressure[RCId] -= Cost;
2247 }
2248 }
2249 LLVM_DEBUG(dumpRegPressure());
2250}
2251
2252void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2253 if (!TracksRegPressure)
2254 return;
2255
2256 const SDNode *N = SU->getNode();
2257 if (!N) return;
2258
2259 if (!N->isMachineOpcode()) {
2260 if (N->getOpcode() != ISD::CopyToReg)
2261 return;
2262 } else {
2263 unsigned Opc = N->getMachineOpcode();
2264 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2265 Opc == TargetOpcode::INSERT_SUBREG ||
2266 Opc == TargetOpcode::SUBREG_TO_REG ||
2267 Opc == TargetOpcode::REG_SEQUENCE ||
2268 Opc == TargetOpcode::IMPLICIT_DEF)
2269 return;
2270 }
2271
2272 for (const SDep &Pred : SU->Preds) {
2273 if (Pred.isCtrl())
2274 continue;
2275 SUnit *PredSU = Pred.getSUnit();
2276 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2277 // counts data deps.
2278 if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2279 continue;
2280 const SDNode *PN = PredSU->getNode();
2281 if (!PN->isMachineOpcode()) {
2282 if (PN->getOpcode() == ISD::CopyFromReg) {
2283 MVT VT = PN->getSimpleValueType(0);
2284 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2285 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2286 }
2287 continue;
2288 }
2289 unsigned POpc = PN->getMachineOpcode();
2290 if (POpc == TargetOpcode::IMPLICIT_DEF)
2291 continue;
2292 if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2293 POpc == TargetOpcode::INSERT_SUBREG ||
2294 POpc == TargetOpcode::SUBREG_TO_REG) {
2295 MVT VT = PN->getSimpleValueType(0);
2296 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2297 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2298 continue;
2299 }
2300 if (POpc == TargetOpcode::REG_SEQUENCE) {
2301 unsigned DstRCIdx =
2302 cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue();
2303 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
2304 unsigned RCId = RC->getID();
2305 // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
2306 // here. Instead use the same constant as in GetCostForDef.
2308 continue;
2309 }
2310 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2311 for (unsigned i = 0; i != NumDefs; ++i) {
2312 MVT VT = PN->getSimpleValueType(i);
2313 if (!PN->hasAnyUseOfValue(i))
2314 continue;
2315 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2316 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2317 // Register pressure tracking is imprecise. This can happen.
2318 RegPressure[RCId] = 0;
2319 else
2320 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2321 }
2322 }
2323
2324 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2325 // may transfer data dependencies to CopyToReg.
2326 if (SU->NumSuccs && N->isMachineOpcode()) {
2327 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2328 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2329 MVT VT = N->getSimpleValueType(i);
2330 if (VT == MVT::Glue || VT == MVT::Other)
2331 continue;
2332 if (!N->hasAnyUseOfValue(i))
2333 continue;
2334 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2335 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2336 }
2337 }
2338
2339 LLVM_DEBUG(dumpRegPressure());
2340}
2341
2342//===----------------------------------------------------------------------===//
2343// Dynamic Node Priority for Register Pressure Reduction
2344//===----------------------------------------------------------------------===//
2345
2346/// closestSucc - Returns the scheduled cycle of the successor which is
2347/// closest to the current cycle.
2348static unsigned closestSucc(const SUnit *SU) {
2349 unsigned MaxHeight = 0;
2350 for (const SDep &Succ : SU->Succs) {
2351 if (Succ.isCtrl()) continue; // ignore chain succs
2352 unsigned Height = Succ.getSUnit()->getHeight();
2353 // If there are bunch of CopyToRegs stacked up, they should be considered
2354 // to be at the same position.
2355 if (Succ.getSUnit()->getNode() &&
2356 Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2357 Height = closestSucc(Succ.getSUnit())+1;
2358 if (Height > MaxHeight)
2359 MaxHeight = Height;
2360 }
2361 return MaxHeight;
2362}
2363
2364/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2365/// for scratch registers, i.e. number of data dependencies.
2366static unsigned calcMaxScratches(const SUnit *SU) {
2367 unsigned Scratches = 0;
2368 for (const SDep &Pred : SU->Preds) {
2369 if (Pred.isCtrl()) continue; // ignore chain preds
2370 Scratches++;
2371 }
2372 return Scratches;
2373}
2374
2375/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2376/// CopyFromReg from a virtual register.
2377static bool hasOnlyLiveInOpers(const SUnit *SU) {
2378 bool RetVal = false;
2379 for (const SDep &Pred : SU->Preds) {
2380 if (Pred.isCtrl()) continue;
2381 const SUnit *PredSU = Pred.getSUnit();
2382 if (PredSU->getNode() &&
2383 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2384 Register Reg =
2385 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2386 if (Reg.isVirtual()) {
2387 RetVal = true;
2388 continue;
2389 }
2390 }
2391 return false;
2392 }
2393 return RetVal;
2394}
2395
2396/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2397/// CopyToReg to a virtual register. This SU def is probably a liveout and
2398/// it has no other use. It should be scheduled closer to the terminator.
2399static bool hasOnlyLiveOutUses(const SUnit *SU) {
2400 bool RetVal = false;
2401 for (const SDep &Succ : SU->Succs) {
2402 if (Succ.isCtrl()) continue;
2403 const SUnit *SuccSU = Succ.getSUnit();
2404 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2405 Register Reg =
2406 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2407 if (Reg.isVirtual()) {
2408 RetVal = true;
2409 continue;
2410 }
2411 }
2412 return false;
2413 }
2414 return RetVal;
2415}
2416
2417// Set isVRegCycle for a node with only live in opers and live out uses. Also
2418// set isVRegCycle for its CopyFromReg operands.
2419//
2420// This is only relevant for single-block loops, in which case the VRegCycle
2421// node is likely an induction variable in which the operand and target virtual
2422// registers should be coalesced (e.g. pre/post increment values). Setting the
2423// isVRegCycle flag helps the scheduler prioritize other uses of the same
2424// CopyFromReg so that this node becomes the virtual register "kill". This
2425// avoids interference between the values live in and out of the block and
2426// eliminates a copy inside the loop.
2427static void initVRegCycle(SUnit *SU) {
2429 return;
2430
2431 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2432 return;
2433
2434 LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2435
2436 SU->isVRegCycle = true;
2437
2438 for (const SDep &Pred : SU->Preds) {
2439 if (Pred.isCtrl()) continue;
2440 Pred.getSUnit()->isVRegCycle = true;
2441 }
2442}
2443
2444// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2445// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2446static void resetVRegCycle(SUnit *SU) {
2447 if (!SU->isVRegCycle)
2448 return;
2449
2450 for (const SDep &Pred : SU->Preds) {
2451 if (Pred.isCtrl()) continue; // ignore chain preds
2452 SUnit *PredSU = Pred.getSUnit();
2453 if (PredSU->isVRegCycle) {
2454 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2455 "VRegCycle def must be CopyFromReg");
2456 Pred.getSUnit()->isVRegCycle = false;
2457 }
2458 }
2459}
2460
2461// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2462// means a node that defines the VRegCycle has not been scheduled yet.
2463static bool hasVRegCycleUse(const SUnit *SU) {
2464 // If this SU also defines the VReg, don't hoist it as a "use".
2465 if (SU->isVRegCycle)
2466 return false;
2467
2468 for (const SDep &Pred : SU->Preds) {
2469 if (Pred.isCtrl()) continue; // ignore chain preds
2470 if (Pred.getSUnit()->isVRegCycle &&
2471 Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2472 LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2473 return true;
2474 }
2475 }
2476 return false;
2477}
2478
2479// Check for either a dependence (latency) or resource (hazard) stall.
2480//
2481// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2482static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2483 if ((int)SPQ->getCurCycle() < Height) return true;
2484 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2486 return true;
2487 return false;
2488}
2489
2490// Return -1 if left has higher priority, 1 if right has higher priority.
2491// Return 0 if latency-based priority is equivalent.
2492static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2493 RegReductionPQBase *SPQ) {
2494 // Scheduling an instruction that uses a VReg whose postincrement has not yet
2495 // been scheduled will induce a copy. Model this as an extra cycle of latency.
2496 int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2497 int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2498 int LHeight = (int)left->getHeight() + LPenalty;
2499 int RHeight = (int)right->getHeight() + RPenalty;
2500
2501 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2502 BUHasStall(left, LHeight, SPQ);
2503 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2504 BUHasStall(right, RHeight, SPQ);
2505
2506 // If scheduling one of the node will cause a pipeline stall, delay it.
2507 // If scheduling either one of the node will cause a pipeline stall, sort
2508 // them according to their height.
2509 if (LStall) {
2510 if (!RStall)
2511 return 1;
2512 if (LHeight != RHeight)
2513 return LHeight > RHeight ? 1 : -1;
2514 } else if (RStall)
2515 return -1;
2516
2517 // If either node is scheduling for latency, sort them by height/depth
2518 // and latency.
2519 if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2520 right->SchedulingPref == Sched::ILP)) {
2521 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2522 // is enabled, grouping instructions by cycle, then its height is already
2523 // covered so only its depth matters. We also reach this point if both stall
2524 // but have the same height.
2525 if (!SPQ->getHazardRec()->isEnabled()) {
2526 if (LHeight != RHeight)
2527 return LHeight > RHeight ? 1 : -1;
2528 }
2529 int LDepth = left->getDepth() - LPenalty;
2530 int RDepth = right->getDepth() - RPenalty;
2531 if (LDepth != RDepth) {
2532 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2533 << ") depth " << LDepth << " vs SU (" << right->NodeNum
2534 << ") depth " << RDepth << "\n");
2535 return LDepth < RDepth ? 1 : -1;
2536 }
2537 if (left->Latency != right->Latency)
2538 return left->Latency > right->Latency ? 1 : -1;
2539 }
2540 return 0;
2541}
2542
2543static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2544 // Schedule physical register definitions close to their use. This is
2545 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2546 // long as shortening physreg live ranges is generally good, we can defer
2547 // creating a subtarget hook.
2549 bool LHasPhysReg = left->hasPhysRegDefs;
2550 bool RHasPhysReg = right->hasPhysRegDefs;
2551 if (LHasPhysReg != RHasPhysReg) {
2552 #ifndef NDEBUG
2553 static const char *const PhysRegMsg[] = { " has no physreg",
2554 " defines a physreg" };
2555 #endif
2556 LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2557 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2558 << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2559 return LHasPhysReg < RHasPhysReg;
2560 }
2561 }
2562
2563 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2564 unsigned LPriority = SPQ->getNodePriority(left);
2565 unsigned RPriority = SPQ->getNodePriority(right);
2566
2567 // Be really careful about hoisting call operands above previous calls.
2568 // Only allows it if it would reduce register pressure.
2569 if (left->isCall && right->isCallOp) {
2570 unsigned RNumVals = right->getNode()->getNumValues();
2571 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2572 }
2573 if (right->isCall && left->isCallOp) {
2574 unsigned LNumVals = left->getNode()->getNumValues();
2575 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2576 }
2577
2578 if (LPriority != RPriority)
2579 return LPriority > RPriority;
2580
2581 // One or both of the nodes are calls and their sethi-ullman numbers are the
2582 // same, then keep source order.
2583 if (left->isCall || right->isCall) {
2584 unsigned LOrder = SPQ->getNodeOrdering(left);
2585 unsigned ROrder = SPQ->getNodeOrdering(right);
2586
2587 // Prefer an ordering where the lower the non-zero order number, the higher
2588 // the preference.
2589 if ((LOrder || ROrder) && LOrder != ROrder)
2590 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2591 }
2592
2593 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2594 // e.g.
2595 // t1 = op t2, c1
2596 // t3 = op t4, c2
2597 //
2598 // and the following instructions are both ready.
2599 // t2 = op c3
2600 // t4 = op c4
2601 //
2602 // Then schedule t2 = op first.
2603 // i.e.
2604 // t4 = op c4
2605 // t2 = op c3
2606 // t1 = op t2, c1
2607 // t3 = op t4, c2
2608 //
2609 // This creates more short live intervals.
2610 unsigned LDist = closestSucc(left);
2611 unsigned RDist = closestSucc(right);
2612 if (LDist != RDist)
2613 return LDist < RDist;
2614
2615 // How many registers becomes live when the node is scheduled.
2616 unsigned LScratch = calcMaxScratches(left);
2617 unsigned RScratch = calcMaxScratches(right);
2618 if (LScratch != RScratch)
2619 return LScratch > RScratch;
2620
2621 // Comparing latency against a call makes little sense unless the node
2622 // is register pressure-neutral.
2623 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2624 return (left->NodeQueueId > right->NodeQueueId);
2625
2626 // Do not compare latencies when one or both of the nodes are calls.
2627 if (!DisableSchedCycles &&
2628 !(left->isCall || right->isCall)) {
2629 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2630 if (result != 0)
2631 return result > 0;
2632 }
2633 else {
2634 if (left->getHeight() != right->getHeight())
2635 return left->getHeight() > right->getHeight();
2636
2637 if (left->getDepth() != right->getDepth())
2638 return left->getDepth() < right->getDepth();
2639 }
2640
2641 assert(left->NodeQueueId && right->NodeQueueId &&
2642 "NodeQueueId cannot be zero");
2643 return (left->NodeQueueId > right->NodeQueueId);
2644}
2645
2646// Bottom up
2647bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2648 if (int res = checkSpecialNodes(left, right))
2649 return res > 0;
2650
2651 return BURRSort(left, right, SPQ);
2652}
2653
2654// Source order, otherwise bottom up.
2655bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2656 if (int res = checkSpecialNodes(left, right))
2657 return res > 0;
2658
2659 unsigned LOrder = SPQ->getNodeOrdering(left);
2660 unsigned ROrder = SPQ->getNodeOrdering(right);
2661
2662 // Prefer an ordering where the lower the non-zero order number, the higher
2663 // the preference.
2664 if ((LOrder || ROrder) && LOrder != ROrder)
2665 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2666
2667 return BURRSort(left, right, SPQ);
2668}
2669
2670// If the time between now and when the instruction will be ready can cover
2671// the spill code, then avoid adding it to the ready queue. This gives long
2672// stalls highest priority and allows hoisting across calls. It should also
2673// speed up processing the available queue.
2674bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2675 static const unsigned ReadyDelay = 3;
2676
2677 if (SPQ->MayReduceRegPressure(SU)) return true;
2678
2679 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2680
2681 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2683 return false;
2684
2685 return true;
2686}
2687
2688// Return true if right should be scheduled with higher priority than left.
2689bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2690 if (int res = checkSpecialNodes(left, right))
2691 return res > 0;
2692
2693 if (left->isCall || right->isCall)
2694 // No way to compute latency of calls.
2695 return BURRSort(left, right, SPQ);
2696
2697 bool LHigh = SPQ->HighRegPressure(left);
2698 bool RHigh = SPQ->HighRegPressure(right);
2699 // Avoid causing spills. If register pressure is high, schedule for
2700 // register pressure reduction.
2701 if (LHigh && !RHigh) {
2702 LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2703 << right->NodeNum << ")\n");
2704 return true;
2705 }
2706 else if (!LHigh && RHigh) {
2707 LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2708 << left->NodeNum << ")\n");
2709 return false;
2710 }
2711 if (!LHigh && !RHigh) {
2712 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2713 if (result != 0)
2714 return result > 0;
2715 }
2716 return BURRSort(left, right, SPQ);
2717}
2718
2719// Schedule as many instructions in each cycle as possible. So don't make an
2720// instruction available unless it is ready in the current cycle.
2721bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2722 if (SU->getHeight() > CurCycle) return false;
2723
2724 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2726 return false;
2727
2728 return true;
2729}
2730
2731static bool canEnableCoalescing(SUnit *SU) {
2732 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2733 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2734 // CopyToReg should be close to its uses to facilitate coalescing and
2735 // avoid spilling.
2736 return true;
2737
2738 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2739 Opc == TargetOpcode::SUBREG_TO_REG ||
2740 Opc == TargetOpcode::INSERT_SUBREG)
2741 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2742 // close to their uses to facilitate coalescing.
2743 return true;
2744
2745 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2746 // If SU does not have a register def, schedule it close to its uses
2747 // because it does not lengthen any live ranges.
2748 return true;
2749
2750 return false;
2751}
2752
2753// list-ilp is currently an experimental scheduler that allows various
2754// heuristics to be enabled prior to the normal register reduction logic.
2755bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2756 if (int res = checkSpecialNodes(left, right))
2757 return res > 0;
2758
2759 if (left->isCall || right->isCall)
2760 // No way to compute latency of calls.
2761 return BURRSort(left, right, SPQ);
2762
2763 unsigned LLiveUses = 0, RLiveUses = 0;
2764 int LPDiff = 0, RPDiff = 0;
2766 LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2767 RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2768 }
2769 if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2770 LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2771 << "): " << LPDiff << " != SU(" << right->NodeNum
2772 << "): " << RPDiff << "\n");
2773 return LPDiff > RPDiff;
2774 }
2775
2776 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2777 bool LReduce = canEnableCoalescing(left);
2778 bool RReduce = canEnableCoalescing(right);
2779 if (LReduce && !RReduce) return false;
2780 if (RReduce && !LReduce) return true;
2781 }
2782
2783 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2784 LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2785 << " != SU(" << right->NodeNum << "): " << RLiveUses
2786 << "\n");
2787 return LLiveUses < RLiveUses;
2788 }
2789
2790 if (!DisableSchedStalls) {
2791 bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2792 bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2793 if (LStall != RStall)
2794 return left->getHeight() > right->getHeight();
2795 }
2796
2798 int spread = (int)left->getDepth() - (int)right->getDepth();
2799 if (std::abs(spread) > MaxReorderWindow) {
2800 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2801 << left->getDepth() << " != SU(" << right->NodeNum
2802 << "): " << right->getDepth() << "\n");
2803 return left->getDepth() < right->getDepth();
2804 }
2805 }
2806
2807 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2808 int spread = (int)left->getHeight() - (int)right->getHeight();
2809 if (std::abs(spread) > MaxReorderWindow)
2810 return left->getHeight() > right->getHeight();
2811 }
2812
2813 return BURRSort(left, right, SPQ);
2814}
2815
2816void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2817 SUnits = &sunits;
2818 // Add pseudo dependency edges for two-address nodes.
2819 if (!Disable2AddrHack)
2820 AddPseudoTwoAddrDeps();
2821 // Reroute edges to nodes with multiple uses.
2822 if (!TracksRegPressure && !SrcOrder)
2823 PrescheduleNodesWithMultipleUses();
2824 // Calculate node priorities.
2825 CalculateSethiUllmanNumbers();
2826
2827 // For single block loops, mark nodes that look like canonical IV increments.
2828 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2829 for (SUnit &SU : sunits)
2830 initVRegCycle(&SU);
2831}
2832
2833//===----------------------------------------------------------------------===//
2834// Preschedule for Register Pressure
2835//===----------------------------------------------------------------------===//
2836
2837bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2838 if (SU->isTwoAddress) {
2839 unsigned Opc = SU->getNode()->getMachineOpcode();
2840 const MCInstrDesc &MCID = TII->get(Opc);
2841 unsigned NumRes = MCID.getNumDefs();
2842 unsigned NumOps = MCID.getNumOperands() - NumRes;
2843 for (unsigned i = 0; i != NumOps; ++i) {
2844 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2845 SDNode *DU = SU->getNode()->getOperand(i).getNode();
2846 if (DU->getNodeId() != -1 &&
2847 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2848 return true;
2849 }
2850 }
2851 }
2852 return false;
2853}
2854
2855/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2856/// successor's explicit physregs whose definition can reach DepSU.
2857/// i.e. DepSU should not be scheduled above SU.
2858static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2859 ScheduleDAGRRList *scheduleDAG,
2860 const TargetInstrInfo *TII,
2861 const TargetRegisterInfo *TRI) {
2862 ArrayRef<MCPhysReg> ImpDefs =
2863 TII->get(SU->getNode()->getMachineOpcode()).implicit_defs();
2864 const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2865 if (ImpDefs.empty() && !RegMask)
2866 return false;
2867
2868 for (const SDep &Succ : SU->Succs) {
2869 SUnit *SuccSU = Succ.getSUnit();
2870 for (const SDep &SuccPred : SuccSU->Preds) {
2871 if (!SuccPred.isAssignedRegDep())
2872 continue;
2873
2874 if (RegMask &&
2875 MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2876 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2877 return true;
2878
2879 for (MCPhysReg ImpDef : ImpDefs) {
2880 // Return true if SU clobbers this physical register use and the
2881 // definition of the register reaches from DepSU. IsReachable queries
2882 // a topological forward sort of the DAG (following the successors).
2883 if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) &&
2884 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2885 return true;
2886 }
2887 }
2888 }
2889 return false;
2890}
2891
2892/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2893/// physical register defs.
2894static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2895 const TargetInstrInfo *TII,
2896 const TargetRegisterInfo *TRI) {
2897 SDNode *N = SuccSU->getNode();
2898 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2899 ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs();
2900 assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs");
2901 for (const SDNode *SUNode = SU->getNode(); SUNode;
2902 SUNode = SUNode->getGluedNode()) {
2903 if (!SUNode->isMachineOpcode())
2904 continue;
2905 ArrayRef<MCPhysReg> SUImpDefs =
2906 TII->get(SUNode->getMachineOpcode()).implicit_defs();
2907 const uint32_t *SURegMask = getNodeRegMask(SUNode);
2908 if (SUImpDefs.empty() && !SURegMask)
2909 continue;
2910 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2911 MVT VT = N->getSimpleValueType(i);
2912 if (VT == MVT::Glue || VT == MVT::Other)
2913 continue;
2914 if (!N->hasAnyUseOfValue(i))
2915 continue;
2916 MCPhysReg Reg = ImpDefs[i - NumDefs];
2917 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2918 return true;
2919 for (MCPhysReg SUReg : SUImpDefs) {
2920 if (TRI->regsOverlap(Reg, SUReg))
2921 return true;
2922 }
2923 }
2924 }
2925 return false;
2926}
2927
2928/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2929/// are not handled well by the general register pressure reduction
2930/// heuristics. When presented with code like this:
2931///
2932/// N
2933/// / |
2934/// / |
2935/// U store
2936/// |
2937/// ...
2938///
2939/// the heuristics tend to push the store up, but since the
2940/// operand of the store has another use (U), this would increase
2941/// the length of that other use (the U->N edge).
2942///
2943/// This function transforms code like the above to route U's
2944/// dependence through the store when possible, like this:
2945///
2946/// N
2947/// ||
2948/// ||
2949/// store
2950/// |
2951/// U
2952/// |
2953/// ...
2954///
2955/// This results in the store being scheduled immediately
2956/// after N, which shortens the U->N live range, reducing
2957/// register pressure.
2958void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2959 // Visit all the nodes in topological order, working top-down.
2960 for (SUnit &SU : *SUnits) {
2961 // For now, only look at nodes with no data successors, such as stores.
2962 // These are especially important, due to the heuristics in
2963 // getNodePriority for nodes with no data successors.
2964 if (SU.NumSuccs != 0)
2965 continue;
2966 // For now, only look at nodes with exactly one data predecessor.
2967 if (SU.NumPreds != 1)
2968 continue;
2969 // Avoid prescheduling copies to virtual registers, which don't behave
2970 // like other nodes from the perspective of scheduling heuristics.
2971 if (SDNode *N = SU.getNode())
2972 if (N->getOpcode() == ISD::CopyToReg &&
2973 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
2974 continue;
2975
2976 SDNode *PredFrameSetup = nullptr;
2977 for (const SDep &Pred : SU.Preds)
2978 if (Pred.isCtrl() && Pred.getSUnit()) {
2979 // Find the predecessor which is not data dependence.
2980 SDNode *PredND = Pred.getSUnit()->getNode();
2981
2982 // If PredND is FrameSetup, we should not pre-scheduled the node,
2983 // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2984 // ADJCALLSTACKUP may hold CallResource too long and make other
2985 // calls can't be scheduled. If there's no other available node
2986 // to schedule, the schedular will try to rename the register by
2987 // creating copy to avoid the conflict which will fail because
2988 // CallResource is not a real physical register.
2989 if (PredND && PredND->isMachineOpcode() &&
2990 (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2991 PredFrameSetup = PredND;
2992 break;
2993 }
2994 }
2995 // Skip the node has FrameSetup parent.
2996 if (PredFrameSetup != nullptr)
2997 continue;
2998
2999 // Locate the single data predecessor.
3000 SUnit *PredSU = nullptr;
3001 for (const SDep &Pred : SU.Preds)
3002 if (!Pred.isCtrl()) {
3003 PredSU = Pred.getSUnit();
3004 break;
3005 }
3006 assert(PredSU);
3007
3008 // Don't rewrite edges that carry physregs, because that requires additional
3009 // support infrastructure.
3010 if (PredSU->hasPhysRegDefs)
3011 continue;
3012 // Short-circuit the case where SU is PredSU's only data successor.
3013 if (PredSU->NumSuccs == 1)
3014 continue;
3015 // Avoid prescheduling to copies from virtual registers, which don't behave
3016 // like other nodes from the perspective of scheduling heuristics.
3017 if (SDNode *N = SU.getNode())
3018 if (N->getOpcode() == ISD::CopyFromReg &&
3019 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
3020 continue;
3021
3022 // Perform checks on the successors of PredSU.
3023 for (const SDep &PredSucc : PredSU->Succs) {
3024 SUnit *PredSuccSU = PredSucc.getSUnit();
3025 if (PredSuccSU == &SU) continue;
3026 // If PredSU has another successor with no data successors, for
3027 // now don't attempt to choose either over the other.
3028 if (PredSuccSU->NumSuccs == 0)
3029 goto outer_loop_continue;
3030 // Don't break physical register dependencies.
3031 if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3032 if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3033 goto outer_loop_continue;
3034 // Don't introduce graph cycles.
3035 if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3036 goto outer_loop_continue;
3037 }
3038
3039 // Ok, the transformation is safe and the heuristics suggest it is
3040 // profitable. Update the graph.
3041 LLVM_DEBUG(
3042 dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3043 << PredSU->NodeNum
3044 << " to guide scheduling in the presence of multiple uses\n");
3045 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3046 SDep Edge = PredSU->Succs[i];
3047 assert(!Edge.isAssignedRegDep());
3048 SUnit *SuccSU = Edge.getSUnit();
3049 if (SuccSU != &SU) {
3050 Edge.setSUnit(PredSU);
3051 scheduleDAG->RemovePred(SuccSU, Edge);
3052 scheduleDAG->AddPredQueued(&SU, Edge);
3053 Edge.setSUnit(&SU);
3054 scheduleDAG->AddPredQueued(SuccSU, Edge);
3055 --i;
3056 }
3057 }
3058 outer_loop_continue:;
3059 }
3060}
3061
3062/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3063/// it as a def&use operand. Add a pseudo control edge from it to the other
3064/// node (if it won't create a cycle) so the two-address one will be scheduled
3065/// first (lower in the schedule). If both nodes are two-address, favor the
3066/// one that has a CopyToReg use (more likely to be a loop induction update).
3067/// If both are two-address, but one is commutable while the other is not
3068/// commutable, favor the one that's not commutable.
3069void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3070 for (SUnit &SU : *SUnits) {
3071 if (!SU.isTwoAddress)
3072 continue;
3073
3074 SDNode *Node = SU.getNode();
3075 if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3076 continue;
3077
3078 bool isLiveOut = hasOnlyLiveOutUses(&SU);
3079 unsigned Opc = Node->getMachineOpcode();
3080 const MCInstrDesc &MCID = TII->get(Opc);
3081 unsigned NumRes = MCID.getNumDefs();
3082 unsigned NumOps = MCID.getNumOperands() - NumRes;
3083 for (unsigned j = 0; j != NumOps; ++j) {
3084 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3085 continue;
3086 SDNode *DU = SU.getNode()->getOperand(j).getNode();
3087 if (DU->getNodeId() == -1)
3088 continue;
3089 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3090 if (!DUSU)
3091 continue;
3092 for (const SDep &Succ : DUSU->Succs) {
3093 if (Succ.isCtrl())
3094 continue;
3095 SUnit *SuccSU = Succ.getSUnit();
3096 if (SuccSU == &SU)
3097 continue;
3098 // Be conservative. Ignore if nodes aren't at roughly the same
3099 // depth and height.
3100 if (SuccSU->getHeight() < SU.getHeight() &&
3101 (SU.getHeight() - SuccSU->getHeight()) > 1)
3102 continue;
3103 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3104 // constrains whatever is using the copy, instead of the copy
3105 // itself. In the case that the copy is coalesced, this
3106 // preserves the intent of the pseudo two-address heurietics.
3107 while (SuccSU->Succs.size() == 1 &&
3108 SuccSU->getNode()->isMachineOpcode() &&
3109 SuccSU->getNode()->getMachineOpcode() ==
3110 TargetOpcode::COPY_TO_REGCLASS)
3111 SuccSU = SuccSU->Succs.front().getSUnit();
3112 // Don't constrain non-instruction nodes.
3113 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3114 continue;
3115 // Don't constrain nodes with physical register defs if the
3116 // predecessor can clobber them.
3117 if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3118 if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3119 continue;
3120 }
3121 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3122 // these may be coalesced away. We want them close to their uses.
3123 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3124 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3125 SuccOpc == TargetOpcode::INSERT_SUBREG ||
3126 SuccOpc == TargetOpcode::SUBREG_TO_REG)
3127 continue;
3128 if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3129 (!canClobber(SuccSU, DUSU) ||
3130 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3131 (!SU.isCommutable && SuccSU->isCommutable)) &&
3132 !scheduleDAG->IsReachable(SuccSU, &SU)) {
3134 << " Adding a pseudo-two-addr edge from SU #"
3135 << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3136 scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
3137 }
3138 }
3139 }
3140 }
3141}
3142
3143//===----------------------------------------------------------------------===//
3144// Public Constructor Functions
3145//===----------------------------------------------------------------------===//
3146
3148 CodeGenOptLevel OptLevel) {
3149 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3150 const TargetInstrInfo *TII = STI.getInstrInfo();
3151 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3152
3153 BURegReductionPriorityQueue *PQ =
3154 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3155 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3156 PQ->setScheduleDAG(SD);
3157 return SD;
3158}
3159
3162 CodeGenOptLevel OptLevel) {
3163 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3164 const TargetInstrInfo *TII = STI.getInstrInfo();
3165 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3166
3167 SrcRegReductionPriorityQueue *PQ =
3168 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3169 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3170 PQ->setScheduleDAG(SD);
3171 return SD;
3172}
3173
3176 CodeGenOptLevel OptLevel) {
3177 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3178 const TargetInstrInfo *TII = STI.getInstrInfo();
3179 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3180 const TargetLowering *TLI = IS->TLI;
3181
3182 HybridBURRPriorityQueue *PQ =
3183 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3184
3185 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3186 PQ->setScheduleDAG(SD);
3187 return SD;
3188}
3189
3191 CodeGenOptLevel OptLevel) {
3192 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3193 const TargetInstrInfo *TII = STI.getInstrInfo();
3194 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3195 const TargetLowering *TLI = IS->TLI;
3196
3197 ILPBURRPriorityQueue *PQ =
3198 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3199 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3200 PQ->setScheduleDAG(SD);
3201 return SD;
3202}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:510
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
This file contains some templates that are useful if you are working with the STL at all.
static bool canEnableCoalescing(SUnit *SU)
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI, const SDNode *Node=nullptr)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
static void resetVRegCycle(SUnit *SU)
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it's successor's explicit physregs who...
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU's physical register defs.
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
static void initVRegCycle(SUnit *SU)
static constexpr unsigned RegSequenceCost
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
static bool isOperandOf(const SUnit *SU, SDNode *N)
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
static bool hasVRegCycleUse(const SUnit *SU)
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit * > LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask,...
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
This class represents an Operation in the Expression.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:481
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getIROrder() const
Return the node ordering.
void setNodeId(int Id)
Set unique node id.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:483
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
unsigned NumSuccs
Definition: ScheduleDAG.h:267
unsigned NumPreds
Definition: ScheduleDAG.h:266
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:265
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:281
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:276
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:302
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:439
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
unsigned short NumRegDefsLeft
Definition: ScheduleDAG.h:272
bool isPending
True once pending.
Definition: ScheduleDAG.h:282
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
bool isAvailable
True once available.
Definition: ScheduleDAG.h:283
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:286
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:290
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:304
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:355
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:277
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:274
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
RegDefIter - In place iteration over the values defined by an SUnit.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
SUnit * newSUnit(SDNode *N)
NewSUnit - Creates a new SUnit and return a ptr to it.
virtual void Schedule()=0
Schedule - Order nodes according to selected style, filling in the Sequence member.
virtual bool forceUnitLatencies() const
ForceUnitLatencies - Return true if all scheduling edges should be given a latency value of one.
SUnit * Clone(SUnit *Old)
Clone - Creates a clone of the specified SUnit.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:702
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:776
void AddSUnitWithoutPredecessors(const SUnit *SU)
Add a SUnit without predecessors to the end of the topological order.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
virtual void dumpNode(const SUnit &SU) const =0
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
This interface is used to plug different priorities computation algorithms into the list scheduler.
Definition: ScheduleDAG.h:496
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:545
virtual void remove(SUnit *SU)=0
virtual void releaseState()=0
virtual SUnit * pop()=0
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:541
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:520
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:518
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:536
virtual void initNodes(std::vector< SUnit > &SUnits)=0
virtual bool empty() const =0
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:543
virtual void addNode(const SUnit *SU)=0
virtual void updateNode(const SUnit *SU)=0
virtual void push(SUnit *U)=0
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetLowering * TLI
MachineFunction * MF
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the 'representative' register class for the specified value type.
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ EH_LABEL
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:1097
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1293
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1092
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1089
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
constexpr double e
Definition: MathExtras.h:31
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:41
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1747
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler.
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1884
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Description of the encoding of one expression Op.