LLVM 20.0.0git
MachinePipeliner.h
Go to the documentation of this file.
1//===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10//
11// Software pipelining (SWP) is an instruction scheduling technique for loops
12// that overlap loop iterations and exploits ILP via a compiler transformation.
13//
14// Swing Modulo Scheduling is an implementation of software pipelining
15// that generates schedules that are near optimal in terms of initiation
16// interval, register requirements, and stage count. See the papers:
17//
18// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
19// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
20// Conference on Parallel Architectures and Compilation Techiniques.
21//
22// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
23// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
24// Transactions on Computers, Vol. 50, No. 3, 2001.
25//
26// "An Implementation of Swing Modulo Scheduling With Extensions for
27// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
28// Urbana-Champaign, 2005.
29//
30//
31// The SMS algorithm consists of three main steps after computing the minimal
32// initiation interval (MII).
33// 1) Analyze the dependence graph and compute information about each
34// instruction in the graph.
35// 2) Order the nodes (instructions) by priority based upon the heuristics
36// described in the algorithm.
37// 3) Attempt to schedule the nodes in the specified order using the MII.
38//
39//===----------------------------------------------------------------------===//
40#ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
41#define LLVM_CODEGEN_MACHINEPIPELINER_H
42
43#include "llvm/ADT/SetVector.h"
54
55#include <deque>
56
57namespace llvm {
58
59class AAResults;
60class NodeSet;
61class SMSchedule;
62
63extern cl::opt<bool> SwpEnableCopyToPhi;
64extern cl::opt<int> SwpForceIssueWidth;
65
66/// The main class in the implementation of the target independent
67/// software pipeliner pass.
69public:
70 MachineFunction *MF = nullptr;
72 const MachineLoopInfo *MLI = nullptr;
73 const MachineDominatorTree *MDT = nullptr;
75 const TargetInstrInfo *TII = nullptr;
77 bool disabledByPragma = false;
78 unsigned II_setByPragma = 0;
79
80#ifndef NDEBUG
81 static int NumTries;
82#endif
83
84 /// Cache the target analysis information about the loop.
85 struct LoopInfo {
91 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
92 nullptr;
93 };
95
96 static char ID;
97
100 }
101
103
104 void getAnalysisUsage(AnalysisUsage &AU) const override;
105
106private:
107 void preprocessPhiNodes(MachineBasicBlock &B);
108 bool canPipelineLoop(MachineLoop &L);
109 bool scheduleLoop(MachineLoop &L);
110 bool swingModuloScheduler(MachineLoop &L);
111 void setPragmaPipelineOptions(MachineLoop &L);
112 bool runWindowScheduler(MachineLoop &L);
113 bool useSwingModuloScheduler();
114 bool useWindowScheduler(bool Changed);
115};
116
117/// This class builds the dependence graph for the instructions in a loop,
118/// and attempts to schedule the instructions using the SMS algorithm.
121 /// The minimum initiation interval between iterations for this schedule.
122 unsigned MII = 0;
123 /// The maximum initiation interval between iterations for this schedule.
124 unsigned MAX_II = 0;
125 /// Set to true if a valid pipelined schedule is found for the loop.
126 bool Scheduled = false;
128 LiveIntervals &LIS;
129 const RegisterClassInfo &RegClassInfo;
130 unsigned II_setByPragma = 0;
131 TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
132
133 /// A toplogical ordering of the SUnits, which is needed for changing
134 /// dependences and iterating over the SUnits.
136
137 struct NodeInfo {
138 int ASAP = 0;
139 int ALAP = 0;
140 int ZeroLatencyDepth = 0;
141 int ZeroLatencyHeight = 0;
142
143 NodeInfo() = default;
144 };
145 /// Computed properties for each node in the graph.
146 std::vector<NodeInfo> ScheduleInfo;
147
148 enum OrderKind { BottomUp = 0, TopDown = 1 };
149 /// Computed node ordering for scheduling.
150 SetVector<SUnit *> NodeOrder;
151
156
157 /// Instructions to change when emitting the final schedule.
159
160 /// We may create a new instruction, so remember it because it
161 /// must be deleted when the pass is finished.
163
164 /// Ordered list of DAG postprocessing steps.
165 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
166
167 /// Helper class to implement Johnson's circuit finding algorithm.
168 class Circuits {
169 std::vector<SUnit> &SUnits;
170 SetVector<SUnit *> Stack;
171 BitVector Blocked;
174 // Node to Index from ScheduleDAGTopologicalSort
175 std::vector<int> *Node2Idx;
176 unsigned NumPaths = 0u;
177 static unsigned MaxPaths;
178
179 public:
180 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
181 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
182 Node2Idx = new std::vector<int>(SUs.size());
183 unsigned Idx = 0;
184 for (const auto &NodeNum : Topo)
185 Node2Idx->at(NodeNum) = Idx++;
186 }
187 Circuits &operator=(const Circuits &other) = delete;
188 Circuits(const Circuits &other) = delete;
189 ~Circuits() { delete Node2Idx; }
190
191 /// Reset the data structures used in the circuit algorithm.
192 void reset() {
193 Stack.clear();
194 Blocked.reset();
195 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
196 NumPaths = 0;
197 }
198
199 void createAdjacencyStructure(SwingSchedulerDAG *DAG);
200 bool circuit(int V, int S, NodeSetType &NodeSets,
201 const SwingSchedulerDAG *DAG, bool HasBackedge = false);
202 void unblock(int U);
203 };
204
205 struct CopyToPhiMutation : public ScheduleDAGMutation {
206 void apply(ScheduleDAGInstrs *DAG) override;
207 };
208
209public:
211 const RegisterClassInfo &rci, unsigned II,
213 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
214 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
215 Topo(SUnits, &ExitSU) {
216 P.MF->getSubtarget().getSMSMutations(Mutations);
218 Mutations.push_back(std::make_unique<CopyToPhiMutation>());
219 }
220
221 void schedule() override;
222 void finishBlock() override;
223
224 /// Return true if the loop kernel has been scheduled.
225 bool hasNewSchedule() { return Scheduled; }
226
227 /// Return the earliest time an instruction may be scheduled.
228 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
229
230 /// Return the latest time an instruction my be scheduled.
231 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
232
233 /// The mobility function, which the number of slots in which
234 /// an instruction may be scheduled.
235 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
236
237 /// The depth, in the dependence graph, for a node.
238 unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
239
240 /// The maximum unweighted length of a path from an arbitrary node to the
241 /// given node in which each edge has latency 0
243 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
244 }
245
246 /// The height, in the dependence graph, for a node.
247 unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
248
249 /// The maximum unweighted length of a path from the given node to an
250 /// arbitrary node in which each edge has latency 0
252 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
253 }
254
255 /// Return true if the dependence is a back-edge in the data dependence graph.
256 /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
257 /// using an anti dependence from a Phi to an instruction.
258 bool isBackedge(SUnit *Source, const SDep &Dep) {
259 if (Dep.getKind() != SDep::Anti)
260 return false;
261 return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
262 }
263
264 bool isLoopCarriedDep(SUnit *Source, const SDep &Dep,
265 bool isSucc = true) const;
266
267 /// The distance function, which indicates that operation V of iteration I
268 /// depends on operations U of iteration I-distance.
269 unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
270 // Instructions that feed a Phi have a distance of 1. Computing larger
271 // values for arrays requires data dependence information.
272 if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
273 return 1;
274 return 0;
275 }
276
277 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
278
279 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
280
281 /// Return the new base register that was stored away for the changed
282 /// instruction.
283 unsigned getInstrBaseReg(SUnit *SU) const {
285 InstrChanges.find(SU);
286 if (It != InstrChanges.end())
287 return It->second.first;
288 return 0;
289 }
290
291 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
292 Mutations.push_back(std::move(Mutation));
293 }
294
295 static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
296
297private:
298 void addLoopCarriedDependences(AAResults *AA);
299 void updatePhiDependences();
300 void changeDependences();
301 unsigned calculateResMII();
302 unsigned calculateRecMII(NodeSetType &RecNodeSets);
303 void findCircuits(NodeSetType &NodeSets);
304 void fuseRecs(NodeSetType &NodeSets);
305 void removeDuplicateNodes(NodeSetType &NodeSets);
306 void computeNodeFunctions(NodeSetType &NodeSets);
307 void registerPressureFilter(NodeSetType &NodeSets);
308 void colocateNodeSets(NodeSetType &NodeSets);
309 void checkNodeSets(NodeSetType &NodeSets);
310 void groupRemainingNodes(NodeSetType &NodeSets);
311 void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
312 SetVector<SUnit *> &NodesAdded);
313 void computeNodeOrder(NodeSetType &NodeSets);
314 void checkValidNodeOrder(const NodeSetType &Circuits) const;
315 bool schedulePipeline(SMSchedule &Schedule);
316 bool computeDelta(MachineInstr &MI, unsigned &Delta) const;
317 MachineInstr *findDefInLoop(Register Reg);
318 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
319 unsigned &OffsetPos, unsigned &NewBase,
320 int64_t &NewOffset);
321 void postProcessDAG();
322 /// Set the Minimum Initiation Interval for this schedule attempt.
323 void setMII(unsigned ResMII, unsigned RecMII);
324 /// Set the Maximum Initiation Interval for this schedule attempt.
325 void setMAX_II();
326};
327
328/// A NodeSet contains a set of SUnit DAG nodes with additional information
329/// that assigns a priority to the set.
330class NodeSet {
331 SetVector<SUnit *> Nodes;
332 bool HasRecurrence = false;
333 unsigned RecMII = 0;
334 int MaxMOV = 0;
335 unsigned MaxDepth = 0;
336 unsigned Colocate = 0;
337 SUnit *ExceedPressure = nullptr;
338 unsigned Latency = 0;
339
340public:
342
343 NodeSet() = default;
345 : Nodes(S, E), HasRecurrence(true) {
346 // Calculate the latency of this node set.
347 // Example to demonstrate the calculation:
348 // Given: N0 -> N1 -> N2 -> N0
349 // Edges:
350 // (N0 -> N1, 3)
351 // (N0 -> N1, 5)
352 // (N1 -> N2, 2)
353 // (N2 -> N0, 1)
354 // The total latency which is a lower bound of the recurrence MII is the
355 // longest path from N0 back to N0 given only the edges of this node set.
356 // In this example, the latency is: 5 + 2 + 1 = 8.
357 //
358 // Hold a map from each SUnit in the circle to the maximum distance from the
359 // source node by only considering the nodes.
360 DenseMap<SUnit *, unsigned> SUnitToDistance;
361 for (auto *Node : Nodes)
362 SUnitToDistance[Node] = 0;
363
364 for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {
365 SUnit *U = Nodes[I - 1];
366 SUnit *V = Nodes[I % Nodes.size()];
367 for (const SDep &Succ : U->Succs) {
368 SUnit *SuccSUnit = Succ.getSUnit();
369 if (V != SuccSUnit)
370 continue;
371 if (SUnitToDistance[U] + Succ.getLatency() > SUnitToDistance[V]) {
372 SUnitToDistance[V] = SUnitToDistance[U] + Succ.getLatency();
373 }
374 }
375 }
376 // Handle a back-edge in loop carried dependencies
377 SUnit *FirstNode = Nodes[0];
378 SUnit *LastNode = Nodes[Nodes.size() - 1];
379
380 for (auto &PI : LastNode->Preds) {
381 // If we have an order dep that is potentially loop carried then a
382 // back-edge exists between the last node and the first node that isn't
383 // modeled in the DAG. Handle it manually by adding 1 to the distance of
384 // the last node.
385 if (PI.getSUnit() != FirstNode || PI.getKind() != SDep::Order ||
386 !DAG->isLoopCarriedDep(LastNode, PI, false))
387 continue;
388 SUnitToDistance[FirstNode] =
389 std::max(SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1);
390 }
391
392 // The latency is the distance from the source node to itself.
393 Latency = SUnitToDistance[Nodes.front()];
394 }
395
396 bool insert(SUnit *SU) { return Nodes.insert(SU); }
397
398 void insert(iterator S, iterator E) { Nodes.insert(S, E); }
399
400 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
401 return Nodes.remove_if(P);
402 }
403
404 unsigned count(SUnit *SU) const { return Nodes.count(SU); }
405
406 bool hasRecurrence() { return HasRecurrence; };
407
408 unsigned size() const { return Nodes.size(); }
409
410 bool empty() const { return Nodes.empty(); }
411
412 SUnit *getNode(unsigned i) const { return Nodes[i]; };
413
414 void setRecMII(unsigned mii) { RecMII = mii; };
415
416 void setColocate(unsigned c) { Colocate = c; };
417
418 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
419
420 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
421
422 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
423
424 int getRecMII() { return RecMII; }
425
426 /// Summarize node functions for the entire node set.
428 for (SUnit *SU : *this) {
429 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
430 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
431 }
432 }
433
434 unsigned getLatency() { return Latency; }
435
436 unsigned getMaxDepth() { return MaxDepth; }
437
438 void clear() {
439 Nodes.clear();
440 RecMII = 0;
441 HasRecurrence = false;
442 MaxMOV = 0;
443 MaxDepth = 0;
444 Colocate = 0;
445 ExceedPressure = nullptr;
446 }
447
448 operator SetVector<SUnit *> &() { return Nodes; }
449
450 /// Sort the node sets by importance. First, rank them by recurrence MII,
451 /// then by mobility (least mobile done first), and finally by depth.
452 /// Each node set may contain a colocate value which is used as the first
453 /// tie breaker, if it's set.
454 bool operator>(const NodeSet &RHS) const {
455 if (RecMII == RHS.RecMII) {
456 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
457 return Colocate < RHS.Colocate;
458 if (MaxMOV == RHS.MaxMOV)
459 return MaxDepth > RHS.MaxDepth;
460 return MaxMOV < RHS.MaxMOV;
461 }
462 return RecMII > RHS.RecMII;
463 }
464
465 bool operator==(const NodeSet &RHS) const {
466 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
467 MaxDepth == RHS.MaxDepth;
468 }
469
470 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
471
472 iterator begin() { return Nodes.begin(); }
473 iterator end() { return Nodes.end(); }
474 void print(raw_ostream &os) const;
475
476#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
477 LLVM_DUMP_METHOD void dump() const;
478#endif
479};
480
481// 16 was selected based on the number of ProcResource kinds for all
482// existing Subtargets, so that SmallVector don't need to resize too often.
483static const int DefaultProcResSize = 16;
484
486private:
487 const MCSubtargetInfo *STI;
488 const MCSchedModel &SM;
489 const TargetSubtargetInfo *ST;
490 const TargetInstrInfo *TII;
492 const bool UseDFA;
493 /// DFA resources for each slot
495 /// Modulo Reservation Table. When a resource with ID R is consumed in cycle
496 /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F)
498 /// The number of scheduled micro operations for each slot. Micro operations
499 /// are assumed to be scheduled one per cycle, starting with the cycle in
500 /// which the instruction is scheduled.
501 llvm::SmallVector<int> NumScheduledMops;
502 /// Each processor resource is associated with a so-called processor resource
503 /// mask. This vector allows to correlate processor resource IDs with
504 /// processor resource masks. There is exactly one element per each processor
505 /// resource declared by the scheduling model.
507 int InitiationInterval = 0;
508 /// The number of micro operations that can be scheduled at a cycle.
509 int IssueWidth;
510
511 int calculateResMIIDFA() const;
512 /// Check if MRT is overbooked
513 bool isOverbooked() const;
514 /// Reserve resources on MRT
515 void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
516 /// Unreserve resources on MRT
517 void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
518
519 /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor.
520 /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C,
521 /// II).
522 int positiveModulo(int Dividend, int Divisor) const {
523 assert(Divisor > 0);
524 int R = Dividend % Divisor;
525 if (R < 0)
526 R += Divisor;
527 return R;
528 }
529
530#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
531 LLVM_DUMP_METHOD void dumpMRT() const;
532#endif
533
534public:
536 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
537 DAG(DAG), UseDFA(ST->useDFAforSMS()),
538 ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
539 IssueWidth(SM.IssueWidth) {
540 initProcResourceVectors(SM, ProcResourceMasks);
541 if (IssueWidth <= 0)
542 // If IssueWidth is not specified, set a sufficiently large value
543 IssueWidth = 100;
544 if (SwpForceIssueWidth > 0)
545 IssueWidth = SwpForceIssueWidth;
546 }
547
550
551 /// Check if the resources occupied by a machine instruction are available
552 /// in the current state.
553 bool canReserveResources(SUnit &SU, int Cycle);
554
555 /// Reserve the resources occupied by a machine instruction and change the
556 /// current state to reflect that change.
557 void reserveResources(SUnit &SU, int Cycle);
558
559 int calculateResMII() const;
560
561 /// Initialize resources with the initiation interval II.
562 void init(int II);
563};
564
565/// This class represents the scheduled code. The main data structure is a
566/// map from scheduled cycle to instructions. During scheduling, the
567/// data structure explicitly represents all stages/iterations. When
568/// the algorithm finshes, the schedule is collapsed into a single stage,
569/// which represents instructions from different loop iterations.
570///
571/// The SMS algorithm allows negative values for cycles, so the first cycle
572/// in the schedule is the smallest cycle value.
574private:
575 /// Map from execution cycle to instructions.
576 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
577
578 /// Map from instruction to execution cycle.
579 std::map<SUnit *, int> InstrToCycle;
580
581 /// Keep track of the first cycle value in the schedule. It starts
582 /// as zero, but the algorithm allows negative values.
583 int FirstCycle = 0;
584
585 /// Keep track of the last cycle value in the schedule.
586 int LastCycle = 0;
587
588 /// The initiation interval (II) for the schedule.
589 int InitiationInterval = 0;
590
591 /// Target machine information.
592 const TargetSubtargetInfo &ST;
593
594 /// Virtual register information.
596
597 ResourceManager ProcItinResources;
598
599public:
601 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
602 ProcItinResources(&ST, DAG) {}
603
604 void reset() {
605 ScheduledInstrs.clear();
606 InstrToCycle.clear();
607 FirstCycle = 0;
608 LastCycle = 0;
609 InitiationInterval = 0;
610 }
611
612 /// Set the initiation interval for this schedule.
614 InitiationInterval = ii;
615 ProcItinResources.init(ii);
616 }
617
618 /// Return the initiation interval for this schedule.
619 int getInitiationInterval() const { return InitiationInterval; }
620
621 /// Return the first cycle in the completed schedule. This
622 /// can be a negative value.
623 int getFirstCycle() const { return FirstCycle; }
624
625 /// Return the last cycle in the finalized schedule.
626 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
627
628 /// Return the cycle of the earliest scheduled instruction in the dependence
629 /// chain.
630 int earliestCycleInChain(const SDep &Dep);
631
632 /// Return the cycle of the latest scheduled instruction in the dependence
633 /// chain.
634 int latestCycleInChain(const SDep &Dep);
635
636 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II,
637 SwingSchedulerDAG *DAG);
638 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
639
640 /// Iterators for the cycle to instruction map.
644
645 /// Return true if the instruction is scheduled at the specified stage.
646 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
647 return (stageScheduled(SU) == (int)StageNum);
648 }
649
650 /// Return the stage for a scheduled instruction. Return -1 if
651 /// the instruction has not been scheduled.
652 int stageScheduled(SUnit *SU) const {
653 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
654 if (it == InstrToCycle.end())
655 return -1;
656 return (it->second - FirstCycle) / InitiationInterval;
657 }
658
659 /// Return the cycle for a scheduled instruction. This function normalizes
660 /// the first cycle to be 0.
661 unsigned cycleScheduled(SUnit *SU) const {
662 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
663 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
664 return (it->second - FirstCycle) % InitiationInterval;
665 }
666
667 /// Return the maximum stage count needed for this schedule.
668 unsigned getMaxStageCount() {
669 return (LastCycle - FirstCycle) / InitiationInterval;
670 }
671
672 /// Return the instructions that are scheduled at the specified cycle.
673 std::deque<SUnit *> &getInstructions(int cycle) {
674 return ScheduledInstrs[cycle];
675 }
676
680
681 std::deque<SUnit *>
683 const std::deque<SUnit *> &Instrs) const;
684
685 bool
690 void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
691 std::deque<SUnit *> &Insts) const;
692 bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const;
694 MachineOperand &MO) const;
695
697 SwingSchedulerDAG *DAG) const;
698 void print(raw_ostream &os) const;
699 void dump() const;
700};
701
702} // end namespace llvm
703
704#endif // LLVM_CODEGEN_MACHINEPIPELINER_H
unsigned const MachineRegisterInfo * MRI
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
unsigned Reg
uint64_t IntrinsicInst * II
#define P(N)
PowerPC VSX FMA Mutation
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
Value * RHS
Represent the analysis usage information of a pass.
BitVector & reset()
Definition: BitVector.h:392
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
A possibly irreducible generalization of a Loop.
Itinerary data supplied by a subtarget to be used by a target.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Generic base class for all target subtargets.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isPHI() const
MachineOperand class - Representation of each machine instruction operand.
The main class in the implementation of the target independent software pipeliner pass.
const TargetInstrInfo * TII
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFunction * MF
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
const InstrItineraryData * InstrItins
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
SetVector< SUnit * >::const_iterator iterator
void print(raw_ostream &os) const
bool isExceedSU(SUnit *SU)
void insert(iterator S, iterator E)
iterator begin()
void setRecMII(unsigned mii)
void computeNodeSetInfo(SwingSchedulerDAG *SSD)
Summarize node functions for the entire node set.
unsigned getMaxDepth()
unsigned count(SUnit *SU) const
NodeSet()=default
void setColocate(unsigned c)
unsigned getLatency()
NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)
bool operator>(const NodeSet &RHS) const
Sort the node sets by importance.
int compareRecMII(NodeSet &RHS)
unsigned size() const
bool operator!=(const NodeSet &RHS) const
bool insert(SUnit *SU)
LLVM_DUMP_METHOD void dump() const
bool operator==(const NodeSet &RHS) const
bool remove_if(UnaryPredicate P)
bool empty() const
void setExceedPressure(SUnit *SU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:498
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:504
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, SwingSchedulerDAG *DAG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
A ScheduleDAG for scheduling lists of MachineInstr.
const MachineLoopInfo * MLI
Mutate the DAG as a postpass after normal DAG building.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:720
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:581
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
Definition: SetVector.h:237
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
typename vector_type::const_iterator const_iterator
Definition: SetVector.h:70
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true) const
Return true for an order or output dependence that is loop carried potentially.
unsigned getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool hasNewSchedule()
Return true if the loop kernel has been scheduled.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, const RegisterClassInfo &rci, unsigned II, TargetInstrInfo::PipelinerLoopInfo *PLI)
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
static bool classof(const ScheduleDAGInstrs *DAG)
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
std::set< NodeId > NodeSet
Definition: RDFGraph.h:551
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeMachinePipelinerPass(PassRegistry &)
cl::opt< bool > SwpEnableCopyToPhi
static const int DefaultProcResSize
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:121
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:256
Cache the target analysis information about the loop.
SmallVector< MachineOperand, 4 > BrCond
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo