LLVM 23.0.0git
MachinePipeliner.h
Go to the documentation of this file.
1//===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10//
11// Software pipelining (SWP) is an instruction scheduling technique for loops
12// that overlap loop iterations and exploits ILP via a compiler transformation.
13//
14// Swing Modulo Scheduling is an implementation of software pipelining
15// that generates schedules that are near optimal in terms of initiation
16// interval, register requirements, and stage count. See the papers:
17//
18// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
19// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
20// Conference on Parallel Architectures and Compilation Techiniques.
21//
22// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
23// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
24// Transactions on Computers, Vol. 50, No. 3, 2001.
25//
26// "An Implementation of Swing Modulo Scheduling With Extensions for
27// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
28// Urbana-Champaign, 2005.
29//
30//
31// The SMS algorithm consists of three main steps after computing the minimal
32// initiation interval (MII).
33// 1) Analyze the dependence graph and compute information about each
34// instruction in the graph.
35// 2) Order the nodes (instructions) by priority based upon the heuristics
36// described in the algorithm.
37// 3) Attempt to schedule the nodes in the specified order using the MII.
38//
39//===----------------------------------------------------------------------===//
40#ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
41#define LLVM_CODEGEN_MACHINEPIPELINER_H
42
43#include "llvm/ADT/STLExtras.h"
44#include "llvm/ADT/SetVector.h"
54
55#include <deque>
56
57namespace llvm {
58
59class AAResults;
60class NodeSet;
61class SMSchedule;
62
65
66/// The main class in the implementation of the target independent
67/// software pipeliner pass.
69public:
70 MachineFunction *MF = nullptr;
72 const MachineLoopInfo *MLI = nullptr;
73 const MachineDominatorTree *MDT = nullptr;
75 const TargetInstrInfo *TII = nullptr;
77 bool disabledByPragma = false;
78 unsigned II_setByPragma = 0;
79
80#ifndef NDEBUG
81 static int NumTries;
82#endif
83
84 /// Cache the target analysis information about the loop.
85 struct LoopInfo {
91 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
92 nullptr;
93 };
95
96 static char ID;
97
99
101
102 void getAnalysisUsage(AnalysisUsage &AU) const override;
103
104private:
105 void preprocessPhiNodes(MachineBasicBlock &B);
106 bool canPipelineLoop(MachineLoop &L);
107 bool scheduleLoop(MachineLoop &L);
108 bool swingModuloScheduler(MachineLoop &L);
109 void setPragmaPipelineOptions(MachineLoop &L);
110 bool runWindowScheduler(MachineLoop &L);
111 bool useSwingModuloScheduler();
112 bool useWindowScheduler(bool Changed);
113};
114
115/// Represents a dependence between two instruction.
117 SUnit *Dst = nullptr;
118 SDep Pred;
119 unsigned Distance = 0;
120 bool IsValidationOnly = false;
121
122public:
123 /// Creates an edge corresponding to an edge represented by \p PredOrSucc and
124 /// \p Dep in the original DAG. This pair has no information about the
125 /// direction of the edge, so we need to pass an additional argument \p
126 /// IsSucc.
127 SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc,
128 bool IsValidationOnly)
129 : Dst(PredOrSucc), Pred(Dep), Distance(0u),
130 IsValidationOnly(IsValidationOnly) {
131 SUnit *Src = Dep.getSUnit();
132
133 if (IsSucc) {
134 std::swap(Src, Dst);
135 Pred.setSUnit(Src);
136 }
137
138 // An anti-dependence to PHI means loop-carried dependence.
139 if (Pred.getKind() == SDep::Anti && Src->getInstr()->isPHI()) {
140 Distance = 1;
141 std::swap(Src, Dst);
142 auto Reg = Pred.getReg();
143 Pred = SDep(Src, SDep::Kind::Data, Reg);
144 }
145 }
146
147 /// Returns the SUnit from which the edge comes (source node).
148 SUnit *getSrc() const { return Pred.getSUnit(); }
149
150 /// Returns the SUnit to which the edge points (destination node).
151 SUnit *getDst() const { return Dst; }
152
153 /// Returns the latency value for the edge.
154 unsigned getLatency() const { return Pred.getLatency(); }
155
156 /// Sets the latency for the edge.
157 void setLatency(unsigned Latency) { Pred.setLatency(Latency); }
158
159 /// Returns the distance value for the edge.
160 unsigned getDistance() const { return Distance; }
161
162 /// Sets the distance value for the edge.
163 void setDistance(unsigned D) { Distance = D; }
164
165 /// Returns the register associated with the edge.
166 Register getReg() const { return Pred.getReg(); }
167
168 /// Returns true if the edge represents anti dependence.
169 bool isAntiDep() const { return Pred.getKind() == SDep::Kind::Anti; }
170
171 /// Returns true if the edge represents output dependence.
172 bool isOutputDep() const { return Pred.getKind() == SDep::Kind::Output; }
173
174 /// Returns true if the edge represents a dependence that is not data, anti or
175 /// output dependence.
176 bool isOrderDep() const { return Pred.getKind() == SDep::Kind::Order; }
177
178 /// Returns true if the edge represents unknown scheduling barrier.
179 bool isBarrier() const { return Pred.isBarrier(); }
180
181 /// Returns true if the edge represents an artificial dependence.
182 bool isArtificial() const { return Pred.isArtificial(); }
183
184 /// Tests if this is a Data dependence that is associated with a register.
185 bool isAssignedRegDep() const { return Pred.isAssignedRegDep(); }
186
187 /// Returns true for DDG nodes that we ignore when computing the cost
188 /// functions. We ignore the back-edge recurrence in order to avoid unbounded
189 /// recursion in the calculation of the ASAP, ALAP, etc functions.
190 bool ignoreDependence(bool IgnoreAnti) const;
191
192 /// Returns true if this edge is intended to be used only for validating the
193 /// schedule.
194 bool isValidationOnly() const { return IsValidationOnly; }
195};
196
197/// Represents loop-carried dependencies. Because SwingSchedulerDAG doesn't
198/// assume cycle dependencies as the name suggests, such dependencies must be
199/// handled separately. After DAG construction is finished, these dependencies
200/// are added to SwingSchedulerDDG.
201/// TODO: Also handle output-dependencies introduced by physical registers.
205
207
209 auto Ite = OrderDeps.find(Key);
210 if (Ite == OrderDeps.end())
211 return nullptr;
212 return &Ite->second;
213 }
214
215 /// Adds some edges to the original DAG that correspond to loop-carried
216 /// dependencies. Historically, loop-carried edges are represented by using
217 /// non-loop-carried edges in the original DAG. This function appends such
218 /// edges to preserve the previous behavior.
219 void modifySUnits(std::vector<SUnit> &SUnits, const TargetInstrInfo *TII);
220
221 void dump(SUnit *SU, const TargetRegisterInfo *TRI,
222 const MachineRegisterInfo *MRI) const;
223};
224
225/// This class provides APIs to retrieve edges from/to an SUnit node, with a
226/// particular focus on loop-carried dependencies. Since SUnit is not designed
227/// to represent such edges, handling them directly using its APIs has required
228/// non-trivial logic in the past. This class serves as a wrapper around SUnit,
229/// offering a simpler interface for managing these dependencies.
232
233 struct SwingSchedulerDDGEdges {
234 EdgesType Preds;
235 EdgesType Succs;
236
237 /// This field is a subset of ValidationOnlyEdges. These edges are used only
238 /// by specific heuristics, mainly for cycle detection. Although they are
239 /// unnecessary in theory (i.e., ignoring them should still yield a valid
240 /// schedule), they are retained to preserve the existing behavior. Since we
241 /// only need which extra edges exist from a given SUnit, we only store the
242 /// destination SUnits.
243 SmallVector<SUnit *, 4> ExtraSuccs;
244 };
245
246 void initEdges(SUnit *SU);
247
248 SUnit *EntrySU;
249 SUnit *ExitSU;
250
251 std::vector<SwingSchedulerDDGEdges> EdgesVec;
252 SwingSchedulerDDGEdges EntrySUEdges;
253 SwingSchedulerDDGEdges ExitSUEdges;
254
255 /// Edges that are used only when validating the schedule. These edges are
256 /// not considered to drive the optimization heuristics.
257 SmallVector<SwingSchedulerDDGEdge, 8> ValidationOnlyEdges;
258
259 /// Adds a NON-validation-only edge to the DDG. Assumes to be called only by
260 /// the ctor.
261 void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge);
262
263 SwingSchedulerDDGEdges &getEdges(const SUnit *SU);
264 const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
265
266public:
267 SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU,
268 const LoopCarriedEdges &LCE);
269
270 const EdgesType &getInEdges(const SUnit *SU) const;
271
272 const EdgesType &getOutEdges(const SUnit *SU) const;
273
275
276 bool isValidSchedule(const SMSchedule &Schedule) const;
277};
278
279/// This class builds the dependence graph for the instructions in a loop,
280/// and attempts to schedule the instructions using the SMS algorithm.
282 MachinePipeliner &Pass;
283
284 std::unique_ptr<SwingSchedulerDDG> DDG;
285
286 /// The minimum initiation interval between iterations for this schedule.
287 unsigned MII = 0;
288 /// The maximum initiation interval between iterations for this schedule.
289 unsigned MAX_II = 0;
290 /// Set to true if a valid pipelined schedule is found for the loop.
291 bool Scheduled = false;
292 MachineLoop &Loop;
293 LiveIntervals &LIS;
294 const RegisterClassInfo &RegClassInfo;
295 unsigned II_setByPragma = 0;
296 TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
297
298 /// A topological ordering of the SUnits, which is needed for changing
299 /// dependences and iterating over the SUnits.
301
302 struct NodeInfo {
303 int ASAP = 0;
304 int ALAP = 0;
305 int ZeroLatencyDepth = 0;
306 int ZeroLatencyHeight = 0;
307
308 NodeInfo() = default;
309 };
310 /// Computed properties for each node in the graph.
311 std::vector<NodeInfo> ScheduleInfo;
312
313 enum OrderKind { BottomUp = 0, TopDown = 1 };
314 /// Computed node ordering for scheduling.
315 SetVector<SUnit *> NodeOrder;
316
317 using NodeSetType = SmallVector<NodeSet, 8>;
318 using ValueMapTy = DenseMap<unsigned, unsigned>;
319 using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
321
322 /// Instructions to change when emitting the final schedule.
324
325 /// We may create a new instruction, so remember it because it
326 /// must be deleted when the pass is finished.
328
329 /// Ordered list of DAG postprocessing steps.
330 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
331
332 /// Used to compute single-iteration dependencies (i.e., buildSchedGraph).
333 AliasAnalysis *AA;
334
335 /// Used to compute loop-carried dependencies (i.e.,
336 /// addLoopCarriedDependences).
337 BatchAAResults BAA;
338
339 /// Helper class to implement Johnson's circuit finding algorithm.
340 class Circuits {
341 std::vector<SUnit> &SUnits;
342 SetVector<SUnit *> Stack;
343 BitVector Blocked;
346 // Node to Index from ScheduleDAGTopologicalSort
347 std::vector<int> *Node2Idx;
348 unsigned NumPaths = 0u;
349 static unsigned MaxPaths;
350
351 public:
352 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
353 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
354 Node2Idx = new std::vector<int>(SUs.size());
355 unsigned Idx = 0;
356 for (const auto &NodeNum : Topo)
357 Node2Idx->at(NodeNum) = Idx++;
358 }
359 Circuits &operator=(const Circuits &other) = delete;
360 Circuits(const Circuits &other) = delete;
361 ~Circuits() { delete Node2Idx; }
362
363 /// Reset the data structures used in the circuit algorithm.
364 void reset() {
365 Stack.clear();
366 Blocked.reset();
367 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
368 NumPaths = 0;
369 }
370
371 void createAdjacencyStructure(SwingSchedulerDDG *DDG);
372 bool circuit(int V, int S, NodeSetType &NodeSets,
373 const SwingSchedulerDAG *DAG, bool HasBackedge = false);
374 void unblock(int U);
375 };
376
377 struct CopyToPhiMutation : public ScheduleDAGMutation {
378 void apply(ScheduleDAGInstrs *DAG) override;
379 };
380
381public:
383 const RegisterClassInfo &rci, unsigned II,
385 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
386 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
387 Topo(SUnits, &ExitSU), AA(AA), BAA(*AA) {
388 P.MF->getSubtarget().getSMSMutations(Mutations);
390 Mutations.push_back(std::make_unique<CopyToPhiMutation>());
391 BAA.enableCrossIterationMode();
392 }
393
394 void schedule() override;
395 void finishBlock() override;
396
397 /// Return true if the loop kernel has been scheduled.
398 bool hasNewSchedule() { return Scheduled; }
399
400 /// Return the earliest time an instruction may be scheduled.
401 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
402
403 /// Return the latest time an instruction my be scheduled.
404 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
405
406 /// The mobility function, which the number of slots in which
407 /// an instruction may be scheduled.
408 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
409
410 /// The depth, in the dependence graph, for a node.
411 unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
412
413 /// The maximum unweighted length of a path from an arbitrary node to the
414 /// given node in which each edge has latency 0
416 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
417 }
418
419 /// The height, in the dependence graph, for a node.
420 unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
421
422 /// The maximum unweighted length of a path from the given node to an
423 /// arbitrary node in which each edge has latency 0
425 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
426 }
427
428 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
429
430 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
431
432 /// Return the new base register that was stored away for the changed
433 /// instruction.
436 InstrChanges.find(SU);
437 if (It != InstrChanges.end())
438 return It->second.first;
439 return Register();
440 }
441
442 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
443 Mutations.push_back(std::move(Mutation));
444 }
445
446 static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
447
448 const SwingSchedulerDDG *getDDG() const { return DDG.get(); }
449
450 bool mayOverlapInLaterIter(const MachineInstr *BaseMI,
451 const MachineInstr *OtherMI) const;
452
453private:
454 LoopCarriedEdges addLoopCarriedDependences();
455 void updatePhiDependences();
456 void changeDependences();
457 unsigned calculateResMII();
458 unsigned calculateRecMII(NodeSetType &RecNodeSets);
459 void findCircuits(NodeSetType &NodeSets);
460 void fuseRecs(NodeSetType &NodeSets);
461 void removeDuplicateNodes(NodeSetType &NodeSets);
462 void computeNodeFunctions(NodeSetType &NodeSets);
463 void registerPressureFilter(NodeSetType &NodeSets);
464 void colocateNodeSets(NodeSetType &NodeSets);
465 void checkNodeSets(NodeSetType &NodeSets);
466 void groupRemainingNodes(NodeSetType &NodeSets);
467 void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
468 SetVector<SUnit *> &NodesAdded);
469 void computeNodeOrder(NodeSetType &NodeSets);
470 void checkValidNodeOrder(const NodeSetType &Circuits) const;
471 bool schedulePipeline(SMSchedule &Schedule);
472 bool computeDelta(const MachineInstr &MI, int &Delta) const;
473 MachineInstr *findDefInLoop(Register Reg);
474 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
475 unsigned &OffsetPos, Register &NewBase,
476 int64_t &NewOffset);
477 void postProcessDAG();
478 /// Set the Minimum Initiation Interval for this schedule attempt.
479 void setMII(unsigned ResMII, unsigned RecMII);
480 /// Set the Maximum Initiation Interval for this schedule attempt.
481 void setMAX_II();
482};
483
484/// A NodeSet contains a set of SUnit DAG nodes with additional information
485/// that assigns a priority to the set.
486class NodeSet {
487 SetVector<SUnit *> Nodes;
488 bool HasRecurrence = false;
489 unsigned RecMII = 0;
490 int MaxMOV = 0;
491 unsigned MaxDepth = 0;
492 unsigned Colocate = 0;
493 SUnit *ExceedPressure = nullptr;
494 unsigned Latency = 0;
495
496public:
498
499 NodeSet() = default;
501 : Nodes(S, E), HasRecurrence(true) {
502 // Calculate the latency of this node set.
503 // Example to demonstrate the calculation:
504 // Given: N0 -> N1 -> N2 -> N0
505 // Edges:
506 // (N0 -> N1, 3)
507 // (N0 -> N1, 5)
508 // (N1 -> N2, 2)
509 // (N2 -> N0, 1)
510 // The total latency which is a lower bound of the recurrence MII is the
511 // longest path from N0 back to N0 given only the edges of this node set.
512 // In this example, the latency is: 5 + 2 + 1 = 8.
513 //
514 // Hold a map from each SUnit in the circle to the maximum distance from the
515 // source node by only considering the nodes.
516 const SwingSchedulerDDG *DDG = DAG->getDDG();
517 DenseMap<SUnit *, unsigned> SUnitToDistance;
518 for (auto *Node : Nodes)
519 SUnitToDistance[Node] = 0;
520
521 for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {
522 SUnit *U = Nodes[I - 1];
523 SUnit *V = Nodes[I % Nodes.size()];
524 for (const SwingSchedulerDDGEdge &Succ : DDG->getOutEdges(U)) {
525 SUnit *SuccSUnit = Succ.getDst();
526 if (V != SuccSUnit)
527 continue;
528 unsigned &DU = SUnitToDistance[U];
529 unsigned &DV = SUnitToDistance[V];
530 if (DU + Succ.getLatency() > DV)
531 DV = DU + Succ.getLatency();
532 }
533 }
534 // Handle a back-edge in loop carried dependencies
535 SUnit *FirstNode = Nodes[0];
536 SUnit *LastNode = Nodes[Nodes.size() - 1];
537
538 for (SUnit *SU : DDG->getExtraOutEdges(LastNode)) {
539 // If we have an order dep that is potentially loop carried then a
540 // back-edge exists between the last node and the first node in extra
541 // edges. Handle it manually by adding 1 to the distance of the last node.
542 if (SU != FirstNode)
543 continue;
544 unsigned &First = SUnitToDistance[FirstNode];
545 unsigned Last = SUnitToDistance[LastNode];
546 First = std::max(First, Last + 1);
547 }
548
549 // The latency is the distance from the source node to itself.
550 Latency = SUnitToDistance[Nodes.front()];
551 }
552
553 bool insert(SUnit *SU) { return Nodes.insert(SU); }
554
555 void insert(iterator S, iterator E) { Nodes.insert(S, E); }
556
557 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
558 return Nodes.remove_if(P);
559 }
560
561 unsigned count(SUnit *SU) const { return Nodes.count(SU); }
562
563 bool hasRecurrence() { return HasRecurrence; };
564
565 unsigned size() const { return Nodes.size(); }
566
567 bool empty() const { return Nodes.empty(); }
568
569 SUnit *getNode(unsigned i) const { return Nodes[i]; };
570
571 void setRecMII(unsigned mii) { RecMII = mii; };
572
573 void setColocate(unsigned c) { Colocate = c; };
574
575 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
576
577 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
578
579 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
580
581 int getRecMII() { return RecMII; }
582
583 /// Summarize node functions for the entire node set.
585 for (SUnit *SU : *this) {
586 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
587 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
588 }
589 }
590
591 unsigned getLatency() { return Latency; }
592
593 unsigned getMaxDepth() { return MaxDepth; }
594
595 void clear() {
596 Nodes.clear();
597 RecMII = 0;
598 HasRecurrence = false;
599 MaxMOV = 0;
600 MaxDepth = 0;
601 Colocate = 0;
602 ExceedPressure = nullptr;
603 }
604
605 operator SetVector<SUnit *> &() { return Nodes; }
606
607 /// Sort the node sets by importance. First, rank them by recurrence MII,
608 /// then by mobility (least mobile done first), and finally by depth.
609 /// Each node set may contain a colocate value which is used as the first
610 /// tie breaker, if it's set.
611 bool operator>(const NodeSet &RHS) const {
612 if (RecMII == RHS.RecMII) {
613 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
614 return Colocate < RHS.Colocate;
615 if (MaxMOV == RHS.MaxMOV)
616 return MaxDepth > RHS.MaxDepth;
617 return MaxMOV < RHS.MaxMOV;
618 }
619 return RecMII > RHS.RecMII;
620 }
621
622 bool operator==(const NodeSet &RHS) const {
623 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
624 MaxDepth == RHS.MaxDepth;
625 }
626
627 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
628
629 iterator begin() { return Nodes.begin(); }
630 iterator end() { return Nodes.end(); }
631 void print(raw_ostream &os) const;
632
633#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
634 LLVM_DUMP_METHOD void dump() const;
635#endif
636};
637
638// 16 was selected based on the number of ProcResource kinds for all
639// existing Subtargets, so that SmallVector don't need to resize too often.
640static const int DefaultProcResSize = 16;
641
643private:
644 const MCSubtargetInfo *STI;
645 const MCSchedModel &SM;
646 const TargetSubtargetInfo *ST;
647 const TargetInstrInfo *TII;
649 const bool UseDFA;
650 /// DFA resources for each slot
652 /// Modulo Reservation Table. When a resource with ID R is consumed in cycle
653 /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F)
655 /// The number of scheduled micro operations for each slot. Micro operations
656 /// are assumed to be scheduled one per cycle, starting with the cycle in
657 /// which the instruction is scheduled.
658 llvm::SmallVector<int> NumScheduledMops;
659 /// Each processor resource is associated with a so-called processor resource
660 /// mask. This vector allows to correlate processor resource IDs with
661 /// processor resource masks. There is exactly one element per each processor
662 /// resource declared by the scheduling model.
664 int InitiationInterval = 0;
665 /// The number of micro operations that can be scheduled at a cycle.
666 int IssueWidth;
667
668 int calculateResMIIDFA() const;
669 /// Check if MRT is overbooked
670 bool isOverbooked() const;
671 /// Reserve resources on MRT
672 void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
673 /// Unreserve resources on MRT
674 void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
675
676 /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor.
677 /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C,
678 /// II).
679 int positiveModulo(int Dividend, int Divisor) const {
680 assert(Divisor > 0);
681 int R = Dividend % Divisor;
682 if (R < 0)
683 R += Divisor;
684 return R;
685 }
686
687#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
688 LLVM_DUMP_METHOD void dumpMRT() const;
689#endif
690
691public:
693 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
694 DAG(DAG), UseDFA(ST->useDFAforSMS()),
695 ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
696 IssueWidth(SM.IssueWidth) {
697 initProcResourceVectors(SM, ProcResourceMasks);
698 if (IssueWidth <= 0)
699 // If IssueWidth is not specified, set a sufficiently large value
700 IssueWidth = 100;
701 if (SwpForceIssueWidth > 0)
702 IssueWidth = SwpForceIssueWidth;
703 }
704
705 void initProcResourceVectors(const MCSchedModel &SM,
707
708 /// Check if the resources occupied by a machine instruction are available
709 /// in the current state.
710 bool canReserveResources(SUnit &SU, int Cycle);
711
712 /// Reserve the resources occupied by a machine instruction and change the
713 /// current state to reflect that change.
714 void reserveResources(SUnit &SU, int Cycle);
715
716 int calculateResMII() const;
717
718 /// Initialize resources with the initiation interval II.
719 void init(int II);
720};
721
722/// This class represents the scheduled code. The main data structure is a
723/// map from scheduled cycle to instructions. During scheduling, the
724/// data structure explicitly represents all stages/iterations. When
725/// the algorithm finshes, the schedule is collapsed into a single stage,
726/// which represents instructions from different loop iterations.
727///
728/// The SMS algorithm allows negative values for cycles, so the first cycle
729/// in the schedule is the smallest cycle value.
731private:
732 /// Map from execution cycle to instructions.
733 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
734
735 /// Map from instruction to execution cycle.
736 std::map<SUnit *, int> InstrToCycle;
737
738 /// Keep track of the first cycle value in the schedule. It starts
739 /// as zero, but the algorithm allows negative values.
740 int FirstCycle = 0;
741
742 /// Keep track of the last cycle value in the schedule.
743 int LastCycle = 0;
744
745 /// The initiation interval (II) for the schedule.
746 int InitiationInterval = 0;
747
748 /// Target machine information.
749 const TargetSubtargetInfo &ST;
750
751 /// Virtual register information.
753
754 ResourceManager ProcItinResources;
755
756public:
758 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
759 ProcItinResources(&ST, DAG) {}
760
761 void reset() {
762 ScheduledInstrs.clear();
763 InstrToCycle.clear();
764 FirstCycle = 0;
765 LastCycle = 0;
766 InitiationInterval = 0;
767 }
768
769 /// Set the initiation interval for this schedule.
771 InitiationInterval = ii;
772 ProcItinResources.init(ii);
773 }
774
775 /// Return the initiation interval for this schedule.
776 int getInitiationInterval() const { return InitiationInterval; }
777
778 /// Return the first cycle in the completed schedule. This
779 /// can be a negative value.
780 int getFirstCycle() const { return FirstCycle; }
781
782 /// Return the last cycle in the finalized schedule.
783 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
784
785 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II,
786 SwingSchedulerDAG *DAG);
787 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
788
789 /// Iterators for the cycle to instruction map.
793
794 /// Return true if the instruction is scheduled at the specified stage.
795 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
796 return (stageScheduled(SU) == (int)StageNum);
797 }
798
799 /// Return the stage for a scheduled instruction. Return -1 if
800 /// the instruction has not been scheduled.
801 int stageScheduled(SUnit *SU) const {
802 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
803 if (it == InstrToCycle.end())
804 return -1;
805 return (it->second - FirstCycle) / InitiationInterval;
806 }
807
808 /// Return the cycle for a scheduled instruction. This function normalizes
809 /// the first cycle to be 0.
810 unsigned cycleScheduled(SUnit *SU) const {
811 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
812 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
813 return (it->second - FirstCycle) % InitiationInterval;
814 }
815
816 /// Return the maximum stage count needed for this schedule.
817 unsigned getMaxStageCount() {
818 return (LastCycle - FirstCycle) / InitiationInterval;
819 }
820
821 /// Return the instructions that are scheduled at the specified cycle.
822 std::deque<SUnit *> &getInstructions(int cycle) {
823 return ScheduledInstrs[cycle];
824 }
825
827 computeUnpipelineableNodes(SwingSchedulerDAG *SSD,
829
830 std::deque<SUnit *>
831 reorderInstructions(const SwingSchedulerDAG *SSD,
832 const std::deque<SUnit *> &Instrs) const;
833
834 bool
835 normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD,
837 bool isValidSchedule(SwingSchedulerDAG *SSD);
838 void finalizeSchedule(SwingSchedulerDAG *SSD);
839 void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
840 std::deque<SUnit *> &Insts) const;
841 bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const;
842 bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def,
843 MachineOperand &MO) const;
844
845 bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU,
846 const SwingSchedulerDDG *DDG) const;
847 void print(raw_ostream &os) const;
848 void dump() const;
849};
850
851} // end namespace llvm
852
853#endif // LLVM_CODEGEN_MACHINEPIPELINER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr unsigned SM(unsigned Version)
uint64_t IntrinsicInst * II
#define P(N)
PowerPC VSX FMA Mutation
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
Value * RHS
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
BitVector & reset()
Definition BitVector.h:411
iterator end()
Definition DenseMap.h:81
Itinerary data supplied by a subtarget to be used by a target.
Generic base class for all target subtargets.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
const InstrItineraryData * InstrItins
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
SetVector< SUnit * >::const_iterator iterator
bool isExceedSU(SUnit *SU)
void insert(iterator S, iterator E)
void setRecMII(unsigned mii)
void computeNodeSetInfo(SwingSchedulerDAG *SSD)
Summarize node functions for the entire node set.
unsigned getMaxDepth()
unsigned count(SUnit *SU) const
NodeSet()=default
void setColocate(unsigned c)
unsigned getLatency()
NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)
bool operator>(const NodeSet &RHS) const
Sort the node sets by importance.
int compareRecMII(NodeSet &RHS)
unsigned size() const
bool operator!=(const NodeSet &RHS) const
bool insert(SUnit *SU)
bool operator==(const NodeSet &RHS) const
bool remove_if(UnaryPredicate P)
bool empty() const
void setExceedPressure(SUnit *SU)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG)
Scheduling dependency.
Definition ScheduleDAG.h:51
SUnit * getSUnit() const
@ Output
A register output-dependence (aka WAW).
Definition ScheduleDAG.h:57
@ Order
Any other ordering dependency.
Definition ScheduleDAG.h:58
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
This class represents the scheduled code.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
DenseMap< int, std::deque< SUnit * > >::iterator sched_iterator
Iterators for the cycle to instruction map.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
A ScheduleDAG for scheduling lists of MachineInstr.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
const MachineLoopInfo * MLI
Mutate the DAG as a postpass after normal DAG building.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
ScheduleDAG & operator=(const ScheduleDAG &)=delete
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:132
typename vector_type::const_iterator const_iterator
Definition SetVector.h:73
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool hasNewSchedule()
Return true if the loop kernel has been scheduled.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, const RegisterClassInfo &rci, unsigned II, TargetInstrInfo::PipelinerLoopInfo *PLI, AliasAnalysis *AA)
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
static bool classof(const ScheduleDAGInstrs *DAG)
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
Register getReg() const
Returns the register associated with the edge.
void setDistance(unsigned D)
Sets the distance value for the edge.
bool isBarrier() const
Returns true if the edge represents unknown scheduling barrier.
void setLatency(unsigned Latency)
Sets the latency for the edge.
SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc, bool IsValidationOnly)
Creates an edge corresponding to an edge represented by PredOrSucc and Dep in the original DAG.
bool isAntiDep() const
Returns true if the edge represents anti dependence.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
bool isArtificial() const
Returns true if the edge represents an artificial dependence.
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
bool isOrderDep() const
Returns true if the edge represents a dependence that is not data, anti or output dependence.
unsigned getLatency() const
Returns the latency value for the edge.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
bool isValidationOnly() const
Returns true if this edge is intended to be used only for validating the schedule.
unsigned getDistance() const
Returns the distance value for the edge.
bool isOutputDep() const
Returns true if the edge represents output dependence.
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
ArrayRef< SUnit * > getExtraOutEdges(const SUnit *SU) const
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
cl::opt< bool > SwpEnableCopyToPhi
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
static const int DefaultProcResSize
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
DenseMap< SUnit *, OrderDep > OrderDepsType
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
Machine model for scheduling, bundling, and heuristics.
Definition MCSchedule.h:258
Cache the target analysis information about the loop.
SmallVector< MachineOperand, 4 > BrCond
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo