51#define DEBUG_TYPE "pipeliner"
55 "Number of loops that we attempt to use window scheduling");
57 "Number of times that we run list schedule in the window scheduling");
59 "Number of loops that we successfully use window scheduling");
61 "Window scheduling abort due to the failure of the II analysis");
64 WindowSearchNum(
"window-search-num",
65 cl::desc(
"The number of searches per loop in the window "
66 "algorithm. 0 means no search number limit."),
70 "window-search-ratio",
71 cl::desc(
"The ratio of searches per loop in the window algorithm. 100 "
72 "means search all positions in the loop, while 0 means not "
73 "performing any search."),
79 "The coefficient used when initializing II in the window algorithm."),
83 "window-region-limit",
85 "The lower limit of the scheduling region in the window algorithm."),
90 cl::desc(
"The lower limit of the difference between best II and base II in "
91 "the window algorithm. If the difference is smaller than "
92 "this lower limit, window scheduling will not be performed."),
100 cl::desc(
"The upper limit of II in the window algorithm."),
105 Subtarget(&MF->getSubtarget()),
TII(Subtarget->getInstrInfo()),
106 TRI(Subtarget->getRegisterInfo()),
MRI(&MF->getRegInfo()) {
107 TripleDAG = std::unique_ptr<ScheduleDAGInstrs>(
113 LLVM_DEBUG(
dbgs() <<
"The WindowScheduler failed to initialize!\n");
119 ++NumTryWindowSchedule;
125 for (
unsigned Idx : SearchIndexes) {
127 ++NumTryWindowSearch;
132 SchedDAG->startBlock(
MBB);
134 SchedDAG->schedule();
139 LLVM_DEBUG(
dbgs() <<
"Can't find a valid II. Keep searching...\n");
157 <<
" and Best II is " <<
BestII <<
".\n");
166 return OnlyBuildGraph
201 if (PrevUses.
count(Phi.getOperand(0).getReg()))
203 PrevDefs.
insert(Phi.getOperand(0).getReg());
204 for (
unsigned I = 1, E = Phi.getNumOperands();
I != E;
I += 2) {
205 if (PrevDefs.
count(Phi.getOperand(
I).getReg()))
207 PrevUses.
insert(Phi.getOperand(
I).getReg());
212 for (
auto &
MI : *
MBB) {
213 if (
MI.isMetaInstruction() ||
MI.isTerminator())
216 if (IsLoopCarried(
MI)) {
217 LLVM_DEBUG(
dbgs() <<
"Loop carried phis are not supported yet!\n");
226 dbgs() <<
"Boundary MI is not allowed in window scheduling!\n");
229 if (PLI->shouldIgnoreForPipelining(&
MI)) {
230 LLVM_DEBUG(
dbgs() <<
"Special MI defined by target is not allowed in "
231 "window scheduling!\n");
234 for (
auto &Def :
MI.all_defs())
235 if (Def.isReg() && Def.getReg().isPhysical())
239 LLVM_DEBUG(
dbgs() <<
"There are too few MIs in the window region!\n");
279 MI.eraseFromParent();
288 const unsigned DuplicateNum = 3;
297 if (
MI->isMetaInstruction() ||
MI->isTerminator())
301 DefPairs[
MI->getOperand(0).getReg()] = AntiReg;
310 for (
size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) {
312 if (
MI->isPHI() ||
MI->isMetaInstruction() ||
313 (
MI->isTerminator() && Cnt < DuplicateNum - 1))
318 for (
auto MO : NewMI->all_defs())
319 if (MO.isReg() && MO.getReg().isVirtual()) {
322 NewMI->substituteRegister(MO.getReg(), NewDef, 0, *
TRI);
323 NewDefs[MO.getReg()] = NewDef;
326 for (
auto DefRegPair : DefPairs)
327 if (NewMI->readsRegister(DefRegPair.first,
TRI)) {
328 Register NewUse = DefRegPair.second;
356 if (DefPairs.count(NewUse))
357 NewUse = DefPairs[NewUse];
358 NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *
TRI);
361 for (
auto &NewDef : NewDefs)
362 DefPairs[NewDef.first] = NewDef.second;
374 for (
auto &Phi :
MBB->
phis()) {
375 for (
auto DefRegPair : DefPairs)
376 if (Phi.readsRegister(DefRegPair.first,
TRI))
377 Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *
TRI);
387 std::advance(OldPos,
I);
388 auto CurPos =
MI->getIterator();
389 if (CurPos != OldPos) {
397 unsigned SearchRatio) {
402 assert(SearchRatio <= 100 &&
"SearchRatio should be equal or less than 100!");
404 unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1;
406 for (
unsigned Idx = 0;
Idx < MaxIdx;
Idx += Step)
408 return SearchIndexes;
414 for (
auto &SU : DAG.
SUnits)
431 int ExpectCycle = CurCycle;
433 for (
auto &Pred : SU->Preds) {
436 auto *PredMI = Pred.getSUnit()->
getInstr();
438 ExpectCycle = std::max(ExpectCycle, PredCycle + (
int)Pred.getLatency());
442 while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
447 RM.reserveResources(*SU, CurCycle);
487 int MaxStallCycle = 0;
492 for (
auto &Succ : SU->Succs) {
493 if (Succ.isWeak() || Succ.getSUnit() == &
TripleDAG->ExitSU)
496 if (DefCycle + (
int)Succ.getLatency() <= MaxCycle)
501 auto *SuccMI = Succ.getSUnit()->getInstr();
503 if (DefCycle < UseCycle)
506 int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle;
507 MaxStallCycle = std::max(MaxStallCycle, StallCycle);
510 LLVM_DEBUG(
dbgs() <<
"MaxStallCycle is " << MaxStallCycle <<
".\n");
511 return MaxStallCycle;
523 return MaxCycle + StallCycle + 1;
528 for (
auto &Phi :
MBB->
phis()) {
529 int LateCycle = INT_MAX;
531 for (
auto &Succ : SU->Succs) {
537 auto *SuccMI = Succ.getSUnit()->getInstr();
540 LateCycle = std::min(LateCycle,
Cycle);
546 if (AntiMI->getParent() ==
MBB) {
549 LateCycle = std::min(LateCycle, AntiCycle);
553 if (LateCycle == INT_MAX)
554 LateCycle = (int)(
II - 1);
555 LLVM_DEBUG(
dbgs() <<
"\tCycle range [0, " << LateCycle <<
"] " << Phi);
579 for (
auto *
MI : CycleToMIs[
Cycle])
580 IssueOrder[
MI] = Id++;
605 assert(IssueOrder.count(Pair.first) &&
"Cannot find original MI!");
606 SchedResult.push_back(std::make_tuple(Pair.first, Pair.second,
608 IssueOrder[Pair.first]));
615 [](
const std::tuple<MachineInstr *, int, int, int> &
A,
616 const std::tuple<MachineInstr *, int, int, int> &
B) {
617 return std::get<3>(
A) < std::get<3>(
B);
622 std::vector<MachineInstr *> OrderedInsts;
624 auto *
MI = std::get<0>(
Info);
625 OrderedInsts.push_back(
MI);
626 Cycles[
MI] = std::get<1>(
Info);
627 Stages[
MI] = std::get<2>(
Info);
643 if (!MO.isReg() || MO.getReg() == 0)
655 std::advance(RegionBegin,
Offset);
656 auto RegionEnd = RegionBegin;
657 std::advance(RegionEnd, Num);
675 "Cannot find OriMI in OriMIs!");
683 if (
MI->isMetaInstruction())
689 return Id >= (size_t)
Offset ? 1 : 0;
693 assert(Phi->isPHI() &&
"Expecting PHI!");
695 for (
auto MO : Phi->uses()) {
697 AntiReg = MO.getReg();
698 else if (MO.isMBB() && MO.getMBB() ==
MBB)
unsigned const MachineRegisterInfo * MRI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
static const unsigned MaxDepth
unsigned const TargetRegisterInfo * TRI
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
cl::opt< unsigned > WindowIILimit("window-ii-limit", cl::desc("The upper limit of II in the window algorithm."), cl::Hidden, cl::init(1000))
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
A possibly irreducible generalization of a Loop.
void repairIntervalsInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, ArrayRef< Register > OrigRegs)
Update live intervals for instructions in a range of iterators.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndexes * getSlotIndexes() const
Represents a single loop in the control flow graph.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
void push_back(MachineInstr *MI)
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Represents a schedule for a single-block loop.
Wrapper class representing virtual and physical registers.
@ Data
Regular data dependence (aka true-dependence).
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
std::vector< SUnit > SUnits
The scheduling units.
void removeMachineInstrFromMaps(MachineInstr &MI, bool AllowBundled=false)
Removes machine instruction (bundle) MI from the mapping.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const
Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...
virtual bool enableWindowScheduler() const
True if the subtarget should run WindowScheduler.
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
MachineInstr * getOriMI(MachineInstr *NewMI)
Get the original MI from which the new MI is cloned.
virtual ScheduleDAGInstrs * createMachineScheduler(bool OnlyBuildGraph=false)
Two types of ScheduleDAGs are needed, one for creating dependency graphs only, and the other for list...
unsigned SchedPhiNum
SchedPhiNum records the number of phi in the original MBB, and the scheduling starts with MI after ph...
virtual void preProcess()
Add some related processing before running window scheduling.
MachineSchedContext * Context
virtual void restoreTripleMBB()
Restore the order of MIs in TripleMBB after each list scheduling.
virtual void postProcess()
Add some related processing after running window scheduling.
DenseMap< MachineInstr *, int > OriToCycle
OriToCycle keeps the mappings between the original MI and its issue cycle.
virtual int calculateMaxCycle(ScheduleDAGInstrs &DAG, unsigned Offset)
Calculate MIs execution cycle after list scheduling.
MachineRegisterInfo * MRI
unsigned BestII
BestII and BestOffset record the characteristics of the best scheduling result and are used together ...
void backupMBB()
Back up the MIs in the original MBB and remove them from MBB.
DenseMap< MachineInstr *, MachineInstr * > TriToOri
TriToOri keeps the mappings between the MI clones in TripleMBB and their original MI.
int getEstimatedII(ScheduleDAGInstrs &DAG)
Estimate a II value at which all MIs will be scheduled successfully.
virtual void expand()
Using the scheduling infrastructure to expand the results of window scheduling.
DenseMap< MachineInstr *, int > getIssueOrder(unsigned Offset, unsigned II)
Get the final issue order of all scheduled MIs including phis.
Register getAntiRegister(MachineInstr *Phi)
Gets the register in phi which is generated from the current MBB.
unsigned getOriStage(MachineInstr *OriMI, unsigned Offset)
Get the scheduling stage, where the stage of the new MI is identical to the original MI.
const TargetRegisterInfo * TRI
virtual void updateLiveIntervals()
Update the live intervals for all registers used within MBB.
const TargetSubtargetInfo * Subtarget
std::unique_ptr< ScheduleDAGInstrs > TripleDAG
To innovatively identify the dependencies between MIs across two trips, we construct a DAG for a new ...
unsigned BaseII
BaseII is the II obtained when the window offset is SchedPhiNum.
virtual void schedulePhi(int Offset, unsigned &II)
Phis are scheduled separately after each list scheduling.
virtual unsigned analyseII(ScheduleDAGInstrs &DAG, unsigned Offset)
Analyzes the II value after each list scheduling.
int getOriCycle(MachineInstr *NewMI)
Get the issue cycle of the new MI based on the cycle of the original MI.
unsigned SchedInstrNum
SchedInstrNum records the MIs involved in scheduling in the original MBB, excluding debug instruction...
const TargetInstrInfo * TII
virtual void generateTripleMBB()
Make three copies of the original MBB to generate a new TripleMBB.
iterator_range< MachineBasicBlock::iterator > getScheduleRange(unsigned Offset, unsigned Num)
Gets the iterator range of MIs in the scheduling window.
virtual bool isScheduleValid()
Check whether the final result of window scheduling is valid.
virtual int calculateStallCycle(unsigned Offset, int MaxCycle)
Calculate the stall cycle between two trips after list scheduling.
virtual void updateScheduleResult(unsigned Offset, unsigned II)
Update the scheduling result after each list scheduling.
SmallVector< MachineInstr * > OriMIs
OriMIs keeps the MIs removed from the original MBB.
virtual bool initialize()
Initializes the algorithm and determines if it can be executed.
SmallVector< std::tuple< MachineInstr *, int, int, int >, 256 > SchedResult
SchedResult keeps the result of each list scheduling, and the format of the tuple is <MI pointer,...
virtual SmallVector< unsigned > getSearchIndexes(unsigned SearchNum, unsigned SearchRatio)
Give the folding position in the window algorithm, where different heuristics can be used.
void restoreMBB()
Erase the MIs in current MBB and restore the original MIs.
WindowScheduler(MachineSchedContext *C, MachineLoop &ML)
SmallVector< MachineInstr * > TriMIs
TriMIs keeps the MIs of TripleMBB, which is used to restore TripleMBB.
A range adaptor for a pair of iterators.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
const TargetPassConfig * PassConfig