22#define DEBUG_TYPE "pipeliner"
27 cl::desc(
"Swap target blocks of a conditional branch for MVE expander"));
41 unsigned &InitVal,
unsigned &LoopVal) {
42 assert(Phi.isPHI() &&
"Expecting a Phi.");
46 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
47 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
48 InitVal = Phi.getOperand(i).getReg();
50 LoopVal = Phi.getOperand(i).getReg();
52 assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
57 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
59 return Phi.getOperand(i).getReg();
65 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
67 return Phi.getOperand(i).getReg();
84 bool PhiIsSwapped =
false;
89 if (UseStage != -1 && UseStage >= DefStage)
90 Diff = UseStage - DefStage;
92 if (isLoopCarried(*
MI))
97 MaxDiff = std::max(Diff, MaxDiff);
99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
103 generatePipelinedLoop();
106void ModuloScheduleExpander::generatePipelinedLoop() {
119 ValueMapTy *VRMap =
new ValueMapTy[(MaxStageCount + 1) * 2];
124 ValueMapTy *VRMapPhi =
new ValueMapTy[(MaxStageCount + 1) * 2];
131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
139 unsigned StageNum = Schedule.
getStage(CI);
140 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
141 updateInstruction(NewMI,
false, MaxStageCount, StageNum, VRMap);
143 InstrMap[NewMI] = CI;
150 updateInstruction(NewMI,
false, MaxStageCount, 0, VRMap);
152 InstrMap[NewMI] = &
MI;
155 NewKernel = KernelBB;
159 generateExistingPhis(KernelBB, PrologBBs.
back(), KernelBB, KernelBB, VRMap,
160 InstrMap, MaxStageCount, MaxStageCount,
false);
161 generatePhis(KernelBB, PrologBBs.
back(), KernelBB, KernelBB, VRMap, VRMapPhi,
162 InstrMap, MaxStageCount, MaxStageCount,
false);
168 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
173 splitLifetimes(KernelBB, EpilogBBs);
176 removeDeadInstructions(KernelBB, EpilogBBs);
179 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
190 BB->eraseFromParent();
194void ModuloScheduleExpander::generateProlog(
unsigned LastStage,
197 MBBVectorTy &PrologBBs) {
204 for (
unsigned i = 0; i < LastStage; ++i) {
216 for (
int StageNum = i; StageNum >= 0; --StageNum) {
220 if (Schedule.
getStage(&*BBI) == StageNum) {
224 cloneAndChangeInstr(&*BBI, i, (
unsigned)StageNum);
225 updateInstruction(NewMI,
false, i, (
unsigned)StageNum, VRMap);
227 InstrMap[NewMI] = &*BBI;
231 rewritePhiValues(NewBB, i, VRMap, InstrMap);
233 dbgs() <<
"prolog:\n";
252void ModuloScheduleExpander::generateEpilog(
254 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
255 MBBVectorTy &PrologBBs) {
261 assert(!checkBranch &&
"generateEpilog must be able to analyze the branch");
266 if (*LoopExitI == KernelBB)
268 assert(LoopExitI != KernelBB->
succ_end() &&
"Expecting a successor");
278 int EpilogStage = LastStage + 1;
279 for (
unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
287 if (EpilogStart == LoopExitBB)
292 for (
unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
293 for (
auto &BBI : *BB) {
297 if ((
unsigned)Schedule.
getStage(In) == StageNum) {
301 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
303 InstrMap[NewMI] =
In;
307 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
308 InstrMap, LastStage, EpilogStage, i == 1);
309 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
310 InstrMap, LastStage, EpilogStage, i == 1);
314 dbgs() <<
"epilog:\n";
325 assert((OrigBB ==
TBB || OrigBB == FBB) &&
326 "Unable to determine looping branch direction");
332 if (EpilogBBs.size() > 0) {
347 if (O.getParent()->getParent() !=
MBB)
358 if (MO.getParent()->getParent() != BB)
366void ModuloScheduleExpander::generateExistingPhis(
369 unsigned LastStageNum,
unsigned CurStageNum,
bool IsLast) {
373 unsigned PrologStage = 0;
374 unsigned PrevStage = 0;
375 bool InKernel = (LastStageNum == CurStageNum);
377 PrologStage = LastStageNum - 1;
378 PrevStage = CurStageNum;
380 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
381 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
385 BBE = BB->getFirstNonPHI();
389 unsigned InitVal = 0;
390 unsigned LoopVal = 0;
396 unsigned PhiOp2 = LoopVal;
397 if (VRMap[LastStageNum].
count(LoopVal))
398 PhiOp2 = VRMap[LastStageNum][LoopVal];
400 int StageScheduled = Schedule.
getStage(&*BBI);
402 unsigned NumStages = getStagesForReg(Def, CurStageNum);
403 if (NumStages == 0) {
406 unsigned NewReg = VRMap[PrevStage][LoopVal];
407 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
409 if (VRMap[CurStageNum].
count(LoopVal))
410 VRMap[CurStageNum][
Def] = VRMap[CurStageNum][LoopVal];
416 unsigned MaxPhis = PrologStage + 2;
417 if (!InKernel && (
int)PrologStage <= LoopValStage)
418 MaxPhis = std::max((
int)MaxPhis - (
int)LoopValStage, 1);
419 unsigned NumPhis = std::min(NumStages, MaxPhis);
422 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
429 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
434 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
435 StageDiff = StageScheduled - LoopValStage;
436 for (
unsigned np = 0; np < NumPhis; ++np) {
440 if (np > PrologStage || StageScheduled >= (
int)LastStageNum)
443 else if (PrologStage >= AccessStage + StageDiff + np &&
444 VRMap[PrologStage - StageDiff - np].
count(LoopVal) != 0)
445 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
448 else if (PrologStage >= AccessStage + StageDiff + np) {
454 while (InstOp1 && InstOp1->
isPHI() && InstOp1->
getParent() == BB) {
455 int PhiStage = Schedule.
getStage(InstOp1);
456 if ((
int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
461 int PhiOpStage = Schedule.
getStage(InstOp1);
462 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
463 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
464 VRMap[PrologStage - StageAdj - Indirects - np].
count(PhiOp1)) {
465 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
479 bool LoopDefIsPhi = PhiInst && PhiInst->
isPHI();
484 int StageDiffAdj = 0;
485 if (LoopValStage != -1 && StageScheduled > LoopValStage)
486 StageDiffAdj = StageScheduled - LoopValStage;
489 if (np == 0 && PrevStage == LastStageNum &&
490 (StageScheduled != 0 || LoopValStage != 0) &&
491 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
492 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
495 else if (np > 0 && PrevStage == LastStageNum &&
496 VRMap[PrevStage - np + 1].
count(Def))
497 PhiOp2 = VRMap[PrevStage - np + 1][Def];
499 else if (
static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
500 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
501 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
504 else if (VRMap[PrevStage - np].
count(Def) &&
505 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
506 (LoopValStage == StageScheduled)))
507 PhiOp2 = VRMap[PrevStage - np][
Def];
515 if (
static_cast<int>(PrologStage - np) >= StageScheduled) {
516 int LVNumStages = getStagesForPhi(LoopVal);
517 int StageDiff = (StageScheduled - LoopValStage);
518 LVNumStages -= StageDiff;
520 if (LVNumStages > (
int)np && VRMap[CurStageNum].count(LoopVal)) {
522 unsigned ReuseStage = CurStageNum;
523 if (isLoopCarried(*PhiInst))
524 ReuseStage -= LVNumStages;
527 if (VRMap[ReuseStage - np].
count(LoopVal)) {
528 NewReg = VRMap[ReuseStage - np][LoopVal];
530 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
533 VRMap[CurStageNum - np][
Def] = NewReg;
535 if (VRMap[LastStageNum - np - 1].
count(LoopVal))
536 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
538 if (IsLast && np == NumPhis - 1)
544 if (InKernel && StageDiff > 0 &&
545 VRMap[CurStageNum - StageDiff - np].
count(LoopVal))
546 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
554 TII->
get(TargetOpcode::PHI), NewReg);
558 InstrMap[NewPhi] = &*BBI;
563 unsigned PrevReg = 0;
564 if (InKernel && VRMap[PrevStage - np].
count(LoopVal))
565 PrevReg = VRMap[PrevStage - np][LoopVal];
566 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
569 if (VRMap[CurStageNum - np].
count(Def)) {
570 unsigned R = VRMap[CurStageNum - np][
Def];
571 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
578 if (IsLast && np == NumPhis - 1)
586 VRMap[CurStageNum - np][
Def] = NewReg;
589 while (NumPhis++ < NumStages) {
590 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
596 if (NumStages == 0 && IsLast && VRMap[CurStageNum].
count(LoopVal))
604void ModuloScheduleExpander::generatePhis(
607 InstrMapTy &InstrMap,
unsigned LastStageNum,
unsigned CurStageNum,
611 unsigned PrologStage = 0;
612 unsigned PrevStage = 0;
613 unsigned StageDiff = CurStageNum - LastStageNum;
614 bool InKernel = (StageDiff == 0);
616 PrologStage = LastStageNum - 1;
617 PrevStage = CurStageNum;
619 PrologStage = LastStageNum - StageDiff;
620 PrevStage = LastStageNum + StageDiff - 1;
624 BBE = BB->instr_end();
626 for (
unsigned i = 0, e = BBI->getNumOperands(); i !=
e; ++i) {
631 int StageScheduled = Schedule.
getStage(&*BBI);
632 assert(StageScheduled != -1 &&
"Expecting scheduled instruction.");
634 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
638 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
641 if (!InKernel && (
unsigned)StageScheduled > PrologStage)
646 PhiOp2 = VRMap[PrevStage][
Def];
648 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
653 if (NumPhis > PrologStage + 1 - StageScheduled)
654 NumPhis = PrologStage + 1 - StageScheduled;
655 for (
unsigned np = 0; np < NumPhis; ++np) {
678 unsigned PhiOp1 = VRMap[PrologStage][
Def];
679 if (np <= PrologStage)
680 PhiOp1 = VRMap[PrologStage - np][
Def];
682 if (PrevStage == LastStageNum && np == 0)
683 PhiOp2 = VRMap[LastStageNum][
Def];
685 PhiOp2 = VRMapPhi[PrevStage - np][
Def];
693 TII->
get(TargetOpcode::PHI), NewReg);
697 InstrMap[NewPhi] = &*BBI;
702 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
704 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
708 VRMapPhi[PrevStage - np - 1][
Def] = NewReg;
710 VRMapPhi[CurStageNum - np][
Def] = NewReg;
711 if (np == NumPhis - 1)
712 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
715 if (IsLast && np == NumPhis - 1)
727 MBBVectorTy &EpilogBBs) {
735 if (
MI->isInlineAsm()) {
739 bool SawStore =
false;
742 if (!
MI->isSafeToMove(
nullptr, SawStore) && !
MI->isPHI()) {
756 unsigned realUses = 0;
760 if (
U.getParent()->getParent() != BB) {
772 MI++->eraseFromParent();
783 MI.eraseFromParent();
799 MBBVectorTy &EpilogBBs) {
801 for (
auto &
PHI : KernelBB->
phis()) {
808 if (
I->isPHI() &&
I->getParent() == KernelBB) {
814 if (!
MI ||
MI->getParent() != KernelBB ||
MI->isPHI())
818 unsigned SplitReg = 0;
821 if (BBJ.readsRegister(Def,
nullptr)) {
826 TII->
get(TargetOpcode::COPY), SplitReg)
829 BBJ.substituteRegister(Def, SplitReg, 0, *
TRI);
834 for (
auto &
Epilog : EpilogBBs)
836 if (
I.readsRegister(Def,
nullptr))
837 I.substituteRegister(Def, SplitReg, 0, *
TRI);
849 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2)
850 if (
MI.getOperand(i + 1).getMBB() ==
Incoming) {
851 MI.removeOperand(i + 1);
862 MBBVectorTy &PrologBBs,
864 MBBVectorTy &EpilogBBs,
866 assert(PrologBBs.size() == EpilogBBs.size() &&
"Prolog/Epilog mismatch");
873 unsigned MaxIter = PrologBBs.
size() - 1;
874 for (
unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --
j) {
881 std::optional<bool> StaticallyGreater =
883 unsigned numAdded = 0;
884 if (!StaticallyGreater) {
887 }
else if (*StaticallyGreater ==
false) {
889 Prolog->removeSuccessor(LastPro);
894 if (LastPro != LastEpi) {
898 if (LastPro == KernelBB) {
912 I != E && numAdded > 0; ++
I, --numAdded)
913 updateInstruction(&*
I,
false, j, 0, VRMap);
917 LoopInfo->setPreheader(PrologBBs[MaxIter]);
918 LoopInfo->adjustTripCount(-(MaxIter + 1));
924bool ModuloScheduleExpander::computeDelta(
MachineInstr &
MI,
unsigned &Delta) {
928 bool OffsetIsScalable;
933 if (OffsetIsScalable)
936 if (!BaseOp->
isReg())
944 if (BaseDef && BaseDef->
isPHI()) {
946 BaseDef =
MRI.getVRegDef(BaseReg);
962void ModuloScheduleExpander::updateMemOperands(
MachineInstr &NewMI,
974 if (MMO->isVolatile() || MMO->isAtomic() ||
975 (MMO->isInvariant() && MMO->isDereferenceable()) ||
976 (!MMO->getValue())) {
981 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
982 int64_t AdjOffset = Delta * Num;
996 unsigned CurStageNum,
997 unsigned InstStageNum) {
999 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1006MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1007 MachineInstr *OldMI,
unsigned CurStageNum,
unsigned InstStageNum) {
1009 auto It = InstrChanges.
find(OldMI);
1010 if (It != InstrChanges.
end()) {
1011 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1012 unsigned BasePos, OffsetPos;
1016 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1017 if (Schedule.
getStage(LoopDef) > (
signed)InstStageNum)
1018 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1021 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1027void ModuloScheduleExpander::updateInstruction(
MachineInstr *NewMI,
1029 unsigned CurStageNum,
1030 unsigned InstrStageNum,
1031 ValueMapTy *VRMap) {
1041 VRMap[CurStageNum][reg] = NewReg;
1044 }
else if (MO.
isUse()) {
1047 int DefStageNum = Schedule.
getStage(Def);
1048 unsigned StageNum = CurStageNum;
1049 if (DefStageNum != -1 && (
int)InstrStageNum > DefStageNum) {
1051 unsigned StageDiff = (InstrStageNum - DefStageNum);
1053 StageNum -= StageDiff;
1055 if (VRMap[StageNum].
count(reg))
1056 MO.
setReg(VRMap[StageNum][reg]);
1064MachineInstr *ModuloScheduleExpander::findDefInLoop(
unsigned Reg) {
1067 while (
Def->isPHI()) {
1068 if (!Visited.
insert(Def).second)
1070 for (
unsigned i = 1, e =
Def->getNumOperands(); i < e; i += 2)
1071 if (
Def->getOperand(i + 1).getMBB() == BB) {
1072 Def =
MRI.getVRegDef(
Def->getOperand(i).getReg());
1080unsigned ModuloScheduleExpander::getPrevMapVal(
1081 unsigned StageNum,
unsigned PhiStage,
unsigned LoopVal,
unsigned LoopStage,
1083 unsigned PrevVal = 0;
1084 if (StageNum > PhiStage) {
1086 if (PhiStage == LoopStage && VRMap[StageNum - 1].
count(LoopVal))
1088 PrevVal = VRMap[StageNum - 1][LoopVal];
1089 else if (VRMap[StageNum].
count(LoopVal))
1092 PrevVal = VRMap[StageNum][LoopVal];
1096 else if (StageNum == PhiStage + 1)
1099 else if (StageNum > PhiStage + 1 && LoopInst->
getParent() == BB)
1102 getPrevMapVal(StageNum - 1, PhiStage,
getLoopPhiReg(*LoopInst, BB),
1103 LoopStage, VRMap, BB);
1115 InstrMapTy &InstrMap) {
1116 for (
auto &
PHI : BB->
phis()) {
1117 unsigned InitVal = 0;
1118 unsigned LoopVal = 0;
1124 unsigned NumPhis = getStagesForPhi(PhiDef);
1125 if (NumPhis > StageNum)
1127 for (
unsigned np = 0; np <= NumPhis; ++np) {
1129 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1132 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &
PHI, PhiDef,
1141void ModuloScheduleExpander::rewriteScheduledInstr(
1143 unsigned PhiNum,
MachineInstr *Phi,
unsigned OldReg,
unsigned NewReg,
1146 int StagePhi = Schedule.
getStage(Phi) + PhiNum;
1161 assert(OrigInstr != InstrMap.end() &&
"Instruction not scheduled.");
1163 int StageSched = Schedule.
getStage(OrigMI);
1164 int CycleSched = Schedule.
getCycle(OrigMI);
1165 unsigned ReplaceReg = 0;
1167 if (StagePhi == StageSched &&
Phi->isPHI()) {
1168 int CyclePhi = Schedule.
getCycle(Phi);
1169 if (PrevReg && InProlog)
1170 ReplaceReg = PrevReg;
1171 else if (PrevReg && !isLoopCarried(*Phi) &&
1172 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1173 ReplaceReg = PrevReg;
1175 ReplaceReg = NewReg;
1179 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1180 ReplaceReg = NewReg;
1181 if (StagePhi > StageSched &&
Phi->isPHI())
1182 ReplaceReg = NewReg;
1183 if (!InProlog && !
Phi->isPHI() && StagePhi < StageSched)
1184 ReplaceReg = NewReg;
1187 MRI.constrainRegClass(ReplaceReg,
MRI.getRegClass(OldReg));
1189 UseOp.setReg(ReplaceReg);
1191 Register SplitReg =
MRI.createVirtualRegister(
MRI.getRegClass(OldReg));
1195 UseOp.setReg(SplitReg);
1201bool ModuloScheduleExpander::isLoopCarried(
MachineInstr &Phi) {
1204 int DefCycle = Schedule.
getCycle(&Phi);
1205 int DefStage = Schedule.
getStage(&Phi);
1207 unsigned InitVal = 0;
1208 unsigned LoopVal = 0;
1211 if (!
Use ||
Use->isPHI())
1215 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1230 bool Changed =
true;
1235 if (
MRI.use_empty(
MI.getOperand(0).getReg())) {
1238 MI.eraseFromParent();
1240 }
else if (!KeepSingleSrcPhi &&
MI.getNumExplicitOperands() == 3) {
1242 MRI.constrainRegClass(
MI.getOperand(1).getReg(),
1243 MRI.getRegClass(
MI.getOperand(0).getReg()));
1244 assert(ConstrainRegClass &&
1245 "Expected a valid constrained register class!");
1246 (void)ConstrainRegClass;
1247 MRI.replaceRegWith(
MI.getOperand(0).getReg(),
1248 MI.getOperand(1).getReg());
1251 MI.eraseFromParent();
1260class KernelRewriter {
1296 : S(S), BB(LoopBB), PreheaderBB(
L.getLoopPreheader()),
1297 ExitBB(
L.getExitBlock()),
MRI(BB->
getParent()->getRegInfo()),
1298 TII(BB->
getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1300 if (PreheaderBB == BB)
1304void KernelRewriter::rewrite() {
1314 if (
MI->getParent())
1315 MI->removeFromParent();
1320 assert(FirstMI &&
"Failed to find first MI in schedule");
1327 (
I++)->eraseFromParent();
1332 if (
MI.isPHI() ||
MI.isTerminator())
1341 EliminateDeadPhis(BB,
MRI, LIS);
1347 for (
auto MI = BB->getFirstNonPHI();
MI != BB->end(); ++
MI) {
1356 if (
MI.getParent() != BB) {
1377 int ProducerStage = S.
getStage(Producer);
1378 assert(ConsumerStage != -1 &&
1379 "In-loop consumer should always be scheduled!");
1380 assert(ConsumerStage >= ProducerStage);
1381 unsigned StageDiff = ConsumerStage - ProducerStage;
1383 for (
unsigned I = 0;
I < StageDiff; ++
I)
1393 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1396 LoopProducer =
MRI.getUniqueVRegDef(LoopReg);
1399 int LoopProducerStage = S.
getStage(LoopProducer);
1401 std::optional<Register> IllegalPhiDefault;
1403 if (LoopProducerStage == -1) {
1405 }
else if (LoopProducerStage > ConsumerStage) {
1411 int LoopProducerCycle = S.
getCycle(LoopProducer);
1414 assert(LoopProducerCycle <= ConsumerCycle);
1415 assert(LoopProducerStage == ConsumerStage + 1);
1422 IllegalPhiDefault = Defaults.
front();
1425 assert(ConsumerStage >= LoopProducerStage);
1426 int StageDiff = ConsumerStage - LoopProducerStage;
1427 if (StageDiff > 0) {
1429 <<
" to " << (Defaults.
size() + StageDiff) <<
"\n");
1434 Defaults.
empty() ? std::optional<Register>()
1440 auto DefaultI = Defaults.
rbegin();
1441 while (DefaultI != Defaults.
rend())
1442 LoopReg =
phi(LoopReg, *DefaultI++,
MRI.getRegClass(Reg));
1444 if (IllegalPhiDefault) {
1450 auto RC =
MRI.getRegClass(Reg);
1454 .
addReg(*IllegalPhiDefault)
1460 S.
setStage(IllegalPhi, LoopProducerStage);
1467Register KernelRewriter::phi(
Register LoopReg, std::optional<Register> InitReg,
1471 auto I = Phis.find({LoopReg, *InitReg});
1472 if (
I != Phis.end())
1475 for (
auto &KV : Phis) {
1476 if (KV.first.first == LoopReg)
1483 auto I = UndefPhis.find(LoopReg);
1484 if (
I != UndefPhis.end()) {
1492 MI->getOperand(1).setReg(*InitReg);
1493 Phis.insert({{LoopReg, *InitReg},
R});
1495 MRI.constrainRegClass(R,
MRI.getRegClass(*InitReg));
1496 assert(ConstrainRegClass &&
"Expected a valid constrained register class!");
1497 (void)ConstrainRegClass;
1504 RC =
MRI.getRegClass(LoopReg);
1508 MRI.constrainRegClass(R,
MRI.getRegClass(*InitReg));
1509 assert(ConstrainRegClass &&
"Expected a valid constrained register class!");
1510 (void)ConstrainRegClass;
1513 .
addReg(InitReg ? *InitReg : undef(RC))
1518 UndefPhis[LoopReg] =
R;
1520 Phis[{LoopReg, *InitReg}] =
R;
1530 R =
MRI.createVirtualRegister(RC);
1533 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1542class KernelOperandInfo {
1555 while (isRegInLoop(MO)) {
1557 if (
MI->isFullCopy()) {
1558 MO = &
MI->getOperand(1);
1565 MO = &
MI->getOperand(3);
1570 MO =
MI->getOperand(2).getMBB() == BB ? &
MI->getOperand(1)
1571 : &
MI->getOperand(3);
1578 return PhiDefaults.
size() ==
Other.PhiDefaults.size();
1582 OS <<
"use of " << *
Source <<
": distance(" << PhiDefaults.
size() <<
") in "
1589 MRI.getVRegDef(MO->
getReg())->getParent() == BB;
1601 for (
auto I =
BB->
begin(), NI = NewBB->
begin(); !
I->isTerminator();
1617 if (Stage == -1 || Stage >= MinStage)
1630 for (
auto &Sub : Subs)
1631 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,
1636 MI->eraseFromParent();
1666 MI.removeFromParent();
1670 BlockMIs.erase({SourceBB, KernelMI});
1680 assert(Def->findRegisterDefOperandIdx(
MI.getOperand(1).getReg(),
1683 MI.getOperand(0).setReg(PhiReg);
1687 for (
auto *
P : PhiToDelete)
1688 P->eraseFromParent();
1694 DestBB->
insert(InsertPt, NewMI);
1695 Register OrigR = Phi->getOperand(0).getReg();
1716 if (
Use &&
Use->isPHI() &&
Use->getParent() == SourceBB) {
1731 for (
unsigned I = 0;
I < distance; ++
I) {
1734 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1740 return CanonicalUseReg;
1765 EliminateDeadPhis(ExitingBB,
MRI,
LIS,
true);
1788 EliminateDeadPhis(
B,
MRI,
LIS,
true);
1792 for (
size_t I = 0;
I <
Epilogs.size();
I++) {
1794 for (
size_t J =
I; J <
Epilogs.size(); J++) {
1798 for (
size_t K = Iteration; K >
I; K--)
1812 for (; PI !=
Prologs.end(); ++PI, ++EI) {
1814 (*PI)->addSuccessor(*EI);
1818 if (
Use &&
Use->getParent() == Pred) {
1820 if (CanonicalUse->
isPHI()) {
1841 for (
auto I =
B->instr_rbegin();
1842 I != std::next(
B->getFirstNonPHI()->getReverseIterator());) {
1850 MI->eraseFromParent();
1856 EliminateDeadPhis(
B,
MRI,
LIS);
1857 EliminateDeadPhis(ExitingBB,
MRI,
LIS);
1876 if (
Use.getParent() !=
BB)
1879 Use->substituteRegister(OldR, R, 0,
1888 Exit->replacePhiUsesWith(
BB, NewBB);
1895 assert(CanAnalyzeBr &&
"Must be able to analyze the loop branch!");
1907 unsigned OpIdx =
MI->findRegisterDefOperandIdx(Reg,
nullptr);
1919 R =
MI->getOperand(1).getReg();
1924 MI->getOperand(0).setReg(PhiR);
1930 if (Stage == -1 ||
LiveStages.count(
MI->getParent()) == 0 ||
1945 for (
auto &Sub : Subs)
1946 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,
1951 MI->eraseFromParent();
1956 bool KernelDisposed =
false;
1965 std::optional<bool> StaticallyGreater =
1967 if (!StaticallyGreater) {
1971 }
else if (*StaticallyGreater ==
false) {
1975 Prolog->removeSuccessor(Fallthrough);
1981 KernelDisposed =
true;
1993 if (!KernelDisposed) {
2024 std::string ScheduleDump;
2031 assert(
LIS &&
"Requires LiveIntervals!");
2036 if (!ExpandedKernel) {
2054 IllegalPhis.
insert(&*NI);
2060 auto OI = ExpandedKernel->
begin();
2062 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2063 while (OI->isPHI() || OI->isFullCopy())
2065 while (NI->isPHI() || NI->isFullCopy())
2067 assert(OI->getOpcode() == NI->getOpcode() &&
"Opcodes don't match?!");
2069 for (
auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2070 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2072 KernelOperandInfo(&*NOpI,
MRI, IllegalPhis));
2076 for (
auto &OldAndNew : KOIs) {
2077 if (OldAndNew.first == OldAndNew.second)
2080 errs() <<
"Modulo kernel validation error: [\n";
2081 errs() <<
" [golden] ";
2082 OldAndNew.first.print(
errs());
2084 OldAndNew.second.print(
errs());
2089 errs() <<
"Golden reference kernel:\n";
2091 errs() <<
"New kernel:\n";
2093 errs() << ScheduleDump;
2095 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2118 if (Exit->pred_size() == 1)
2133 else if (FBB ==
Loop)
2139 Loop->replaceSuccessor(Exit, NewExit);
2140 TII->insertUnconditionalBranch(*NewExit, Exit,
DebugLoc());
2143 Exit->replacePhiUsesWith(
Loop, NewExit);
2153 InstrMapTy &LastStage0Insts,
2157 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC,
MBB,
Cond,
2176void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2279 Prolog->addSuccessor(NewKernel);
2284 Epilog->addSuccessor(NewPreheader);
2285 Epilog->addSuccessor(NewExit);
2287 InstrMapTy LastStage0Insts;
2288 insertCondBranch(*Check, Schedule.
getNumStages() + NumUnroll - 2,
2289 LastStage0Insts, *Prolog, *NewPreheader);
2294 generateProlog(PrologVRMap);
2295 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2296 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2300void ModuloScheduleExpanderMVE::updateInstrUse(
2311 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2316 if (!DefInst || DefInst->
getParent() != OrigKernel)
2318 unsigned InitReg = 0;
2319 unsigned DefReg = OrigReg;
2320 if (DefInst->
isPHI()) {
2323 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2326 DefInst =
MRI.getVRegDef(LoopReg);
2328 unsigned DefStageNum = Schedule.
getStage(DefInst);
2329 DiffStage += StageNum - DefStageNum;
2331 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].
count(DefReg))
2333 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2334 else if (!PrevVRMap)
2343 NewReg = (*PrevVRMap)[PrevVRMap->
size() - (DiffStage - PhaseNum)][DefReg];
2346 MRI.constrainRegClass(NewReg,
MRI.getRegClass(OrigReg));
2348 UseMO.setReg(NewReg);
2350 Register SplitReg =
MRI.createVirtualRegister(
MRI.getRegClass(OrigReg));
2351 BuildMI(*OrigKernel,
MI,
MI->getDebugLoc(), TII->
get(TargetOpcode::COPY),
2354 UseMO.setReg(SplitReg);
2363 unsigned InitVal, LoopVal;
2372void ModuloScheduleExpanderMVE::generatePhi(
2377 int StageNum = Schedule.
getStage(OrigMI);
2379 if (Schedule.
getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2380 UsePrologReg =
true;
2381 else if (Schedule.
getNumStages() - NumUnroll + UnrollNum == StageNum)
2382 UsePrologReg =
false;
2418 if (!DefMO.isReg() || DefMO.isDead())
2421 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2422 if (NewReg == KernelVRMap[UnrollNum].
end())
2426 int PrologNum = Schedule.
getNumStages() - NumUnroll + UnrollNum - 1;
2427 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2436 Register PhiReg =
MRI.createVirtualRegister(
MRI.getRegClass(OrigReg));
2438 TII->
get(TargetOpcode::PHI), PhiReg)
2443 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2449 for (
unsigned Idx = 1;
Idx < Phi.getNumOperands();
Idx += 2) {
2450 if (Phi.getOperand(
Idx).getReg() == OrigReg) {
2451 Phi.getOperand(
Idx).setReg(NewReg);
2452 Phi.getOperand(
Idx + 1).setMBB(NewMBB);
2459void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(
Register OrigReg,
2467 if (
O.getParent()->getParent() != OrigKernel &&
2468 O.getParent()->getParent() != Prolog &&
2469 O.getParent()->getParent() != NewKernel &&
2470 O.getParent()->getParent() != Epilog)
2472 if (
O.getParent()->getParent() == OrigKernel &&
O.getParent()->isPHI())
2478 if (!UsesAfterLoop.
empty()) {
2479 Register PhiReg =
MRI.createVirtualRegister(
MRI.getRegClass(OrigReg));
2481 TII->
get(TargetOpcode::PHI), PhiReg)
2496 if (!LoopPhis.
empty()) {
2498 unsigned InitReg, LoopReg;
2499 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2500 Register NewInit =
MRI.createVirtualRegister(
MRI.getRegClass(InitReg));
2502 TII->
get(TargetOpcode::PHI), NewInit)
2512void ModuloScheduleExpanderMVE::generateProlog(
2514 PrologVRMap.
clear();
2517 for (
int PrologNum = 0; PrologNum < Schedule.
getNumStages() - 1;
2523 if (StageNum > PrologNum)
2526 updateInstrDef(NewMI, PrologVRMap[PrologNum],
false);
2527 NewMIMap[NewMI] = {PrologNum, StageNum};
2528 Prolog->push_back(NewMI);
2532 for (
auto I : NewMIMap) {
2534 int PrologNum =
I.second.first;
2535 int StageNum =
I.second.second;
2536 updateInstrUse(
MI, StageNum, PrologNum, PrologVRMap,
nullptr);
2540 dbgs() <<
"prolog:\n";
2545void ModuloScheduleExpanderMVE::generateKernel(
2548 KernelVRMap.
clear();
2549 KernelVRMap.
resize(NumUnroll);
2551 PhiVRMap.
resize(NumUnroll);
2554 for (
int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2560 if (UnrollNum == NumUnroll - 1)
2561 LastStage0Insts[
MI] = NewMI;
2562 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2563 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2564 generatePhi(
MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2565 NewMIMap[NewMI] = {UnrollNum, StageNum};
2570 for (
auto I : NewMIMap) {
2572 int UnrollNum =
I.second.first;
2573 int StageNum =
I.second.second;
2574 updateInstrUse(
MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2578 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2582 dbgs() <<
"kernel:\n";
2587void ModuloScheduleExpanderMVE::generateEpilog(
2590 EpilogVRMap.
clear();
2593 for (
int EpilogNum = 0; EpilogNum < Schedule.
getNumStages() - 1;
2599 if (StageNum <= EpilogNum)
2602 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2603 NewMIMap[NewMI] = {EpilogNum, StageNum};
2604 Epilog->push_back(NewMI);
2608 for (
auto I : NewMIMap) {
2610 int EpilogNum =
I.second.first;
2611 int StageNum =
I.second.second;
2612 updateInstrUse(
MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2619 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2622 dbgs() <<
"epilog:\n";
2628void ModuloScheduleExpanderMVE::calcNumUnroll() {
2645 int NumUnrollLocal = 1;
2653 if (Inst2Idx[
MI] <= Inst2Idx[
DefMI])
2655 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2664void ModuloScheduleExpanderMVE::updateInstrDef(
MachineInstr *NewMI,
2674 VRMap[
Reg] = NewReg;
2676 mergeRegUsesAfterPipeline(Reg, NewReg);
2687 generatePipelinedLoop();
2692 if (!L.getExitBlock()) {
2694 dbgs() <<
"Can not apply MVE expander: No single exit block.\n";);
2710 if (
Ref.getParent() != BB ||
Ref.isPHI()) {
2712 <<
"Can not apply MVE expander: A phi result is "
2713 "referenced outside of the loop or by phi.\n";);
2720 unsigned InitVal, LoopVal;
2722 if (!
Register(LoopVal).isVirtual() ||
2723 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2725 dbgs() <<
"Can not apply MVE expander: A phi source value coming "
2726 "from the loop is not defined in the loop.\n";);
2729 if (UsedByPhi.
count(LoopVal)) {
2730 LLVM_DEBUG(
dbgs() <<
"Can not apply MVE expander: A value defined in the "
2731 "loop is referenced by two or more phis.\n";);
2734 UsedByPhi.
insert(LoopVal);
2773char ModuloScheduleTest::ID = 0;
2776 "Modulo Schedule test pass",
false,
false)
2784 for (
auto *L : MLI) {
2785 if (L->getTopBlock() != L->getBottomBlock())
2794 std::pair<StringRef, StringRef> StageAndCycle = getToken(S,
"_");
2795 std::pair<StringRef, StringRef> StageTokenAndValue =
2796 getToken(StageAndCycle.first,
"-");
2797 std::pair<StringRef, StringRef> CycleTokenAndValue =
2798 getToken(StageAndCycle.second,
"-");
2799 if (StageTokenAndValue.first !=
"Stage" ||
2800 CycleTokenAndValue.first !=
"_Cycle") {
2802 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2806 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2807 CycleTokenAndValue.second.drop_front().getAsInteger(10,
Cycle);
2809 dbgs() <<
" Stage=" << Stage <<
", Cycle=" <<
Cycle <<
"\n";
2815 dbgs() <<
"--- ModuloScheduleTest running on BB#" << BB->
getNumber() <<
"\n";
2818 std::vector<MachineInstr *> Instrs;
2820 if (
MI.isTerminator())
2822 Instrs.push_back(&
MI);
2824 dbgs() <<
"Parsing post-instr symbol for " <<
MI;
2847 MI->setPostInstrSymbol(MF,
Sym);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::optional< std::vector< StOtherPiece > > Other
DenseMap< Block *, BlockRelaxAux > Blocks
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit)
Create a dedicated exit for Loop.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
modulo schedule Modulo Schedule test pass
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool rewrite(Function &F)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
A possibly irreducible generalization of a Loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
succ_iterator succ_begin()
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
std::vector< MachineBasicBlock * >::iterator succ_iterator
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
pred_iterator pred_begin()
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
iterator_range< mop_iterator > operands()
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
ModuloSchedule & Schedule
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
const TargetInstrInfo * TII
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
MachineRegisterInfo & MRI
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
reverse_iterator rbegin()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt copy(R &&Range, OutputIt Out)
void initializeModuloScheduleTestPass(PassRegistry &)
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...