LLVM 17.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
17#include "llvm/MC/MCContext.h"
18#include "llvm/Support/Debug.h"
21
22#define DEBUG_TYPE "pipeliner"
23using namespace llvm;
24
26 for (MachineInstr *MI : ScheduledInstrs)
27 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
28}
29
30//===----------------------------------------------------------------------===//
31// ModuloScheduleExpander implementation
32//===----------------------------------------------------------------------===//
33
34/// Return the register values for the operands of a Phi instruction.
35/// This function assume the instruction is a Phi.
37 unsigned &InitVal, unsigned &LoopVal) {
38 assert(Phi.isPHI() && "Expecting a Phi.");
39
40 InitVal = 0;
41 LoopVal = 0;
42 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
43 if (Phi.getOperand(i + 1).getMBB() != Loop)
44 InitVal = Phi.getOperand(i).getReg();
45 else
46 LoopVal = Phi.getOperand(i).getReg();
47
48 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
49}
50
51/// Return the Phi register value that comes from the incoming block.
52static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
53 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
54 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
55 return Phi.getOperand(i).getReg();
56 return 0;
57}
58
59/// Return the Phi register value that comes the loop block.
60static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
61 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
62 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
63 return Phi.getOperand(i).getReg();
64 return 0;
65}
66
68 BB = Schedule.getLoop()->getTopBlock();
69 Preheader = *BB->pred_begin();
70 if (Preheader == BB)
71 Preheader = *std::next(BB->pred_begin());
72
73 // Iterate over the definitions in each instruction, and compute the
74 // stage difference for each use. Keep the maximum value.
75 for (MachineInstr *MI : Schedule.getInstructions()) {
76 int DefStage = Schedule.getStage(MI);
77 for (const MachineOperand &Op : MI->operands()) {
78 if (!Op.isReg() || !Op.isDef())
79 continue;
80
81 Register Reg = Op.getReg();
82 unsigned MaxDiff = 0;
83 bool PhiIsSwapped = false;
84 for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
85 MachineInstr *UseMI = UseOp.getParent();
86 int UseStage = Schedule.getStage(UseMI);
87 unsigned Diff = 0;
88 if (UseStage != -1 && UseStage >= DefStage)
89 Diff = UseStage - DefStage;
90 if (MI->isPHI()) {
91 if (isLoopCarried(*MI))
92 ++Diff;
93 else
94 PhiIsSwapped = true;
95 }
96 MaxDiff = std::max(Diff, MaxDiff);
97 }
98 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
99 }
100 }
101
102 generatePipelinedLoop();
103}
104
105void ModuloScheduleExpander::generatePipelinedLoop() {
107 assert(LoopInfo && "Must be able to analyze loop!");
108
109 // Create a new basic block for the kernel and add it to the CFG.
111
112 unsigned MaxStageCount = Schedule.getNumStages() - 1;
113
114 // Remember the registers that are used in different stages. The index is
115 // the iteration, or stage, that the instruction is scheduled in. This is
116 // a map between register names in the original block and the names created
117 // in each stage of the pipelined loop.
118 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
119
120 // The renaming destination by Phis for the registers across stages.
121 // This map is updated during Phis generation to point to the most recent
122 // renaming destination.
123 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
124
125 InstrMapTy InstrMap;
126
128
129 // Generate the prolog instructions that set up the pipeline.
130 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
131 MF.insert(BB->getIterator(), KernelBB);
132
133 // Rearrange the instructions to generate the new, pipelined loop,
134 // and update register names as needed.
135 for (MachineInstr *CI : Schedule.getInstructions()) {
136 if (CI->isPHI())
137 continue;
138 unsigned StageNum = Schedule.getStage(CI);
139 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
140 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
141 KernelBB->push_back(NewMI);
142 InstrMap[NewMI] = CI;
143 }
144
145 // Copy any terminator instructions to the new kernel, and update
146 // names as needed.
147 for (MachineInstr &MI : BB->terminators()) {
148 MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
149 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
150 KernelBB->push_back(NewMI);
151 InstrMap[NewMI] = &MI;
152 }
153
154 NewKernel = KernelBB;
155 KernelBB->transferSuccessors(BB);
156 KernelBB->replaceSuccessor(BB, KernelBB);
157
158 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
159 InstrMap, MaxStageCount, MaxStageCount, false);
160 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
161 InstrMap, MaxStageCount, MaxStageCount, false);
162
163 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
164
166 // Generate the epilog instructions to complete the pipeline.
167 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
168 PrologBBs);
169
170 // We need this step because the register allocation doesn't handle some
171 // situations well, so we insert copies to help out.
172 splitLifetimes(KernelBB, EpilogBBs);
173
174 // Remove dead instructions due to loop induction variables.
175 removeDeadInstructions(KernelBB, EpilogBBs);
176
177 // Add branches between prolog and epilog blocks.
178 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
179
180 delete[] VRMap;
181 delete[] VRMapPhi;
182}
183
185 // Remove the original loop since it's no longer referenced.
186 for (auto &I : *BB)
188 BB->clear();
189 BB->eraseFromParent();
190}
191
192/// Generate the pipeline prolog code.
193void ModuloScheduleExpander::generateProlog(unsigned LastStage,
194 MachineBasicBlock *KernelBB,
195 ValueMapTy *VRMap,
196 MBBVectorTy &PrologBBs) {
197 MachineBasicBlock *PredBB = Preheader;
198 InstrMapTy InstrMap;
199
200 // Generate a basic block for each stage, not including the last stage,
201 // which will be generated in the kernel. Each basic block may contain
202 // instructions from multiple stages/iterations.
203 for (unsigned i = 0; i < LastStage; ++i) {
204 // Create and insert the prolog basic block prior to the original loop
205 // basic block. The original loop is removed later.
207 PrologBBs.push_back(NewBB);
208 MF.insert(BB->getIterator(), NewBB);
209 NewBB->transferSuccessors(PredBB);
210 PredBB->addSuccessor(NewBB);
211 PredBB = NewBB;
212
213 // Generate instructions for each appropriate stage. Process instructions
214 // in original program order.
215 for (int StageNum = i; StageNum >= 0; --StageNum) {
217 BBE = BB->getFirstTerminator();
218 BBI != BBE; ++BBI) {
219 if (Schedule.getStage(&*BBI) == StageNum) {
220 if (BBI->isPHI())
221 continue;
222 MachineInstr *NewMI =
223 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
224 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
225 NewBB->push_back(NewMI);
226 InstrMap[NewMI] = &*BBI;
227 }
228 }
229 }
230 rewritePhiValues(NewBB, i, VRMap, InstrMap);
231 LLVM_DEBUG({
232 dbgs() << "prolog:\n";
233 NewBB->dump();
234 });
235 }
236
237 PredBB->replaceSuccessor(BB, KernelBB);
238
239 // Check if we need to remove the branch from the preheader to the original
240 // loop, and replace it with a branch to the new loop.
241 unsigned numBranches = TII->removeBranch(*Preheader);
242 if (numBranches) {
244 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
245 }
246}
247
248/// Generate the pipeline epilog code. The epilog code finishes the iterations
249/// that were started in either the prolog or the kernel. We create a basic
250/// block for each stage that needs to complete.
251void ModuloScheduleExpander::generateEpilog(
252 unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
253 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
254 MBBVectorTy &PrologBBs) {
255 // We need to change the branch from the kernel to the first epilog block, so
256 // this call to analyze branch uses the kernel rather than the original BB.
257 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
259 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
260 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
261 if (checkBranch)
262 return;
263
264 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
265 if (*LoopExitI == KernelBB)
266 ++LoopExitI;
267 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
268 MachineBasicBlock *LoopExitBB = *LoopExitI;
269
270 MachineBasicBlock *PredBB = KernelBB;
271 MachineBasicBlock *EpilogStart = LoopExitBB;
272 InstrMapTy InstrMap;
273
274 // Generate a basic block for each stage, not including the last stage,
275 // which was generated for the kernel. Each basic block may contain
276 // instructions from multiple stages/iterations.
277 int EpilogStage = LastStage + 1;
278 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
280 EpilogBBs.push_back(NewBB);
281 MF.insert(BB->getIterator(), NewBB);
282
283 PredBB->replaceSuccessor(LoopExitBB, NewBB);
284 NewBB->addSuccessor(LoopExitBB);
285
286 if (EpilogStart == LoopExitBB)
287 EpilogStart = NewBB;
288
289 // Add instructions to the epilog depending on the current block.
290 // Process instructions in original program order.
291 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
292 for (auto &BBI : *BB) {
293 if (BBI.isPHI())
294 continue;
295 MachineInstr *In = &BBI;
296 if ((unsigned)Schedule.getStage(In) == StageNum) {
297 // Instructions with memoperands in the epilog are updated with
298 // conservative values.
299 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
300 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
301 NewBB->push_back(NewMI);
302 InstrMap[NewMI] = In;
303 }
304 }
305 }
306 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
307 InstrMap, LastStage, EpilogStage, i == 1);
308 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
309 InstrMap, LastStage, EpilogStage, i == 1);
310 PredBB = NewBB;
311
312 LLVM_DEBUG({
313 dbgs() << "epilog:\n";
314 NewBB->dump();
315 });
316 }
317
318 // Fix any Phi nodes in the loop exit block.
319 LoopExitBB->replacePhiUsesWith(BB, PredBB);
320
321 // Create a branch to the new epilog from the kernel.
322 // Remove the original branch and add a new branch to the epilog.
323 TII->removeBranch(*KernelBB);
324 assert((OrigBB == TBB || OrigBB == FBB) &&
325 "Unable to determine looping branch direction");
326 if (OrigBB != TBB)
327 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
328 else
329 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
330 // Add a branch to the loop exit.
331 if (EpilogBBs.size() > 0) {
332 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
334 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
335 }
336}
337
338/// Replace all uses of FromReg that appear outside the specified
339/// basic block with ToReg.
340static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
343 LiveIntervals &LIS) {
344 for (MachineOperand &O :
345 llvm::make_early_inc_range(MRI.use_operands(FromReg)))
346 if (O.getParent()->getParent() != MBB)
347 O.setReg(ToReg);
348 if (!LIS.hasInterval(ToReg))
349 LIS.createEmptyInterval(ToReg);
350}
351
352/// Return true if the register has a use that occurs outside the
353/// specified loop.
354static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
356 for (const MachineOperand &MO : MRI.use_operands(Reg))
357 if (MO.getParent()->getParent() != BB)
358 return true;
359 return false;
360}
361
362/// Generate Phis for the specific block in the generated pipelined code.
363/// This function looks at the Phis from the original code to guide the
364/// creation of new Phis.
365void ModuloScheduleExpander::generateExistingPhis(
367 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
368 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
369 // Compute the stage number for the initial value of the Phi, which
370 // comes from the prolog. The prolog to use depends on to which kernel/
371 // epilog that we're adding the Phi.
372 unsigned PrologStage = 0;
373 unsigned PrevStage = 0;
374 bool InKernel = (LastStageNum == CurStageNum);
375 if (InKernel) {
376 PrologStage = LastStageNum - 1;
377 PrevStage = CurStageNum;
378 } else {
379 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
380 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
381 }
382
383 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
384 BBE = BB->getFirstNonPHI();
385 BBI != BBE; ++BBI) {
386 Register Def = BBI->getOperand(0).getReg();
387
388 unsigned InitVal = 0;
389 unsigned LoopVal = 0;
390 getPhiRegs(*BBI, BB, InitVal, LoopVal);
391
392 unsigned PhiOp1 = 0;
393 // The Phi value from the loop body typically is defined in the loop, but
394 // not always. So, we need to check if the value is defined in the loop.
395 unsigned PhiOp2 = LoopVal;
396 if (VRMap[LastStageNum].count(LoopVal))
397 PhiOp2 = VRMap[LastStageNum][LoopVal];
398
399 int StageScheduled = Schedule.getStage(&*BBI);
400 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
401 unsigned NumStages = getStagesForReg(Def, CurStageNum);
402 if (NumStages == 0) {
403 // We don't need to generate a Phi anymore, but we need to rename any uses
404 // of the Phi value.
405 unsigned NewReg = VRMap[PrevStage][LoopVal];
406 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
407 InitVal, NewReg);
408 if (VRMap[CurStageNum].count(LoopVal))
409 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
410 }
411 // Adjust the number of Phis needed depending on the number of prologs left,
412 // and the distance from where the Phi is first scheduled. The number of
413 // Phis cannot exceed the number of prolog stages. Each stage can
414 // potentially define two values.
415 unsigned MaxPhis = PrologStage + 2;
416 if (!InKernel && (int)PrologStage <= LoopValStage)
417 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
418 unsigned NumPhis = std::min(NumStages, MaxPhis);
419
420 unsigned NewReg = 0;
421 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
422 // In the epilog, we may need to look back one stage to get the correct
423 // Phi name, because the epilog and prolog blocks execute the same stage.
424 // The correct name is from the previous block only when the Phi has
425 // been completely scheduled prior to the epilog, and Phi value is not
426 // needed in multiple stages.
427 int StageDiff = 0;
428 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
429 NumPhis == 1)
430 StageDiff = 1;
431 // Adjust the computations below when the phi and the loop definition
432 // are scheduled in different stages.
433 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
434 StageDiff = StageScheduled - LoopValStage;
435 for (unsigned np = 0; np < NumPhis; ++np) {
436 // If the Phi hasn't been scheduled, then use the initial Phi operand
437 // value. Otherwise, use the scheduled version of the instruction. This
438 // is a little complicated when a Phi references another Phi.
439 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
440 PhiOp1 = InitVal;
441 // Check if the Phi has already been scheduled in a prolog stage.
442 else if (PrologStage >= AccessStage + StageDiff + np &&
443 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
444 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
445 // Check if the Phi has already been scheduled, but the loop instruction
446 // is either another Phi, or doesn't occur in the loop.
447 else if (PrologStage >= AccessStage + StageDiff + np) {
448 // If the Phi references another Phi, we need to examine the other
449 // Phi to get the correct value.
450 PhiOp1 = LoopVal;
451 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
452 int Indirects = 1;
453 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
454 int PhiStage = Schedule.getStage(InstOp1);
455 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
456 PhiOp1 = getInitPhiReg(*InstOp1, BB);
457 else
458 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
459 InstOp1 = MRI.getVRegDef(PhiOp1);
460 int PhiOpStage = Schedule.getStage(InstOp1);
461 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
462 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
463 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
464 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
465 break;
466 }
467 ++Indirects;
468 }
469 } else
470 PhiOp1 = InitVal;
471 // If this references a generated Phi in the kernel, get the Phi operand
472 // from the incoming block.
473 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
474 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
475 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
476
477 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
478 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
479 // In the epilog, a map lookup is needed to get the value from the kernel,
480 // or previous epilog block. How is does this depends on if the
481 // instruction is scheduled in the previous block.
482 if (!InKernel) {
483 int StageDiffAdj = 0;
484 if (LoopValStage != -1 && StageScheduled > LoopValStage)
485 StageDiffAdj = StageScheduled - LoopValStage;
486 // Use the loop value defined in the kernel, unless the kernel
487 // contains the last definition of the Phi.
488 if (np == 0 && PrevStage == LastStageNum &&
489 (StageScheduled != 0 || LoopValStage != 0) &&
490 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
491 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
492 // Use the value defined by the Phi. We add one because we switch
493 // from looking at the loop value to the Phi definition.
494 else if (np > 0 && PrevStage == LastStageNum &&
495 VRMap[PrevStage - np + 1].count(Def))
496 PhiOp2 = VRMap[PrevStage - np + 1][Def];
497 // Use the loop value defined in the kernel.
498 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
499 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
500 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
501 // Use the value defined by the Phi, unless we're generating the first
502 // epilog and the Phi refers to a Phi in a different stage.
503 else if (VRMap[PrevStage - np].count(Def) &&
504 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
505 (LoopValStage == StageScheduled)))
506 PhiOp2 = VRMap[PrevStage - np][Def];
507 }
508
509 // Check if we can reuse an existing Phi. This occurs when a Phi
510 // references another Phi, and the other Phi is scheduled in an
511 // earlier stage. We can try to reuse an existing Phi up until the last
512 // stage of the current Phi.
513 if (LoopDefIsPhi) {
514 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
515 int LVNumStages = getStagesForPhi(LoopVal);
516 int StageDiff = (StageScheduled - LoopValStage);
517 LVNumStages -= StageDiff;
518 // Make sure the loop value Phi has been processed already.
519 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
520 NewReg = PhiOp2;
521 unsigned ReuseStage = CurStageNum;
522 if (isLoopCarried(*PhiInst))
523 ReuseStage -= LVNumStages;
524 // Check if the Phi to reuse has been generated yet. If not, then
525 // there is nothing to reuse.
526 if (VRMap[ReuseStage - np].count(LoopVal)) {
527 NewReg = VRMap[ReuseStage - np][LoopVal];
528
529 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
530 Def, NewReg);
531 // Update the map with the new Phi name.
532 VRMap[CurStageNum - np][Def] = NewReg;
533 PhiOp2 = NewReg;
534 if (VRMap[LastStageNum - np - 1].count(LoopVal))
535 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
536
537 if (IsLast && np == NumPhis - 1)
538 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
539 continue;
540 }
541 }
542 }
543 if (InKernel && StageDiff > 0 &&
544 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
545 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
546 }
547
548 const TargetRegisterClass *RC = MRI.getRegClass(Def);
549 NewReg = MRI.createVirtualRegister(RC);
550
551 MachineInstrBuilder NewPhi =
552 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
553 TII->get(TargetOpcode::PHI), NewReg);
554 NewPhi.addReg(PhiOp1).addMBB(BB1);
555 NewPhi.addReg(PhiOp2).addMBB(BB2);
556 if (np == 0)
557 InstrMap[NewPhi] = &*BBI;
558
559 // We define the Phis after creating the new pipelined code, so
560 // we need to rename the Phi values in scheduled instructions.
561
562 unsigned PrevReg = 0;
563 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
564 PrevReg = VRMap[PrevStage - np][LoopVal];
565 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
566 NewReg, PrevReg);
567 // If the Phi has been scheduled, use the new name for rewriting.
568 if (VRMap[CurStageNum - np].count(Def)) {
569 unsigned R = VRMap[CurStageNum - np][Def];
570 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
571 NewReg);
572 }
573
574 // Check if we need to rename any uses that occurs after the loop. The
575 // register to replace depends on whether the Phi is scheduled in the
576 // epilog.
577 if (IsLast && np == NumPhis - 1)
578 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
579
580 // In the kernel, a dependent Phi uses the value from this Phi.
581 if (InKernel)
582 PhiOp2 = NewReg;
583
584 // Update the map with the new Phi name.
585 VRMap[CurStageNum - np][Def] = NewReg;
586 }
587
588 while (NumPhis++ < NumStages) {
589 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
590 NewReg, 0);
591 }
592
593 // Check if we need to rename a Phi that has been eliminated due to
594 // scheduling.
595 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
596 replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
597 }
598}
599
600/// Generate Phis for the specified block in the generated pipelined code.
601/// These are new Phis needed because the definition is scheduled after the
602/// use in the pipelined sequence.
603void ModuloScheduleExpander::generatePhis(
605 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
606 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
607 bool IsLast) {
608 // Compute the stage number that contains the initial Phi value, and
609 // the Phi from the previous stage.
610 unsigned PrologStage = 0;
611 unsigned PrevStage = 0;
612 unsigned StageDiff = CurStageNum - LastStageNum;
613 bool InKernel = (StageDiff == 0);
614 if (InKernel) {
615 PrologStage = LastStageNum - 1;
616 PrevStage = CurStageNum;
617 } else {
618 PrologStage = LastStageNum - StageDiff;
619 PrevStage = LastStageNum + StageDiff - 1;
620 }
621
622 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
623 BBE = BB->instr_end();
624 BBI != BBE; ++BBI) {
625 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
626 MachineOperand &MO = BBI->getOperand(i);
627 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
628 continue;
629
630 int StageScheduled = Schedule.getStage(&*BBI);
631 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
632 Register Def = MO.getReg();
633 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
634 // An instruction scheduled in stage 0 and is used after the loop
635 // requires a phi in the epilog for the last definition from either
636 // the kernel or prolog.
637 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
638 hasUseAfterLoop(Def, BB, MRI))
639 NumPhis = 1;
640 if (!InKernel && (unsigned)StageScheduled > PrologStage)
641 continue;
642
643 unsigned PhiOp2;
644 if (InKernel) {
645 PhiOp2 = VRMap[PrevStage][Def];
646 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
647 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
648 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
649 }
650 // The number of Phis can't exceed the number of prolog stages. The
651 // prolog stage number is zero based.
652 if (NumPhis > PrologStage + 1 - StageScheduled)
653 NumPhis = PrologStage + 1 - StageScheduled;
654 for (unsigned np = 0; np < NumPhis; ++np) {
655 // Example for
656 // Org:
657 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
658 //
659 // Prolog0 (Stage0):
660 // %Clone0 = ...
661 // Prolog1 (Stage1):
662 // %Clone1 = ...
663 // Kernel (Stage2):
664 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
665 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
666 // %Clone2 = ...
667 // Epilog0 (Stage3):
668 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
669 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
670 // Epilog1 (Stage4):
671 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
672 //
673 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
674 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
675 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
676
677 unsigned PhiOp1 = VRMap[PrologStage][Def];
678 if (np <= PrologStage)
679 PhiOp1 = VRMap[PrologStage - np][Def];
680 if (!InKernel) {
681 if (PrevStage == LastStageNum && np == 0)
682 PhiOp2 = VRMap[LastStageNum][Def];
683 else
684 PhiOp2 = VRMapPhi[PrevStage - np][Def];
685 }
686
687 const TargetRegisterClass *RC = MRI.getRegClass(Def);
688 Register NewReg = MRI.createVirtualRegister(RC);
689
690 MachineInstrBuilder NewPhi =
691 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
692 TII->get(TargetOpcode::PHI), NewReg);
693 NewPhi.addReg(PhiOp1).addMBB(BB1);
694 NewPhi.addReg(PhiOp2).addMBB(BB2);
695 if (np == 0)
696 InstrMap[NewPhi] = &*BBI;
697
698 // Rewrite uses and update the map. The actions depend upon whether
699 // we generating code for the kernel or epilog blocks.
700 if (InKernel) {
701 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
702 NewReg);
703 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
704 NewReg);
705
706 PhiOp2 = NewReg;
707 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
708 } else {
709 VRMapPhi[CurStageNum - np][Def] = NewReg;
710 if (np == NumPhis - 1)
711 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
712 NewReg);
713 }
714 if (IsLast && np == NumPhis - 1)
715 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
716 }
717 }
718 }
719}
720
721/// Remove instructions that generate values with no uses.
722/// Typically, these are induction variable operations that generate values
723/// used in the loop itself. A dead instruction has a definition with
724/// no uses, or uses that occur in the original loop only.
725void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
726 MBBVectorTy &EpilogBBs) {
727 // For each epilog block, check that the value defined by each instruction
728 // is used. If not, delete it.
729 for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
731 ME = MBB->instr_rend();
732 MI != ME;) {
733 // From DeadMachineInstructionElem. Don't delete inline assembly.
734 if (MI->isInlineAsm()) {
735 ++MI;
736 continue;
737 }
738 bool SawStore = false;
739 // Check if it's safe to remove the instruction due to side effects.
740 // We can, and want to, remove Phis here.
741 if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
742 ++MI;
743 continue;
744 }
745 bool used = true;
746 for (const MachineOperand &MO : MI->operands()) {
747 if (!MO.isReg() || !MO.isDef())
748 continue;
749 Register reg = MO.getReg();
750 // Assume physical registers are used, unless they are marked dead.
751 if (reg.isPhysical()) {
752 used = !MO.isDead();
753 if (used)
754 break;
755 continue;
756 }
757 unsigned realUses = 0;
758 for (const MachineOperand &U : MRI.use_operands(reg)) {
759 // Check if there are any uses that occur only in the original
760 // loop. If so, that's not a real use.
761 if (U.getParent()->getParent() != BB) {
762 realUses++;
763 used = true;
764 break;
765 }
766 }
767 if (realUses > 0)
768 break;
769 used = false;
770 }
771 if (!used) {
773 MI++->eraseFromParent();
774 continue;
775 }
776 ++MI;
777 }
778 // In the kernel block, check if we can remove a Phi that generates a value
779 // used in an instruction removed in the epilog block.
780 for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
781 Register reg = MI.getOperand(0).getReg();
782 if (MRI.use_begin(reg) == MRI.use_end()) {
784 MI.eraseFromParent();
785 }
786 }
787}
788
789/// For loop carried definitions, we split the lifetime of a virtual register
790/// that has uses past the definition in the next iteration. A copy with a new
791/// virtual register is inserted before the definition, which helps with
792/// generating a better register assignment.
793///
794/// v1 = phi(a, v2) v1 = phi(a, v2)
795/// v2 = phi(b, v3) v2 = phi(b, v3)
796/// v3 = .. v4 = copy v1
797/// .. = V1 v3 = ..
798/// .. = v4
799void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
800 MBBVectorTy &EpilogBBs) {
802 for (auto &PHI : KernelBB->phis()) {
803 Register Def = PHI.getOperand(0).getReg();
804 // Check for any Phi definition that used as an operand of another Phi
805 // in the same block.
807 E = MRI.use_instr_end();
808 I != E; ++I) {
809 if (I->isPHI() && I->getParent() == KernelBB) {
810 // Get the loop carried definition.
811 unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
812 if (!LCDef)
813 continue;
814 MachineInstr *MI = MRI.getVRegDef(LCDef);
815 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
816 continue;
817 // Search through the rest of the block looking for uses of the Phi
818 // definition. If one occurs, then split the lifetime.
819 unsigned SplitReg = 0;
821 KernelBB->instr_end()))
822 if (BBJ.readsRegister(Def)) {
823 // We split the lifetime when we find the first use.
824 if (SplitReg == 0) {
825 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
826 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
827 TII->get(TargetOpcode::COPY), SplitReg)
828 .addReg(Def);
829 }
830 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
831 }
832 if (!SplitReg)
833 continue;
834 // Search through each of the epilog blocks for any uses to be renamed.
835 for (auto &Epilog : EpilogBBs)
836 for (auto &I : *Epilog)
837 if (I.readsRegister(Def))
838 I.substituteRegister(Def, SplitReg, 0, *TRI);
839 break;
840 }
841 }
842 }
843}
844
845/// Remove the incoming block from the Phis in a basic block.
846static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
847 for (MachineInstr &MI : *BB) {
848 if (!MI.isPHI())
849 break;
850 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
851 if (MI.getOperand(i + 1).getMBB() == Incoming) {
852 MI.removeOperand(i + 1);
853 MI.removeOperand(i);
854 break;
855 }
856 }
857}
858
859/// Create branches from each prolog basic block to the appropriate epilog
860/// block. These edges are needed if the loop ends before reaching the
861/// kernel.
862void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
863 MBBVectorTy &PrologBBs,
864 MachineBasicBlock *KernelBB,
865 MBBVectorTy &EpilogBBs,
866 ValueMapTy *VRMap) {
867 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
868 MachineBasicBlock *LastPro = KernelBB;
869 MachineBasicBlock *LastEpi = KernelBB;
870
871 // Start from the blocks connected to the kernel and work "out"
872 // to the first prolog and the last epilog blocks.
874 unsigned MaxIter = PrologBBs.size() - 1;
875 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
876 // Add branches to the prolog that go to the corresponding
877 // epilog, and the fall-thru prolog/kernel block.
878 MachineBasicBlock *Prolog = PrologBBs[j];
879 MachineBasicBlock *Epilog = EpilogBBs[i];
880
882 std::optional<bool> StaticallyGreater =
883 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
884 unsigned numAdded = 0;
885 if (!StaticallyGreater) {
886 Prolog->addSuccessor(Epilog);
887 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
888 } else if (*StaticallyGreater == false) {
889 Prolog->addSuccessor(Epilog);
890 Prolog->removeSuccessor(LastPro);
891 LastEpi->removeSuccessor(Epilog);
892 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
893 removePhis(Epilog, LastEpi);
894 // Remove the blocks that are no longer referenced.
895 if (LastPro != LastEpi) {
896 LastEpi->clear();
897 LastEpi->eraseFromParent();
898 }
899 if (LastPro == KernelBB) {
900 LoopInfo->disposed();
901 NewKernel = nullptr;
902 }
903 LastPro->clear();
904 LastPro->eraseFromParent();
905 } else {
906 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
908 }
909 LastPro = Prolog;
910 LastEpi = Epilog;
912 E = Prolog->instr_rend();
913 I != E && numAdded > 0; ++I, --numAdded)
914 updateInstruction(&*I, false, j, 0, VRMap);
915 }
916
917 if (NewKernel) {
918 LoopInfo->setPreheader(PrologBBs[MaxIter]);
919 LoopInfo->adjustTripCount(-(MaxIter + 1));
920 }
921}
922
923/// Return true if we can compute the amount the instruction changes
924/// during each iteration. Set Delta to the amount of the change.
925bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
927 const MachineOperand *BaseOp;
928 int64_t Offset;
929 bool OffsetIsScalable;
930 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
931 return false;
932
933 // FIXME: This algorithm assumes instructions have fixed-size offsets.
934 if (OffsetIsScalable)
935 return false;
936
937 if (!BaseOp->isReg())
938 return false;
939
940 Register BaseReg = BaseOp->getReg();
941
943 // Check if there is a Phi. If so, get the definition in the loop.
944 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
945 if (BaseDef && BaseDef->isPHI()) {
946 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
947 BaseDef = MRI.getVRegDef(BaseReg);
948 }
949 if (!BaseDef)
950 return false;
951
952 int D = 0;
953 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
954 return false;
955
956 Delta = D;
957 return true;
958}
959
960/// Update the memory operand with a new offset when the pipeliner
961/// generates a new copy of the instruction that refers to a
962/// different memory location.
963void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
964 MachineInstr &OldMI,
965 unsigned Num) {
966 if (Num == 0)
967 return;
968 // If the instruction has memory operands, then adjust the offset
969 // when the instruction appears in different stages.
970 if (NewMI.memoperands_empty())
971 return;
973 for (MachineMemOperand *MMO : NewMI.memoperands()) {
974 // TODO: Figure out whether isAtomic is really necessary (see D57601).
975 if (MMO->isVolatile() || MMO->isAtomic() ||
976 (MMO->isInvariant() && MMO->isDereferenceable()) ||
977 (!MMO->getValue())) {
978 NewMMOs.push_back(MMO);
979 continue;
980 }
981 unsigned Delta;
982 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
983 int64_t AdjOffset = Delta * Num;
984 NewMMOs.push_back(
985 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
986 } else {
987 NewMMOs.push_back(
989 }
990 }
991 NewMI.setMemRefs(MF, NewMMOs);
992}
993
994/// Clone the instruction for the new pipelined loop and update the
995/// memory operands, if needed.
996MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
997 unsigned CurStageNum,
998 unsigned InstStageNum) {
999 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1000 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1001 return NewMI;
1002}
1003
1004/// Clone the instruction for the new pipelined loop. If needed, this
1005/// function updates the instruction using the values saved in the
1006/// InstrChanges structure.
1007MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1008 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1009 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1010 auto It = InstrChanges.find(OldMI);
1011 if (It != InstrChanges.end()) {
1012 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1013 unsigned BasePos, OffsetPos;
1014 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1015 return nullptr;
1016 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1017 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1018 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1019 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1020 NewMI->getOperand(OffsetPos).setImm(NewOffset);
1021 }
1022 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1023 return NewMI;
1024}
1025
1026/// Update the machine instruction with new virtual registers. This
1027/// function may change the definitions and/or uses.
1028void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1029 bool LastDef,
1030 unsigned CurStageNum,
1031 unsigned InstrStageNum,
1032 ValueMapTy *VRMap) {
1033 for (MachineOperand &MO : NewMI->operands()) {
1034 if (!MO.isReg() || !MO.getReg().isVirtual())
1035 continue;
1036 Register reg = MO.getReg();
1037 if (MO.isDef()) {
1038 // Create a new virtual register for the definition.
1039 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1040 Register NewReg = MRI.createVirtualRegister(RC);
1041 MO.setReg(NewReg);
1042 VRMap[CurStageNum][reg] = NewReg;
1043 if (LastDef)
1044 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1045 } else if (MO.isUse()) {
1046 MachineInstr *Def = MRI.getVRegDef(reg);
1047 // Compute the stage that contains the last definition for instruction.
1048 int DefStageNum = Schedule.getStage(Def);
1049 unsigned StageNum = CurStageNum;
1050 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1051 // Compute the difference in stages between the defintion and the use.
1052 unsigned StageDiff = (InstrStageNum - DefStageNum);
1053 // Make an adjustment to get the last definition.
1054 StageNum -= StageDiff;
1055 }
1056 if (VRMap[StageNum].count(reg))
1057 MO.setReg(VRMap[StageNum][reg]);
1058 }
1059 }
1060}
1061
1062/// Return the instruction in the loop that defines the register.
1063/// If the definition is a Phi, then follow the Phi operand to
1064/// the instruction in the loop.
1065MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1067 MachineInstr *Def = MRI.getVRegDef(Reg);
1068 while (Def->isPHI()) {
1069 if (!Visited.insert(Def).second)
1070 break;
1071 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1072 if (Def->getOperand(i + 1).getMBB() == BB) {
1073 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1074 break;
1075 }
1076 }
1077 return Def;
1078}
1079
1080/// Return the new name for the value from the previous stage.
1081unsigned ModuloScheduleExpander::getPrevMapVal(
1082 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1083 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1084 unsigned PrevVal = 0;
1085 if (StageNum > PhiStage) {
1086 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1087 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1088 // The name is defined in the previous stage.
1089 PrevVal = VRMap[StageNum - 1][LoopVal];
1090 else if (VRMap[StageNum].count(LoopVal))
1091 // The previous name is defined in the current stage when the instruction
1092 // order is swapped.
1093 PrevVal = VRMap[StageNum][LoopVal];
1094 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1095 // The loop value hasn't yet been scheduled.
1096 PrevVal = LoopVal;
1097 else if (StageNum == PhiStage + 1)
1098 // The loop value is another phi, which has not been scheduled.
1099 PrevVal = getInitPhiReg(*LoopInst, BB);
1100 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1101 // The loop value is another phi, which has been scheduled.
1102 PrevVal =
1103 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1104 LoopStage, VRMap, BB);
1105 }
1106 return PrevVal;
1107}
1108
1109/// Rewrite the Phi values in the specified block to use the mappings
1110/// from the initial operand. Once the Phi is scheduled, we switch
1111/// to using the loop value instead of the Phi value, so those names
1112/// do not need to be rewritten.
1113void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1114 unsigned StageNum,
1115 ValueMapTy *VRMap,
1116 InstrMapTy &InstrMap) {
1117 for (auto &PHI : BB->phis()) {
1118 unsigned InitVal = 0;
1119 unsigned LoopVal = 0;
1120 getPhiRegs(PHI, BB, InitVal, LoopVal);
1121 Register PhiDef = PHI.getOperand(0).getReg();
1122
1123 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1124 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1125 unsigned NumPhis = getStagesForPhi(PhiDef);
1126 if (NumPhis > StageNum)
1127 NumPhis = StageNum;
1128 for (unsigned np = 0; np <= NumPhis; ++np) {
1129 unsigned NewVal =
1130 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1131 if (!NewVal)
1132 NewVal = InitVal;
1133 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1134 NewVal);
1135 }
1136 }
1137}
1138
1139/// Rewrite a previously scheduled instruction to use the register value
1140/// from the new instruction. Make sure the instruction occurs in the
1141/// basic block, and we don't change the uses in the new instruction.
1142void ModuloScheduleExpander::rewriteScheduledInstr(
1143 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1144 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1145 unsigned PrevReg) {
1146 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1147 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1148 // Rewrite uses that have been scheduled already to use the new
1149 // Phi register.
1150 for (MachineOperand &UseOp :
1151 llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
1152 MachineInstr *UseMI = UseOp.getParent();
1153 if (UseMI->getParent() != BB)
1154 continue;
1155 if (UseMI->isPHI()) {
1156 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1157 continue;
1158 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1159 continue;
1160 }
1161 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1162 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1163 MachineInstr *OrigMI = OrigInstr->second;
1164 int StageSched = Schedule.getStage(OrigMI);
1165 int CycleSched = Schedule.getCycle(OrigMI);
1166 unsigned ReplaceReg = 0;
1167 // This is the stage for the scheduled instruction.
1168 if (StagePhi == StageSched && Phi->isPHI()) {
1169 int CyclePhi = Schedule.getCycle(Phi);
1170 if (PrevReg && InProlog)
1171 ReplaceReg = PrevReg;
1172 else if (PrevReg && !isLoopCarried(*Phi) &&
1173 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1174 ReplaceReg = PrevReg;
1175 else
1176 ReplaceReg = NewReg;
1177 }
1178 // The scheduled instruction occurs before the scheduled Phi, and the
1179 // Phi is not loop carried.
1180 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1181 ReplaceReg = NewReg;
1182 if (StagePhi > StageSched && Phi->isPHI())
1183 ReplaceReg = NewReg;
1184 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1185 ReplaceReg = NewReg;
1186 if (ReplaceReg) {
1187 const TargetRegisterClass *NRC =
1188 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1189 if (NRC)
1190 UseOp.setReg(ReplaceReg);
1191 else {
1192 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1193 BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1194 SplitReg)
1195 .addReg(ReplaceReg);
1196 UseOp.setReg(SplitReg);
1197 }
1198 }
1199 }
1200}
1201
1202bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1203 if (!Phi.isPHI())
1204 return false;
1205 int DefCycle = Schedule.getCycle(&Phi);
1206 int DefStage = Schedule.getStage(&Phi);
1207
1208 unsigned InitVal = 0;
1209 unsigned LoopVal = 0;
1210 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1211 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1212 if (!Use || Use->isPHI())
1213 return true;
1214 int LoopCycle = Schedule.getCycle(Use);
1215 int LoopStage = Schedule.getStage(Use);
1216 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1217}
1218
1219//===----------------------------------------------------------------------===//
1220// PeelingModuloScheduleExpander implementation
1221//===----------------------------------------------------------------------===//
1222// This is a reimplementation of ModuloScheduleExpander that works by creating
1223// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1224//===----------------------------------------------------------------------===//
1225
1226namespace {
1227// Remove any dead phis in MBB. Dead phis either have only one block as input
1228// (in which case they are the identity) or have no uses.
1229void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1230 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1231 bool Changed = true;
1232 while (Changed) {
1233 Changed = false;
1235 assert(MI.isPHI());
1236 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1237 if (LIS)
1239 MI.eraseFromParent();
1240 Changed = true;
1241 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1242 const TargetRegisterClass *ConstrainRegClass =
1243 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1244 MRI.getRegClass(MI.getOperand(0).getReg()));
1245 assert(ConstrainRegClass &&
1246 "Expected a valid constrained register class!");
1247 (void)ConstrainRegClass;
1248 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1249 MI.getOperand(1).getReg());
1250 if (LIS)
1252 MI.eraseFromParent();
1253 Changed = true;
1254 }
1255 }
1256 }
1257}
1258
1259/// Rewrites the kernel block in-place to adhere to the given schedule.
1260/// KernelRewriter holds all of the state required to perform the rewriting.
1261class KernelRewriter {
1262 ModuloSchedule &S;
1264 MachineBasicBlock *PreheaderBB, *ExitBB;
1266 const TargetInstrInfo *TII;
1267 LiveIntervals *LIS;
1268
1269 // Map from register class to canonical undef register for that class.
1271 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1272 // this map is only used when InitReg is non-undef.
1274 // Map from LoopReg to phi register where the InitReg is undef.
1276
1277 // Reg is used by MI. Return the new register MI should use to adhere to the
1278 // schedule. Insert phis as necessary.
1279 Register remapUse(Register Reg, MachineInstr &MI);
1280 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1281 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1282 // or will be chosen so as to share another phi.
1283 Register phi(Register LoopReg, std::optional<Register> InitReg = {},
1284 const TargetRegisterClass *RC = nullptr);
1285 // Create an undef register of the given register class.
1286 Register undef(const TargetRegisterClass *RC);
1287
1288public:
1289 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1290 LiveIntervals *LIS = nullptr);
1291 void rewrite();
1292};
1293} // namespace
1294
1295KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1296 MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1297 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1298 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1299 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1300 PreheaderBB = *BB->pred_begin();
1301 if (PreheaderBB == BB)
1302 PreheaderBB = *std::next(BB->pred_begin());
1303}
1304
1305void KernelRewriter::rewrite() {
1306 // Rearrange the loop to be in schedule order. Note that the schedule may
1307 // contain instructions that are not owned by the loop block (InstrChanges and
1308 // friends), so we gracefully handle unowned instructions and delete any
1309 // instructions that weren't in the schedule.
1310 auto InsertPt = BB->getFirstTerminator();
1311 MachineInstr *FirstMI = nullptr;
1312 for (MachineInstr *MI : S.getInstructions()) {
1313 if (MI->isPHI())
1314 continue;
1315 if (MI->getParent())
1316 MI->removeFromParent();
1317 BB->insert(InsertPt, MI);
1318 if (!FirstMI)
1319 FirstMI = MI;
1320 }
1321 assert(FirstMI && "Failed to find first MI in schedule");
1322
1323 // At this point all of the scheduled instructions are between FirstMI
1324 // and the end of the block. Kill from the first non-phi to FirstMI.
1325 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1326 if (LIS)
1328 (I++)->eraseFromParent();
1329 }
1330
1331 // Now remap every instruction in the loop.
1332 for (MachineInstr &MI : *BB) {
1333 if (MI.isPHI() || MI.isTerminator())
1334 continue;
1335 for (MachineOperand &MO : MI.uses()) {
1336 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1337 continue;
1338 Register Reg = remapUse(MO.getReg(), MI);
1339 MO.setReg(Reg);
1340 }
1341 }
1342 EliminateDeadPhis(BB, MRI, LIS);
1343
1344 // Ensure a phi exists for all instructions that are either referenced by
1345 // an illegal phi or by an instruction outside the loop. This allows us to
1346 // treat remaps of these values the same as "normal" values that come from
1347 // loop-carried phis.
1348 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1349 if (MI->isPHI()) {
1350 Register R = MI->getOperand(0).getReg();
1351 phi(R);
1352 continue;
1353 }
1354
1355 for (MachineOperand &Def : MI->defs()) {
1356 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1357 if (MI.getParent() != BB) {
1358 phi(Def.getReg());
1359 break;
1360 }
1361 }
1362 }
1363 }
1364}
1365
1366Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1367 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1368 if (!Producer)
1369 return Reg;
1370
1371 int ConsumerStage = S.getStage(&MI);
1372 if (!Producer->isPHI()) {
1373 // Non-phi producers are simple to remap. Insert as many phis as the
1374 // difference between the consumer and producer stages.
1375 if (Producer->getParent() != BB)
1376 // Producer was not inside the loop. Use the register as-is.
1377 return Reg;
1378 int ProducerStage = S.getStage(Producer);
1379 assert(ConsumerStage != -1 &&
1380 "In-loop consumer should always be scheduled!");
1381 assert(ConsumerStage >= ProducerStage);
1382 unsigned StageDiff = ConsumerStage - ProducerStage;
1383
1384 for (unsigned I = 0; I < StageDiff; ++I)
1385 Reg = phi(Reg);
1386 return Reg;
1387 }
1388
1389 // First, dive through the phi chain to find the defaults for the generated
1390 // phis.
1392 Register LoopReg = Reg;
1393 auto LoopProducer = Producer;
1394 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1395 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1396 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1397 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1398 assert(LoopProducer);
1399 }
1400 int LoopProducerStage = S.getStage(LoopProducer);
1401
1402 std::optional<Register> IllegalPhiDefault;
1403
1404 if (LoopProducerStage == -1) {
1405 // Do nothing.
1406 } else if (LoopProducerStage > ConsumerStage) {
1407 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1408 // In addition, Consumer's cycle must be scheduled after Producer in the
1409 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1410 // functions.
1411#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1412 int LoopProducerCycle = S.getCycle(LoopProducer);
1413 int ConsumerCycle = S.getCycle(&MI);
1414#endif
1415 assert(LoopProducerCycle <= ConsumerCycle);
1416 assert(LoopProducerStage == ConsumerStage + 1);
1417 // Peel off the first phi from Defaults and insert a phi between producer
1418 // and consumer. This phi will not be at the front of the block so we
1419 // consider it illegal. It will only exist during the rewrite process; it
1420 // needs to exist while we peel off prologs because these could take the
1421 // default value. After that we can replace all uses with the loop producer
1422 // value.
1423 IllegalPhiDefault = Defaults.front();
1424 Defaults.erase(Defaults.begin());
1425 } else {
1426 assert(ConsumerStage >= LoopProducerStage);
1427 int StageDiff = ConsumerStage - LoopProducerStage;
1428 if (StageDiff > 0) {
1429 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1430 << " to " << (Defaults.size() + StageDiff) << "\n");
1431 // If we need more phis than we have defaults for, pad out with undefs for
1432 // the earliest phis, which are at the end of the defaults chain (the
1433 // chain is in reverse order).
1434 Defaults.resize(Defaults.size() + StageDiff,
1435 Defaults.empty() ? std::optional<Register>()
1436 : Defaults.back());
1437 }
1438 }
1439
1440 // Now we know the number of stages to jump back, insert the phi chain.
1441 auto DefaultI = Defaults.rbegin();
1442 while (DefaultI != Defaults.rend())
1443 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1444
1445 if (IllegalPhiDefault) {
1446 // The consumer optionally consumes LoopProducer in the same iteration
1447 // (because the producer is scheduled at an earlier cycle than the consumer)
1448 // or the initial value. To facilitate this we create an illegal block here
1449 // by embedding a phi in the middle of the block. We will fix this up
1450 // immediately prior to pruning.
1451 auto RC = MRI.getRegClass(Reg);
1452 Register R = MRI.createVirtualRegister(RC);
1453 MachineInstr *IllegalPhi =
1454 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1455 .addReg(*IllegalPhiDefault)
1456 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1457 .addReg(LoopReg)
1458 .addMBB(BB); // Block choice is arbitrary and has no effect.
1459 // Illegal phi should belong to the producer stage so that it can be
1460 // filtered correctly during peeling.
1461 S.setStage(IllegalPhi, LoopProducerStage);
1462 return R;
1463 }
1464
1465 return LoopReg;
1466}
1467
1468Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
1469 const TargetRegisterClass *RC) {
1470 // If the init register is not undef, try and find an existing phi.
1471 if (InitReg) {
1472 auto I = Phis.find({LoopReg, *InitReg});
1473 if (I != Phis.end())
1474 return I->second;
1475 } else {
1476 for (auto &KV : Phis) {
1477 if (KV.first.first == LoopReg)
1478 return KV.second;
1479 }
1480 }
1481
1482 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1483 // find a phi that takes undef as input.
1484 auto I = UndefPhis.find(LoopReg);
1485 if (I != UndefPhis.end()) {
1486 Register R = I->second;
1487 if (!InitReg)
1488 // Found a phi taking undef as input, and this input is undef so return
1489 // without any more changes.
1490 return R;
1491 // Found a phi taking undef as input, so rewrite it to take InitReg.
1492 MachineInstr *MI = MRI.getVRegDef(R);
1493 MI->getOperand(1).setReg(*InitReg);
1494 Phis.insert({{LoopReg, *InitReg}, R});
1495 const TargetRegisterClass *ConstrainRegClass =
1496 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1497 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1498 (void)ConstrainRegClass;
1499 UndefPhis.erase(I);
1500 return R;
1501 }
1502
1503 // Failed to find any existing phi to reuse, so create a new one.
1504 if (!RC)
1505 RC = MRI.getRegClass(LoopReg);
1506 Register R = MRI.createVirtualRegister(RC);
1507 if (InitReg) {
1508 const TargetRegisterClass *ConstrainRegClass =
1509 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1510 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1511 (void)ConstrainRegClass;
1512 }
1513 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1514 .addReg(InitReg ? *InitReg : undef(RC))
1515 .addMBB(PreheaderBB)
1516 .addReg(LoopReg)
1517 .addMBB(BB);
1518 if (!InitReg)
1519 UndefPhis[LoopReg] = R;
1520 else
1521 Phis[{LoopReg, *InitReg}] = R;
1522 return R;
1523}
1524
1525Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1526 Register &R = Undefs[RC];
1527 if (R == 0) {
1528 // Create an IMPLICIT_DEF that defines this register if we need it.
1529 // All uses of this should be removed by the time we have finished unrolling
1530 // prologs and epilogs.
1531 R = MRI.createVirtualRegister(RC);
1532 auto *InsertBB = &PreheaderBB->getParent()->front();
1533 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1534 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1535 }
1536 return R;
1537}
1538
1539namespace {
1540/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1541/// the operand are discovered, such as how many in-loop PHIs it has to jump
1542/// through and defaults for these phis.
1543class KernelOperandInfo {
1546 SmallVector<Register, 4> PhiDefaults;
1549
1550public:
1551 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1552 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1553 : MRI(MRI) {
1554 Source = MO;
1555 BB = MO->getParent()->getParent();
1556 while (isRegInLoop(MO)) {
1557 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1558 if (MI->isFullCopy()) {
1559 MO = &MI->getOperand(1);
1560 continue;
1561 }
1562 if (!MI->isPHI())
1563 break;
1564 // If this is an illegal phi, don't count it in distance.
1565 if (IllegalPhis.count(MI)) {
1566 MO = &MI->getOperand(3);
1567 continue;
1568 }
1569
1571 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1572 : &MI->getOperand(3);
1573 PhiDefaults.push_back(Default);
1574 }
1575 Target = MO;
1576 }
1577
1578 bool operator==(const KernelOperandInfo &Other) const {
1579 return PhiDefaults.size() == Other.PhiDefaults.size();
1580 }
1581
1582 void print(raw_ostream &OS) const {
1583 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1584 << *Source->getParent();
1585 }
1586
1587private:
1588 bool isRegInLoop(MachineOperand *MO) {
1589 return MO->isReg() && MO->getReg().isVirtual() &&
1590 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1591 }
1592};
1593} // namespace
1594
1598 if (LPD == LPD_Front)
1599 PeeledFront.push_back(NewBB);
1600 else
1601 PeeledBack.push_front(NewBB);
1602 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1603 ++I, ++NI) {
1604 CanonicalMIs[&*I] = &*I;
1605 CanonicalMIs[&*NI] = &*I;
1606 BlockMIs[{NewBB, &*I}] = &*NI;
1607 BlockMIs[{BB, &*I}] = &*I;
1608 }
1609 return NewBB;
1610}
1611
1613 int MinStage) {
1614 for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1615 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1616 MachineInstr *MI = &*I++;
1617 int Stage = getStage(MI);
1618 if (Stage == -1 || Stage >= MinStage)
1619 continue;
1620
1621 for (MachineOperand &DefMO : MI->defs()) {
1623 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1624 // Only PHIs can use values from this block by construction.
1625 // Match with the equivalent PHI in B.
1626 assert(UseMI.isPHI());
1627 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1628 MI->getParent());
1629 Subs.emplace_back(&UseMI, Reg);
1630 }
1631 for (auto &Sub : Subs)
1632 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1634 }
1635 if (LIS)
1637 MI->eraseFromParent();
1638 }
1639}
1640
1642 MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1643 auto InsertPt = DestBB->getFirstNonPHI();
1646 llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1647 if (MI.isPHI()) {
1648 // This is an illegal PHI. If we move any instructions using an illegal
1649 // PHI, we need to create a legal Phi.
1650 if (getStage(&MI) != Stage) {
1651 // The legal Phi is not necessary if the illegal phi's stage
1652 // is being moved.
1653 Register PhiR = MI.getOperand(0).getReg();
1654 auto RC = MRI.getRegClass(PhiR);
1656 MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1657 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1658 .addReg(PhiR)
1659 .addMBB(SourceBB);
1660 BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1662 Remaps[PhiR] = NR;
1663 }
1664 }
1665 if (getStage(&MI) != Stage)
1666 continue;
1667 MI.removeFromParent();
1668 DestBB->insert(InsertPt, &MI);
1669 auto *KernelMI = CanonicalMIs[&MI];
1670 BlockMIs[{DestBB, KernelMI}] = &MI;
1671 BlockMIs.erase({SourceBB, KernelMI});
1672 }
1674 for (MachineInstr &MI : DestBB->phis()) {
1675 assert(MI.getNumOperands() == 3);
1676 MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1677 // If the instruction referenced by the phi is moved inside the block
1678 // we don't need the phi anymore.
1679 if (getStage(Def) == Stage) {
1680 Register PhiReg = MI.getOperand(0).getReg();
1681 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1);
1682 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1683 MI.getOperand(0).setReg(PhiReg);
1684 PhiToDelete.push_back(&MI);
1685 }
1686 }
1687 for (auto *P : PhiToDelete)
1688 P->eraseFromParent();
1689 InsertPt = DestBB->getFirstNonPHI();
1690 // Helper to clone Phi instructions into the destination block. We clone Phi
1691 // greedily to avoid combinatorial explosion of Phi instructions.
1692 auto clonePhi = [&](MachineInstr *Phi) {
1693 MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1694 DestBB->insert(InsertPt, NewMI);
1695 Register OrigR = Phi->getOperand(0).getReg();
1697 NewMI->getOperand(0).setReg(R);
1698 NewMI->getOperand(1).setReg(OrigR);
1699 NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1700 Remaps[OrigR] = R;
1701 CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1702 BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1704 return R;
1705 };
1706 for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1707 for (MachineOperand &MO : I->uses()) {
1708 if (!MO.isReg())
1709 continue;
1710 if (Remaps.count(MO.getReg()))
1711 MO.setReg(Remaps[MO.getReg()]);
1712 else {
1713 // If we are using a phi from the source block we need to add a new phi
1714 // pointing to the old one.
1716 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1717 Register R = clonePhi(Use);
1718 MO.setReg(R);
1719 }
1720 }
1721 }
1722 }
1723}
1724
1727 MachineInstr *Phi) {
1728 unsigned distance = PhiNodeLoopIteration[Phi];
1729 MachineInstr *CanonicalUse = CanonicalPhi;
1730 Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1731 for (unsigned I = 0; I < distance; ++I) {
1732 assert(CanonicalUse->isPHI());
1733 assert(CanonicalUse->getNumOperands() == 5);
1734 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1735 if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1736 std::swap(LoopRegIdx, InitRegIdx);
1737 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1738 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1739 }
1740 return CanonicalUseReg;
1741}
1742
1744 BitVector LS(Schedule.getNumStages(), true);
1745 BitVector AS(Schedule.getNumStages(), true);
1746 LiveStages[BB] = LS;
1747 AvailableStages[BB] = AS;
1748
1749 // Peel out the prologs.
1750 LS.reset();
1751 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1752 LS[I] = true;
1753 Prologs.push_back(peelKernel(LPD_Front));
1754 LiveStages[Prologs.back()] = LS;
1755 AvailableStages[Prologs.back()] = LS;
1756 }
1757
1758 // Create a block that will end up as the new loop exiting block (dominated by
1759 // all prologs and epilogs). It will only contain PHIs, in the same order as
1760 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1761 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1762 // property that any value deffed in BB but used outside of BB is used by a
1763 // PHI in the exiting block.
1765 EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1766 // Push out the epilogs, again in reverse order.
1767 // We can't assume anything about the minumum loop trip count at this point,
1768 // so emit a fairly complex epilog.
1769
1770 // We first peel number of stages minus one epilogue. Then we remove dead
1771 // stages and reorder instructions based on their stage. If we have 3 stages
1772 // we generate first:
1773 // E0[3, 2, 1]
1774 // E1[3', 2']
1775 // E2[3'']
1776 // And then we move instructions based on their stages to have:
1777 // E0[3]
1778 // E1[2, 3']
1779 // E2[1, 2', 3'']
1780 // The transformation is legal because we only move instructions past
1781 // instructions of a previous loop iteration.
1782 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1783 Epilogs.push_back(peelKernel(LPD_Back));
1784 MachineBasicBlock *B = Epilogs.back();
1786 // Keep track at which iteration each phi belongs to. We need it to know
1787 // what version of the variable to use during prologue/epilogue stitching.
1788 EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1789 for (MachineInstr &Phi : B->phis())
1791 }
1792 for (size_t I = 0; I < Epilogs.size(); I++) {
1793 LS.reset();
1794 for (size_t J = I; J < Epilogs.size(); J++) {
1795 int Iteration = J;
1796 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1797 // Move stage one block at a time so that Phi nodes are updated correctly.
1798 for (size_t K = Iteration; K > I; K--)
1799 moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1800 LS[Stage] = true;
1801 }
1802 LiveStages[Epilogs[I]] = LS;
1803 AvailableStages[Epilogs[I]] = AS;
1804 }
1805
1806 // Now we've defined all the prolog and epilog blocks as a fallthrough
1807 // sequence, add the edges that will be followed if the loop trip count is
1808 // lower than the number of stages (connecting prologs directly with epilogs).
1809 auto PI = Prologs.begin();
1810 auto EI = Epilogs.begin();
1811 assert(Prologs.size() == Epilogs.size());
1812 for (; PI != Prologs.end(); ++PI, ++EI) {
1813 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1814 (*PI)->addSuccessor(*EI);
1815 for (MachineInstr &MI : (*EI)->phis()) {
1816 Register Reg = MI.getOperand(1).getReg();
1818 if (Use && Use->getParent() == Pred) {
1819 MachineInstr *CanonicalUse = CanonicalMIs[Use];
1820 if (CanonicalUse->isPHI()) {
1821 // If the use comes from a phi we need to skip as many phi as the
1822 // distance between the epilogue and the kernel. Trace through the phi
1823 // chain to find the right value.
1824 Reg = getPhiCanonicalReg(CanonicalUse, Use);
1825 }
1826 Reg = getEquivalentRegisterIn(Reg, *PI);
1827 }
1828 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1829 MI.addOperand(MachineOperand::CreateMBB(*PI));
1830 }
1831 }
1832
1833 // Create a list of all blocks in order.
1835 llvm::copy(PeeledFront, std::back_inserter(Blocks));
1836 Blocks.push_back(BB);
1837 llvm::copy(PeeledBack, std::back_inserter(Blocks));
1838
1839 // Iterate in reverse order over all instructions, remapping as we go.
1840 for (MachineBasicBlock *B : reverse(Blocks)) {
1841 for (auto I = B->instr_rbegin();
1842 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1844 rewriteUsesOf(&*MI);
1845 }
1846 }
1847 for (auto *MI : IllegalPhisToDelete) {
1848 if (LIS)
1850 MI->eraseFromParent();
1851 }
1853
1854 // Now all remapping has been done, we're free to optimize the generated code.
1856 EliminateDeadPhis(B, MRI, LIS);
1857 EliminateDeadPhis(ExitingBB, MRI, LIS);
1858}
1859
1862 MachineBasicBlock *Exit = *BB->succ_begin();
1863 if (Exit == BB)
1864 Exit = *std::next(BB->succ_begin());
1865
1867 MF.insert(std::next(BB->getIterator()), NewBB);
1868
1869 // Clone all phis in BB into NewBB and rewrite.
1870 for (MachineInstr &MI : BB->phis()) {
1871 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1872 Register OldR = MI.getOperand(3).getReg();
1875 for (MachineInstr &Use : MRI.use_instructions(OldR))
1876 if (Use.getParent() != BB)
1877 Uses.push_back(&Use);
1878 for (MachineInstr *Use : Uses)
1879 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1881 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1882 .addReg(OldR)
1883 .addMBB(BB);
1884 BlockMIs[{NewBB, &MI}] = NI;
1885 CanonicalMIs[NI] = &MI;
1886 }
1887 BB->replaceSuccessor(Exit, NewBB);
1888 Exit->replacePhiUsesWith(BB, NewBB);
1889 NewBB->addSuccessor(Exit);
1890
1891 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1893 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1894 (void)CanAnalyzeBr;
1895 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1896 TII->removeBranch(*BB);
1897 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1898 Cond, DebugLoc());
1899 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1900 return NewBB;
1901}
1902
1905 MachineBasicBlock *BB) {
1907 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
1908 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1909}
1910
1912 if (MI->isPHI()) {
1913 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1914 // and it is produced by this block.
1915 Register PhiR = MI->getOperand(0).getReg();
1916 Register R = MI->getOperand(3).getReg();
1917 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1918 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1919 R = MI->getOperand(1).getReg();
1920 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1921 MRI.replaceRegWith(PhiR, R);
1922 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1923 // later to figure out how to remap registers.
1924 MI->getOperand(0).setReg(PhiR);
1926 return;
1927 }
1928
1929 int Stage = getStage(MI);
1930 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1931 LiveStages[MI->getParent()].test(Stage))
1932 // Instruction is live, no rewriting to do.
1933 return;
1934
1935 for (MachineOperand &DefMO : MI->defs()) {
1937 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1938 // Only PHIs can use values from this block by construction.
1939 // Match with the equivalent PHI in B.
1940 assert(UseMI.isPHI());
1941 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1942 MI->getParent());
1943 Subs.emplace_back(&UseMI, Reg);
1944 }
1945 for (auto &Sub : Subs)
1946 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1948 }
1949 if (LIS)
1951 MI->eraseFromParent();
1952}
1953
1955 // Work outwards from the kernel.
1956 bool KernelDisposed = false;
1957 int TC = Schedule.getNumStages() - 1;
1958 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1959 ++PI, ++EI, --TC) {
1961 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1965 std::optional<bool> StaticallyGreater =
1966 LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1967 if (!StaticallyGreater) {
1968 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1969 // Dynamically branch based on Cond.
1970 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1971 } else if (*StaticallyGreater == false) {
1972 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1973 // Prolog never falls through; branch to epilog and orphan interior
1974 // blocks. Leave it to unreachable-block-elim to clean up.
1975 Prolog->removeSuccessor(Fallthrough);
1976 for (MachineInstr &P : Fallthrough->phis()) {
1977 P.removeOperand(2);
1978 P.removeOperand(1);
1979 }
1981 KernelDisposed = true;
1982 } else {
1983 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1984 // Prolog always falls through; remove incoming values in epilog.
1985 Prolog->removeSuccessor(Epilog);
1986 for (MachineInstr &P : Epilog->phis()) {
1987 P.removeOperand(4);
1988 P.removeOperand(3);
1989 }
1990 }
1991 }
1992
1993 if (!KernelDisposed) {
1994 LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1995 LoopInfo->setPreheader(Prologs.back());
1996 } else {
1997 LoopInfo->disposed();
1998 }
1999}
2000
2002 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2003 KR.rewrite();
2004}
2005
2012
2013 rewriteKernel();
2015 fixupBranches();
2016}
2017
2021
2022 // Dump the schedule before we invalidate and remap all its instructions.
2023 // Stash it in a string so we can print it if we found an error.
2024 std::string ScheduleDump;
2025 raw_string_ostream OS(ScheduleDump);
2026 Schedule.print(OS);
2027 OS.flush();
2028
2029 // First, run the normal ModuleScheduleExpander. We don't support any
2030 // InstrChanges.
2031 assert(LIS && "Requires LiveIntervals!");
2034 MSE.expand();
2035 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2036 if (!ExpandedKernel) {
2037 // The expander optimized away the kernel. We can't do any useful checking.
2038 MSE.cleanup();
2039 return;
2040 }
2041 // Before running the KernelRewriter, re-add BB into the CFG.
2043
2044 // Now run the new expansion algorithm.
2045 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2046 KR.rewrite();
2048
2049 // Collect all illegal phis that the new algorithm created. We'll give these
2050 // to KernelOperandInfo.
2052 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2053 if (NI->isPHI())
2054 IllegalPhis.insert(&*NI);
2055 }
2056
2057 // Co-iterate across both kernels. We expect them to be identical apart from
2058 // phis and full COPYs (we look through both).
2060 auto OI = ExpandedKernel->begin();
2061 auto NI = BB->begin();
2062 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2063 while (OI->isPHI() || OI->isFullCopy())
2064 ++OI;
2065 while (NI->isPHI() || NI->isFullCopy())
2066 ++NI;
2067 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2068 // Analyze every operand separately.
2069 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2070 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2071 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2072 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2073 }
2074
2075 bool Failed = false;
2076 for (auto &OldAndNew : KOIs) {
2077 if (OldAndNew.first == OldAndNew.second)
2078 continue;
2079 Failed = true;
2080 errs() << "Modulo kernel validation error: [\n";
2081 errs() << " [golden] ";
2082 OldAndNew.first.print(errs());
2083 errs() << " ";
2084 OldAndNew.second.print(errs());
2085 errs() << "]\n";
2086 }
2087
2088 if (Failed) {
2089 errs() << "Golden reference kernel:\n";
2090 ExpandedKernel->print(errs());
2091 errs() << "New kernel:\n";
2092 BB->print(errs());
2093 errs() << ScheduleDump;
2095 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2096 }
2097
2098 // Cleanup by removing BB from the CFG again as the original
2099 // ModuloScheduleExpander intended.
2101 MSE.cleanup();
2102}
2103
2104//===----------------------------------------------------------------------===//
2105// ModuloScheduleTestPass implementation
2106//===----------------------------------------------------------------------===//
2107// This pass constructs a ModuloSchedule from its module and runs
2108// ModuloScheduleExpander.
2109//
2110// The module is expected to contain a single-block analyzable loop.
2111// The total order of instructions is taken from the loop as-is.
2112// Instructions are expected to be annotated with a PostInstrSymbol.
2113// This PostInstrSymbol must have the following format:
2114// "Stage=%d Cycle=%d".
2115//===----------------------------------------------------------------------===//
2116
2117namespace {
2118class ModuloScheduleTest : public MachineFunctionPass {
2119public:
2120 static char ID;
2121
2122 ModuloScheduleTest() : MachineFunctionPass(ID) {
2124 }
2125
2126 bool runOnMachineFunction(MachineFunction &MF) override;
2127 void runOnLoop(MachineFunction &MF, MachineLoop &L);
2128
2129 void getAnalysisUsage(AnalysisUsage &AU) const override {
2133 }
2134};
2135} // namespace
2136
2137char ModuloScheduleTest::ID = 0;
2138
2139INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2140 "Modulo Schedule test pass", false, false)
2143INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2144 "Modulo Schedule test pass", false, false)
2145
2146bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2147 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
2148 for (auto *L : MLI) {
2149 if (L->getTopBlock() != L->getBottomBlock())
2150 continue;
2151 runOnLoop(MF, *L);
2152 return false;
2153 }
2154 return false;
2155}
2156
2157static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2158 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2159 std::pair<StringRef, StringRef> StageTokenAndValue =
2160 getToken(StageAndCycle.first, "-");
2161 std::pair<StringRef, StringRef> CycleTokenAndValue =
2162 getToken(StageAndCycle.second, "-");
2163 if (StageTokenAndValue.first != "Stage" ||
2164 CycleTokenAndValue.first != "_Cycle") {
2166 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2167 return;
2168 }
2169
2170 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2171 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2172
2173 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2174}
2175
2176void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2177 LiveIntervals &LIS = getAnalysis<LiveIntervals>();
2178 MachineBasicBlock *BB = L.getTopBlock();
2179 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2180
2182 std::vector<MachineInstr *> Instrs;
2183 for (MachineInstr &MI : *BB) {
2184 if (MI.isTerminator())
2185 continue;
2186 Instrs.push_back(&MI);
2187 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2188 dbgs() << "Parsing post-instr symbol for " << MI;
2189 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2190 }
2191 }
2192
2193 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2194 std::move(Stage));
2196 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2197 MSE.expand();
2198 MSE.cleanup();
2199}
2200
2201//===----------------------------------------------------------------------===//
2202// ModuloScheduleTestAnnotater implementation
2203//===----------------------------------------------------------------------===//
2204
2206 for (MachineInstr *MI : S.getInstructions()) {
2209 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2211 MI->setPostInstrSymbol(MF, Sym);
2212 }
2213}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
Rewrite undef for PHI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
@ Default
Definition: DwarfDebug.cpp:86
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1269
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:491
Symbol * Sym
Definition: ELF_riscv.cpp:463
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
This file provides utility analysis objects describing memory locations.
modulo schedule test
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
modulo schedule Modulo Schedule test pass
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static bool rewrite(Function &F)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
A possibly irreducible generalization of a Loop.
bool hasInterval(Register Reg) const
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
std::vector< MachineBasicBlock * >::iterator succ_iterator
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:743
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:641
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:713
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Iterator for intrusive lists based on ilist_node.
self_iterator getIterator()
Definition: ilist_node.h:82
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:31
constexpr double phi
Definition: MathExtras.h:45
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
CycleInfo::CycleT Cycle
Definition: CycleAnalysis.h:28
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:2011
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1921
void initializeModuloScheduleTestPass(PassRegistry &)
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860