LLVM 19.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
17#include "llvm/MC/MCContext.h"
18#include "llvm/Support/Debug.h"
21
22#define DEBUG_TYPE "pipeliner"
23using namespace llvm;
24
26 "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
27 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
28
30 for (MachineInstr *MI : ScheduledInstrs)
31 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
32}
33
34//===----------------------------------------------------------------------===//
35// ModuloScheduleExpander implementation
36//===----------------------------------------------------------------------===//
37
38/// Return the register values for the operands of a Phi instruction.
39/// This function assume the instruction is a Phi.
41 unsigned &InitVal, unsigned &LoopVal) {
42 assert(Phi.isPHI() && "Expecting a Phi.");
43
44 InitVal = 0;
45 LoopVal = 0;
46 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
47 if (Phi.getOperand(i + 1).getMBB() != Loop)
48 InitVal = Phi.getOperand(i).getReg();
49 else
50 LoopVal = Phi.getOperand(i).getReg();
51
52 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
53}
54
55/// Return the Phi register value that comes from the incoming block.
56static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
57 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
59 return Phi.getOperand(i).getReg();
60 return 0;
61}
62
63/// Return the Phi register value that comes the loop block.
64static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
65 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
67 return Phi.getOperand(i).getReg();
68 return 0;
69}
70
72 BB = Schedule.getLoop()->getTopBlock();
73 Preheader = *BB->pred_begin();
74 if (Preheader == BB)
75 Preheader = *std::next(BB->pred_begin());
76
77 // Iterate over the definitions in each instruction, and compute the
78 // stage difference for each use. Keep the maximum value.
79 for (MachineInstr *MI : Schedule.getInstructions()) {
80 int DefStage = Schedule.getStage(MI);
81 for (const MachineOperand &Op : MI->all_defs()) {
82 Register Reg = Op.getReg();
83 unsigned MaxDiff = 0;
84 bool PhiIsSwapped = false;
85 for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
86 MachineInstr *UseMI = UseOp.getParent();
87 int UseStage = Schedule.getStage(UseMI);
88 unsigned Diff = 0;
89 if (UseStage != -1 && UseStage >= DefStage)
90 Diff = UseStage - DefStage;
91 if (MI->isPHI()) {
92 if (isLoopCarried(*MI))
93 ++Diff;
94 else
95 PhiIsSwapped = true;
96 }
97 MaxDiff = std::max(Diff, MaxDiff);
98 }
99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
100 }
101 }
102
103 generatePipelinedLoop();
104}
105
106void ModuloScheduleExpander::generatePipelinedLoop() {
108 assert(LoopInfo && "Must be able to analyze loop!");
109
110 // Create a new basic block for the kernel and add it to the CFG.
112
113 unsigned MaxStageCount = Schedule.getNumStages() - 1;
114
115 // Remember the registers that are used in different stages. The index is
116 // the iteration, or stage, that the instruction is scheduled in. This is
117 // a map between register names in the original block and the names created
118 // in each stage of the pipelined loop.
119 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
120
121 // The renaming destination by Phis for the registers across stages.
122 // This map is updated during Phis generation to point to the most recent
123 // renaming destination.
124 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
125
126 InstrMapTy InstrMap;
127
129
130 // Generate the prolog instructions that set up the pipeline.
131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
132 MF.insert(BB->getIterator(), KernelBB);
133
134 // Rearrange the instructions to generate the new, pipelined loop,
135 // and update register names as needed.
136 for (MachineInstr *CI : Schedule.getInstructions()) {
137 if (CI->isPHI())
138 continue;
139 unsigned StageNum = Schedule.getStage(CI);
140 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
141 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
142 KernelBB->push_back(NewMI);
143 InstrMap[NewMI] = CI;
144 }
145
146 // Copy any terminator instructions to the new kernel, and update
147 // names as needed.
148 for (MachineInstr &MI : BB->terminators()) {
149 MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
150 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
151 KernelBB->push_back(NewMI);
152 InstrMap[NewMI] = &MI;
153 }
154
155 NewKernel = KernelBB;
156 KernelBB->transferSuccessors(BB);
157 KernelBB->replaceSuccessor(BB, KernelBB);
158
159 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
160 InstrMap, MaxStageCount, MaxStageCount, false);
161 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
162 InstrMap, MaxStageCount, MaxStageCount, false);
163
164 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
165
167 // Generate the epilog instructions to complete the pipeline.
168 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
169 PrologBBs);
170
171 // We need this step because the register allocation doesn't handle some
172 // situations well, so we insert copies to help out.
173 splitLifetimes(KernelBB, EpilogBBs);
174
175 // Remove dead instructions due to loop induction variables.
176 removeDeadInstructions(KernelBB, EpilogBBs);
177
178 // Add branches between prolog and epilog blocks.
179 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
180
181 delete[] VRMap;
182 delete[] VRMapPhi;
183}
184
186 // Remove the original loop since it's no longer referenced.
187 for (auto &I : *BB)
189 BB->clear();
190 BB->eraseFromParent();
191}
192
193/// Generate the pipeline prolog code.
194void ModuloScheduleExpander::generateProlog(unsigned LastStage,
195 MachineBasicBlock *KernelBB,
196 ValueMapTy *VRMap,
197 MBBVectorTy &PrologBBs) {
198 MachineBasicBlock *PredBB = Preheader;
199 InstrMapTy InstrMap;
200
201 // Generate a basic block for each stage, not including the last stage,
202 // which will be generated in the kernel. Each basic block may contain
203 // instructions from multiple stages/iterations.
204 for (unsigned i = 0; i < LastStage; ++i) {
205 // Create and insert the prolog basic block prior to the original loop
206 // basic block. The original loop is removed later.
208 PrologBBs.push_back(NewBB);
209 MF.insert(BB->getIterator(), NewBB);
210 NewBB->transferSuccessors(PredBB);
211 PredBB->addSuccessor(NewBB);
212 PredBB = NewBB;
213
214 // Generate instructions for each appropriate stage. Process instructions
215 // in original program order.
216 for (int StageNum = i; StageNum >= 0; --StageNum) {
218 BBE = BB->getFirstTerminator();
219 BBI != BBE; ++BBI) {
220 if (Schedule.getStage(&*BBI) == StageNum) {
221 if (BBI->isPHI())
222 continue;
223 MachineInstr *NewMI =
224 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
225 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
226 NewBB->push_back(NewMI);
227 InstrMap[NewMI] = &*BBI;
228 }
229 }
230 }
231 rewritePhiValues(NewBB, i, VRMap, InstrMap);
232 LLVM_DEBUG({
233 dbgs() << "prolog:\n";
234 NewBB->dump();
235 });
236 }
237
238 PredBB->replaceSuccessor(BB, KernelBB);
239
240 // Check if we need to remove the branch from the preheader to the original
241 // loop, and replace it with a branch to the new loop.
242 unsigned numBranches = TII->removeBranch(*Preheader);
243 if (numBranches) {
245 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
246 }
247}
248
249/// Generate the pipeline epilog code. The epilog code finishes the iterations
250/// that were started in either the prolog or the kernel. We create a basic
251/// block for each stage that needs to complete.
252void ModuloScheduleExpander::generateEpilog(
253 unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
254 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
255 MBBVectorTy &PrologBBs) {
256 // We need to change the branch from the kernel to the first epilog block, so
257 // this call to analyze branch uses the kernel rather than the original BB.
258 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
260 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
261 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
262 if (checkBranch)
263 return;
264
265 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
266 if (*LoopExitI == KernelBB)
267 ++LoopExitI;
268 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
269 MachineBasicBlock *LoopExitBB = *LoopExitI;
270
271 MachineBasicBlock *PredBB = KernelBB;
272 MachineBasicBlock *EpilogStart = LoopExitBB;
273 InstrMapTy InstrMap;
274
275 // Generate a basic block for each stage, not including the last stage,
276 // which was generated for the kernel. Each basic block may contain
277 // instructions from multiple stages/iterations.
278 int EpilogStage = LastStage + 1;
279 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
281 EpilogBBs.push_back(NewBB);
282 MF.insert(BB->getIterator(), NewBB);
283
284 PredBB->replaceSuccessor(LoopExitBB, NewBB);
285 NewBB->addSuccessor(LoopExitBB);
286
287 if (EpilogStart == LoopExitBB)
288 EpilogStart = NewBB;
289
290 // Add instructions to the epilog depending on the current block.
291 // Process instructions in original program order.
292 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
293 for (auto &BBI : *BB) {
294 if (BBI.isPHI())
295 continue;
296 MachineInstr *In = &BBI;
297 if ((unsigned)Schedule.getStage(In) == StageNum) {
298 // Instructions with memoperands in the epilog are updated with
299 // conservative values.
300 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
301 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
302 NewBB->push_back(NewMI);
303 InstrMap[NewMI] = In;
304 }
305 }
306 }
307 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
308 InstrMap, LastStage, EpilogStage, i == 1);
309 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
310 InstrMap, LastStage, EpilogStage, i == 1);
311 PredBB = NewBB;
312
313 LLVM_DEBUG({
314 dbgs() << "epilog:\n";
315 NewBB->dump();
316 });
317 }
318
319 // Fix any Phi nodes in the loop exit block.
320 LoopExitBB->replacePhiUsesWith(BB, PredBB);
321
322 // Create a branch to the new epilog from the kernel.
323 // Remove the original branch and add a new branch to the epilog.
324 TII->removeBranch(*KernelBB);
325 assert((OrigBB == TBB || OrigBB == FBB) &&
326 "Unable to determine looping branch direction");
327 if (OrigBB != TBB)
328 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
329 else
330 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
331 // Add a branch to the loop exit.
332 if (EpilogBBs.size() > 0) {
333 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
335 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
336 }
337}
338
339/// Replace all uses of FromReg that appear outside the specified
340/// basic block with ToReg.
341static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
344 LiveIntervals &LIS) {
345 for (MachineOperand &O :
346 llvm::make_early_inc_range(MRI.use_operands(FromReg)))
347 if (O.getParent()->getParent() != MBB)
348 O.setReg(ToReg);
349 if (!LIS.hasInterval(ToReg))
350 LIS.createEmptyInterval(ToReg);
351}
352
353/// Return true if the register has a use that occurs outside the
354/// specified loop.
355static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
357 for (const MachineOperand &MO : MRI.use_operands(Reg))
358 if (MO.getParent()->getParent() != BB)
359 return true;
360 return false;
361}
362
363/// Generate Phis for the specific block in the generated pipelined code.
364/// This function looks at the Phis from the original code to guide the
365/// creation of new Phis.
366void ModuloScheduleExpander::generateExistingPhis(
368 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
369 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
370 // Compute the stage number for the initial value of the Phi, which
371 // comes from the prolog. The prolog to use depends on to which kernel/
372 // epilog that we're adding the Phi.
373 unsigned PrologStage = 0;
374 unsigned PrevStage = 0;
375 bool InKernel = (LastStageNum == CurStageNum);
376 if (InKernel) {
377 PrologStage = LastStageNum - 1;
378 PrevStage = CurStageNum;
379 } else {
380 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
381 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
382 }
383
384 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
385 BBE = BB->getFirstNonPHI();
386 BBI != BBE; ++BBI) {
387 Register Def = BBI->getOperand(0).getReg();
388
389 unsigned InitVal = 0;
390 unsigned LoopVal = 0;
391 getPhiRegs(*BBI, BB, InitVal, LoopVal);
392
393 unsigned PhiOp1 = 0;
394 // The Phi value from the loop body typically is defined in the loop, but
395 // not always. So, we need to check if the value is defined in the loop.
396 unsigned PhiOp2 = LoopVal;
397 if (VRMap[LastStageNum].count(LoopVal))
398 PhiOp2 = VRMap[LastStageNum][LoopVal];
399
400 int StageScheduled = Schedule.getStage(&*BBI);
401 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
402 unsigned NumStages = getStagesForReg(Def, CurStageNum);
403 if (NumStages == 0) {
404 // We don't need to generate a Phi anymore, but we need to rename any uses
405 // of the Phi value.
406 unsigned NewReg = VRMap[PrevStage][LoopVal];
407 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
408 InitVal, NewReg);
409 if (VRMap[CurStageNum].count(LoopVal))
410 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
411 }
412 // Adjust the number of Phis needed depending on the number of prologs left,
413 // and the distance from where the Phi is first scheduled. The number of
414 // Phis cannot exceed the number of prolog stages. Each stage can
415 // potentially define two values.
416 unsigned MaxPhis = PrologStage + 2;
417 if (!InKernel && (int)PrologStage <= LoopValStage)
418 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
419 unsigned NumPhis = std::min(NumStages, MaxPhis);
420
421 unsigned NewReg = 0;
422 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
423 // In the epilog, we may need to look back one stage to get the correct
424 // Phi name, because the epilog and prolog blocks execute the same stage.
425 // The correct name is from the previous block only when the Phi has
426 // been completely scheduled prior to the epilog, and Phi value is not
427 // needed in multiple stages.
428 int StageDiff = 0;
429 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
430 NumPhis == 1)
431 StageDiff = 1;
432 // Adjust the computations below when the phi and the loop definition
433 // are scheduled in different stages.
434 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
435 StageDiff = StageScheduled - LoopValStage;
436 for (unsigned np = 0; np < NumPhis; ++np) {
437 // If the Phi hasn't been scheduled, then use the initial Phi operand
438 // value. Otherwise, use the scheduled version of the instruction. This
439 // is a little complicated when a Phi references another Phi.
440 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
441 PhiOp1 = InitVal;
442 // Check if the Phi has already been scheduled in a prolog stage.
443 else if (PrologStage >= AccessStage + StageDiff + np &&
444 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
445 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
446 // Check if the Phi has already been scheduled, but the loop instruction
447 // is either another Phi, or doesn't occur in the loop.
448 else if (PrologStage >= AccessStage + StageDiff + np) {
449 // If the Phi references another Phi, we need to examine the other
450 // Phi to get the correct value.
451 PhiOp1 = LoopVal;
452 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
453 int Indirects = 1;
454 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
455 int PhiStage = Schedule.getStage(InstOp1);
456 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
457 PhiOp1 = getInitPhiReg(*InstOp1, BB);
458 else
459 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
460 InstOp1 = MRI.getVRegDef(PhiOp1);
461 int PhiOpStage = Schedule.getStage(InstOp1);
462 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
463 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
464 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
465 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
466 break;
467 }
468 ++Indirects;
469 }
470 } else
471 PhiOp1 = InitVal;
472 // If this references a generated Phi in the kernel, get the Phi operand
473 // from the incoming block.
474 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
475 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
476 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
477
478 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
479 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
480 // In the epilog, a map lookup is needed to get the value from the kernel,
481 // or previous epilog block. How is does this depends on if the
482 // instruction is scheduled in the previous block.
483 if (!InKernel) {
484 int StageDiffAdj = 0;
485 if (LoopValStage != -1 && StageScheduled > LoopValStage)
486 StageDiffAdj = StageScheduled - LoopValStage;
487 // Use the loop value defined in the kernel, unless the kernel
488 // contains the last definition of the Phi.
489 if (np == 0 && PrevStage == LastStageNum &&
490 (StageScheduled != 0 || LoopValStage != 0) &&
491 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
492 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
493 // Use the value defined by the Phi. We add one because we switch
494 // from looking at the loop value to the Phi definition.
495 else if (np > 0 && PrevStage == LastStageNum &&
496 VRMap[PrevStage - np + 1].count(Def))
497 PhiOp2 = VRMap[PrevStage - np + 1][Def];
498 // Use the loop value defined in the kernel.
499 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
500 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
501 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
502 // Use the value defined by the Phi, unless we're generating the first
503 // epilog and the Phi refers to a Phi in a different stage.
504 else if (VRMap[PrevStage - np].count(Def) &&
505 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
506 (LoopValStage == StageScheduled)))
507 PhiOp2 = VRMap[PrevStage - np][Def];
508 }
509
510 // Check if we can reuse an existing Phi. This occurs when a Phi
511 // references another Phi, and the other Phi is scheduled in an
512 // earlier stage. We can try to reuse an existing Phi up until the last
513 // stage of the current Phi.
514 if (LoopDefIsPhi) {
515 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
516 int LVNumStages = getStagesForPhi(LoopVal);
517 int StageDiff = (StageScheduled - LoopValStage);
518 LVNumStages -= StageDiff;
519 // Make sure the loop value Phi has been processed already.
520 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
521 NewReg = PhiOp2;
522 unsigned ReuseStage = CurStageNum;
523 if (isLoopCarried(*PhiInst))
524 ReuseStage -= LVNumStages;
525 // Check if the Phi to reuse has been generated yet. If not, then
526 // there is nothing to reuse.
527 if (VRMap[ReuseStage - np].count(LoopVal)) {
528 NewReg = VRMap[ReuseStage - np][LoopVal];
529
530 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
531 Def, NewReg);
532 // Update the map with the new Phi name.
533 VRMap[CurStageNum - np][Def] = NewReg;
534 PhiOp2 = NewReg;
535 if (VRMap[LastStageNum - np - 1].count(LoopVal))
536 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
537
538 if (IsLast && np == NumPhis - 1)
539 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
540 continue;
541 }
542 }
543 }
544 if (InKernel && StageDiff > 0 &&
545 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
546 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
547 }
548
549 const TargetRegisterClass *RC = MRI.getRegClass(Def);
550 NewReg = MRI.createVirtualRegister(RC);
551
552 MachineInstrBuilder NewPhi =
553 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
554 TII->get(TargetOpcode::PHI), NewReg);
555 NewPhi.addReg(PhiOp1).addMBB(BB1);
556 NewPhi.addReg(PhiOp2).addMBB(BB2);
557 if (np == 0)
558 InstrMap[NewPhi] = &*BBI;
559
560 // We define the Phis after creating the new pipelined code, so
561 // we need to rename the Phi values in scheduled instructions.
562
563 unsigned PrevReg = 0;
564 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
565 PrevReg = VRMap[PrevStage - np][LoopVal];
566 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
567 NewReg, PrevReg);
568 // If the Phi has been scheduled, use the new name for rewriting.
569 if (VRMap[CurStageNum - np].count(Def)) {
570 unsigned R = VRMap[CurStageNum - np][Def];
571 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
572 NewReg);
573 }
574
575 // Check if we need to rename any uses that occurs after the loop. The
576 // register to replace depends on whether the Phi is scheduled in the
577 // epilog.
578 if (IsLast && np == NumPhis - 1)
579 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
580
581 // In the kernel, a dependent Phi uses the value from this Phi.
582 if (InKernel)
583 PhiOp2 = NewReg;
584
585 // Update the map with the new Phi name.
586 VRMap[CurStageNum - np][Def] = NewReg;
587 }
588
589 while (NumPhis++ < NumStages) {
590 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
591 NewReg, 0);
592 }
593
594 // Check if we need to rename a Phi that has been eliminated due to
595 // scheduling.
596 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
597 replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
598 }
599}
600
601/// Generate Phis for the specified block in the generated pipelined code.
602/// These are new Phis needed because the definition is scheduled after the
603/// use in the pipelined sequence.
604void ModuloScheduleExpander::generatePhis(
606 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
607 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
608 bool IsLast) {
609 // Compute the stage number that contains the initial Phi value, and
610 // the Phi from the previous stage.
611 unsigned PrologStage = 0;
612 unsigned PrevStage = 0;
613 unsigned StageDiff = CurStageNum - LastStageNum;
614 bool InKernel = (StageDiff == 0);
615 if (InKernel) {
616 PrologStage = LastStageNum - 1;
617 PrevStage = CurStageNum;
618 } else {
619 PrologStage = LastStageNum - StageDiff;
620 PrevStage = LastStageNum + StageDiff - 1;
621 }
622
623 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
624 BBE = BB->instr_end();
625 BBI != BBE; ++BBI) {
626 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
627 MachineOperand &MO = BBI->getOperand(i);
628 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
629 continue;
630
631 int StageScheduled = Schedule.getStage(&*BBI);
632 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
633 Register Def = MO.getReg();
634 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
635 // An instruction scheduled in stage 0 and is used after the loop
636 // requires a phi in the epilog for the last definition from either
637 // the kernel or prolog.
638 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
639 hasUseAfterLoop(Def, BB, MRI))
640 NumPhis = 1;
641 if (!InKernel && (unsigned)StageScheduled > PrologStage)
642 continue;
643
644 unsigned PhiOp2;
645 if (InKernel) {
646 PhiOp2 = VRMap[PrevStage][Def];
647 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
648 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
649 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
650 }
651 // The number of Phis can't exceed the number of prolog stages. The
652 // prolog stage number is zero based.
653 if (NumPhis > PrologStage + 1 - StageScheduled)
654 NumPhis = PrologStage + 1 - StageScheduled;
655 for (unsigned np = 0; np < NumPhis; ++np) {
656 // Example for
657 // Org:
658 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
659 //
660 // Prolog0 (Stage0):
661 // %Clone0 = ...
662 // Prolog1 (Stage1):
663 // %Clone1 = ...
664 // Kernel (Stage2):
665 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
666 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
667 // %Clone2 = ...
668 // Epilog0 (Stage3):
669 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
670 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
671 // Epilog1 (Stage4):
672 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
673 //
674 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
675 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
676 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
677
678 unsigned PhiOp1 = VRMap[PrologStage][Def];
679 if (np <= PrologStage)
680 PhiOp1 = VRMap[PrologStage - np][Def];
681 if (!InKernel) {
682 if (PrevStage == LastStageNum && np == 0)
683 PhiOp2 = VRMap[LastStageNum][Def];
684 else
685 PhiOp2 = VRMapPhi[PrevStage - np][Def];
686 }
687
688 const TargetRegisterClass *RC = MRI.getRegClass(Def);
689 Register NewReg = MRI.createVirtualRegister(RC);
690
691 MachineInstrBuilder NewPhi =
692 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
693 TII->get(TargetOpcode::PHI), NewReg);
694 NewPhi.addReg(PhiOp1).addMBB(BB1);
695 NewPhi.addReg(PhiOp2).addMBB(BB2);
696 if (np == 0)
697 InstrMap[NewPhi] = &*BBI;
698
699 // Rewrite uses and update the map. The actions depend upon whether
700 // we generating code for the kernel or epilog blocks.
701 if (InKernel) {
702 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
703 NewReg);
704 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
705 NewReg);
706
707 PhiOp2 = NewReg;
708 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
709 } else {
710 VRMapPhi[CurStageNum - np][Def] = NewReg;
711 if (np == NumPhis - 1)
712 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
713 NewReg);
714 }
715 if (IsLast && np == NumPhis - 1)
716 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
717 }
718 }
719 }
720}
721
722/// Remove instructions that generate values with no uses.
723/// Typically, these are induction variable operations that generate values
724/// used in the loop itself. A dead instruction has a definition with
725/// no uses, or uses that occur in the original loop only.
726void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
727 MBBVectorTy &EpilogBBs) {
728 // For each epilog block, check that the value defined by each instruction
729 // is used. If not, delete it.
730 for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
732 ME = MBB->instr_rend();
733 MI != ME;) {
734 // From DeadMachineInstructionElem. Don't delete inline assembly.
735 if (MI->isInlineAsm()) {
736 ++MI;
737 continue;
738 }
739 bool SawStore = false;
740 // Check if it's safe to remove the instruction due to side effects.
741 // We can, and want to, remove Phis here.
742 if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
743 ++MI;
744 continue;
745 }
746 bool used = true;
747 for (const MachineOperand &MO : MI->all_defs()) {
748 Register reg = MO.getReg();
749 // Assume physical registers are used, unless they are marked dead.
750 if (reg.isPhysical()) {
751 used = !MO.isDead();
752 if (used)
753 break;
754 continue;
755 }
756 unsigned realUses = 0;
757 for (const MachineOperand &U : MRI.use_operands(reg)) {
758 // Check if there are any uses that occur only in the original
759 // loop. If so, that's not a real use.
760 if (U.getParent()->getParent() != BB) {
761 realUses++;
762 used = true;
763 break;
764 }
765 }
766 if (realUses > 0)
767 break;
768 used = false;
769 }
770 if (!used) {
772 MI++->eraseFromParent();
773 continue;
774 }
775 ++MI;
776 }
777 // In the kernel block, check if we can remove a Phi that generates a value
778 // used in an instruction removed in the epilog block.
779 for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
780 Register reg = MI.getOperand(0).getReg();
781 if (MRI.use_begin(reg) == MRI.use_end()) {
783 MI.eraseFromParent();
784 }
785 }
786}
787
788/// For loop carried definitions, we split the lifetime of a virtual register
789/// that has uses past the definition in the next iteration. A copy with a new
790/// virtual register is inserted before the definition, which helps with
791/// generating a better register assignment.
792///
793/// v1 = phi(a, v2) v1 = phi(a, v2)
794/// v2 = phi(b, v3) v2 = phi(b, v3)
795/// v3 = .. v4 = copy v1
796/// .. = V1 v3 = ..
797/// .. = v4
798void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
799 MBBVectorTy &EpilogBBs) {
801 for (auto &PHI : KernelBB->phis()) {
802 Register Def = PHI.getOperand(0).getReg();
803 // Check for any Phi definition that used as an operand of another Phi
804 // in the same block.
806 E = MRI.use_instr_end();
807 I != E; ++I) {
808 if (I->isPHI() && I->getParent() == KernelBB) {
809 // Get the loop carried definition.
810 unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
811 if (!LCDef)
812 continue;
813 MachineInstr *MI = MRI.getVRegDef(LCDef);
814 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
815 continue;
816 // Search through the rest of the block looking for uses of the Phi
817 // definition. If one occurs, then split the lifetime.
818 unsigned SplitReg = 0;
820 KernelBB->instr_end()))
821 if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) {
822 // We split the lifetime when we find the first use.
823 if (SplitReg == 0) {
824 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
825 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
826 TII->get(TargetOpcode::COPY), SplitReg)
827 .addReg(Def);
828 }
829 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
830 }
831 if (!SplitReg)
832 continue;
833 // Search through each of the epilog blocks for any uses to be renamed.
834 for (auto &Epilog : EpilogBBs)
835 for (auto &I : *Epilog)
836 if (I.readsRegister(Def, /*TRI=*/nullptr))
837 I.substituteRegister(Def, SplitReg, 0, *TRI);
838 break;
839 }
840 }
841 }
842}
843
844/// Remove the incoming block from the Phis in a basic block.
846 for (MachineInstr &MI : *BB) {
847 if (!MI.isPHI())
848 break;
849 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
850 if (MI.getOperand(i + 1).getMBB() == Incoming) {
851 MI.removeOperand(i + 1);
852 MI.removeOperand(i);
853 break;
854 }
855 }
856}
857
858/// Create branches from each prolog basic block to the appropriate epilog
859/// block. These edges are needed if the loop ends before reaching the
860/// kernel.
861void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
862 MBBVectorTy &PrologBBs,
863 MachineBasicBlock *KernelBB,
864 MBBVectorTy &EpilogBBs,
865 ValueMapTy *VRMap) {
866 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
867 MachineBasicBlock *LastPro = KernelBB;
868 MachineBasicBlock *LastEpi = KernelBB;
869
870 // Start from the blocks connected to the kernel and work "out"
871 // to the first prolog and the last epilog blocks.
873 unsigned MaxIter = PrologBBs.size() - 1;
874 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
875 // Add branches to the prolog that go to the corresponding
876 // epilog, and the fall-thru prolog/kernel block.
877 MachineBasicBlock *Prolog = PrologBBs[j];
878 MachineBasicBlock *Epilog = EpilogBBs[i];
879
881 std::optional<bool> StaticallyGreater =
882 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
883 unsigned numAdded = 0;
884 if (!StaticallyGreater) {
885 Prolog->addSuccessor(Epilog);
886 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
887 } else if (*StaticallyGreater == false) {
888 Prolog->addSuccessor(Epilog);
889 Prolog->removeSuccessor(LastPro);
890 LastEpi->removeSuccessor(Epilog);
891 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
892 removePhis(Epilog, LastEpi);
893 // Remove the blocks that are no longer referenced.
894 if (LastPro != LastEpi) {
895 LastEpi->clear();
896 LastEpi->eraseFromParent();
897 }
898 if (LastPro == KernelBB) {
899 LoopInfo->disposed();
900 NewKernel = nullptr;
901 }
902 LastPro->clear();
903 LastPro->eraseFromParent();
904 } else {
905 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
907 }
908 LastPro = Prolog;
909 LastEpi = Epilog;
911 E = Prolog->instr_rend();
912 I != E && numAdded > 0; ++I, --numAdded)
913 updateInstruction(&*I, false, j, 0, VRMap);
914 }
915
916 if (NewKernel) {
917 LoopInfo->setPreheader(PrologBBs[MaxIter]);
918 LoopInfo->adjustTripCount(-(MaxIter + 1));
919 }
920}
921
922/// Return true if we can compute the amount the instruction changes
923/// during each iteration. Set Delta to the amount of the change.
924bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
926 const MachineOperand *BaseOp;
927 int64_t Offset;
928 bool OffsetIsScalable;
929 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
930 return false;
931
932 // FIXME: This algorithm assumes instructions have fixed-size offsets.
933 if (OffsetIsScalable)
934 return false;
935
936 if (!BaseOp->isReg())
937 return false;
938
939 Register BaseReg = BaseOp->getReg();
940
942 // Check if there is a Phi. If so, get the definition in the loop.
943 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
944 if (BaseDef && BaseDef->isPHI()) {
945 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
946 BaseDef = MRI.getVRegDef(BaseReg);
947 }
948 if (!BaseDef)
949 return false;
950
951 int D = 0;
952 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
953 return false;
954
955 Delta = D;
956 return true;
957}
958
959/// Update the memory operand with a new offset when the pipeliner
960/// generates a new copy of the instruction that refers to a
961/// different memory location.
962void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
963 MachineInstr &OldMI,
964 unsigned Num) {
965 if (Num == 0)
966 return;
967 // If the instruction has memory operands, then adjust the offset
968 // when the instruction appears in different stages.
969 if (NewMI.memoperands_empty())
970 return;
972 for (MachineMemOperand *MMO : NewMI.memoperands()) {
973 // TODO: Figure out whether isAtomic is really necessary (see D57601).
974 if (MMO->isVolatile() || MMO->isAtomic() ||
975 (MMO->isInvariant() && MMO->isDereferenceable()) ||
976 (!MMO->getValue())) {
977 NewMMOs.push_back(MMO);
978 continue;
979 }
980 unsigned Delta;
981 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
982 int64_t AdjOffset = Delta * Num;
983 NewMMOs.push_back(
984 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
985 } else {
988 }
989 }
990 NewMI.setMemRefs(MF, NewMMOs);
991}
992
993/// Clone the instruction for the new pipelined loop and update the
994/// memory operands, if needed.
995MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
996 unsigned CurStageNum,
997 unsigned InstStageNum) {
998 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
999 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1000 return NewMI;
1001}
1002
1003/// Clone the instruction for the new pipelined loop. If needed, this
1004/// function updates the instruction using the values saved in the
1005/// InstrChanges structure.
1006MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1007 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1008 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1009 auto It = InstrChanges.find(OldMI);
1010 if (It != InstrChanges.end()) {
1011 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1012 unsigned BasePos, OffsetPos;
1013 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1014 return nullptr;
1015 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1016 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1017 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1018 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1019 NewMI->getOperand(OffsetPos).setImm(NewOffset);
1020 }
1021 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1022 return NewMI;
1023}
1024
1025/// Update the machine instruction with new virtual registers. This
1026/// function may change the definitions and/or uses.
1027void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1028 bool LastDef,
1029 unsigned CurStageNum,
1030 unsigned InstrStageNum,
1031 ValueMapTy *VRMap) {
1032 for (MachineOperand &MO : NewMI->operands()) {
1033 if (!MO.isReg() || !MO.getReg().isVirtual())
1034 continue;
1035 Register reg = MO.getReg();
1036 if (MO.isDef()) {
1037 // Create a new virtual register for the definition.
1038 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1039 Register NewReg = MRI.createVirtualRegister(RC);
1040 MO.setReg(NewReg);
1041 VRMap[CurStageNum][reg] = NewReg;
1042 if (LastDef)
1043 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1044 } else if (MO.isUse()) {
1045 MachineInstr *Def = MRI.getVRegDef(reg);
1046 // Compute the stage that contains the last definition for instruction.
1047 int DefStageNum = Schedule.getStage(Def);
1048 unsigned StageNum = CurStageNum;
1049 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1050 // Compute the difference in stages between the defintion and the use.
1051 unsigned StageDiff = (InstrStageNum - DefStageNum);
1052 // Make an adjustment to get the last definition.
1053 StageNum -= StageDiff;
1054 }
1055 if (VRMap[StageNum].count(reg))
1056 MO.setReg(VRMap[StageNum][reg]);
1057 }
1058 }
1059}
1060
1061/// Return the instruction in the loop that defines the register.
1062/// If the definition is a Phi, then follow the Phi operand to
1063/// the instruction in the loop.
1064MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1066 MachineInstr *Def = MRI.getVRegDef(Reg);
1067 while (Def->isPHI()) {
1068 if (!Visited.insert(Def).second)
1069 break;
1070 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1071 if (Def->getOperand(i + 1).getMBB() == BB) {
1072 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1073 break;
1074 }
1075 }
1076 return Def;
1077}
1078
1079/// Return the new name for the value from the previous stage.
1080unsigned ModuloScheduleExpander::getPrevMapVal(
1081 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1082 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1083 unsigned PrevVal = 0;
1084 if (StageNum > PhiStage) {
1085 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1086 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1087 // The name is defined in the previous stage.
1088 PrevVal = VRMap[StageNum - 1][LoopVal];
1089 else if (VRMap[StageNum].count(LoopVal))
1090 // The previous name is defined in the current stage when the instruction
1091 // order is swapped.
1092 PrevVal = VRMap[StageNum][LoopVal];
1093 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1094 // The loop value hasn't yet been scheduled.
1095 PrevVal = LoopVal;
1096 else if (StageNum == PhiStage + 1)
1097 // The loop value is another phi, which has not been scheduled.
1098 PrevVal = getInitPhiReg(*LoopInst, BB);
1099 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1100 // The loop value is another phi, which has been scheduled.
1101 PrevVal =
1102 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1103 LoopStage, VRMap, BB);
1104 }
1105 return PrevVal;
1106}
1107
1108/// Rewrite the Phi values in the specified block to use the mappings
1109/// from the initial operand. Once the Phi is scheduled, we switch
1110/// to using the loop value instead of the Phi value, so those names
1111/// do not need to be rewritten.
1112void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1113 unsigned StageNum,
1114 ValueMapTy *VRMap,
1115 InstrMapTy &InstrMap) {
1116 for (auto &PHI : BB->phis()) {
1117 unsigned InitVal = 0;
1118 unsigned LoopVal = 0;
1119 getPhiRegs(PHI, BB, InitVal, LoopVal);
1120 Register PhiDef = PHI.getOperand(0).getReg();
1121
1122 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1123 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1124 unsigned NumPhis = getStagesForPhi(PhiDef);
1125 if (NumPhis > StageNum)
1126 NumPhis = StageNum;
1127 for (unsigned np = 0; np <= NumPhis; ++np) {
1128 unsigned NewVal =
1129 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1130 if (!NewVal)
1131 NewVal = InitVal;
1132 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1133 NewVal);
1134 }
1135 }
1136}
1137
1138/// Rewrite a previously scheduled instruction to use the register value
1139/// from the new instruction. Make sure the instruction occurs in the
1140/// basic block, and we don't change the uses in the new instruction.
1141void ModuloScheduleExpander::rewriteScheduledInstr(
1142 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1143 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1144 unsigned PrevReg) {
1145 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1146 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1147 // Rewrite uses that have been scheduled already to use the new
1148 // Phi register.
1149 for (MachineOperand &UseOp :
1150 llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
1151 MachineInstr *UseMI = UseOp.getParent();
1152 if (UseMI->getParent() != BB)
1153 continue;
1154 if (UseMI->isPHI()) {
1155 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1156 continue;
1157 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1158 continue;
1159 }
1160 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1161 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1162 MachineInstr *OrigMI = OrigInstr->second;
1163 int StageSched = Schedule.getStage(OrigMI);
1164 int CycleSched = Schedule.getCycle(OrigMI);
1165 unsigned ReplaceReg = 0;
1166 // This is the stage for the scheduled instruction.
1167 if (StagePhi == StageSched && Phi->isPHI()) {
1168 int CyclePhi = Schedule.getCycle(Phi);
1169 if (PrevReg && InProlog)
1170 ReplaceReg = PrevReg;
1171 else if (PrevReg && !isLoopCarried(*Phi) &&
1172 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1173 ReplaceReg = PrevReg;
1174 else
1175 ReplaceReg = NewReg;
1176 }
1177 // The scheduled instruction occurs before the scheduled Phi, and the
1178 // Phi is not loop carried.
1179 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1180 ReplaceReg = NewReg;
1181 if (StagePhi > StageSched && Phi->isPHI())
1182 ReplaceReg = NewReg;
1183 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1184 ReplaceReg = NewReg;
1185 if (ReplaceReg) {
1186 const TargetRegisterClass *NRC =
1187 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1188 if (NRC)
1189 UseOp.setReg(ReplaceReg);
1190 else {
1191 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1192 BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1193 SplitReg)
1194 .addReg(ReplaceReg);
1195 UseOp.setReg(SplitReg);
1196 }
1197 }
1198 }
1199}
1200
1201bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1202 if (!Phi.isPHI())
1203 return false;
1204 int DefCycle = Schedule.getCycle(&Phi);
1205 int DefStage = Schedule.getStage(&Phi);
1206
1207 unsigned InitVal = 0;
1208 unsigned LoopVal = 0;
1209 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1210 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1211 if (!Use || Use->isPHI())
1212 return true;
1213 int LoopCycle = Schedule.getCycle(Use);
1214 int LoopStage = Schedule.getStage(Use);
1215 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1216}
1217
1218//===----------------------------------------------------------------------===//
1219// PeelingModuloScheduleExpander implementation
1220//===----------------------------------------------------------------------===//
1221// This is a reimplementation of ModuloScheduleExpander that works by creating
1222// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1223//===----------------------------------------------------------------------===//
1224
1225namespace {
1226// Remove any dead phis in MBB. Dead phis either have only one block as input
1227// (in which case they are the identity) or have no uses.
1228void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1229 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1230 bool Changed = true;
1231 while (Changed) {
1232 Changed = false;
1234 assert(MI.isPHI());
1235 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1236 if (LIS)
1238 MI.eraseFromParent();
1239 Changed = true;
1240 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1241 const TargetRegisterClass *ConstrainRegClass =
1242 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1243 MRI.getRegClass(MI.getOperand(0).getReg()));
1244 assert(ConstrainRegClass &&
1245 "Expected a valid constrained register class!");
1246 (void)ConstrainRegClass;
1247 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1248 MI.getOperand(1).getReg());
1249 if (LIS)
1251 MI.eraseFromParent();
1252 Changed = true;
1253 }
1254 }
1255 }
1256}
1257
1258/// Rewrites the kernel block in-place to adhere to the given schedule.
1259/// KernelRewriter holds all of the state required to perform the rewriting.
1260class KernelRewriter {
1261 ModuloSchedule &S;
1263 MachineBasicBlock *PreheaderBB, *ExitBB;
1265 const TargetInstrInfo *TII;
1266 LiveIntervals *LIS;
1267
1268 // Map from register class to canonical undef register for that class.
1270 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1271 // this map is only used when InitReg is non-undef.
1273 // Map from LoopReg to phi register where the InitReg is undef.
1275
1276 // Reg is used by MI. Return the new register MI should use to adhere to the
1277 // schedule. Insert phis as necessary.
1278 Register remapUse(Register Reg, MachineInstr &MI);
1279 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1280 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1281 // or will be chosen so as to share another phi.
1282 Register phi(Register LoopReg, std::optional<Register> InitReg = {},
1283 const TargetRegisterClass *RC = nullptr);
1284 // Create an undef register of the given register class.
1285 Register undef(const TargetRegisterClass *RC);
1286
1287public:
1288 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1289 LiveIntervals *LIS = nullptr);
1290 void rewrite();
1291};
1292} // namespace
1293
1294KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1295 MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1296 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1297 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1298 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1299 PreheaderBB = *BB->pred_begin();
1300 if (PreheaderBB == BB)
1301 PreheaderBB = *std::next(BB->pred_begin());
1302}
1303
1304void KernelRewriter::rewrite() {
1305 // Rearrange the loop to be in schedule order. Note that the schedule may
1306 // contain instructions that are not owned by the loop block (InstrChanges and
1307 // friends), so we gracefully handle unowned instructions and delete any
1308 // instructions that weren't in the schedule.
1309 auto InsertPt = BB->getFirstTerminator();
1310 MachineInstr *FirstMI = nullptr;
1311 for (MachineInstr *MI : S.getInstructions()) {
1312 if (MI->isPHI())
1313 continue;
1314 if (MI->getParent())
1315 MI->removeFromParent();
1316 BB->insert(InsertPt, MI);
1317 if (!FirstMI)
1318 FirstMI = MI;
1319 }
1320 assert(FirstMI && "Failed to find first MI in schedule");
1321
1322 // At this point all of the scheduled instructions are between FirstMI
1323 // and the end of the block. Kill from the first non-phi to FirstMI.
1324 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1325 if (LIS)
1327 (I++)->eraseFromParent();
1328 }
1329
1330 // Now remap every instruction in the loop.
1331 for (MachineInstr &MI : *BB) {
1332 if (MI.isPHI() || MI.isTerminator())
1333 continue;
1334 for (MachineOperand &MO : MI.uses()) {
1335 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1336 continue;
1337 Register Reg = remapUse(MO.getReg(), MI);
1338 MO.setReg(Reg);
1339 }
1340 }
1341 EliminateDeadPhis(BB, MRI, LIS);
1342
1343 // Ensure a phi exists for all instructions that are either referenced by
1344 // an illegal phi or by an instruction outside the loop. This allows us to
1345 // treat remaps of these values the same as "normal" values that come from
1346 // loop-carried phis.
1347 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1348 if (MI->isPHI()) {
1349 Register R = MI->getOperand(0).getReg();
1350 phi(R);
1351 continue;
1352 }
1353
1354 for (MachineOperand &Def : MI->defs()) {
1355 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1356 if (MI.getParent() != BB) {
1357 phi(Def.getReg());
1358 break;
1359 }
1360 }
1361 }
1362 }
1363}
1364
1365Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1366 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1367 if (!Producer)
1368 return Reg;
1369
1370 int ConsumerStage = S.getStage(&MI);
1371 if (!Producer->isPHI()) {
1372 // Non-phi producers are simple to remap. Insert as many phis as the
1373 // difference between the consumer and producer stages.
1374 if (Producer->getParent() != BB)
1375 // Producer was not inside the loop. Use the register as-is.
1376 return Reg;
1377 int ProducerStage = S.getStage(Producer);
1378 assert(ConsumerStage != -1 &&
1379 "In-loop consumer should always be scheduled!");
1380 assert(ConsumerStage >= ProducerStage);
1381 unsigned StageDiff = ConsumerStage - ProducerStage;
1382
1383 for (unsigned I = 0; I < StageDiff; ++I)
1384 Reg = phi(Reg);
1385 return Reg;
1386 }
1387
1388 // First, dive through the phi chain to find the defaults for the generated
1389 // phis.
1391 Register LoopReg = Reg;
1392 auto LoopProducer = Producer;
1393 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1394 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1395 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1396 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1397 assert(LoopProducer);
1398 }
1399 int LoopProducerStage = S.getStage(LoopProducer);
1400
1401 std::optional<Register> IllegalPhiDefault;
1402
1403 if (LoopProducerStage == -1) {
1404 // Do nothing.
1405 } else if (LoopProducerStage > ConsumerStage) {
1406 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1407 // In addition, Consumer's cycle must be scheduled after Producer in the
1408 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1409 // functions.
1410#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1411 int LoopProducerCycle = S.getCycle(LoopProducer);
1412 int ConsumerCycle = S.getCycle(&MI);
1413#endif
1414 assert(LoopProducerCycle <= ConsumerCycle);
1415 assert(LoopProducerStage == ConsumerStage + 1);
1416 // Peel off the first phi from Defaults and insert a phi between producer
1417 // and consumer. This phi will not be at the front of the block so we
1418 // consider it illegal. It will only exist during the rewrite process; it
1419 // needs to exist while we peel off prologs because these could take the
1420 // default value. After that we can replace all uses with the loop producer
1421 // value.
1422 IllegalPhiDefault = Defaults.front();
1423 Defaults.erase(Defaults.begin());
1424 } else {
1425 assert(ConsumerStage >= LoopProducerStage);
1426 int StageDiff = ConsumerStage - LoopProducerStage;
1427 if (StageDiff > 0) {
1428 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1429 << " to " << (Defaults.size() + StageDiff) << "\n");
1430 // If we need more phis than we have defaults for, pad out with undefs for
1431 // the earliest phis, which are at the end of the defaults chain (the
1432 // chain is in reverse order).
1433 Defaults.resize(Defaults.size() + StageDiff,
1434 Defaults.empty() ? std::optional<Register>()
1435 : Defaults.back());
1436 }
1437 }
1438
1439 // Now we know the number of stages to jump back, insert the phi chain.
1440 auto DefaultI = Defaults.rbegin();
1441 while (DefaultI != Defaults.rend())
1442 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1443
1444 if (IllegalPhiDefault) {
1445 // The consumer optionally consumes LoopProducer in the same iteration
1446 // (because the producer is scheduled at an earlier cycle than the consumer)
1447 // or the initial value. To facilitate this we create an illegal block here
1448 // by embedding a phi in the middle of the block. We will fix this up
1449 // immediately prior to pruning.
1450 auto RC = MRI.getRegClass(Reg);
1451 Register R = MRI.createVirtualRegister(RC);
1452 MachineInstr *IllegalPhi =
1453 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1454 .addReg(*IllegalPhiDefault)
1455 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1456 .addReg(LoopReg)
1457 .addMBB(BB); // Block choice is arbitrary and has no effect.
1458 // Illegal phi should belong to the producer stage so that it can be
1459 // filtered correctly during peeling.
1460 S.setStage(IllegalPhi, LoopProducerStage);
1461 return R;
1462 }
1463
1464 return LoopReg;
1465}
1466
1467Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
1468 const TargetRegisterClass *RC) {
1469 // If the init register is not undef, try and find an existing phi.
1470 if (InitReg) {
1471 auto I = Phis.find({LoopReg, *InitReg});
1472 if (I != Phis.end())
1473 return I->second;
1474 } else {
1475 for (auto &KV : Phis) {
1476 if (KV.first.first == LoopReg)
1477 return KV.second;
1478 }
1479 }
1480
1481 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1482 // find a phi that takes undef as input.
1483 auto I = UndefPhis.find(LoopReg);
1484 if (I != UndefPhis.end()) {
1485 Register R = I->second;
1486 if (!InitReg)
1487 // Found a phi taking undef as input, and this input is undef so return
1488 // without any more changes.
1489 return R;
1490 // Found a phi taking undef as input, so rewrite it to take InitReg.
1491 MachineInstr *MI = MRI.getVRegDef(R);
1492 MI->getOperand(1).setReg(*InitReg);
1493 Phis.insert({{LoopReg, *InitReg}, R});
1494 const TargetRegisterClass *ConstrainRegClass =
1495 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1496 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1497 (void)ConstrainRegClass;
1498 UndefPhis.erase(I);
1499 return R;
1500 }
1501
1502 // Failed to find any existing phi to reuse, so create a new one.
1503 if (!RC)
1504 RC = MRI.getRegClass(LoopReg);
1505 Register R = MRI.createVirtualRegister(RC);
1506 if (InitReg) {
1507 const TargetRegisterClass *ConstrainRegClass =
1508 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1509 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1510 (void)ConstrainRegClass;
1511 }
1512 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1513 .addReg(InitReg ? *InitReg : undef(RC))
1514 .addMBB(PreheaderBB)
1515 .addReg(LoopReg)
1516 .addMBB(BB);
1517 if (!InitReg)
1518 UndefPhis[LoopReg] = R;
1519 else
1520 Phis[{LoopReg, *InitReg}] = R;
1521 return R;
1522}
1523
1524Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1525 Register &R = Undefs[RC];
1526 if (R == 0) {
1527 // Create an IMPLICIT_DEF that defines this register if we need it.
1528 // All uses of this should be removed by the time we have finished unrolling
1529 // prologs and epilogs.
1530 R = MRI.createVirtualRegister(RC);
1531 auto *InsertBB = &PreheaderBB->getParent()->front();
1532 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1533 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1534 }
1535 return R;
1536}
1537
1538namespace {
1539/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1540/// the operand are discovered, such as how many in-loop PHIs it has to jump
1541/// through and defaults for these phis.
1542class KernelOperandInfo {
1545 SmallVector<Register, 4> PhiDefaults;
1548
1549public:
1550 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1551 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1552 : MRI(MRI) {
1553 Source = MO;
1554 BB = MO->getParent()->getParent();
1555 while (isRegInLoop(MO)) {
1556 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1557 if (MI->isFullCopy()) {
1558 MO = &MI->getOperand(1);
1559 continue;
1560 }
1561 if (!MI->isPHI())
1562 break;
1563 // If this is an illegal phi, don't count it in distance.
1564 if (IllegalPhis.count(MI)) {
1565 MO = &MI->getOperand(3);
1566 continue;
1567 }
1568
1570 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1571 : &MI->getOperand(3);
1572 PhiDefaults.push_back(Default);
1573 }
1574 Target = MO;
1575 }
1576
1577 bool operator==(const KernelOperandInfo &Other) const {
1578 return PhiDefaults.size() == Other.PhiDefaults.size();
1579 }
1580
1581 void print(raw_ostream &OS) const {
1582 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1583 << *Source->getParent();
1584 }
1585
1586private:
1587 bool isRegInLoop(MachineOperand *MO) {
1588 return MO->isReg() && MO->getReg().isVirtual() &&
1589 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1590 }
1591};
1592} // namespace
1593
1597 if (LPD == LPD_Front)
1598 PeeledFront.push_back(NewBB);
1599 else
1600 PeeledBack.push_front(NewBB);
1601 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1602 ++I, ++NI) {
1603 CanonicalMIs[&*I] = &*I;
1604 CanonicalMIs[&*NI] = &*I;
1605 BlockMIs[{NewBB, &*I}] = &*NI;
1606 BlockMIs[{BB, &*I}] = &*I;
1607 }
1608 return NewBB;
1609}
1610
1612 int MinStage) {
1613 for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1614 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1615 MachineInstr *MI = &*I++;
1616 int Stage = getStage(MI);
1617 if (Stage == -1 || Stage >= MinStage)
1618 continue;
1619
1620 for (MachineOperand &DefMO : MI->defs()) {
1622 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1623 // Only PHIs can use values from this block by construction.
1624 // Match with the equivalent PHI in B.
1625 assert(UseMI.isPHI());
1626 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1627 MI->getParent());
1628 Subs.emplace_back(&UseMI, Reg);
1629 }
1630 for (auto &Sub : Subs)
1631 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1633 }
1634 if (LIS)
1636 MI->eraseFromParent();
1637 }
1638}
1639
1641 MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1642 auto InsertPt = DestBB->getFirstNonPHI();
1645 llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1646 if (MI.isPHI()) {
1647 // This is an illegal PHI. If we move any instructions using an illegal
1648 // PHI, we need to create a legal Phi.
1649 if (getStage(&MI) != Stage) {
1650 // The legal Phi is not necessary if the illegal phi's stage
1651 // is being moved.
1652 Register PhiR = MI.getOperand(0).getReg();
1653 auto RC = MRI.getRegClass(PhiR);
1655 MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1656 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1657 .addReg(PhiR)
1658 .addMBB(SourceBB);
1659 BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1661 Remaps[PhiR] = NR;
1662 }
1663 }
1664 if (getStage(&MI) != Stage)
1665 continue;
1666 MI.removeFromParent();
1667 DestBB->insert(InsertPt, &MI);
1668 auto *KernelMI = CanonicalMIs[&MI];
1669 BlockMIs[{DestBB, KernelMI}] = &MI;
1670 BlockMIs.erase({SourceBB, KernelMI});
1671 }
1673 for (MachineInstr &MI : DestBB->phis()) {
1674 assert(MI.getNumOperands() == 3);
1675 MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1676 // If the instruction referenced by the phi is moved inside the block
1677 // we don't need the phi anymore.
1678 if (getStage(Def) == Stage) {
1679 Register PhiReg = MI.getOperand(0).getReg();
1680 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1681 /*TRI=*/nullptr) != -1);
1682 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1683 MI.getOperand(0).setReg(PhiReg);
1684 PhiToDelete.push_back(&MI);
1685 }
1686 }
1687 for (auto *P : PhiToDelete)
1688 P->eraseFromParent();
1689 InsertPt = DestBB->getFirstNonPHI();
1690 // Helper to clone Phi instructions into the destination block. We clone Phi
1691 // greedily to avoid combinatorial explosion of Phi instructions.
1692 auto clonePhi = [&](MachineInstr *Phi) {
1693 MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1694 DestBB->insert(InsertPt, NewMI);
1695 Register OrigR = Phi->getOperand(0).getReg();
1697 NewMI->getOperand(0).setReg(R);
1698 NewMI->getOperand(1).setReg(OrigR);
1699 NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1700 Remaps[OrigR] = R;
1701 CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1702 BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1704 return R;
1705 };
1706 for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1707 for (MachineOperand &MO : I->uses()) {
1708 if (!MO.isReg())
1709 continue;
1710 if (Remaps.count(MO.getReg()))
1711 MO.setReg(Remaps[MO.getReg()]);
1712 else {
1713 // If we are using a phi from the source block we need to add a new phi
1714 // pointing to the old one.
1716 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1717 Register R = clonePhi(Use);
1718 MO.setReg(R);
1719 }
1720 }
1721 }
1722 }
1723}
1724
1727 MachineInstr *Phi) {
1728 unsigned distance = PhiNodeLoopIteration[Phi];
1729 MachineInstr *CanonicalUse = CanonicalPhi;
1730 Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1731 for (unsigned I = 0; I < distance; ++I) {
1732 assert(CanonicalUse->isPHI());
1733 assert(CanonicalUse->getNumOperands() == 5);
1734 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1735 if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1736 std::swap(LoopRegIdx, InitRegIdx);
1737 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1738 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1739 }
1740 return CanonicalUseReg;
1741}
1742
1744 BitVector LS(Schedule.getNumStages(), true);
1745 BitVector AS(Schedule.getNumStages(), true);
1746 LiveStages[BB] = LS;
1747 AvailableStages[BB] = AS;
1748
1749 // Peel out the prologs.
1750 LS.reset();
1751 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1752 LS[I] = true;
1753 Prologs.push_back(peelKernel(LPD_Front));
1754 LiveStages[Prologs.back()] = LS;
1755 AvailableStages[Prologs.back()] = LS;
1756 }
1757
1758 // Create a block that will end up as the new loop exiting block (dominated by
1759 // all prologs and epilogs). It will only contain PHIs, in the same order as
1760 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1761 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1762 // property that any value deffed in BB but used outside of BB is used by a
1763 // PHI in the exiting block.
1765 EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1766 // Push out the epilogs, again in reverse order.
1767 // We can't assume anything about the minumum loop trip count at this point,
1768 // so emit a fairly complex epilog.
1769
1770 // We first peel number of stages minus one epilogue. Then we remove dead
1771 // stages and reorder instructions based on their stage. If we have 3 stages
1772 // we generate first:
1773 // E0[3, 2, 1]
1774 // E1[3', 2']
1775 // E2[3'']
1776 // And then we move instructions based on their stages to have:
1777 // E0[3]
1778 // E1[2, 3']
1779 // E2[1, 2', 3'']
1780 // The transformation is legal because we only move instructions past
1781 // instructions of a previous loop iteration.
1782 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1783 Epilogs.push_back(peelKernel(LPD_Back));
1784 MachineBasicBlock *B = Epilogs.back();
1786 // Keep track at which iteration each phi belongs to. We need it to know
1787 // what version of the variable to use during prologue/epilogue stitching.
1788 EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1789 for (MachineInstr &Phi : B->phis())
1791 }
1792 for (size_t I = 0; I < Epilogs.size(); I++) {
1793 LS.reset();
1794 for (size_t J = I; J < Epilogs.size(); J++) {
1795 int Iteration = J;
1796 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1797 // Move stage one block at a time so that Phi nodes are updated correctly.
1798 for (size_t K = Iteration; K > I; K--)
1799 moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1800 LS[Stage] = true;
1801 }
1802 LiveStages[Epilogs[I]] = LS;
1803 AvailableStages[Epilogs[I]] = AS;
1804 }
1805
1806 // Now we've defined all the prolog and epilog blocks as a fallthrough
1807 // sequence, add the edges that will be followed if the loop trip count is
1808 // lower than the number of stages (connecting prologs directly with epilogs).
1809 auto PI = Prologs.begin();
1810 auto EI = Epilogs.begin();
1811 assert(Prologs.size() == Epilogs.size());
1812 for (; PI != Prologs.end(); ++PI, ++EI) {
1813 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1814 (*PI)->addSuccessor(*EI);
1815 for (MachineInstr &MI : (*EI)->phis()) {
1816 Register Reg = MI.getOperand(1).getReg();
1818 if (Use && Use->getParent() == Pred) {
1819 MachineInstr *CanonicalUse = CanonicalMIs[Use];
1820 if (CanonicalUse->isPHI()) {
1821 // If the use comes from a phi we need to skip as many phi as the
1822 // distance between the epilogue and the kernel. Trace through the phi
1823 // chain to find the right value.
1824 Reg = getPhiCanonicalReg(CanonicalUse, Use);
1825 }
1826 Reg = getEquivalentRegisterIn(Reg, *PI);
1827 }
1828 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1829 MI.addOperand(MachineOperand::CreateMBB(*PI));
1830 }
1831 }
1832
1833 // Create a list of all blocks in order.
1835 llvm::copy(PeeledFront, std::back_inserter(Blocks));
1836 Blocks.push_back(BB);
1837 llvm::copy(PeeledBack, std::back_inserter(Blocks));
1838
1839 // Iterate in reverse order over all instructions, remapping as we go.
1840 for (MachineBasicBlock *B : reverse(Blocks)) {
1841 for (auto I = B->instr_rbegin();
1842 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1844 rewriteUsesOf(&*MI);
1845 }
1846 }
1847 for (auto *MI : IllegalPhisToDelete) {
1848 if (LIS)
1850 MI->eraseFromParent();
1851 }
1853
1854 // Now all remapping has been done, we're free to optimize the generated code.
1856 EliminateDeadPhis(B, MRI, LIS);
1857 EliminateDeadPhis(ExitingBB, MRI, LIS);
1858}
1859
1862 MachineBasicBlock *Exit = *BB->succ_begin();
1863 if (Exit == BB)
1864 Exit = *std::next(BB->succ_begin());
1865
1867 MF.insert(std::next(BB->getIterator()), NewBB);
1868
1869 // Clone all phis in BB into NewBB and rewrite.
1870 for (MachineInstr &MI : BB->phis()) {
1871 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1872 Register OldR = MI.getOperand(3).getReg();
1875 for (MachineInstr &Use : MRI.use_instructions(OldR))
1876 if (Use.getParent() != BB)
1877 Uses.push_back(&Use);
1878 for (MachineInstr *Use : Uses)
1879 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1881 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1882 .addReg(OldR)
1883 .addMBB(BB);
1884 BlockMIs[{NewBB, &MI}] = NI;
1885 CanonicalMIs[NI] = &MI;
1886 }
1887 BB->replaceSuccessor(Exit, NewBB);
1888 Exit->replacePhiUsesWith(BB, NewBB);
1889 NewBB->addSuccessor(Exit);
1890
1891 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1893 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1894 (void)CanAnalyzeBr;
1895 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1896 TII->removeBranch(*BB);
1897 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1898 Cond, DebugLoc());
1899 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1900 return NewBB;
1901}
1902
1905 MachineBasicBlock *BB) {
1907 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
1908 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1909}
1910
1912 if (MI->isPHI()) {
1913 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1914 // and it is produced by this block.
1915 Register PhiR = MI->getOperand(0).getReg();
1916 Register R = MI->getOperand(3).getReg();
1917 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1918 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1919 R = MI->getOperand(1).getReg();
1920 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1921 MRI.replaceRegWith(PhiR, R);
1922 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1923 // later to figure out how to remap registers.
1924 MI->getOperand(0).setReg(PhiR);
1926 return;
1927 }
1928
1929 int Stage = getStage(MI);
1930 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1931 LiveStages[MI->getParent()].test(Stage))
1932 // Instruction is live, no rewriting to do.
1933 return;
1934
1935 for (MachineOperand &DefMO : MI->defs()) {
1937 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1938 // Only PHIs can use values from this block by construction.
1939 // Match with the equivalent PHI in B.
1940 assert(UseMI.isPHI());
1941 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1942 MI->getParent());
1943 Subs.emplace_back(&UseMI, Reg);
1944 }
1945 for (auto &Sub : Subs)
1946 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1948 }
1949 if (LIS)
1951 MI->eraseFromParent();
1952}
1953
1955 // Work outwards from the kernel.
1956 bool KernelDisposed = false;
1957 int TC = Schedule.getNumStages() - 1;
1958 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1959 ++PI, ++EI, --TC) {
1961 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1965 std::optional<bool> StaticallyGreater =
1966 LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1967 if (!StaticallyGreater) {
1968 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1969 // Dynamically branch based on Cond.
1970 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1971 } else if (*StaticallyGreater == false) {
1972 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1973 // Prolog never falls through; branch to epilog and orphan interior
1974 // blocks. Leave it to unreachable-block-elim to clean up.
1975 Prolog->removeSuccessor(Fallthrough);
1976 for (MachineInstr &P : Fallthrough->phis()) {
1977 P.removeOperand(2);
1978 P.removeOperand(1);
1979 }
1981 KernelDisposed = true;
1982 } else {
1983 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1984 // Prolog always falls through; remove incoming values in epilog.
1985 Prolog->removeSuccessor(Epilog);
1986 for (MachineInstr &P : Epilog->phis()) {
1987 P.removeOperand(4);
1988 P.removeOperand(3);
1989 }
1990 }
1991 }
1992
1993 if (!KernelDisposed) {
1994 LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1995 LoopInfo->setPreheader(Prologs.back());
1996 } else {
1997 LoopInfo->disposed();
1998 }
1999}
2000
2002 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2003 KR.rewrite();
2004}
2005
2012
2013 rewriteKernel();
2015 fixupBranches();
2016}
2017
2021
2022 // Dump the schedule before we invalidate and remap all its instructions.
2023 // Stash it in a string so we can print it if we found an error.
2024 std::string ScheduleDump;
2025 raw_string_ostream OS(ScheduleDump);
2026 Schedule.print(OS);
2027 OS.flush();
2028
2029 // First, run the normal ModuleScheduleExpander. We don't support any
2030 // InstrChanges.
2031 assert(LIS && "Requires LiveIntervals!");
2034 MSE.expand();
2035 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2036 if (!ExpandedKernel) {
2037 // The expander optimized away the kernel. We can't do any useful checking.
2038 MSE.cleanup();
2039 return;
2040 }
2041 // Before running the KernelRewriter, re-add BB into the CFG.
2043
2044 // Now run the new expansion algorithm.
2045 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2046 KR.rewrite();
2048
2049 // Collect all illegal phis that the new algorithm created. We'll give these
2050 // to KernelOperandInfo.
2052 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2053 if (NI->isPHI())
2054 IllegalPhis.insert(&*NI);
2055 }
2056
2057 // Co-iterate across both kernels. We expect them to be identical apart from
2058 // phis and full COPYs (we look through both).
2060 auto OI = ExpandedKernel->begin();
2061 auto NI = BB->begin();
2062 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2063 while (OI->isPHI() || OI->isFullCopy())
2064 ++OI;
2065 while (NI->isPHI() || NI->isFullCopy())
2066 ++NI;
2067 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2068 // Analyze every operand separately.
2069 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2070 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2071 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2072 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2073 }
2074
2075 bool Failed = false;
2076 for (auto &OldAndNew : KOIs) {
2077 if (OldAndNew.first == OldAndNew.second)
2078 continue;
2079 Failed = true;
2080 errs() << "Modulo kernel validation error: [\n";
2081 errs() << " [golden] ";
2082 OldAndNew.first.print(errs());
2083 errs() << " ";
2084 OldAndNew.second.print(errs());
2085 errs() << "]\n";
2086 }
2087
2088 if (Failed) {
2089 errs() << "Golden reference kernel:\n";
2090 ExpandedKernel->print(errs());
2091 errs() << "New kernel:\n";
2092 BB->print(errs());
2093 errs() << ScheduleDump;
2095 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2096 }
2097
2098 // Cleanup by removing BB from the CFG again as the original
2099 // ModuloScheduleExpander intended.
2101 MSE.cleanup();
2102}
2103
2104MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
2105 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2106
2107 // TODO: Offset information needs to be corrected.
2108 NewMI->dropMemRefs(MF);
2109
2110 return NewMI;
2111}
2112
2113/// Create a dedicated exit for Loop. Exit is the original exit for Loop.
2114/// If it is already dedicated exit, return it. Otherwise, insert a new
2115/// block between them and return the new block.
2117 MachineBasicBlock *Exit) {
2118 if (Exit->pred_size() == 1)
2119 return Exit;
2120
2121 MachineFunction *MF = Loop->getParent();
2123
2124 MachineBasicBlock *NewExit =
2125 MF->CreateMachineBasicBlock(Loop->getBasicBlock());
2126 MF->insert(Loop->getIterator(), NewExit);
2127
2128 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2130 TII->analyzeBranch(*Loop, TBB, FBB, Cond);
2131 if (TBB == Loop)
2132 FBB = NewExit;
2133 else if (FBB == Loop)
2134 TBB = NewExit;
2135 else
2136 llvm_unreachable("unexpected loop structure");
2138 TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
2139 Loop->replaceSuccessor(Exit, NewExit);
2140 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2141 NewExit->addSuccessor(Exit);
2142
2143 Exit->replacePhiUsesWith(Loop, NewExit);
2144
2145 return NewExit;
2146}
2147
2148/// Insert branch code into the end of MBB. It branches to GreaterThan if the
2149/// remaining trip count for instructions in LastStage0Insts is greater than
2150/// RequiredTC, and to Otherwise otherwise.
2151void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
2152 int RequiredTC,
2153 InstrMapTy &LastStage0Insts,
2154 MachineBasicBlock &GreaterThan,
2155 MachineBasicBlock &Otherwise) {
2157 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2158 LastStage0Insts);
2159
2161 // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
2162 // FBB for optimal performance.
2163 if (TII->reverseBranchCondition(Cond))
2164 llvm_unreachable("can not reverse branch condition");
2165 TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
2166 } else {
2167 TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
2168 }
2169}
2170
2171/// Generate a pipelined loop that is unrolled by using MVE algorithm and any
2172/// other necessary blocks. The control flow is modified to execute the
2173/// pipelined loop if the trip count satisfies the condition, otherwise the
2174/// original loop. The original loop is also used to execute the remainder
2175/// iterations which occur due to unrolling.
2176void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2177 // The control flow for pipelining with MVE:
2178 //
2179 // OrigPreheader:
2180 // // The block that is originally the loop preheader
2181 // goto Check
2182 //
2183 // Check:
2184 // // Check whether the trip count satisfies the requirements to pipeline.
2185 // if (LoopCounter > NumStages + NumUnroll - 2)
2186 // // The minimum number of iterations to pipeline =
2187 // // iterations executed in prolog/epilog (NumStages-1) +
2188 // // iterations executed in one kernel run (NumUnroll)
2189 // goto Prolog
2190 // // fallback to the original loop
2191 // goto NewPreheader
2192 //
2193 // Prolog:
2194 // // All prolog stages. There are no direct branches to the epilogue.
2195 // goto NewKernel
2196 //
2197 // NewKernel:
2198 // // NumUnroll copies of the kernel
2199 // if (LoopCounter > MVE-1)
2200 // goto NewKernel
2201 // goto Epilog
2202 //
2203 // Epilog:
2204 // // All epilog stages.
2205 // if (LoopCounter > 0)
2206 // // The remainder is executed in the original loop
2207 // goto NewPreheader
2208 // goto NewExit
2209 //
2210 // NewPreheader:
2211 // // Newly created preheader for the original loop.
2212 // // The initial values of the phis in the loop are merged from two paths.
2213 // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
2214 // goto OrigKernel
2215 //
2216 // OrigKernel:
2217 // // The original loop block.
2218 // if (LoopCounter != 0)
2219 // goto OrigKernel
2220 // goto NewExit
2221 //
2222 // NewExit:
2223 // // Newly created dedicated exit for the original loop.
2224 // // Merge values which are referenced after the loop
2225 // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
2226 // goto OrigExit
2227 //
2228 // OrigExit:
2229 // // The block that is originally the loop exit.
2230 // // If it is already deicated exit, NewExit is not created.
2231
2232 // An example of where each stage is executed:
2233 // Assume #Stages 3, #MVE 4, #Iterations 12
2234 // Iter 0 1 2 3 4 5 6 7 8 9 10-11
2235 // -------------------------------------------------
2236 // Stage 0 Prolog#0
2237 // Stage 1 0 Prolog#1
2238 // Stage 2 1 0 Kernel Unroll#0 Iter#0
2239 // Stage 2 1 0 Kernel Unroll#1 Iter#0
2240 // Stage 2 1 0 Kernel Unroll#2 Iter#0
2241 // Stage 2 1 0 Kernel Unroll#3 Iter#0
2242 // Stage 2 1 0 Kernel Unroll#0 Iter#1
2243 // Stage 2 1 0 Kernel Unroll#1 Iter#1
2244 // Stage 2 1 0 Kernel Unroll#2 Iter#1
2245 // Stage 2 1 0 Kernel Unroll#3 Iter#1
2246 // Stage 2 1 Epilog#0
2247 // Stage 2 Epilog#1
2248 // Stage 0-2 OrigKernel
2249
2250 LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
2251 assert(LoopInfo && "Must be able to analyze loop!");
2252
2253 calcNumUnroll();
2254
2255 Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2256 Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2257 NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2258 Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2259 NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2260
2261 MF.insert(OrigKernel->getIterator(), Check);
2262 MF.insert(OrigKernel->getIterator(), Prolog);
2263 MF.insert(OrigKernel->getIterator(), NewKernel);
2264 MF.insert(OrigKernel->getIterator(), Epilog);
2265 MF.insert(OrigKernel->getIterator(), NewPreheader);
2266
2267 NewExit = createDedicatedExit(OrigKernel, OrigExit);
2268
2269 NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
2270 TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
2271
2272 OrigPreheader->addSuccessor(Check);
2273 TII->removeBranch(*OrigPreheader);
2274 TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
2275
2276 Check->addSuccessor(Prolog);
2277 Check->addSuccessor(NewPreheader);
2278
2279 Prolog->addSuccessor(NewKernel);
2280
2281 NewKernel->addSuccessor(NewKernel);
2282 NewKernel->addSuccessor(Epilog);
2283
2284 Epilog->addSuccessor(NewPreheader);
2285 Epilog->addSuccessor(NewExit);
2286
2287 InstrMapTy LastStage0Insts;
2288 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2289 LastStage0Insts, *Prolog, *NewPreheader);
2290
2291 // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
2292 // register#
2293 SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
2294 generateProlog(PrologVRMap);
2295 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2296 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2297}
2298
2299/// Replace MI's use operands according to the maps.
2300void ModuloScheduleExpanderMVE::updateInstrUse(
2301 MachineInstr *MI, int StageNum, int PhaseNum,
2303 SmallVectorImpl<ValueMapTy> *PrevVRMap) {
2304 // If MI is in the prolog/kernel/epilog block, CurVRMap is
2305 // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
2306 // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
2307 // Refer to the appropriate map according to the stage difference between
2308 // MI and the definition of an operand.
2309
2310 for (MachineOperand &UseMO : MI->uses()) {
2311 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2312 continue;
2313 int DiffStage = 0;
2314 Register OrigReg = UseMO.getReg();
2315 MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
2316 if (!DefInst || DefInst->getParent() != OrigKernel)
2317 continue;
2318 unsigned InitReg = 0;
2319 unsigned DefReg = OrigReg;
2320 if (DefInst->isPHI()) {
2321 ++DiffStage;
2322 unsigned LoopReg;
2323 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2324 // LoopReg is guaranteed to be defined within the loop by canApply()
2325 DefReg = LoopReg;
2326 DefInst = MRI.getVRegDef(LoopReg);
2327 }
2328 unsigned DefStageNum = Schedule.getStage(DefInst);
2329 DiffStage += StageNum - DefStageNum;
2330 Register NewReg;
2331 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2332 // NewReg is defined in a previous phase of the same block
2333 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2334 else if (!PrevVRMap)
2335 // Since this is the first iteration, refer the initial register of the
2336 // loop
2337 NewReg = InitReg;
2338 else
2339 // Cases where DiffStage is larger than PhaseNum.
2340 // If MI is in the kernel block, the value is defined by the previous
2341 // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
2342 // value is defined in the kernel block and KernelVRMap is referenced.
2343 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2344
2345 const TargetRegisterClass *NRC =
2346 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2347 if (NRC)
2348 UseMO.setReg(NewReg);
2349 else {
2350 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2351 BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
2352 SplitReg)
2353 .addReg(NewReg);
2354 UseMO.setReg(SplitReg);
2355 }
2356 }
2357}
2358
2359/// Return a phi if Reg is referenced by the phi.
2360/// canApply() guarantees that at most only one such phi exists.
2362 for (MachineInstr &Phi : Loop->phis()) {
2363 unsigned InitVal, LoopVal;
2364 getPhiRegs(Phi, Loop, InitVal, LoopVal);
2365 if (LoopVal == Reg)
2366 return &Phi;
2367 }
2368 return nullptr;
2369}
2370
2371/// Generate phis for registers defined by OrigMI.
2372void ModuloScheduleExpanderMVE::generatePhi(
2373 MachineInstr *OrigMI, int UnrollNum,
2374 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2375 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2376 SmallVectorImpl<ValueMapTy> &PhiVRMap) {
2377 int StageNum = Schedule.getStage(OrigMI);
2378 bool UsePrologReg;
2379 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2380 UsePrologReg = true;
2381 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2382 UsePrologReg = false;
2383 else
2384 return;
2385
2386 // Examples that show which stages are merged by phi.
2387 // Meaning of the symbol following the stage number:
2388 // a/b: Stages with the same letter are merged (UsePrologReg == true)
2389 // +: Merged with the initial value (UsePrologReg == false)
2390 // *: No phis required
2391 //
2392 // #Stages 3, #MVE 4
2393 // Iter 0 1 2 3 4 5 6 7 8
2394 // -----------------------------------------
2395 // Stage 0a Prolog#0
2396 // Stage 1a 0b Prolog#1
2397 // Stage 2* 1* 0* Kernel Unroll#0
2398 // Stage 2* 1* 0+ Kernel Unroll#1
2399 // Stage 2* 1+ 0a Kernel Unroll#2
2400 // Stage 2+ 1a 0b Kernel Unroll#3
2401 //
2402 // #Stages 3, #MVE 2
2403 // Iter 0 1 2 3 4 5 6 7 8
2404 // -----------------------------------------
2405 // Stage 0a Prolog#0
2406 // Stage 1a 0b Prolog#1
2407 // Stage 2* 1+ 0a Kernel Unroll#0
2408 // Stage 2+ 1a 0b Kernel Unroll#1
2409 //
2410 // #Stages 3, #MVE 1
2411 // Iter 0 1 2 3 4 5 6 7 8
2412 // -----------------------------------------
2413 // Stage 0* Prolog#0
2414 // Stage 1a 0b Prolog#1
2415 // Stage 2+ 1a 0b Kernel Unroll#0
2416
2417 for (MachineOperand &DefMO : OrigMI->defs()) {
2418 if (!DefMO.isReg() || DefMO.isDead())
2419 continue;
2420 Register OrigReg = DefMO.getReg();
2421 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2422 if (NewReg == KernelVRMap[UnrollNum].end())
2423 continue;
2424 Register CorrespondReg;
2425 if (UsePrologReg) {
2426 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2427 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2428 } else {
2429 MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
2430 if (!Phi)
2431 continue;
2432 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2433 }
2434
2435 assert(CorrespondReg.isValid());
2436 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2437 BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
2438 TII->get(TargetOpcode::PHI), PhiReg)
2439 .addReg(NewReg->second)
2440 .addMBB(NewKernel)
2441 .addReg(CorrespondReg)
2442 .addMBB(Prolog);
2443 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2444 }
2445}
2446
2447static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
2448 MachineBasicBlock *NewMBB) {
2449 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2450 if (Phi.getOperand(Idx).getReg() == OrigReg) {
2451 Phi.getOperand(Idx).setReg(NewReg);
2452 Phi.getOperand(Idx + 1).setMBB(NewMBB);
2453 return;
2454 }
2455 }
2456}
2457
2458/// Generate phis that merge values from multiple routes
2459void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2460 Register NewReg) {
2461 SmallVector<MachineOperand *> UsesAfterLoop;
2463 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
2464 E = MRI.use_end();
2465 I != E; ++I) {
2466 MachineOperand &O = *I;
2467 if (O.getParent()->getParent() != OrigKernel &&
2468 O.getParent()->getParent() != Prolog &&
2469 O.getParent()->getParent() != NewKernel &&
2470 O.getParent()->getParent() != Epilog)
2471 UsesAfterLoop.push_back(&O);
2472 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2473 LoopPhis.push_back(O.getParent());
2474 }
2475
2476 // Merge the route that only execute the pipelined loop (when there are no
2477 // remaining iterations) with the route that execute the original loop.
2478 if (!UsesAfterLoop.empty()) {
2479 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2480 BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
2481 TII->get(TargetOpcode::PHI), PhiReg)
2482 .addReg(OrigReg)
2483 .addMBB(OrigKernel)
2484 .addReg(NewReg)
2485 .addMBB(Epilog);
2486
2487 for (MachineOperand *MO : UsesAfterLoop)
2488 MO->setReg(PhiReg);
2489
2490 if (!LIS.hasInterval(PhiReg))
2491 LIS.createEmptyInterval(PhiReg);
2492 }
2493
2494 // Merge routes from the pipelined loop and the bypassed route before the
2495 // original loop
2496 if (!LoopPhis.empty()) {
2497 for (MachineInstr *Phi : LoopPhis) {
2498 unsigned InitReg, LoopReg;
2499 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2500 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2501 BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(), Phi->getDebugLoc(),
2502 TII->get(TargetOpcode::PHI), NewInit)
2503 .addReg(InitReg)
2504 .addMBB(Check)
2505 .addReg(NewReg)
2506 .addMBB(Epilog);
2507 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2508 }
2509 }
2510}
2511
2512void ModuloScheduleExpanderMVE::generateProlog(
2513 SmallVectorImpl<ValueMapTy> &PrologVRMap) {
2514 PrologVRMap.clear();
2515 PrologVRMap.resize(Schedule.getNumStages() - 1);
2517 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2518 ++PrologNum) {
2519 for (MachineInstr *MI : Schedule.getInstructions()) {
2520 if (MI->isPHI())
2521 continue;
2522 int StageNum = Schedule.getStage(MI);
2523 if (StageNum > PrologNum)
2524 continue;
2525 MachineInstr *NewMI = cloneInstr(MI);
2526 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2527 NewMIMap[NewMI] = {PrologNum, StageNum};
2528 Prolog->push_back(NewMI);
2529 }
2530 }
2531
2532 for (auto I : NewMIMap) {
2533 MachineInstr *MI = I.first;
2534 int PrologNum = I.second.first;
2535 int StageNum = I.second.second;
2536 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2537 }
2538
2539 LLVM_DEBUG({
2540 dbgs() << "prolog:\n";
2541 Prolog->dump();
2542 });
2543}
2544
2545void ModuloScheduleExpanderMVE::generateKernel(
2546 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2547 SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
2548 KernelVRMap.clear();
2549 KernelVRMap.resize(NumUnroll);
2550 SmallVector<ValueMapTy> PhiVRMap;
2551 PhiVRMap.resize(NumUnroll);
2554 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2555 for (MachineInstr *MI : Schedule.getInstructions()) {
2556 if (MI->isPHI())
2557 continue;
2558 int StageNum = Schedule.getStage(MI);
2559 MachineInstr *NewMI = cloneInstr(MI);
2560 if (UnrollNum == NumUnroll - 1)
2561 LastStage0Insts[MI] = NewMI;
2562 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2563 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2564 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2565 NewMIMap[NewMI] = {UnrollNum, StageNum};
2566 NewKernel->push_back(NewMI);
2567 }
2568 }
2569
2570 for (auto I : NewMIMap) {
2571 MachineInstr *MI = I.first;
2572 int UnrollNum = I.second.first;
2573 int StageNum = I.second.second;
2574 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2575 }
2576
2577 // If remaining trip count is greater than NumUnroll-1, loop continues
2578 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2579 *Epilog);
2580
2581 LLVM_DEBUG({
2582 dbgs() << "kernel:\n";
2583 NewKernel->dump();
2584 });
2585}
2586
2587void ModuloScheduleExpanderMVE::generateEpilog(
2588 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2589 SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
2590 EpilogVRMap.clear();
2591 EpilogVRMap.resize(Schedule.getNumStages() - 1);
2593 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2594 ++EpilogNum) {
2595 for (MachineInstr *MI : Schedule.getInstructions()) {
2596 if (MI->isPHI())
2597 continue;
2598 int StageNum = Schedule.getStage(MI);
2599 if (StageNum <= EpilogNum)
2600 continue;
2601 MachineInstr *NewMI = cloneInstr(MI);
2602 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2603 NewMIMap[NewMI] = {EpilogNum, StageNum};
2604 Epilog->push_back(NewMI);
2605 }
2606 }
2607
2608 for (auto I : NewMIMap) {
2609 MachineInstr *MI = I.first;
2610 int EpilogNum = I.second.first;
2611 int StageNum = I.second.second;
2612 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2613 }
2614
2615 // If there are remaining iterations, they are executed in the original loop.
2616 // Instructions related to loop control, such as loop counter comparison,
2617 // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
2618 // in stage 0. Thus, the map is for the last one in the kernel.
2619 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2620
2621 LLVM_DEBUG({
2622 dbgs() << "epilog:\n";
2623 Epilog->dump();
2624 });
2625}
2626
2627/// Calculate the number of unroll required and set it to NumUnroll
2628void ModuloScheduleExpanderMVE::calcNumUnroll() {
2630 NumUnroll = 1;
2631 for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
2632 Inst2Idx[Schedule.getInstructions()[I]] = I;
2633
2634 for (MachineInstr *MI : Schedule.getInstructions()) {
2635 if (MI->isPHI())
2636 continue;
2637 int StageNum = Schedule.getStage(MI);
2638 for (const MachineOperand &MO : MI->uses()) {
2639 if (!MO.isReg() || !MO.getReg().isVirtual())
2640 continue;
2641 MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
2642 if (DefMI->getParent() != OrigKernel)
2643 continue;
2644
2645 int NumUnrollLocal = 1;
2646 if (DefMI->isPHI()) {
2647 ++NumUnrollLocal;
2648 // canApply() guarantees that DefMI is not phi and is an instruction in
2649 // the loop
2650 DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
2651 }
2652 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2653 if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2654 --NumUnrollLocal;
2655 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2656 }
2657 }
2658 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2659}
2660
2661/// Create new virtual registers for definitions of NewMI and update NewMI.
2662/// If the definitions are referenced after the pipelined loop, phis are
2663/// created to merge with other routes.
2664void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2665 ValueMapTy &VRMap,
2666 bool LastDef) {
2667 for (MachineOperand &MO : NewMI->operands()) {
2668 if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef())
2669 continue;
2670 Register Reg = MO.getReg();
2671 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
2672 Register NewReg = MRI.createVirtualRegister(RC);
2673 MO.setReg(NewReg);
2674 VRMap[Reg] = NewReg;
2675 if (LastDef)
2676 mergeRegUsesAfterPipeline(Reg, NewReg);
2677 }
2678}
2679
2681 OrigKernel = Schedule.getLoop()->getTopBlock();
2682 OrigPreheader = Schedule.getLoop()->getLoopPreheader();
2683 OrigExit = Schedule.getLoop()->getExitBlock();
2684
2685 LLVM_DEBUG(Schedule.dump());
2686
2687 generatePipelinedLoop();
2688}
2689
2690/// Check if ModuloScheduleExpanderMVE can be applied to L
2692 if (!L.getExitBlock()) {
2693 LLVM_DEBUG(
2694 dbgs() << "Can not apply MVE expander: No single exit block.\n";);
2695 return false;
2696 }
2697
2698 MachineBasicBlock *BB = L.getTopBlock();
2700
2701 // Put some constraints on the operands of the phis to simplify the
2702 // transformation
2703 DenseSet<unsigned> UsedByPhi;
2704 for (MachineInstr &MI : BB->phis()) {
2705 // Registers defined by phis must be used only inside the loop and be never
2706 // used by phis.
2707 for (MachineOperand &MO : MI.defs())
2708 if (MO.isReg())
2709 for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
2710 if (Ref.getParent() != BB || Ref.isPHI()) {
2712 << "Can not apply MVE expander: A phi result is "
2713 "referenced outside of the loop or by phi.\n";);
2714 return false;
2715 }
2716
2717 // A source register from the loop block must be defined inside the loop.
2718 // A register defined inside the loop must be referenced by only one phi at
2719 // most.
2720 unsigned InitVal, LoopVal;
2721 getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
2722 if (!Register(LoopVal).isVirtual() ||
2723 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2724 LLVM_DEBUG(
2725 dbgs() << "Can not apply MVE expander: A phi source value coming "
2726 "from the loop is not defined in the loop.\n";);
2727 return false;
2728 }
2729 if (UsedByPhi.count(LoopVal)) {
2730 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2731 "loop is referenced by two or more phis.\n";);
2732 return false;
2733 }
2734 UsedByPhi.insert(LoopVal);
2735 }
2736
2737 return true;
2738}
2739
2740//===----------------------------------------------------------------------===//
2741// ModuloScheduleTestPass implementation
2742//===----------------------------------------------------------------------===//
2743// This pass constructs a ModuloSchedule from its module and runs
2744// ModuloScheduleExpander.
2745//
2746// The module is expected to contain a single-block analyzable loop.
2747// The total order of instructions is taken from the loop as-is.
2748// Instructions are expected to be annotated with a PostInstrSymbol.
2749// This PostInstrSymbol must have the following format:
2750// "Stage=%d Cycle=%d".
2751//===----------------------------------------------------------------------===//
2752
2753namespace {
2754class ModuloScheduleTest : public MachineFunctionPass {
2755public:
2756 static char ID;
2757
2758 ModuloScheduleTest() : MachineFunctionPass(ID) {
2760 }
2761
2762 bool runOnMachineFunction(MachineFunction &MF) override;
2763 void runOnLoop(MachineFunction &MF, MachineLoop &L);
2764
2765 void getAnalysisUsage(AnalysisUsage &AU) const override {
2769 }
2770};
2771} // namespace
2772
2773char ModuloScheduleTest::ID = 0;
2774
2775INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2776 "Modulo Schedule test pass", false, false)
2779INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2780 "Modulo Schedule test pass", false, false)
2781
2782bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2783 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
2784 for (auto *L : MLI) {
2785 if (L->getTopBlock() != L->getBottomBlock())
2786 continue;
2787 runOnLoop(MF, *L);
2788 return false;
2789 }
2790 return false;
2791}
2792
2793static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2794 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2795 std::pair<StringRef, StringRef> StageTokenAndValue =
2796 getToken(StageAndCycle.first, "-");
2797 std::pair<StringRef, StringRef> CycleTokenAndValue =
2798 getToken(StageAndCycle.second, "-");
2799 if (StageTokenAndValue.first != "Stage" ||
2800 CycleTokenAndValue.first != "_Cycle") {
2802 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2803 return;
2804 }
2805
2806 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2807 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2808
2809 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2810}
2811
2812void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2813 LiveIntervals &LIS = getAnalysis<LiveIntervals>();
2814 MachineBasicBlock *BB = L.getTopBlock();
2815 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2816
2818 std::vector<MachineInstr *> Instrs;
2819 for (MachineInstr &MI : *BB) {
2820 if (MI.isTerminator())
2821 continue;
2822 Instrs.push_back(&MI);
2823 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2824 dbgs() << "Parsing post-instr symbol for " << MI;
2825 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2826 }
2827 }
2828
2829 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2830 std::move(Stage));
2832 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2833 MSE.expand();
2834 MSE.cleanup();
2835}
2836
2837//===----------------------------------------------------------------------===//
2838// ModuloScheduleTestAnnotater implementation
2839//===----------------------------------------------------------------------===//
2840
2842 for (MachineInstr *MI : S.getInstructions()) {
2845 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2847 MI->setPostInstrSymbol(MF, Sym);
2848 }
2849}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
Rewrite undef for PHI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
@ Default
Definition: DwarfDebug.cpp:87
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1293
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Symbol * Sym
Definition: ELF_riscv.cpp:479
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
This file provides utility analysis objects describing memory locations.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit)
Create a dedicated exit for Loop.
modulo schedule test
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
modulo schedule Modulo Schedule test pass
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static bool rewrite(Function &F)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
A possibly irreducible generalization of a Loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:213
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
std::vector< MachineBasicBlock * >::iterator succ_iterator
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:812
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:685
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:722
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:782
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
constexpr double phi
Definition: MathExtras.h:61
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1824
void initializeModuloScheduleTestPass(PassRegistry &)
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...