LLVM 20.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
17#include "llvm/MC/MCContext.h"
18#include "llvm/Support/Debug.h"
21
22#define DEBUG_TYPE "pipeliner"
23using namespace llvm;
24
26 "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
27 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
28
30 for (MachineInstr *MI : ScheduledInstrs)
31 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
32}
33
34//===----------------------------------------------------------------------===//
35// ModuloScheduleExpander implementation
36//===----------------------------------------------------------------------===//
37
38/// Return the register values for the operands of a Phi instruction.
39/// This function assume the instruction is a Phi.
41 unsigned &InitVal, unsigned &LoopVal) {
42 assert(Phi.isPHI() && "Expecting a Phi.");
43
44 InitVal = 0;
45 LoopVal = 0;
46 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
47 if (Phi.getOperand(i + 1).getMBB() != Loop)
48 InitVal = Phi.getOperand(i).getReg();
49 else
50 LoopVal = Phi.getOperand(i).getReg();
51
52 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
53}
54
55/// Return the Phi register value that comes from the incoming block.
56static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
57 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
59 return Phi.getOperand(i).getReg();
60 return 0;
61}
62
63/// Return the Phi register value that comes the loop block.
64static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
65 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
67 return Phi.getOperand(i).getReg();
68 return 0;
69}
70
72 BB = Schedule.getLoop()->getTopBlock();
73 Preheader = *BB->pred_begin();
74 if (Preheader == BB)
75 Preheader = *std::next(BB->pred_begin());
76
77 // Iterate over the definitions in each instruction, and compute the
78 // stage difference for each use. Keep the maximum value.
79 for (MachineInstr *MI : Schedule.getInstructions()) {
80 int DefStage = Schedule.getStage(MI);
81 for (const MachineOperand &Op : MI->all_defs()) {
82 Register Reg = Op.getReg();
83 unsigned MaxDiff = 0;
84 bool PhiIsSwapped = false;
85 for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
86 MachineInstr *UseMI = UseOp.getParent();
87 int UseStage = Schedule.getStage(UseMI);
88 unsigned Diff = 0;
89 if (UseStage != -1 && UseStage >= DefStage)
90 Diff = UseStage - DefStage;
91 if (MI->isPHI()) {
92 if (isLoopCarried(*MI))
93 ++Diff;
94 else
95 PhiIsSwapped = true;
96 }
97 MaxDiff = std::max(Diff, MaxDiff);
98 }
99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
100 }
101 }
102
103 generatePipelinedLoop();
104}
105
106void ModuloScheduleExpander::generatePipelinedLoop() {
108 assert(LoopInfo && "Must be able to analyze loop!");
109
110 // Create a new basic block for the kernel and add it to the CFG.
112
113 unsigned MaxStageCount = Schedule.getNumStages() - 1;
114
115 // Remember the registers that are used in different stages. The index is
116 // the iteration, or stage, that the instruction is scheduled in. This is
117 // a map between register names in the original block and the names created
118 // in each stage of the pipelined loop.
119 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
120
121 // The renaming destination by Phis for the registers across stages.
122 // This map is updated during Phis generation to point to the most recent
123 // renaming destination.
124 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
125
126 InstrMapTy InstrMap;
127
129
130 // Generate the prolog instructions that set up the pipeline.
131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
132 MF.insert(BB->getIterator(), KernelBB);
133 LIS.insertMBBInMaps(KernelBB);
134
135 // Rearrange the instructions to generate the new, pipelined loop,
136 // and update register names as needed.
137 for (MachineInstr *CI : Schedule.getInstructions()) {
138 if (CI->isPHI())
139 continue;
140 unsigned StageNum = Schedule.getStage(CI);
141 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
143 KernelBB->push_back(NewMI);
144 InstrMap[NewMI] = CI;
145 }
146
147 // Copy any terminator instructions to the new kernel, and update
148 // names as needed.
149 for (MachineInstr &MI : BB->terminators()) {
150 MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
151 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
152 KernelBB->push_back(NewMI);
153 InstrMap[NewMI] = &MI;
154 }
155
156 NewKernel = KernelBB;
157 KernelBB->transferSuccessors(BB);
158 KernelBB->replaceSuccessor(BB, KernelBB);
159
160 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
161 InstrMap, MaxStageCount, MaxStageCount, false);
162 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
163 InstrMap, MaxStageCount, MaxStageCount, false);
164
165 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
166
168 // Generate the epilog instructions to complete the pipeline.
169 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
170 PrologBBs);
171
172 // We need this step because the register allocation doesn't handle some
173 // situations well, so we insert copies to help out.
174 splitLifetimes(KernelBB, EpilogBBs);
175
176 // Remove dead instructions due to loop induction variables.
177 removeDeadInstructions(KernelBB, EpilogBBs);
178
179 // Add branches between prolog and epilog blocks.
180 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
181
182 delete[] VRMap;
183 delete[] VRMapPhi;
184}
185
187 // Remove the original loop since it's no longer referenced.
188 for (auto &I : *BB)
190 BB->clear();
191 BB->eraseFromParent();
192}
193
194/// Generate the pipeline prolog code.
195void ModuloScheduleExpander::generateProlog(unsigned LastStage,
196 MachineBasicBlock *KernelBB,
197 ValueMapTy *VRMap,
198 MBBVectorTy &PrologBBs) {
199 MachineBasicBlock *PredBB = Preheader;
200 InstrMapTy InstrMap;
201
202 // Generate a basic block for each stage, not including the last stage,
203 // which will be generated in the kernel. Each basic block may contain
204 // instructions from multiple stages/iterations.
205 for (unsigned i = 0; i < LastStage; ++i) {
206 // Create and insert the prolog basic block prior to the original loop
207 // basic block. The original loop is removed later.
209 PrologBBs.push_back(NewBB);
210 MF.insert(BB->getIterator(), NewBB);
211 NewBB->transferSuccessors(PredBB);
212 PredBB->addSuccessor(NewBB);
213 PredBB = NewBB;
214 LIS.insertMBBInMaps(NewBB);
215
216 // Generate instructions for each appropriate stage. Process instructions
217 // in original program order.
218 for (int StageNum = i; StageNum >= 0; --StageNum) {
220 BBE = BB->getFirstTerminator();
221 BBI != BBE; ++BBI) {
222 if (Schedule.getStage(&*BBI) == StageNum) {
223 if (BBI->isPHI())
224 continue;
225 MachineInstr *NewMI =
226 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
227 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
228 NewBB->push_back(NewMI);
229 InstrMap[NewMI] = &*BBI;
230 }
231 }
232 }
233 rewritePhiValues(NewBB, i, VRMap, InstrMap);
234 LLVM_DEBUG({
235 dbgs() << "prolog:\n";
236 NewBB->dump();
237 });
238 }
239
240 PredBB->replaceSuccessor(BB, KernelBB);
241
242 // Check if we need to remove the branch from the preheader to the original
243 // loop, and replace it with a branch to the new loop.
244 unsigned numBranches = TII->removeBranch(*Preheader);
245 if (numBranches) {
247 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
248 }
249}
250
251/// Generate the pipeline epilog code. The epilog code finishes the iterations
252/// that were started in either the prolog or the kernel. We create a basic
253/// block for each stage that needs to complete.
254void ModuloScheduleExpander::generateEpilog(
255 unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
256 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
257 MBBVectorTy &PrologBBs) {
258 // We need to change the branch from the kernel to the first epilog block, so
259 // this call to analyze branch uses the kernel rather than the original BB.
260 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
262 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
263 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
264 if (checkBranch)
265 return;
266
267 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
268 if (*LoopExitI == KernelBB)
269 ++LoopExitI;
270 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
271 MachineBasicBlock *LoopExitBB = *LoopExitI;
272
273 MachineBasicBlock *PredBB = KernelBB;
274 MachineBasicBlock *EpilogStart = LoopExitBB;
275 InstrMapTy InstrMap;
276
277 // Generate a basic block for each stage, not including the last stage,
278 // which was generated for the kernel. Each basic block may contain
279 // instructions from multiple stages/iterations.
280 int EpilogStage = LastStage + 1;
281 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
283 EpilogBBs.push_back(NewBB);
284 MF.insert(BB->getIterator(), NewBB);
285
286 PredBB->replaceSuccessor(LoopExitBB, NewBB);
287 NewBB->addSuccessor(LoopExitBB);
288 LIS.insertMBBInMaps(NewBB);
289
290 if (EpilogStart == LoopExitBB)
291 EpilogStart = NewBB;
292
293 // Add instructions to the epilog depending on the current block.
294 // Process instructions in original program order.
295 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
296 for (auto &BBI : *BB) {
297 if (BBI.isPHI())
298 continue;
299 MachineInstr *In = &BBI;
300 if ((unsigned)Schedule.getStage(In) == StageNum) {
301 // Instructions with memoperands in the epilog are updated with
302 // conservative values.
303 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
304 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
305 NewBB->push_back(NewMI);
306 InstrMap[NewMI] = In;
307 }
308 }
309 }
310 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
311 InstrMap, LastStage, EpilogStage, i == 1);
312 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
313 InstrMap, LastStage, EpilogStage, i == 1);
314 PredBB = NewBB;
315
316 LLVM_DEBUG({
317 dbgs() << "epilog:\n";
318 NewBB->dump();
319 });
320 }
321
322 // Fix any Phi nodes in the loop exit block.
323 LoopExitBB->replacePhiUsesWith(BB, PredBB);
324
325 // Create a branch to the new epilog from the kernel.
326 // Remove the original branch and add a new branch to the epilog.
327 TII->removeBranch(*KernelBB);
328 assert((OrigBB == TBB || OrigBB == FBB) &&
329 "Unable to determine looping branch direction");
330 if (OrigBB != TBB)
331 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
332 else
333 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
334 // Add a branch to the loop exit.
335 if (EpilogBBs.size() > 0) {
336 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
338 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
339 }
340}
341
342/// Replace all uses of FromReg that appear outside the specified
343/// basic block with ToReg.
344static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
347 LiveIntervals &LIS) {
348 for (MachineOperand &O :
349 llvm::make_early_inc_range(MRI.use_operands(FromReg)))
350 if (O.getParent()->getParent() != MBB)
351 O.setReg(ToReg);
352 if (!LIS.hasInterval(ToReg))
353 LIS.createEmptyInterval(ToReg);
354}
355
356/// Return true if the register has a use that occurs outside the
357/// specified loop.
358static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
360 for (const MachineOperand &MO : MRI.use_operands(Reg))
361 if (MO.getParent()->getParent() != BB)
362 return true;
363 return false;
364}
365
366/// Generate Phis for the specific block in the generated pipelined code.
367/// This function looks at the Phis from the original code to guide the
368/// creation of new Phis.
369void ModuloScheduleExpander::generateExistingPhis(
371 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
372 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
373 // Compute the stage number for the initial value of the Phi, which
374 // comes from the prolog. The prolog to use depends on to which kernel/
375 // epilog that we're adding the Phi.
376 unsigned PrologStage = 0;
377 unsigned PrevStage = 0;
378 bool InKernel = (LastStageNum == CurStageNum);
379 if (InKernel) {
380 PrologStage = LastStageNum - 1;
381 PrevStage = CurStageNum;
382 } else {
383 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
384 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
385 }
386
387 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
388 BBE = BB->getFirstNonPHI();
389 BBI != BBE; ++BBI) {
390 Register Def = BBI->getOperand(0).getReg();
391
392 unsigned InitVal = 0;
393 unsigned LoopVal = 0;
394 getPhiRegs(*BBI, BB, InitVal, LoopVal);
395
396 unsigned PhiOp1 = 0;
397 // The Phi value from the loop body typically is defined in the loop, but
398 // not always. So, we need to check if the value is defined in the loop.
399 unsigned PhiOp2 = LoopVal;
400 if (VRMap[LastStageNum].count(LoopVal))
401 PhiOp2 = VRMap[LastStageNum][LoopVal];
402
403 int StageScheduled = Schedule.getStage(&*BBI);
404 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
405 unsigned NumStages = getStagesForReg(Def, CurStageNum);
406 if (NumStages == 0) {
407 // We don't need to generate a Phi anymore, but we need to rename any uses
408 // of the Phi value.
409 unsigned NewReg = VRMap[PrevStage][LoopVal];
410 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
411 InitVal, NewReg);
412 if (VRMap[CurStageNum].count(LoopVal))
413 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
414 }
415 // Adjust the number of Phis needed depending on the number of prologs left,
416 // and the distance from where the Phi is first scheduled. The number of
417 // Phis cannot exceed the number of prolog stages. Each stage can
418 // potentially define two values.
419 unsigned MaxPhis = PrologStage + 2;
420 if (!InKernel && (int)PrologStage <= LoopValStage)
421 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
422 unsigned NumPhis = std::min(NumStages, MaxPhis);
423
424 unsigned NewReg = 0;
425 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
426 // In the epilog, we may need to look back one stage to get the correct
427 // Phi name, because the epilog and prolog blocks execute the same stage.
428 // The correct name is from the previous block only when the Phi has
429 // been completely scheduled prior to the epilog, and Phi value is not
430 // needed in multiple stages.
431 int StageDiff = 0;
432 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
433 NumPhis == 1)
434 StageDiff = 1;
435 // Adjust the computations below when the phi and the loop definition
436 // are scheduled in different stages.
437 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
438 StageDiff = StageScheduled - LoopValStage;
439 for (unsigned np = 0; np < NumPhis; ++np) {
440 // If the Phi hasn't been scheduled, then use the initial Phi operand
441 // value. Otherwise, use the scheduled version of the instruction. This
442 // is a little complicated when a Phi references another Phi.
443 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
444 PhiOp1 = InitVal;
445 // Check if the Phi has already been scheduled in a prolog stage.
446 else if (PrologStage >= AccessStage + StageDiff + np &&
447 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
448 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
449 // Check if the Phi has already been scheduled, but the loop instruction
450 // is either another Phi, or doesn't occur in the loop.
451 else if (PrologStage >= AccessStage + StageDiff + np) {
452 // If the Phi references another Phi, we need to examine the other
453 // Phi to get the correct value.
454 PhiOp1 = LoopVal;
455 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
456 int Indirects = 1;
457 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
458 int PhiStage = Schedule.getStage(InstOp1);
459 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
460 PhiOp1 = getInitPhiReg(*InstOp1, BB);
461 else
462 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
463 InstOp1 = MRI.getVRegDef(PhiOp1);
464 int PhiOpStage = Schedule.getStage(InstOp1);
465 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
466 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
467 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
468 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
469 break;
470 }
471 ++Indirects;
472 }
473 } else
474 PhiOp1 = InitVal;
475 // If this references a generated Phi in the kernel, get the Phi operand
476 // from the incoming block.
477 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
478 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
479 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
480
481 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
482 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
483 // In the epilog, a map lookup is needed to get the value from the kernel,
484 // or previous epilog block. How is does this depends on if the
485 // instruction is scheduled in the previous block.
486 if (!InKernel) {
487 int StageDiffAdj = 0;
488 if (LoopValStage != -1 && StageScheduled > LoopValStage)
489 StageDiffAdj = StageScheduled - LoopValStage;
490 // Use the loop value defined in the kernel, unless the kernel
491 // contains the last definition of the Phi.
492 if (np == 0 && PrevStage == LastStageNum &&
493 (StageScheduled != 0 || LoopValStage != 0) &&
494 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
495 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
496 // Use the value defined by the Phi. We add one because we switch
497 // from looking at the loop value to the Phi definition.
498 else if (np > 0 && PrevStage == LastStageNum &&
499 VRMap[PrevStage - np + 1].count(Def))
500 PhiOp2 = VRMap[PrevStage - np + 1][Def];
501 // Use the loop value defined in the kernel.
502 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
503 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
504 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
505 // Use the value defined by the Phi, unless we're generating the first
506 // epilog and the Phi refers to a Phi in a different stage.
507 else if (VRMap[PrevStage - np].count(Def) &&
508 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
509 (LoopValStage == StageScheduled)))
510 PhiOp2 = VRMap[PrevStage - np][Def];
511 }
512
513 // Check if we can reuse an existing Phi. This occurs when a Phi
514 // references another Phi, and the other Phi is scheduled in an
515 // earlier stage. We can try to reuse an existing Phi up until the last
516 // stage of the current Phi.
517 if (LoopDefIsPhi) {
518 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
519 int LVNumStages = getStagesForPhi(LoopVal);
520 int StageDiff = (StageScheduled - LoopValStage);
521 LVNumStages -= StageDiff;
522 // Make sure the loop value Phi has been processed already.
523 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
524 NewReg = PhiOp2;
525 unsigned ReuseStage = CurStageNum;
526 if (isLoopCarried(*PhiInst))
527 ReuseStage -= LVNumStages;
528 // Check if the Phi to reuse has been generated yet. If not, then
529 // there is nothing to reuse.
530 if (VRMap[ReuseStage - np].count(LoopVal)) {
531 NewReg = VRMap[ReuseStage - np][LoopVal];
532
533 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
534 Def, NewReg);
535 // Update the map with the new Phi name.
536 VRMap[CurStageNum - np][Def] = NewReg;
537 PhiOp2 = NewReg;
538 if (VRMap[LastStageNum - np - 1].count(LoopVal))
539 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
540
541 if (IsLast && np == NumPhis - 1)
542 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
543 continue;
544 }
545 }
546 }
547 if (InKernel && StageDiff > 0 &&
548 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
549 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
550 }
551
552 const TargetRegisterClass *RC = MRI.getRegClass(Def);
553 NewReg = MRI.createVirtualRegister(RC);
554
555 MachineInstrBuilder NewPhi =
556 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
557 TII->get(TargetOpcode::PHI), NewReg);
558 NewPhi.addReg(PhiOp1).addMBB(BB1);
559 NewPhi.addReg(PhiOp2).addMBB(BB2);
560 if (np == 0)
561 InstrMap[NewPhi] = &*BBI;
562
563 // We define the Phis after creating the new pipelined code, so
564 // we need to rename the Phi values in scheduled instructions.
565
566 unsigned PrevReg = 0;
567 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
568 PrevReg = VRMap[PrevStage - np][LoopVal];
569 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
570 NewReg, PrevReg);
571 // If the Phi has been scheduled, use the new name for rewriting.
572 if (VRMap[CurStageNum - np].count(Def)) {
573 unsigned R = VRMap[CurStageNum - np][Def];
574 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
575 NewReg);
576 }
577
578 // Check if we need to rename any uses that occurs after the loop. The
579 // register to replace depends on whether the Phi is scheduled in the
580 // epilog.
581 if (IsLast && np == NumPhis - 1)
582 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
583
584 // In the kernel, a dependent Phi uses the value from this Phi.
585 if (InKernel)
586 PhiOp2 = NewReg;
587
588 // Update the map with the new Phi name.
589 VRMap[CurStageNum - np][Def] = NewReg;
590 }
591
592 while (NumPhis++ < NumStages) {
593 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
594 NewReg, 0);
595 }
596
597 // Check if we need to rename a Phi that has been eliminated due to
598 // scheduling.
599 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
600 replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
601 }
602}
603
604/// Generate Phis for the specified block in the generated pipelined code.
605/// These are new Phis needed because the definition is scheduled after the
606/// use in the pipelined sequence.
607void ModuloScheduleExpander::generatePhis(
609 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
610 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
611 bool IsLast) {
612 // Compute the stage number that contains the initial Phi value, and
613 // the Phi from the previous stage.
614 unsigned PrologStage = 0;
615 unsigned PrevStage = 0;
616 unsigned StageDiff = CurStageNum - LastStageNum;
617 bool InKernel = (StageDiff == 0);
618 if (InKernel) {
619 PrologStage = LastStageNum - 1;
620 PrevStage = CurStageNum;
621 } else {
622 PrologStage = LastStageNum - StageDiff;
623 PrevStage = LastStageNum + StageDiff - 1;
624 }
625
626 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
627 BBE = BB->instr_end();
628 BBI != BBE; ++BBI) {
629 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
630 MachineOperand &MO = BBI->getOperand(i);
631 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
632 continue;
633
634 int StageScheduled = Schedule.getStage(&*BBI);
635 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
636 Register Def = MO.getReg();
637 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
638 // An instruction scheduled in stage 0 and is used after the loop
639 // requires a phi in the epilog for the last definition from either
640 // the kernel or prolog.
641 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
642 hasUseAfterLoop(Def, BB, MRI))
643 NumPhis = 1;
644 if (!InKernel && (unsigned)StageScheduled > PrologStage)
645 continue;
646
647 unsigned PhiOp2;
648 if (InKernel) {
649 PhiOp2 = VRMap[PrevStage][Def];
650 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
651 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
652 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
653 }
654 // The number of Phis can't exceed the number of prolog stages. The
655 // prolog stage number is zero based.
656 if (NumPhis > PrologStage + 1 - StageScheduled)
657 NumPhis = PrologStage + 1 - StageScheduled;
658 for (unsigned np = 0; np < NumPhis; ++np) {
659 // Example for
660 // Org:
661 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
662 //
663 // Prolog0 (Stage0):
664 // %Clone0 = ...
665 // Prolog1 (Stage1):
666 // %Clone1 = ...
667 // Kernel (Stage2):
668 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
669 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
670 // %Clone2 = ...
671 // Epilog0 (Stage3):
672 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
673 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
674 // Epilog1 (Stage4):
675 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
676 //
677 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
678 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
679 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
680
681 unsigned PhiOp1 = VRMap[PrologStage][Def];
682 if (np <= PrologStage)
683 PhiOp1 = VRMap[PrologStage - np][Def];
684 if (!InKernel) {
685 if (PrevStage == LastStageNum && np == 0)
686 PhiOp2 = VRMap[LastStageNum][Def];
687 else
688 PhiOp2 = VRMapPhi[PrevStage - np][Def];
689 }
690
691 const TargetRegisterClass *RC = MRI.getRegClass(Def);
692 Register NewReg = MRI.createVirtualRegister(RC);
693
694 MachineInstrBuilder NewPhi =
695 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
696 TII->get(TargetOpcode::PHI), NewReg);
697 NewPhi.addReg(PhiOp1).addMBB(BB1);
698 NewPhi.addReg(PhiOp2).addMBB(BB2);
699 if (np == 0)
700 InstrMap[NewPhi] = &*BBI;
701
702 // Rewrite uses and update the map. The actions depend upon whether
703 // we generating code for the kernel or epilog blocks.
704 if (InKernel) {
705 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
706 NewReg);
707 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
708 NewReg);
709
710 PhiOp2 = NewReg;
711 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
712 } else {
713 VRMapPhi[CurStageNum - np][Def] = NewReg;
714 if (np == NumPhis - 1)
715 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
716 NewReg);
717 }
718 if (IsLast && np == NumPhis - 1)
719 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
720 }
721 }
722 }
723}
724
725/// Remove instructions that generate values with no uses.
726/// Typically, these are induction variable operations that generate values
727/// used in the loop itself. A dead instruction has a definition with
728/// no uses, or uses that occur in the original loop only.
729void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
730 MBBVectorTy &EpilogBBs) {
731 // For each epilog block, check that the value defined by each instruction
732 // is used. If not, delete it.
733 for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
735 ME = MBB->instr_rend();
736 MI != ME;) {
737 // From DeadMachineInstructionElem. Don't delete inline assembly.
738 if (MI->isInlineAsm()) {
739 ++MI;
740 continue;
741 }
742 bool SawStore = false;
743 // Check if it's safe to remove the instruction due to side effects.
744 // We can, and want to, remove Phis here.
745 if (!MI->isSafeToMove(SawStore) && !MI->isPHI()) {
746 ++MI;
747 continue;
748 }
749 bool used = true;
750 for (const MachineOperand &MO : MI->all_defs()) {
751 Register reg = MO.getReg();
752 // Assume physical registers are used, unless they are marked dead.
753 if (reg.isPhysical()) {
754 used = !MO.isDead();
755 if (used)
756 break;
757 continue;
758 }
759 unsigned realUses = 0;
760 for (const MachineOperand &U : MRI.use_operands(reg)) {
761 // Check if there are any uses that occur only in the original
762 // loop. If so, that's not a real use.
763 if (U.getParent()->getParent() != BB) {
764 realUses++;
765 used = true;
766 break;
767 }
768 }
769 if (realUses > 0)
770 break;
771 used = false;
772 }
773 if (!used) {
775 MI++->eraseFromParent();
776 continue;
777 }
778 ++MI;
779 }
780 // In the kernel block, check if we can remove a Phi that generates a value
781 // used in an instruction removed in the epilog block.
782 for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
783 Register reg = MI.getOperand(0).getReg();
784 if (MRI.use_begin(reg) == MRI.use_end()) {
786 MI.eraseFromParent();
787 }
788 }
789}
790
791/// For loop carried definitions, we split the lifetime of a virtual register
792/// that has uses past the definition in the next iteration. A copy with a new
793/// virtual register is inserted before the definition, which helps with
794/// generating a better register assignment.
795///
796/// v1 = phi(a, v2) v1 = phi(a, v2)
797/// v2 = phi(b, v3) v2 = phi(b, v3)
798/// v3 = .. v4 = copy v1
799/// .. = V1 v3 = ..
800/// .. = v4
801void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
802 MBBVectorTy &EpilogBBs) {
804 for (auto &PHI : KernelBB->phis()) {
805 Register Def = PHI.getOperand(0).getReg();
806 // Check for any Phi definition that used as an operand of another Phi
807 // in the same block.
809 E = MRI.use_instr_end();
810 I != E; ++I) {
811 if (I->isPHI() && I->getParent() == KernelBB) {
812 // Get the loop carried definition.
813 unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
814 if (!LCDef)
815 continue;
816 MachineInstr *MI = MRI.getVRegDef(LCDef);
817 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
818 continue;
819 // Search through the rest of the block looking for uses of the Phi
820 // definition. If one occurs, then split the lifetime.
821 unsigned SplitReg = 0;
823 KernelBB->instr_end()))
824 if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) {
825 // We split the lifetime when we find the first use.
826 if (SplitReg == 0) {
827 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
828 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
829 TII->get(TargetOpcode::COPY), SplitReg)
830 .addReg(Def);
831 }
832 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
833 }
834 if (!SplitReg)
835 continue;
836 // Search through each of the epilog blocks for any uses to be renamed.
837 for (auto &Epilog : EpilogBBs)
838 for (auto &I : *Epilog)
839 if (I.readsRegister(Def, /*TRI=*/nullptr))
840 I.substituteRegister(Def, SplitReg, 0, *TRI);
841 break;
842 }
843 }
844 }
845}
846
847/// Remove the incoming block from the Phis in a basic block.
849 for (MachineInstr &MI : *BB) {
850 if (!MI.isPHI())
851 break;
852 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
853 if (MI.getOperand(i + 1).getMBB() == Incoming) {
854 MI.removeOperand(i + 1);
855 MI.removeOperand(i);
856 break;
857 }
858 }
859}
860
861/// Create branches from each prolog basic block to the appropriate epilog
862/// block. These edges are needed if the loop ends before reaching the
863/// kernel.
864void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
865 MBBVectorTy &PrologBBs,
866 MachineBasicBlock *KernelBB,
867 MBBVectorTy &EpilogBBs,
868 ValueMapTy *VRMap) {
869 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
870 MachineBasicBlock *LastPro = KernelBB;
871 MachineBasicBlock *LastEpi = KernelBB;
872
873 // Start from the blocks connected to the kernel and work "out"
874 // to the first prolog and the last epilog blocks.
876 unsigned MaxIter = PrologBBs.size() - 1;
877 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
878 // Add branches to the prolog that go to the corresponding
879 // epilog, and the fall-thru prolog/kernel block.
880 MachineBasicBlock *Prolog = PrologBBs[j];
881 MachineBasicBlock *Epilog = EpilogBBs[i];
882
884 std::optional<bool> StaticallyGreater =
885 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
886 unsigned numAdded = 0;
887 if (!StaticallyGreater) {
888 Prolog->addSuccessor(Epilog);
889 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
890 } else if (*StaticallyGreater == false) {
891 Prolog->addSuccessor(Epilog);
892 Prolog->removeSuccessor(LastPro);
893 LastEpi->removeSuccessor(Epilog);
894 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
895 removePhis(Epilog, LastEpi);
896 // Remove the blocks that are no longer referenced.
897 if (LastPro != LastEpi) {
898 LastEpi->clear();
899 LastEpi->eraseFromParent();
900 }
901 if (LastPro == KernelBB) {
902 LoopInfo->disposed();
903 NewKernel = nullptr;
904 }
905 LastPro->clear();
906 LastPro->eraseFromParent();
907 } else {
908 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
910 }
911 LastPro = Prolog;
912 LastEpi = Epilog;
914 E = Prolog->instr_rend();
915 I != E && numAdded > 0; ++I, --numAdded)
916 updateInstruction(&*I, false, j, 0, VRMap);
917 }
918
919 if (NewKernel) {
920 LoopInfo->setPreheader(PrologBBs[MaxIter]);
921 LoopInfo->adjustTripCount(-(MaxIter + 1));
922 }
923}
924
925/// Return true if we can compute the amount the instruction changes
926/// during each iteration. Set Delta to the amount of the change.
927bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
929 const MachineOperand *BaseOp;
930 int64_t Offset;
931 bool OffsetIsScalable;
932 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
933 return false;
934
935 // FIXME: This algorithm assumes instructions have fixed-size offsets.
936 if (OffsetIsScalable)
937 return false;
938
939 if (!BaseOp->isReg())
940 return false;
941
942 Register BaseReg = BaseOp->getReg();
943
945 // Check if there is a Phi. If so, get the definition in the loop.
946 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
947 if (BaseDef && BaseDef->isPHI()) {
948 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
949 BaseDef = MRI.getVRegDef(BaseReg);
950 }
951 if (!BaseDef)
952 return false;
953
954 int D = 0;
955 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
956 return false;
957
958 Delta = D;
959 return true;
960}
961
962/// Update the memory operand with a new offset when the pipeliner
963/// generates a new copy of the instruction that refers to a
964/// different memory location.
965void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
966 MachineInstr &OldMI,
967 unsigned Num) {
968 if (Num == 0)
969 return;
970 // If the instruction has memory operands, then adjust the offset
971 // when the instruction appears in different stages.
972 if (NewMI.memoperands_empty())
973 return;
975 for (MachineMemOperand *MMO : NewMI.memoperands()) {
976 // TODO: Figure out whether isAtomic is really necessary (see D57601).
977 if (MMO->isVolatile() || MMO->isAtomic() ||
978 (MMO->isInvariant() && MMO->isDereferenceable()) ||
979 (!MMO->getValue())) {
980 NewMMOs.push_back(MMO);
981 continue;
982 }
983 unsigned Delta;
984 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
985 int64_t AdjOffset = Delta * Num;
986 NewMMOs.push_back(
987 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
988 } else {
991 }
992 }
993 NewMI.setMemRefs(MF, NewMMOs);
994}
995
996/// Clone the instruction for the new pipelined loop and update the
997/// memory operands, if needed.
998MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
999 unsigned CurStageNum,
1000 unsigned InstStageNum) {
1001 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1002 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1003 return NewMI;
1004}
1005
1006/// Clone the instruction for the new pipelined loop. If needed, this
1007/// function updates the instruction using the values saved in the
1008/// InstrChanges structure.
1009MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1010 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1011 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1012 auto It = InstrChanges.find(OldMI);
1013 if (It != InstrChanges.end()) {
1014 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1015 unsigned BasePos, OffsetPos;
1016 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1017 return nullptr;
1018 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1019 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1020 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1021 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1022 NewMI->getOperand(OffsetPos).setImm(NewOffset);
1023 }
1024 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1025 return NewMI;
1026}
1027
1028/// Update the machine instruction with new virtual registers. This
1029/// function may change the definitions and/or uses.
1030void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1031 bool LastDef,
1032 unsigned CurStageNum,
1033 unsigned InstrStageNum,
1034 ValueMapTy *VRMap) {
1035 for (MachineOperand &MO : NewMI->operands()) {
1036 if (!MO.isReg() || !MO.getReg().isVirtual())
1037 continue;
1038 Register reg = MO.getReg();
1039 if (MO.isDef()) {
1040 // Create a new virtual register for the definition.
1041 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1042 Register NewReg = MRI.createVirtualRegister(RC);
1043 MO.setReg(NewReg);
1044 VRMap[CurStageNum][reg] = NewReg;
1045 if (LastDef)
1046 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1047 } else if (MO.isUse()) {
1048 MachineInstr *Def = MRI.getVRegDef(reg);
1049 // Compute the stage that contains the last definition for instruction.
1050 int DefStageNum = Schedule.getStage(Def);
1051 unsigned StageNum = CurStageNum;
1052 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1053 // Compute the difference in stages between the defintion and the use.
1054 unsigned StageDiff = (InstrStageNum - DefStageNum);
1055 // Make an adjustment to get the last definition.
1056 StageNum -= StageDiff;
1057 }
1058 if (VRMap[StageNum].count(reg))
1059 MO.setReg(VRMap[StageNum][reg]);
1060 }
1061 }
1062}
1063
1064/// Return the instruction in the loop that defines the register.
1065/// If the definition is a Phi, then follow the Phi operand to
1066/// the instruction in the loop.
1067MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1069 MachineInstr *Def = MRI.getVRegDef(Reg);
1070 while (Def->isPHI()) {
1071 if (!Visited.insert(Def).second)
1072 break;
1073 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1074 if (Def->getOperand(i + 1).getMBB() == BB) {
1075 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1076 break;
1077 }
1078 }
1079 return Def;
1080}
1081
1082/// Return the new name for the value from the previous stage.
1083unsigned ModuloScheduleExpander::getPrevMapVal(
1084 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1085 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1086 unsigned PrevVal = 0;
1087 if (StageNum > PhiStage) {
1088 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1089 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1090 // The name is defined in the previous stage.
1091 PrevVal = VRMap[StageNum - 1][LoopVal];
1092 else if (VRMap[StageNum].count(LoopVal))
1093 // The previous name is defined in the current stage when the instruction
1094 // order is swapped.
1095 PrevVal = VRMap[StageNum][LoopVal];
1096 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1097 // The loop value hasn't yet been scheduled.
1098 PrevVal = LoopVal;
1099 else if (StageNum == PhiStage + 1)
1100 // The loop value is another phi, which has not been scheduled.
1101 PrevVal = getInitPhiReg(*LoopInst, BB);
1102 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1103 // The loop value is another phi, which has been scheduled.
1104 PrevVal =
1105 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1106 LoopStage, VRMap, BB);
1107 }
1108 return PrevVal;
1109}
1110
1111/// Rewrite the Phi values in the specified block to use the mappings
1112/// from the initial operand. Once the Phi is scheduled, we switch
1113/// to using the loop value instead of the Phi value, so those names
1114/// do not need to be rewritten.
1115void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1116 unsigned StageNum,
1117 ValueMapTy *VRMap,
1118 InstrMapTy &InstrMap) {
1119 for (auto &PHI : BB->phis()) {
1120 unsigned InitVal = 0;
1121 unsigned LoopVal = 0;
1122 getPhiRegs(PHI, BB, InitVal, LoopVal);
1123 Register PhiDef = PHI.getOperand(0).getReg();
1124
1125 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1126 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1127 unsigned NumPhis = getStagesForPhi(PhiDef);
1128 if (NumPhis > StageNum)
1129 NumPhis = StageNum;
1130 for (unsigned np = 0; np <= NumPhis; ++np) {
1131 unsigned NewVal =
1132 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1133 if (!NewVal)
1134 NewVal = InitVal;
1135 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1136 NewVal);
1137 }
1138 }
1139}
1140
1141/// Rewrite a previously scheduled instruction to use the register value
1142/// from the new instruction. Make sure the instruction occurs in the
1143/// basic block, and we don't change the uses in the new instruction.
1144void ModuloScheduleExpander::rewriteScheduledInstr(
1145 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1146 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1147 unsigned PrevReg) {
1148 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1149 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1150 // Rewrite uses that have been scheduled already to use the new
1151 // Phi register.
1152 for (MachineOperand &UseOp :
1153 llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
1154 MachineInstr *UseMI = UseOp.getParent();
1155 if (UseMI->getParent() != BB)
1156 continue;
1157 if (UseMI->isPHI()) {
1158 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1159 continue;
1160 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1161 continue;
1162 }
1163 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1164 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1165 MachineInstr *OrigMI = OrigInstr->second;
1166 int StageSched = Schedule.getStage(OrigMI);
1167 int CycleSched = Schedule.getCycle(OrigMI);
1168 unsigned ReplaceReg = 0;
1169 // This is the stage for the scheduled instruction.
1170 if (StagePhi == StageSched && Phi->isPHI()) {
1171 int CyclePhi = Schedule.getCycle(Phi);
1172 if (PrevReg && InProlog)
1173 ReplaceReg = PrevReg;
1174 else if (PrevReg && !isLoopCarried(*Phi) &&
1175 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1176 ReplaceReg = PrevReg;
1177 else
1178 ReplaceReg = NewReg;
1179 }
1180 // The scheduled instruction occurs before the scheduled Phi, and the
1181 // Phi is not loop carried.
1182 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1183 ReplaceReg = NewReg;
1184 if (StagePhi > StageSched && Phi->isPHI())
1185 ReplaceReg = NewReg;
1186 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1187 ReplaceReg = NewReg;
1188 if (ReplaceReg) {
1189 const TargetRegisterClass *NRC =
1190 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1191 if (NRC)
1192 UseOp.setReg(ReplaceReg);
1193 else {
1194 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1195 BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1196 SplitReg)
1197 .addReg(ReplaceReg);
1198 UseOp.setReg(SplitReg);
1199 }
1200 }
1201 }
1202}
1203
1204bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1205 if (!Phi.isPHI())
1206 return false;
1207 int DefCycle = Schedule.getCycle(&Phi);
1208 int DefStage = Schedule.getStage(&Phi);
1209
1210 unsigned InitVal = 0;
1211 unsigned LoopVal = 0;
1212 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1213 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1214 if (!Use || Use->isPHI())
1215 return true;
1216 int LoopCycle = Schedule.getCycle(Use);
1217 int LoopStage = Schedule.getStage(Use);
1218 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1219}
1220
1221//===----------------------------------------------------------------------===//
1222// PeelingModuloScheduleExpander implementation
1223//===----------------------------------------------------------------------===//
1224// This is a reimplementation of ModuloScheduleExpander that works by creating
1225// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1226//===----------------------------------------------------------------------===//
1227
1228namespace {
1229// Remove any dead phis in MBB. Dead phis either have only one block as input
1230// (in which case they are the identity) or have no uses.
1231void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1232 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1233 bool Changed = true;
1234 while (Changed) {
1235 Changed = false;
1237 assert(MI.isPHI());
1238 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1239 if (LIS)
1241 MI.eraseFromParent();
1242 Changed = true;
1243 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1244 const TargetRegisterClass *ConstrainRegClass =
1245 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1246 MRI.getRegClass(MI.getOperand(0).getReg()));
1247 assert(ConstrainRegClass &&
1248 "Expected a valid constrained register class!");
1249 (void)ConstrainRegClass;
1250 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1251 MI.getOperand(1).getReg());
1252 if (LIS)
1254 MI.eraseFromParent();
1255 Changed = true;
1256 }
1257 }
1258 }
1259}
1260
1261/// Rewrites the kernel block in-place to adhere to the given schedule.
1262/// KernelRewriter holds all of the state required to perform the rewriting.
1263class KernelRewriter {
1264 ModuloSchedule &S;
1266 MachineBasicBlock *PreheaderBB, *ExitBB;
1268 const TargetInstrInfo *TII;
1269 LiveIntervals *LIS;
1270
1271 // Map from register class to canonical undef register for that class.
1273 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1274 // this map is only used when InitReg is non-undef.
1276 // Map from LoopReg to phi register where the InitReg is undef.
1278
1279 // Reg is used by MI. Return the new register MI should use to adhere to the
1280 // schedule. Insert phis as necessary.
1281 Register remapUse(Register Reg, MachineInstr &MI);
1282 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1283 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1284 // or will be chosen so as to share another phi.
1285 Register phi(Register LoopReg, std::optional<Register> InitReg = {},
1286 const TargetRegisterClass *RC = nullptr);
1287 // Create an undef register of the given register class.
1288 Register undef(const TargetRegisterClass *RC);
1289
1290public:
1291 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1292 LiveIntervals *LIS = nullptr);
1293 void rewrite();
1294};
1295} // namespace
1296
1297KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1298 MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1299 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1300 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1301 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1302 PreheaderBB = *BB->pred_begin();
1303 if (PreheaderBB == BB)
1304 PreheaderBB = *std::next(BB->pred_begin());
1305}
1306
1307void KernelRewriter::rewrite() {
1308 // Rearrange the loop to be in schedule order. Note that the schedule may
1309 // contain instructions that are not owned by the loop block (InstrChanges and
1310 // friends), so we gracefully handle unowned instructions and delete any
1311 // instructions that weren't in the schedule.
1312 auto InsertPt = BB->getFirstTerminator();
1313 MachineInstr *FirstMI = nullptr;
1314 for (MachineInstr *MI : S.getInstructions()) {
1315 if (MI->isPHI())
1316 continue;
1317 if (MI->getParent())
1318 MI->removeFromParent();
1319 BB->insert(InsertPt, MI);
1320 if (!FirstMI)
1321 FirstMI = MI;
1322 }
1323 assert(FirstMI && "Failed to find first MI in schedule");
1324
1325 // At this point all of the scheduled instructions are between FirstMI
1326 // and the end of the block. Kill from the first non-phi to FirstMI.
1327 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1328 if (LIS)
1330 (I++)->eraseFromParent();
1331 }
1332
1333 // Now remap every instruction in the loop.
1334 for (MachineInstr &MI : *BB) {
1335 if (MI.isPHI() || MI.isTerminator())
1336 continue;
1337 for (MachineOperand &MO : MI.uses()) {
1338 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1339 continue;
1340 Register Reg = remapUse(MO.getReg(), MI);
1341 MO.setReg(Reg);
1342 }
1343 }
1344 EliminateDeadPhis(BB, MRI, LIS);
1345
1346 // Ensure a phi exists for all instructions that are either referenced by
1347 // an illegal phi or by an instruction outside the loop. This allows us to
1348 // treat remaps of these values the same as "normal" values that come from
1349 // loop-carried phis.
1350 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1351 if (MI->isPHI()) {
1352 Register R = MI->getOperand(0).getReg();
1353 phi(R);
1354 continue;
1355 }
1356
1357 for (MachineOperand &Def : MI->defs()) {
1358 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1359 if (MI.getParent() != BB) {
1360 phi(Def.getReg());
1361 break;
1362 }
1363 }
1364 }
1365 }
1366}
1367
1368Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1369 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1370 if (!Producer)
1371 return Reg;
1372
1373 int ConsumerStage = S.getStage(&MI);
1374 if (!Producer->isPHI()) {
1375 // Non-phi producers are simple to remap. Insert as many phis as the
1376 // difference between the consumer and producer stages.
1377 if (Producer->getParent() != BB)
1378 // Producer was not inside the loop. Use the register as-is.
1379 return Reg;
1380 int ProducerStage = S.getStage(Producer);
1381 assert(ConsumerStage != -1 &&
1382 "In-loop consumer should always be scheduled!");
1383 assert(ConsumerStage >= ProducerStage);
1384 unsigned StageDiff = ConsumerStage - ProducerStage;
1385
1386 for (unsigned I = 0; I < StageDiff; ++I)
1387 Reg = phi(Reg);
1388 return Reg;
1389 }
1390
1391 // First, dive through the phi chain to find the defaults for the generated
1392 // phis.
1394 Register LoopReg = Reg;
1395 auto LoopProducer = Producer;
1396 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1397 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1398 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1399 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1400 assert(LoopProducer);
1401 }
1402 int LoopProducerStage = S.getStage(LoopProducer);
1403
1404 std::optional<Register> IllegalPhiDefault;
1405
1406 if (LoopProducerStage == -1) {
1407 // Do nothing.
1408 } else if (LoopProducerStage > ConsumerStage) {
1409 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1410 // In addition, Consumer's cycle must be scheduled after Producer in the
1411 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1412 // functions.
1413#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1414 int LoopProducerCycle = S.getCycle(LoopProducer);
1415 int ConsumerCycle = S.getCycle(&MI);
1416#endif
1417 assert(LoopProducerCycle <= ConsumerCycle);
1418 assert(LoopProducerStage == ConsumerStage + 1);
1419 // Peel off the first phi from Defaults and insert a phi between producer
1420 // and consumer. This phi will not be at the front of the block so we
1421 // consider it illegal. It will only exist during the rewrite process; it
1422 // needs to exist while we peel off prologs because these could take the
1423 // default value. After that we can replace all uses with the loop producer
1424 // value.
1425 IllegalPhiDefault = Defaults.front();
1426 Defaults.erase(Defaults.begin());
1427 } else {
1428 assert(ConsumerStage >= LoopProducerStage);
1429 int StageDiff = ConsumerStage - LoopProducerStage;
1430 if (StageDiff > 0) {
1431 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1432 << " to " << (Defaults.size() + StageDiff) << "\n");
1433 // If we need more phis than we have defaults for, pad out with undefs for
1434 // the earliest phis, which are at the end of the defaults chain (the
1435 // chain is in reverse order).
1436 Defaults.resize(Defaults.size() + StageDiff,
1437 Defaults.empty() ? std::optional<Register>()
1438 : Defaults.back());
1439 }
1440 }
1441
1442 // Now we know the number of stages to jump back, insert the phi chain.
1443 auto DefaultI = Defaults.rbegin();
1444 while (DefaultI != Defaults.rend())
1445 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1446
1447 if (IllegalPhiDefault) {
1448 // The consumer optionally consumes LoopProducer in the same iteration
1449 // (because the producer is scheduled at an earlier cycle than the consumer)
1450 // or the initial value. To facilitate this we create an illegal block here
1451 // by embedding a phi in the middle of the block. We will fix this up
1452 // immediately prior to pruning.
1453 auto RC = MRI.getRegClass(Reg);
1454 Register R = MRI.createVirtualRegister(RC);
1455 MachineInstr *IllegalPhi =
1456 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1457 .addReg(*IllegalPhiDefault)
1458 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1459 .addReg(LoopReg)
1460 .addMBB(BB); // Block choice is arbitrary and has no effect.
1461 // Illegal phi should belong to the producer stage so that it can be
1462 // filtered correctly during peeling.
1463 S.setStage(IllegalPhi, LoopProducerStage);
1464 return R;
1465 }
1466
1467 return LoopReg;
1468}
1469
1470Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
1471 const TargetRegisterClass *RC) {
1472 // If the init register is not undef, try and find an existing phi.
1473 if (InitReg) {
1474 auto I = Phis.find({LoopReg, *InitReg});
1475 if (I != Phis.end())
1476 return I->second;
1477 } else {
1478 for (auto &KV : Phis) {
1479 if (KV.first.first == LoopReg)
1480 return KV.second;
1481 }
1482 }
1483
1484 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1485 // find a phi that takes undef as input.
1486 auto I = UndefPhis.find(LoopReg);
1487 if (I != UndefPhis.end()) {
1488 Register R = I->second;
1489 if (!InitReg)
1490 // Found a phi taking undef as input, and this input is undef so return
1491 // without any more changes.
1492 return R;
1493 // Found a phi taking undef as input, so rewrite it to take InitReg.
1494 MachineInstr *MI = MRI.getVRegDef(R);
1495 MI->getOperand(1).setReg(*InitReg);
1496 Phis.insert({{LoopReg, *InitReg}, R});
1497 const TargetRegisterClass *ConstrainRegClass =
1498 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1499 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1500 (void)ConstrainRegClass;
1501 UndefPhis.erase(I);
1502 return R;
1503 }
1504
1505 // Failed to find any existing phi to reuse, so create a new one.
1506 if (!RC)
1507 RC = MRI.getRegClass(LoopReg);
1508 Register R = MRI.createVirtualRegister(RC);
1509 if (InitReg) {
1510 const TargetRegisterClass *ConstrainRegClass =
1511 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1512 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1513 (void)ConstrainRegClass;
1514 }
1515 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1516 .addReg(InitReg ? *InitReg : undef(RC))
1517 .addMBB(PreheaderBB)
1518 .addReg(LoopReg)
1519 .addMBB(BB);
1520 if (!InitReg)
1521 UndefPhis[LoopReg] = R;
1522 else
1523 Phis[{LoopReg, *InitReg}] = R;
1524 return R;
1525}
1526
1527Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1528 Register &R = Undefs[RC];
1529 if (R == 0) {
1530 // Create an IMPLICIT_DEF that defines this register if we need it.
1531 // All uses of this should be removed by the time we have finished unrolling
1532 // prologs and epilogs.
1533 R = MRI.createVirtualRegister(RC);
1534 auto *InsertBB = &PreheaderBB->getParent()->front();
1535 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1536 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1537 }
1538 return R;
1539}
1540
1541namespace {
1542/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1543/// the operand are discovered, such as how many in-loop PHIs it has to jump
1544/// through and defaults for these phis.
1545class KernelOperandInfo {
1548 SmallVector<Register, 4> PhiDefaults;
1551
1552public:
1553 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1554 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1555 : MRI(MRI) {
1556 Source = MO;
1557 BB = MO->getParent()->getParent();
1558 while (isRegInLoop(MO)) {
1559 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1560 if (MI->isFullCopy()) {
1561 MO = &MI->getOperand(1);
1562 continue;
1563 }
1564 if (!MI->isPHI())
1565 break;
1566 // If this is an illegal phi, don't count it in distance.
1567 if (IllegalPhis.count(MI)) {
1568 MO = &MI->getOperand(3);
1569 continue;
1570 }
1571
1573 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1574 : &MI->getOperand(3);
1575 PhiDefaults.push_back(Default);
1576 }
1577 Target = MO;
1578 }
1579
1580 bool operator==(const KernelOperandInfo &Other) const {
1581 return PhiDefaults.size() == Other.PhiDefaults.size();
1582 }
1583
1584 void print(raw_ostream &OS) const {
1585 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1586 << *Source->getParent();
1587 }
1588
1589private:
1590 bool isRegInLoop(MachineOperand *MO) {
1591 return MO->isReg() && MO->getReg().isVirtual() &&
1592 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1593 }
1594};
1595} // namespace
1596
1600 if (LPD == LPD_Front)
1601 PeeledFront.push_back(NewBB);
1602 else
1603 PeeledBack.push_front(NewBB);
1604 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1605 ++I, ++NI) {
1606 CanonicalMIs[&*I] = &*I;
1607 CanonicalMIs[&*NI] = &*I;
1608 BlockMIs[{NewBB, &*I}] = &*NI;
1609 BlockMIs[{BB, &*I}] = &*I;
1610 }
1611 return NewBB;
1612}
1613
1615 int MinStage) {
1616 for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1617 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1618 MachineInstr *MI = &*I++;
1619 int Stage = getStage(MI);
1620 if (Stage == -1 || Stage >= MinStage)
1621 continue;
1622
1623 for (MachineOperand &DefMO : MI->defs()) {
1625 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1626 // Only PHIs can use values from this block by construction.
1627 // Match with the equivalent PHI in B.
1628 assert(UseMI.isPHI());
1629 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1630 MI->getParent());
1631 Subs.emplace_back(&UseMI, Reg);
1632 }
1633 for (auto &Sub : Subs)
1634 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1636 }
1637 if (LIS)
1639 MI->eraseFromParent();
1640 }
1641}
1642
1644 MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1645 auto InsertPt = DestBB->getFirstNonPHI();
1648 llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1649 if (MI.isPHI()) {
1650 // This is an illegal PHI. If we move any instructions using an illegal
1651 // PHI, we need to create a legal Phi.
1652 if (getStage(&MI) != Stage) {
1653 // The legal Phi is not necessary if the illegal phi's stage
1654 // is being moved.
1655 Register PhiR = MI.getOperand(0).getReg();
1656 auto RC = MRI.getRegClass(PhiR);
1658 MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1659 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1660 .addReg(PhiR)
1661 .addMBB(SourceBB);
1662 BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1664 Remaps[PhiR] = NR;
1665 }
1666 }
1667 if (getStage(&MI) != Stage)
1668 continue;
1669 MI.removeFromParent();
1670 DestBB->insert(InsertPt, &MI);
1671 auto *KernelMI = CanonicalMIs[&MI];
1672 BlockMIs[{DestBB, KernelMI}] = &MI;
1673 BlockMIs.erase({SourceBB, KernelMI});
1674 }
1676 for (MachineInstr &MI : DestBB->phis()) {
1677 assert(MI.getNumOperands() == 3);
1678 MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1679 // If the instruction referenced by the phi is moved inside the block
1680 // we don't need the phi anymore.
1681 if (getStage(Def) == Stage) {
1682 Register PhiReg = MI.getOperand(0).getReg();
1683 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1684 /*TRI=*/nullptr) != -1);
1685 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1686 MI.getOperand(0).setReg(PhiReg);
1687 PhiToDelete.push_back(&MI);
1688 }
1689 }
1690 for (auto *P : PhiToDelete)
1691 P->eraseFromParent();
1692 InsertPt = DestBB->getFirstNonPHI();
1693 // Helper to clone Phi instructions into the destination block. We clone Phi
1694 // greedily to avoid combinatorial explosion of Phi instructions.
1695 auto clonePhi = [&](MachineInstr *Phi) {
1696 MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1697 DestBB->insert(InsertPt, NewMI);
1698 Register OrigR = Phi->getOperand(0).getReg();
1700 NewMI->getOperand(0).setReg(R);
1701 NewMI->getOperand(1).setReg(OrigR);
1702 NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1703 Remaps[OrigR] = R;
1704 CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1705 BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1707 return R;
1708 };
1709 for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1710 for (MachineOperand &MO : I->uses()) {
1711 if (!MO.isReg())
1712 continue;
1713 if (Remaps.count(MO.getReg()))
1714 MO.setReg(Remaps[MO.getReg()]);
1715 else {
1716 // If we are using a phi from the source block we need to add a new phi
1717 // pointing to the old one.
1719 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1720 Register R = clonePhi(Use);
1721 MO.setReg(R);
1722 }
1723 }
1724 }
1725 }
1726}
1727
1730 MachineInstr *Phi) {
1731 unsigned distance = PhiNodeLoopIteration[Phi];
1732 MachineInstr *CanonicalUse = CanonicalPhi;
1733 Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1734 for (unsigned I = 0; I < distance; ++I) {
1735 assert(CanonicalUse->isPHI());
1736 assert(CanonicalUse->getNumOperands() == 5);
1737 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1738 if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1739 std::swap(LoopRegIdx, InitRegIdx);
1740 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1741 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1742 }
1743 return CanonicalUseReg;
1744}
1745
1747 BitVector LS(Schedule.getNumStages(), true);
1748 BitVector AS(Schedule.getNumStages(), true);
1749 LiveStages[BB] = LS;
1750 AvailableStages[BB] = AS;
1751
1752 // Peel out the prologs.
1753 LS.reset();
1754 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1755 LS[I] = true;
1756 Prologs.push_back(peelKernel(LPD_Front));
1757 LiveStages[Prologs.back()] = LS;
1758 AvailableStages[Prologs.back()] = LS;
1759 }
1760
1761 // Create a block that will end up as the new loop exiting block (dominated by
1762 // all prologs and epilogs). It will only contain PHIs, in the same order as
1763 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1764 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1765 // property that any value deffed in BB but used outside of BB is used by a
1766 // PHI in the exiting block.
1768 EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1769 // Push out the epilogs, again in reverse order.
1770 // We can't assume anything about the minumum loop trip count at this point,
1771 // so emit a fairly complex epilog.
1772
1773 // We first peel number of stages minus one epilogue. Then we remove dead
1774 // stages and reorder instructions based on their stage. If we have 3 stages
1775 // we generate first:
1776 // E0[3, 2, 1]
1777 // E1[3', 2']
1778 // E2[3'']
1779 // And then we move instructions based on their stages to have:
1780 // E0[3]
1781 // E1[2, 3']
1782 // E2[1, 2', 3'']
1783 // The transformation is legal because we only move instructions past
1784 // instructions of a previous loop iteration.
1785 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1786 Epilogs.push_back(peelKernel(LPD_Back));
1787 MachineBasicBlock *B = Epilogs.back();
1789 // Keep track at which iteration each phi belongs to. We need it to know
1790 // what version of the variable to use during prologue/epilogue stitching.
1791 EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1792 for (MachineInstr &Phi : B->phis())
1794 }
1795 for (size_t I = 0; I < Epilogs.size(); I++) {
1796 LS.reset();
1797 for (size_t J = I; J < Epilogs.size(); J++) {
1798 int Iteration = J;
1799 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1800 // Move stage one block at a time so that Phi nodes are updated correctly.
1801 for (size_t K = Iteration; K > I; K--)
1802 moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1803 LS[Stage] = true;
1804 }
1805 LiveStages[Epilogs[I]] = LS;
1806 AvailableStages[Epilogs[I]] = AS;
1807 }
1808
1809 // Now we've defined all the prolog and epilog blocks as a fallthrough
1810 // sequence, add the edges that will be followed if the loop trip count is
1811 // lower than the number of stages (connecting prologs directly with epilogs).
1812 auto PI = Prologs.begin();
1813 auto EI = Epilogs.begin();
1814 assert(Prologs.size() == Epilogs.size());
1815 for (; PI != Prologs.end(); ++PI, ++EI) {
1816 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1817 (*PI)->addSuccessor(*EI);
1818 for (MachineInstr &MI : (*EI)->phis()) {
1819 Register Reg = MI.getOperand(1).getReg();
1821 if (Use && Use->getParent() == Pred) {
1822 MachineInstr *CanonicalUse = CanonicalMIs[Use];
1823 if (CanonicalUse->isPHI()) {
1824 // If the use comes from a phi we need to skip as many phi as the
1825 // distance between the epilogue and the kernel. Trace through the phi
1826 // chain to find the right value.
1827 Reg = getPhiCanonicalReg(CanonicalUse, Use);
1828 }
1829 Reg = getEquivalentRegisterIn(Reg, *PI);
1830 }
1831 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1832 MI.addOperand(MachineOperand::CreateMBB(*PI));
1833 }
1834 }
1835
1836 // Create a list of all blocks in order.
1838 llvm::copy(PeeledFront, std::back_inserter(Blocks));
1839 Blocks.push_back(BB);
1840 llvm::copy(PeeledBack, std::back_inserter(Blocks));
1841
1842 // Iterate in reverse order over all instructions, remapping as we go.
1843 for (MachineBasicBlock *B : reverse(Blocks)) {
1844 for (auto I = B->instr_rbegin();
1845 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1847 rewriteUsesOf(&*MI);
1848 }
1849 }
1850 for (auto *MI : IllegalPhisToDelete) {
1851 if (LIS)
1853 MI->eraseFromParent();
1854 }
1856
1857 // Now all remapping has been done, we're free to optimize the generated code.
1859 EliminateDeadPhis(B, MRI, LIS);
1860 EliminateDeadPhis(ExitingBB, MRI, LIS);
1861}
1862
1865 MachineBasicBlock *Exit = *BB->succ_begin();
1866 if (Exit == BB)
1867 Exit = *std::next(BB->succ_begin());
1868
1870 MF.insert(std::next(BB->getIterator()), NewBB);
1871
1872 // Clone all phis in BB into NewBB and rewrite.
1873 for (MachineInstr &MI : BB->phis()) {
1874 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1875 Register OldR = MI.getOperand(3).getReg();
1878 for (MachineInstr &Use : MRI.use_instructions(OldR))
1879 if (Use.getParent() != BB)
1880 Uses.push_back(&Use);
1881 for (MachineInstr *Use : Uses)
1882 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1884 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1885 .addReg(OldR)
1886 .addMBB(BB);
1887 BlockMIs[{NewBB, &MI}] = NI;
1888 CanonicalMIs[NI] = &MI;
1889 }
1890 BB->replaceSuccessor(Exit, NewBB);
1891 Exit->replacePhiUsesWith(BB, NewBB);
1892 NewBB->addSuccessor(Exit);
1893
1894 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1896 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1897 (void)CanAnalyzeBr;
1898 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1899 TII->removeBranch(*BB);
1900 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1901 Cond, DebugLoc());
1902 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1903 return NewBB;
1904}
1905
1908 MachineBasicBlock *BB) {
1910 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
1911 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1912}
1913
1915 if (MI->isPHI()) {
1916 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1917 // and it is produced by this block.
1918 Register PhiR = MI->getOperand(0).getReg();
1919 Register R = MI->getOperand(3).getReg();
1920 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1921 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1922 R = MI->getOperand(1).getReg();
1923 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1924 MRI.replaceRegWith(PhiR, R);
1925 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1926 // later to figure out how to remap registers.
1927 MI->getOperand(0).setReg(PhiR);
1929 return;
1930 }
1931
1932 int Stage = getStage(MI);
1933 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1934 LiveStages[MI->getParent()].test(Stage))
1935 // Instruction is live, no rewriting to do.
1936 return;
1937
1938 for (MachineOperand &DefMO : MI->defs()) {
1940 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1941 // Only PHIs can use values from this block by construction.
1942 // Match with the equivalent PHI in B.
1943 assert(UseMI.isPHI());
1944 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1945 MI->getParent());
1946 Subs.emplace_back(&UseMI, Reg);
1947 }
1948 for (auto &Sub : Subs)
1949 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1951 }
1952 if (LIS)
1954 MI->eraseFromParent();
1955}
1956
1958 // Work outwards from the kernel.
1959 bool KernelDisposed = false;
1960 int TC = Schedule.getNumStages() - 1;
1961 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1962 ++PI, ++EI, --TC) {
1964 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1968 std::optional<bool> StaticallyGreater =
1969 LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1970 if (!StaticallyGreater) {
1971 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1972 // Dynamically branch based on Cond.
1973 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1974 } else if (*StaticallyGreater == false) {
1975 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1976 // Prolog never falls through; branch to epilog and orphan interior
1977 // blocks. Leave it to unreachable-block-elim to clean up.
1978 Prolog->removeSuccessor(Fallthrough);
1979 for (MachineInstr &P : Fallthrough->phis()) {
1980 P.removeOperand(2);
1981 P.removeOperand(1);
1982 }
1984 KernelDisposed = true;
1985 } else {
1986 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1987 // Prolog always falls through; remove incoming values in epilog.
1988 Prolog->removeSuccessor(Epilog);
1989 for (MachineInstr &P : Epilog->phis()) {
1990 P.removeOperand(4);
1991 P.removeOperand(3);
1992 }
1993 }
1994 }
1995
1996 if (!KernelDisposed) {
1997 LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1998 LoopInfo->setPreheader(Prologs.back());
1999 } else {
2000 LoopInfo->disposed();
2001 }
2002}
2003
2005 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2006 KR.rewrite();
2007}
2008
2015
2016 rewriteKernel();
2018 fixupBranches();
2019}
2020
2024
2025 // Dump the schedule before we invalidate and remap all its instructions.
2026 // Stash it in a string so we can print it if we found an error.
2027 std::string ScheduleDump;
2028 raw_string_ostream OS(ScheduleDump);
2029 Schedule.print(OS);
2030 OS.flush();
2031
2032 // First, run the normal ModuleScheduleExpander. We don't support any
2033 // InstrChanges.
2034 assert(LIS && "Requires LiveIntervals!");
2037 MSE.expand();
2038 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2039 if (!ExpandedKernel) {
2040 // The expander optimized away the kernel. We can't do any useful checking.
2041 MSE.cleanup();
2042 return;
2043 }
2044 // Before running the KernelRewriter, re-add BB into the CFG.
2046
2047 // Now run the new expansion algorithm.
2048 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2049 KR.rewrite();
2051
2052 // Collect all illegal phis that the new algorithm created. We'll give these
2053 // to KernelOperandInfo.
2055 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2056 if (NI->isPHI())
2057 IllegalPhis.insert(&*NI);
2058 }
2059
2060 // Co-iterate across both kernels. We expect them to be identical apart from
2061 // phis and full COPYs (we look through both).
2063 auto OI = ExpandedKernel->begin();
2064 auto NI = BB->begin();
2065 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2066 while (OI->isPHI() || OI->isFullCopy())
2067 ++OI;
2068 while (NI->isPHI() || NI->isFullCopy())
2069 ++NI;
2070 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2071 // Analyze every operand separately.
2072 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2073 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2074 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2075 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2076 }
2077
2078 bool Failed = false;
2079 for (auto &OldAndNew : KOIs) {
2080 if (OldAndNew.first == OldAndNew.second)
2081 continue;
2082 Failed = true;
2083 errs() << "Modulo kernel validation error: [\n";
2084 errs() << " [golden] ";
2085 OldAndNew.first.print(errs());
2086 errs() << " ";
2087 OldAndNew.second.print(errs());
2088 errs() << "]\n";
2089 }
2090
2091 if (Failed) {
2092 errs() << "Golden reference kernel:\n";
2093 ExpandedKernel->print(errs());
2094 errs() << "New kernel:\n";
2095 BB->print(errs());
2096 errs() << ScheduleDump;
2098 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2099 }
2100
2101 // Cleanup by removing BB from the CFG again as the original
2102 // ModuloScheduleExpander intended.
2104 MSE.cleanup();
2105}
2106
2107MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
2108 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2109
2110 // TODO: Offset information needs to be corrected.
2111 NewMI->dropMemRefs(MF);
2112
2113 return NewMI;
2114}
2115
2116/// Create a dedicated exit for Loop. Exit is the original exit for Loop.
2117/// If it is already dedicated exit, return it. Otherwise, insert a new
2118/// block between them and return the new block.
2120 MachineBasicBlock *Exit) {
2121 if (Exit->pred_size() == 1)
2122 return Exit;
2123
2124 MachineFunction *MF = Loop->getParent();
2126
2127 MachineBasicBlock *NewExit =
2128 MF->CreateMachineBasicBlock(Loop->getBasicBlock());
2129 MF->insert(Loop->getIterator(), NewExit);
2130
2131 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2133 TII->analyzeBranch(*Loop, TBB, FBB, Cond);
2134 if (TBB == Loop)
2135 FBB = NewExit;
2136 else if (FBB == Loop)
2137 TBB = NewExit;
2138 else
2139 llvm_unreachable("unexpected loop structure");
2141 TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
2142 Loop->replaceSuccessor(Exit, NewExit);
2143 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2144 NewExit->addSuccessor(Exit);
2145
2146 Exit->replacePhiUsesWith(Loop, NewExit);
2147
2148 return NewExit;
2149}
2150
2151/// Insert branch code into the end of MBB. It branches to GreaterThan if the
2152/// remaining trip count for instructions in LastStage0Insts is greater than
2153/// RequiredTC, and to Otherwise otherwise.
2154void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
2155 int RequiredTC,
2156 InstrMapTy &LastStage0Insts,
2157 MachineBasicBlock &GreaterThan,
2158 MachineBasicBlock &Otherwise) {
2160 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2161 LastStage0Insts);
2162
2164 // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
2165 // FBB for optimal performance.
2166 if (TII->reverseBranchCondition(Cond))
2167 llvm_unreachable("can not reverse branch condition");
2168 TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
2169 } else {
2170 TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
2171 }
2172}
2173
2174/// Generate a pipelined loop that is unrolled by using MVE algorithm and any
2175/// other necessary blocks. The control flow is modified to execute the
2176/// pipelined loop if the trip count satisfies the condition, otherwise the
2177/// original loop. The original loop is also used to execute the remainder
2178/// iterations which occur due to unrolling.
2179void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2180 // The control flow for pipelining with MVE:
2181 //
2182 // OrigPreheader:
2183 // // The block that is originally the loop preheader
2184 // goto Check
2185 //
2186 // Check:
2187 // // Check whether the trip count satisfies the requirements to pipeline.
2188 // if (LoopCounter > NumStages + NumUnroll - 2)
2189 // // The minimum number of iterations to pipeline =
2190 // // iterations executed in prolog/epilog (NumStages-1) +
2191 // // iterations executed in one kernel run (NumUnroll)
2192 // goto Prolog
2193 // // fallback to the original loop
2194 // goto NewPreheader
2195 //
2196 // Prolog:
2197 // // All prolog stages. There are no direct branches to the epilogue.
2198 // goto NewKernel
2199 //
2200 // NewKernel:
2201 // // NumUnroll copies of the kernel
2202 // if (LoopCounter > MVE-1)
2203 // goto NewKernel
2204 // goto Epilog
2205 //
2206 // Epilog:
2207 // // All epilog stages.
2208 // if (LoopCounter > 0)
2209 // // The remainder is executed in the original loop
2210 // goto NewPreheader
2211 // goto NewExit
2212 //
2213 // NewPreheader:
2214 // // Newly created preheader for the original loop.
2215 // // The initial values of the phis in the loop are merged from two paths.
2216 // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
2217 // goto OrigKernel
2218 //
2219 // OrigKernel:
2220 // // The original loop block.
2221 // if (LoopCounter != 0)
2222 // goto OrigKernel
2223 // goto NewExit
2224 //
2225 // NewExit:
2226 // // Newly created dedicated exit for the original loop.
2227 // // Merge values which are referenced after the loop
2228 // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
2229 // goto OrigExit
2230 //
2231 // OrigExit:
2232 // // The block that is originally the loop exit.
2233 // // If it is already deicated exit, NewExit is not created.
2234
2235 // An example of where each stage is executed:
2236 // Assume #Stages 3, #MVE 4, #Iterations 12
2237 // Iter 0 1 2 3 4 5 6 7 8 9 10-11
2238 // -------------------------------------------------
2239 // Stage 0 Prolog#0
2240 // Stage 1 0 Prolog#1
2241 // Stage 2 1 0 Kernel Unroll#0 Iter#0
2242 // Stage 2 1 0 Kernel Unroll#1 Iter#0
2243 // Stage 2 1 0 Kernel Unroll#2 Iter#0
2244 // Stage 2 1 0 Kernel Unroll#3 Iter#0
2245 // Stage 2 1 0 Kernel Unroll#0 Iter#1
2246 // Stage 2 1 0 Kernel Unroll#1 Iter#1
2247 // Stage 2 1 0 Kernel Unroll#2 Iter#1
2248 // Stage 2 1 0 Kernel Unroll#3 Iter#1
2249 // Stage 2 1 Epilog#0
2250 // Stage 2 Epilog#1
2251 // Stage 0-2 OrigKernel
2252
2253 LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
2254 assert(LoopInfo && "Must be able to analyze loop!");
2255
2256 calcNumUnroll();
2257
2258 Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2259 Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2260 NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2261 Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2262 NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2263
2264 MF.insert(OrigKernel->getIterator(), Check);
2265 MF.insert(OrigKernel->getIterator(), Prolog);
2266 MF.insert(OrigKernel->getIterator(), NewKernel);
2267 MF.insert(OrigKernel->getIterator(), Epilog);
2268 MF.insert(OrigKernel->getIterator(), NewPreheader);
2269
2270 NewExit = createDedicatedExit(OrigKernel, OrigExit);
2271
2272 NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
2273 TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
2274
2275 OrigPreheader->addSuccessor(Check);
2276 TII->removeBranch(*OrigPreheader);
2277 TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
2278
2279 Check->addSuccessor(Prolog);
2280 Check->addSuccessor(NewPreheader);
2281
2282 Prolog->addSuccessor(NewKernel);
2283
2284 NewKernel->addSuccessor(NewKernel);
2285 NewKernel->addSuccessor(Epilog);
2286
2287 Epilog->addSuccessor(NewPreheader);
2288 Epilog->addSuccessor(NewExit);
2289
2290 InstrMapTy LastStage0Insts;
2291 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2292 LastStage0Insts, *Prolog, *NewPreheader);
2293
2294 // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
2295 // register#
2296 SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
2297 generateProlog(PrologVRMap);
2298 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2299 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2300}
2301
2302/// Replace MI's use operands according to the maps.
2303void ModuloScheduleExpanderMVE::updateInstrUse(
2304 MachineInstr *MI, int StageNum, int PhaseNum,
2306 SmallVectorImpl<ValueMapTy> *PrevVRMap) {
2307 // If MI is in the prolog/kernel/epilog block, CurVRMap is
2308 // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
2309 // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
2310 // Refer to the appropriate map according to the stage difference between
2311 // MI and the definition of an operand.
2312
2313 for (MachineOperand &UseMO : MI->uses()) {
2314 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2315 continue;
2316 int DiffStage = 0;
2317 Register OrigReg = UseMO.getReg();
2318 MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
2319 if (!DefInst || DefInst->getParent() != OrigKernel)
2320 continue;
2321 unsigned InitReg = 0;
2322 unsigned DefReg = OrigReg;
2323 if (DefInst->isPHI()) {
2324 ++DiffStage;
2325 unsigned LoopReg;
2326 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2327 // LoopReg is guaranteed to be defined within the loop by canApply()
2328 DefReg = LoopReg;
2329 DefInst = MRI.getVRegDef(LoopReg);
2330 }
2331 unsigned DefStageNum = Schedule.getStage(DefInst);
2332 DiffStage += StageNum - DefStageNum;
2333 Register NewReg;
2334 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2335 // NewReg is defined in a previous phase of the same block
2336 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2337 else if (!PrevVRMap)
2338 // Since this is the first iteration, refer the initial register of the
2339 // loop
2340 NewReg = InitReg;
2341 else
2342 // Cases where DiffStage is larger than PhaseNum.
2343 // If MI is in the kernel block, the value is defined by the previous
2344 // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
2345 // value is defined in the kernel block and KernelVRMap is referenced.
2346 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2347
2348 const TargetRegisterClass *NRC =
2349 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2350 if (NRC)
2351 UseMO.setReg(NewReg);
2352 else {
2353 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2354 BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
2355 SplitReg)
2356 .addReg(NewReg);
2357 UseMO.setReg(SplitReg);
2358 }
2359 }
2360}
2361
2362/// Return a phi if Reg is referenced by the phi.
2363/// canApply() guarantees that at most only one such phi exists.
2365 for (MachineInstr &Phi : Loop->phis()) {
2366 unsigned InitVal, LoopVal;
2367 getPhiRegs(Phi, Loop, InitVal, LoopVal);
2368 if (LoopVal == Reg)
2369 return &Phi;
2370 }
2371 return nullptr;
2372}
2373
2374/// Generate phis for registers defined by OrigMI.
2375void ModuloScheduleExpanderMVE::generatePhi(
2376 MachineInstr *OrigMI, int UnrollNum,
2377 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2378 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2379 SmallVectorImpl<ValueMapTy> &PhiVRMap) {
2380 int StageNum = Schedule.getStage(OrigMI);
2381 bool UsePrologReg;
2382 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2383 UsePrologReg = true;
2384 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2385 UsePrologReg = false;
2386 else
2387 return;
2388
2389 // Examples that show which stages are merged by phi.
2390 // Meaning of the symbol following the stage number:
2391 // a/b: Stages with the same letter are merged (UsePrologReg == true)
2392 // +: Merged with the initial value (UsePrologReg == false)
2393 // *: No phis required
2394 //
2395 // #Stages 3, #MVE 4
2396 // Iter 0 1 2 3 4 5 6 7 8
2397 // -----------------------------------------
2398 // Stage 0a Prolog#0
2399 // Stage 1a 0b Prolog#1
2400 // Stage 2* 1* 0* Kernel Unroll#0
2401 // Stage 2* 1* 0+ Kernel Unroll#1
2402 // Stage 2* 1+ 0a Kernel Unroll#2
2403 // Stage 2+ 1a 0b Kernel Unroll#3
2404 //
2405 // #Stages 3, #MVE 2
2406 // Iter 0 1 2 3 4 5 6 7 8
2407 // -----------------------------------------
2408 // Stage 0a Prolog#0
2409 // Stage 1a 0b Prolog#1
2410 // Stage 2* 1+ 0a Kernel Unroll#0
2411 // Stage 2+ 1a 0b Kernel Unroll#1
2412 //
2413 // #Stages 3, #MVE 1
2414 // Iter 0 1 2 3 4 5 6 7 8
2415 // -----------------------------------------
2416 // Stage 0* Prolog#0
2417 // Stage 1a 0b Prolog#1
2418 // Stage 2+ 1a 0b Kernel Unroll#0
2419
2420 for (MachineOperand &DefMO : OrigMI->defs()) {
2421 if (!DefMO.isReg() || DefMO.isDead())
2422 continue;
2423 Register OrigReg = DefMO.getReg();
2424 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2425 if (NewReg == KernelVRMap[UnrollNum].end())
2426 continue;
2427 Register CorrespondReg;
2428 if (UsePrologReg) {
2429 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2430 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2431 } else {
2432 MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
2433 if (!Phi)
2434 continue;
2435 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2436 }
2437
2438 assert(CorrespondReg.isValid());
2439 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2440 BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
2441 TII->get(TargetOpcode::PHI), PhiReg)
2442 .addReg(NewReg->second)
2443 .addMBB(NewKernel)
2444 .addReg(CorrespondReg)
2445 .addMBB(Prolog);
2446 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2447 }
2448}
2449
2450static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
2451 MachineBasicBlock *NewMBB) {
2452 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2453 if (Phi.getOperand(Idx).getReg() == OrigReg) {
2454 Phi.getOperand(Idx).setReg(NewReg);
2455 Phi.getOperand(Idx + 1).setMBB(NewMBB);
2456 return;
2457 }
2458 }
2459}
2460
2461/// Generate phis that merge values from multiple routes
2462void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2463 Register NewReg) {
2464 SmallVector<MachineOperand *> UsesAfterLoop;
2466 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
2467 E = MRI.use_end();
2468 I != E; ++I) {
2469 MachineOperand &O = *I;
2470 if (O.getParent()->getParent() != OrigKernel &&
2471 O.getParent()->getParent() != Prolog &&
2472 O.getParent()->getParent() != NewKernel &&
2473 O.getParent()->getParent() != Epilog)
2474 UsesAfterLoop.push_back(&O);
2475 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2476 LoopPhis.push_back(O.getParent());
2477 }
2478
2479 // Merge the route that only execute the pipelined loop (when there are no
2480 // remaining iterations) with the route that execute the original loop.
2481 if (!UsesAfterLoop.empty()) {
2482 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2483 BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
2484 TII->get(TargetOpcode::PHI), PhiReg)
2485 .addReg(OrigReg)
2486 .addMBB(OrigKernel)
2487 .addReg(NewReg)
2488 .addMBB(Epilog);
2489
2490 for (MachineOperand *MO : UsesAfterLoop)
2491 MO->setReg(PhiReg);
2492
2493 if (!LIS.hasInterval(PhiReg))
2494 LIS.createEmptyInterval(PhiReg);
2495 }
2496
2497 // Merge routes from the pipelined loop and the bypassed route before the
2498 // original loop
2499 if (!LoopPhis.empty()) {
2500 for (MachineInstr *Phi : LoopPhis) {
2501 unsigned InitReg, LoopReg;
2502 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2503 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2504 BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(), Phi->getDebugLoc(),
2505 TII->get(TargetOpcode::PHI), NewInit)
2506 .addReg(InitReg)
2507 .addMBB(Check)
2508 .addReg(NewReg)
2509 .addMBB(Epilog);
2510 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2511 }
2512 }
2513}
2514
2515void ModuloScheduleExpanderMVE::generateProlog(
2516 SmallVectorImpl<ValueMapTy> &PrologVRMap) {
2517 PrologVRMap.clear();
2518 PrologVRMap.resize(Schedule.getNumStages() - 1);
2520 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2521 ++PrologNum) {
2522 for (MachineInstr *MI : Schedule.getInstructions()) {
2523 if (MI->isPHI())
2524 continue;
2525 int StageNum = Schedule.getStage(MI);
2526 if (StageNum > PrologNum)
2527 continue;
2528 MachineInstr *NewMI = cloneInstr(MI);
2529 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2530 NewMIMap[NewMI] = {PrologNum, StageNum};
2531 Prolog->push_back(NewMI);
2532 }
2533 }
2534
2535 for (auto I : NewMIMap) {
2536 MachineInstr *MI = I.first;
2537 int PrologNum = I.second.first;
2538 int StageNum = I.second.second;
2539 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2540 }
2541
2542 LLVM_DEBUG({
2543 dbgs() << "prolog:\n";
2544 Prolog->dump();
2545 });
2546}
2547
2548void ModuloScheduleExpanderMVE::generateKernel(
2549 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2550 SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
2551 KernelVRMap.clear();
2552 KernelVRMap.resize(NumUnroll);
2553 SmallVector<ValueMapTy> PhiVRMap;
2554 PhiVRMap.resize(NumUnroll);
2557 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2558 for (MachineInstr *MI : Schedule.getInstructions()) {
2559 if (MI->isPHI())
2560 continue;
2561 int StageNum = Schedule.getStage(MI);
2562 MachineInstr *NewMI = cloneInstr(MI);
2563 if (UnrollNum == NumUnroll - 1)
2564 LastStage0Insts[MI] = NewMI;
2565 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2566 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2567 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2568 NewMIMap[NewMI] = {UnrollNum, StageNum};
2569 NewKernel->push_back(NewMI);
2570 }
2571 }
2572
2573 for (auto I : NewMIMap) {
2574 MachineInstr *MI = I.first;
2575 int UnrollNum = I.second.first;
2576 int StageNum = I.second.second;
2577 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2578 }
2579
2580 // If remaining trip count is greater than NumUnroll-1, loop continues
2581 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2582 *Epilog);
2583
2584 LLVM_DEBUG({
2585 dbgs() << "kernel:\n";
2586 NewKernel->dump();
2587 });
2588}
2589
2590void ModuloScheduleExpanderMVE::generateEpilog(
2591 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2592 SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
2593 EpilogVRMap.clear();
2594 EpilogVRMap.resize(Schedule.getNumStages() - 1);
2596 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2597 ++EpilogNum) {
2598 for (MachineInstr *MI : Schedule.getInstructions()) {
2599 if (MI->isPHI())
2600 continue;
2601 int StageNum = Schedule.getStage(MI);
2602 if (StageNum <= EpilogNum)
2603 continue;
2604 MachineInstr *NewMI = cloneInstr(MI);
2605 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2606 NewMIMap[NewMI] = {EpilogNum, StageNum};
2607 Epilog->push_back(NewMI);
2608 }
2609 }
2610
2611 for (auto I : NewMIMap) {
2612 MachineInstr *MI = I.first;
2613 int EpilogNum = I.second.first;
2614 int StageNum = I.second.second;
2615 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2616 }
2617
2618 // If there are remaining iterations, they are executed in the original loop.
2619 // Instructions related to loop control, such as loop counter comparison,
2620 // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
2621 // in stage 0. Thus, the map is for the last one in the kernel.
2622 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2623
2624 LLVM_DEBUG({
2625 dbgs() << "epilog:\n";
2626 Epilog->dump();
2627 });
2628}
2629
2630/// Calculate the number of unroll required and set it to NumUnroll
2631void ModuloScheduleExpanderMVE::calcNumUnroll() {
2633 NumUnroll = 1;
2634 for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
2635 Inst2Idx[Schedule.getInstructions()[I]] = I;
2636
2637 for (MachineInstr *MI : Schedule.getInstructions()) {
2638 if (MI->isPHI())
2639 continue;
2640 int StageNum = Schedule.getStage(MI);
2641 for (const MachineOperand &MO : MI->uses()) {
2642 if (!MO.isReg() || !MO.getReg().isVirtual())
2643 continue;
2644 MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
2645 if (DefMI->getParent() != OrigKernel)
2646 continue;
2647
2648 int NumUnrollLocal = 1;
2649 if (DefMI->isPHI()) {
2650 ++NumUnrollLocal;
2651 // canApply() guarantees that DefMI is not phi and is an instruction in
2652 // the loop
2653 DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
2654 }
2655 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2656 if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2657 --NumUnrollLocal;
2658 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2659 }
2660 }
2661 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2662}
2663
2664/// Create new virtual registers for definitions of NewMI and update NewMI.
2665/// If the definitions are referenced after the pipelined loop, phis are
2666/// created to merge with other routes.
2667void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2668 ValueMapTy &VRMap,
2669 bool LastDef) {
2670 for (MachineOperand &MO : NewMI->operands()) {
2671 if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef())
2672 continue;
2673 Register Reg = MO.getReg();
2674 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
2675 Register NewReg = MRI.createVirtualRegister(RC);
2676 MO.setReg(NewReg);
2677 VRMap[Reg] = NewReg;
2678 if (LastDef)
2679 mergeRegUsesAfterPipeline(Reg, NewReg);
2680 }
2681}
2682
2684 OrigKernel = Schedule.getLoop()->getTopBlock();
2685 OrigPreheader = Schedule.getLoop()->getLoopPreheader();
2686 OrigExit = Schedule.getLoop()->getExitBlock();
2687
2688 LLVM_DEBUG(Schedule.dump());
2689
2690 generatePipelinedLoop();
2691}
2692
2693/// Check if ModuloScheduleExpanderMVE can be applied to L
2695 if (!L.getExitBlock()) {
2696 LLVM_DEBUG(
2697 dbgs() << "Can not apply MVE expander: No single exit block.\n";);
2698 return false;
2699 }
2700
2701 MachineBasicBlock *BB = L.getTopBlock();
2703
2704 // Put some constraints on the operands of the phis to simplify the
2705 // transformation
2706 DenseSet<unsigned> UsedByPhi;
2707 for (MachineInstr &MI : BB->phis()) {
2708 // Registers defined by phis must be used only inside the loop and be never
2709 // used by phis.
2710 for (MachineOperand &MO : MI.defs())
2711 if (MO.isReg())
2712 for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
2713 if (Ref.getParent() != BB || Ref.isPHI()) {
2715 << "Can not apply MVE expander: A phi result is "
2716 "referenced outside of the loop or by phi.\n";);
2717 return false;
2718 }
2719
2720 // A source register from the loop block must be defined inside the loop.
2721 // A register defined inside the loop must be referenced by only one phi at
2722 // most.
2723 unsigned InitVal, LoopVal;
2724 getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
2725 if (!Register(LoopVal).isVirtual() ||
2726 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2727 LLVM_DEBUG(
2728 dbgs() << "Can not apply MVE expander: A phi source value coming "
2729 "from the loop is not defined in the loop.\n";);
2730 return false;
2731 }
2732 if (UsedByPhi.count(LoopVal)) {
2733 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2734 "loop is referenced by two or more phis.\n";);
2735 return false;
2736 }
2737 UsedByPhi.insert(LoopVal);
2738 }
2739
2740 return true;
2741}
2742
2743//===----------------------------------------------------------------------===//
2744// ModuloScheduleTestPass implementation
2745//===----------------------------------------------------------------------===//
2746// This pass constructs a ModuloSchedule from its module and runs
2747// ModuloScheduleExpander.
2748//
2749// The module is expected to contain a single-block analyzable loop.
2750// The total order of instructions is taken from the loop as-is.
2751// Instructions are expected to be annotated with a PostInstrSymbol.
2752// This PostInstrSymbol must have the following format:
2753// "Stage=%d Cycle=%d".
2754//===----------------------------------------------------------------------===//
2755
2756namespace {
2757class ModuloScheduleTest : public MachineFunctionPass {
2758public:
2759 static char ID;
2760
2761 ModuloScheduleTest() : MachineFunctionPass(ID) {
2763 }
2764
2765 bool runOnMachineFunction(MachineFunction &MF) override;
2766 void runOnLoop(MachineFunction &MF, MachineLoop &L);
2767
2768 void getAnalysisUsage(AnalysisUsage &AU) const override {
2772 }
2773};
2774} // namespace
2775
2776char ModuloScheduleTest::ID = 0;
2777
2778INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2779 "Modulo Schedule test pass", false, false)
2782INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2783 "Modulo Schedule test pass", false, false)
2784
2785bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2786 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2787 for (auto *L : MLI) {
2788 if (L->getTopBlock() != L->getBottomBlock())
2789 continue;
2790 runOnLoop(MF, *L);
2791 return false;
2792 }
2793 return false;
2794}
2795
2796static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2797 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2798 std::pair<StringRef, StringRef> StageTokenAndValue =
2799 getToken(StageAndCycle.first, "-");
2800 std::pair<StringRef, StringRef> CycleTokenAndValue =
2801 getToken(StageAndCycle.second, "-");
2802 if (StageTokenAndValue.first != "Stage" ||
2803 CycleTokenAndValue.first != "_Cycle") {
2805 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2806 return;
2807 }
2808
2809 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2810 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2811
2812 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2813}
2814
2815void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2816 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
2817 MachineBasicBlock *BB = L.getTopBlock();
2818 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2819
2821 std::vector<MachineInstr *> Instrs;
2822 for (MachineInstr &MI : *BB) {
2823 if (MI.isTerminator())
2824 continue;
2825 Instrs.push_back(&MI);
2826 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2827 dbgs() << "Parsing post-instr symbol for " << MI;
2828 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2829 }
2830 }
2831
2832 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2833 std::move(Stage));
2835 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2836 MSE.expand();
2837 MSE.cleanup();
2838}
2839
2840//===----------------------------------------------------------------------===//
2841// ModuloScheduleTestAnnotater implementation
2842//===----------------------------------------------------------------------===//
2843
2845 for (MachineInstr *MI : S.getInstructions()) {
2848 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2850 MI->setPostInstrSymbol(MF, Sym);
2851 }
2852}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
@ Default
Definition: DwarfDebug.cpp:87
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1309
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Symbol * Sym
Definition: ELF_riscv.cpp:479
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
This file provides utility analysis objects describing memory locations.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit)
Create a dedicated exit for Loop.
modulo schedule test
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
modulo schedule Modulo Schedule test pass
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static bool rewrite(Function &F)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
A possibly irreducible generalization of a Loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:213
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:812
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:685
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:722
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:782
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:435
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
constexpr double phi
Definition: MathExtras.h:61
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1921
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1824
void initializeModuloScheduleTestPass(PassRegistry &)
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...