LLVM 20.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
17#include "llvm/MC/MCContext.h"
18#include "llvm/Support/Debug.h"
21
22#define DEBUG_TYPE "pipeliner"
23using namespace llvm;
24
26 "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
27 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
28
30 for (MachineInstr *MI : ScheduledInstrs)
31 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
32}
33
34//===----------------------------------------------------------------------===//
35// ModuloScheduleExpander implementation
36//===----------------------------------------------------------------------===//
37
38/// Return the register values for the operands of a Phi instruction.
39/// This function assume the instruction is a Phi.
41 unsigned &InitVal, unsigned &LoopVal) {
42 assert(Phi.isPHI() && "Expecting a Phi.");
43
44 InitVal = 0;
45 LoopVal = 0;
46 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
47 if (Phi.getOperand(i + 1).getMBB() != Loop)
48 InitVal = Phi.getOperand(i).getReg();
49 else
50 LoopVal = Phi.getOperand(i).getReg();
51
52 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
53}
54
55/// Return the Phi register value that comes from the incoming block.
56static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
57 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
59 return Phi.getOperand(i).getReg();
60 return 0;
61}
62
63/// Return the Phi register value that comes the loop block.
64static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
65 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
67 return Phi.getOperand(i).getReg();
68 return 0;
69}
70
72 BB = Schedule.getLoop()->getTopBlock();
73 Preheader = *BB->pred_begin();
74 if (Preheader == BB)
75 Preheader = *std::next(BB->pred_begin());
76
77 // Iterate over the definitions in each instruction, and compute the
78 // stage difference for each use. Keep the maximum value.
79 for (MachineInstr *MI : Schedule.getInstructions()) {
80 int DefStage = Schedule.getStage(MI);
81 for (const MachineOperand &Op : MI->all_defs()) {
82 Register Reg = Op.getReg();
83 unsigned MaxDiff = 0;
84 bool PhiIsSwapped = false;
85 for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
86 MachineInstr *UseMI = UseOp.getParent();
87 int UseStage = Schedule.getStage(UseMI);
88 unsigned Diff = 0;
89 if (UseStage != -1 && UseStage >= DefStage)
90 Diff = UseStage - DefStage;
91 if (MI->isPHI()) {
92 if (isLoopCarried(*MI))
93 ++Diff;
94 else
95 PhiIsSwapped = true;
96 }
97 MaxDiff = std::max(Diff, MaxDiff);
98 }
99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
100 }
101 }
102
103 generatePipelinedLoop();
104}
105
106void ModuloScheduleExpander::generatePipelinedLoop() {
108 assert(LoopInfo && "Must be able to analyze loop!");
109
110 // Create a new basic block for the kernel and add it to the CFG.
112
113 unsigned MaxStageCount = Schedule.getNumStages() - 1;
114
115 // Remember the registers that are used in different stages. The index is
116 // the iteration, or stage, that the instruction is scheduled in. This is
117 // a map between register names in the original block and the names created
118 // in each stage of the pipelined loop.
119 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
120
121 // The renaming destination by Phis for the registers across stages.
122 // This map is updated during Phis generation to point to the most recent
123 // renaming destination.
124 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
125
126 InstrMapTy InstrMap;
127
129
130 // Generate the prolog instructions that set up the pipeline.
131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
132 MF.insert(BB->getIterator(), KernelBB);
133 LIS.insertMBBInMaps(KernelBB);
134
135 // Rearrange the instructions to generate the new, pipelined loop,
136 // and update register names as needed.
137 for (MachineInstr *CI : Schedule.getInstructions()) {
138 if (CI->isPHI())
139 continue;
140 unsigned StageNum = Schedule.getStage(CI);
141 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
143 KernelBB->push_back(NewMI);
144 InstrMap[NewMI] = CI;
145 }
146
147 // Copy any terminator instructions to the new kernel, and update
148 // names as needed.
149 for (MachineInstr &MI : BB->terminators()) {
150 MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
151 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
152 KernelBB->push_back(NewMI);
153 InstrMap[NewMI] = &MI;
154 }
155
156 NewKernel = KernelBB;
157 KernelBB->transferSuccessors(BB);
158 KernelBB->replaceSuccessor(BB, KernelBB);
159
160 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
161 InstrMap, MaxStageCount, MaxStageCount, false);
162 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
163 InstrMap, MaxStageCount, MaxStageCount, false);
164
165 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
166
168 // Generate the epilog instructions to complete the pipeline.
169 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
170 PrologBBs);
171
172 // We need this step because the register allocation doesn't handle some
173 // situations well, so we insert copies to help out.
174 splitLifetimes(KernelBB, EpilogBBs);
175
176 // Remove dead instructions due to loop induction variables.
177 removeDeadInstructions(KernelBB, EpilogBBs);
178
179 // Add branches between prolog and epilog blocks.
180 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
181
182 delete[] VRMap;
183 delete[] VRMapPhi;
184}
185
187 // Remove the original loop since it's no longer referenced.
188 for (auto &I : *BB)
190 BB->clear();
191 BB->eraseFromParent();
192}
193
194/// Generate the pipeline prolog code.
195void ModuloScheduleExpander::generateProlog(unsigned LastStage,
196 MachineBasicBlock *KernelBB,
197 ValueMapTy *VRMap,
198 MBBVectorTy &PrologBBs) {
199 MachineBasicBlock *PredBB = Preheader;
200 InstrMapTy InstrMap;
201
202 // Generate a basic block for each stage, not including the last stage,
203 // which will be generated in the kernel. Each basic block may contain
204 // instructions from multiple stages/iterations.
205 for (unsigned i = 0; i < LastStage; ++i) {
206 // Create and insert the prolog basic block prior to the original loop
207 // basic block. The original loop is removed later.
209 PrologBBs.push_back(NewBB);
210 MF.insert(BB->getIterator(), NewBB);
211 NewBB->transferSuccessors(PredBB);
212 PredBB->addSuccessor(NewBB);
213 PredBB = NewBB;
214 LIS.insertMBBInMaps(NewBB);
215
216 // Generate instructions for each appropriate stage. Process instructions
217 // in original program order.
218 for (int StageNum = i; StageNum >= 0; --StageNum) {
220 BBE = BB->getFirstTerminator();
221 BBI != BBE; ++BBI) {
222 if (Schedule.getStage(&*BBI) == StageNum) {
223 if (BBI->isPHI())
224 continue;
225 MachineInstr *NewMI =
226 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
227 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
228 NewBB->push_back(NewMI);
229 InstrMap[NewMI] = &*BBI;
230 }
231 }
232 }
233 rewritePhiValues(NewBB, i, VRMap, InstrMap);
234 LLVM_DEBUG({
235 dbgs() << "prolog:\n";
236 NewBB->dump();
237 });
238 }
239
240 PredBB->replaceSuccessor(BB, KernelBB);
241
242 // Check if we need to remove the branch from the preheader to the original
243 // loop, and replace it with a branch to the new loop.
244 unsigned numBranches = TII->removeBranch(*Preheader);
245 if (numBranches) {
247 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
248 }
249}
250
251/// Generate the pipeline epilog code. The epilog code finishes the iterations
252/// that were started in either the prolog or the kernel. We create a basic
253/// block for each stage that needs to complete.
254void ModuloScheduleExpander::generateEpilog(
255 unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
256 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
257 MBBVectorTy &PrologBBs) {
258 // We need to change the branch from the kernel to the first epilog block, so
259 // this call to analyze branch uses the kernel rather than the original BB.
260 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
262 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
263 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
264 if (checkBranch)
265 return;
266
267 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
268 if (*LoopExitI == KernelBB)
269 ++LoopExitI;
270 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
271 MachineBasicBlock *LoopExitBB = *LoopExitI;
272
273 MachineBasicBlock *PredBB = KernelBB;
274 MachineBasicBlock *EpilogStart = LoopExitBB;
275 InstrMapTy InstrMap;
276
277 // Generate a basic block for each stage, not including the last stage,
278 // which was generated for the kernel. Each basic block may contain
279 // instructions from multiple stages/iterations.
280 int EpilogStage = LastStage + 1;
281 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
283 EpilogBBs.push_back(NewBB);
284 MF.insert(BB->getIterator(), NewBB);
285
286 PredBB->replaceSuccessor(LoopExitBB, NewBB);
287 NewBB->addSuccessor(LoopExitBB);
288 LIS.insertMBBInMaps(NewBB);
289
290 if (EpilogStart == LoopExitBB)
291 EpilogStart = NewBB;
292
293 // Add instructions to the epilog depending on the current block.
294 // Process instructions in original program order.
295 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
296 for (auto &BBI : *BB) {
297 if (BBI.isPHI())
298 continue;
299 MachineInstr *In = &BBI;
300 if ((unsigned)Schedule.getStage(In) == StageNum) {
301 // Instructions with memoperands in the epilog are updated with
302 // conservative values.
303 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
304 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
305 NewBB->push_back(NewMI);
306 InstrMap[NewMI] = In;
307 }
308 }
309 }
310 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
311 InstrMap, LastStage, EpilogStage, i == 1);
312 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
313 InstrMap, LastStage, EpilogStage, i == 1);
314 PredBB = NewBB;
315
316 LLVM_DEBUG({
317 dbgs() << "epilog:\n";
318 NewBB->dump();
319 });
320 }
321
322 // Fix any Phi nodes in the loop exit block.
323 LoopExitBB->replacePhiUsesWith(BB, PredBB);
324
325 // Create a branch to the new epilog from the kernel.
326 // Remove the original branch and add a new branch to the epilog.
327 TII->removeBranch(*KernelBB);
328 assert((OrigBB == TBB || OrigBB == FBB) &&
329 "Unable to determine looping branch direction");
330 if (OrigBB != TBB)
331 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
332 else
333 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
334 // Add a branch to the loop exit.
335 if (EpilogBBs.size() > 0) {
336 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
338 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
339 }
340}
341
342/// Replace all uses of FromReg that appear outside the specified
343/// basic block with ToReg.
344static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
347 LiveIntervals &LIS) {
348 for (MachineOperand &O :
349 llvm::make_early_inc_range(MRI.use_operands(FromReg)))
350 if (O.getParent()->getParent() != MBB)
351 O.setReg(ToReg);
352 if (!LIS.hasInterval(ToReg))
353 LIS.createEmptyInterval(ToReg);
354}
355
356/// Return true if the register has a use that occurs outside the
357/// specified loop.
358static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
360 for (const MachineOperand &MO : MRI.use_operands(Reg))
361 if (MO.getParent()->getParent() != BB)
362 return true;
363 return false;
364}
365
366/// Generate Phis for the specific block in the generated pipelined code.
367/// This function looks at the Phis from the original code to guide the
368/// creation of new Phis.
369void ModuloScheduleExpander::generateExistingPhis(
371 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
372 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
373 // Compute the stage number for the initial value of the Phi, which
374 // comes from the prolog. The prolog to use depends on to which kernel/
375 // epilog that we're adding the Phi.
376 unsigned PrologStage = 0;
377 unsigned PrevStage = 0;
378 bool InKernel = (LastStageNum == CurStageNum);
379 if (InKernel) {
380 PrologStage = LastStageNum - 1;
381 PrevStage = CurStageNum;
382 } else {
383 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
384 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
385 }
386
387 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
388 BBE = BB->getFirstNonPHI();
389 BBI != BBE; ++BBI) {
390 Register Def = BBI->getOperand(0).getReg();
391
392 unsigned InitVal = 0;
393 unsigned LoopVal = 0;
394 getPhiRegs(*BBI, BB, InitVal, LoopVal);
395
396 unsigned PhiOp1 = 0;
397 // The Phi value from the loop body typically is defined in the loop, but
398 // not always. So, we need to check if the value is defined in the loop.
399 unsigned PhiOp2 = LoopVal;
400 if (auto It = VRMap[LastStageNum].find(LoopVal);
401 It != VRMap[LastStageNum].end())
402 PhiOp2 = It->second;
403
404 int StageScheduled = Schedule.getStage(&*BBI);
405 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
406 unsigned NumStages = getStagesForReg(Def, CurStageNum);
407 if (NumStages == 0) {
408 // We don't need to generate a Phi anymore, but we need to rename any uses
409 // of the Phi value.
410 unsigned NewReg = VRMap[PrevStage][LoopVal];
411 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
412 InitVal, NewReg);
413 if (VRMap[CurStageNum].count(LoopVal))
414 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
415 }
416 // Adjust the number of Phis needed depending on the number of prologs left,
417 // and the distance from where the Phi is first scheduled. The number of
418 // Phis cannot exceed the number of prolog stages. Each stage can
419 // potentially define two values.
420 unsigned MaxPhis = PrologStage + 2;
421 if (!InKernel && (int)PrologStage <= LoopValStage)
422 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
423 unsigned NumPhis = std::min(NumStages, MaxPhis);
424
425 unsigned NewReg = 0;
426 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
427 // In the epilog, we may need to look back one stage to get the correct
428 // Phi name, because the epilog and prolog blocks execute the same stage.
429 // The correct name is from the previous block only when the Phi has
430 // been completely scheduled prior to the epilog, and Phi value is not
431 // needed in multiple stages.
432 int StageDiff = 0;
433 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
434 NumPhis == 1)
435 StageDiff = 1;
436 // Adjust the computations below when the phi and the loop definition
437 // are scheduled in different stages.
438 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
439 StageDiff = StageScheduled - LoopValStage;
440 for (unsigned np = 0; np < NumPhis; ++np) {
441 // If the Phi hasn't been scheduled, then use the initial Phi operand
442 // value. Otherwise, use the scheduled version of the instruction. This
443 // is a little complicated when a Phi references another Phi.
444 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
445 PhiOp1 = InitVal;
446 // Check if the Phi has already been scheduled in a prolog stage.
447 else if (PrologStage >= AccessStage + StageDiff + np &&
448 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
449 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
450 // Check if the Phi has already been scheduled, but the loop instruction
451 // is either another Phi, or doesn't occur in the loop.
452 else if (PrologStage >= AccessStage + StageDiff + np) {
453 // If the Phi references another Phi, we need to examine the other
454 // Phi to get the correct value.
455 PhiOp1 = LoopVal;
456 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
457 int Indirects = 1;
458 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
459 int PhiStage = Schedule.getStage(InstOp1);
460 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
461 PhiOp1 = getInitPhiReg(*InstOp1, BB);
462 else
463 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
464 InstOp1 = MRI.getVRegDef(PhiOp1);
465 int PhiOpStage = Schedule.getStage(InstOp1);
466 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
467 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
468 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
469 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
470 break;
471 }
472 ++Indirects;
473 }
474 } else
475 PhiOp1 = InitVal;
476 // If this references a generated Phi in the kernel, get the Phi operand
477 // from the incoming block.
478 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
479 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
480 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
481
482 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
483 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
484 // In the epilog, a map lookup is needed to get the value from the kernel,
485 // or previous epilog block. How is does this depends on if the
486 // instruction is scheduled in the previous block.
487 if (!InKernel) {
488 int StageDiffAdj = 0;
489 if (LoopValStage != -1 && StageScheduled > LoopValStage)
490 StageDiffAdj = StageScheduled - LoopValStage;
491 // Use the loop value defined in the kernel, unless the kernel
492 // contains the last definition of the Phi.
493 if (np == 0 && PrevStage == LastStageNum &&
494 (StageScheduled != 0 || LoopValStage != 0) &&
495 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
496 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
497 // Use the value defined by the Phi. We add one because we switch
498 // from looking at the loop value to the Phi definition.
499 else if (np > 0 && PrevStage == LastStageNum &&
500 VRMap[PrevStage - np + 1].count(Def))
501 PhiOp2 = VRMap[PrevStage - np + 1][Def];
502 // Use the loop value defined in the kernel.
503 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
504 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
505 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
506 // Use the value defined by the Phi, unless we're generating the first
507 // epilog and the Phi refers to a Phi in a different stage.
508 else if (VRMap[PrevStage - np].count(Def) &&
509 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
510 (LoopValStage == StageScheduled)))
511 PhiOp2 = VRMap[PrevStage - np][Def];
512 }
513
514 // Check if we can reuse an existing Phi. This occurs when a Phi
515 // references another Phi, and the other Phi is scheduled in an
516 // earlier stage. We can try to reuse an existing Phi up until the last
517 // stage of the current Phi.
518 if (LoopDefIsPhi) {
519 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
520 int LVNumStages = getStagesForPhi(LoopVal);
521 int StageDiff = (StageScheduled - LoopValStage);
522 LVNumStages -= StageDiff;
523 // Make sure the loop value Phi has been processed already.
524 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
525 NewReg = PhiOp2;
526 unsigned ReuseStage = CurStageNum;
527 if (isLoopCarried(*PhiInst))
528 ReuseStage -= LVNumStages;
529 // Check if the Phi to reuse has been generated yet. If not, then
530 // there is nothing to reuse.
531 if (VRMap[ReuseStage - np].count(LoopVal)) {
532 NewReg = VRMap[ReuseStage - np][LoopVal];
533
534 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
535 Def, NewReg);
536 // Update the map with the new Phi name.
537 VRMap[CurStageNum - np][Def] = NewReg;
538 PhiOp2 = NewReg;
539 if (VRMap[LastStageNum - np - 1].count(LoopVal))
540 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
541
542 if (IsLast && np == NumPhis - 1)
543 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
544 continue;
545 }
546 }
547 }
548 if (InKernel && StageDiff > 0 &&
549 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
550 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
551 }
552
553 const TargetRegisterClass *RC = MRI.getRegClass(Def);
554 NewReg = MRI.createVirtualRegister(RC);
555
556 MachineInstrBuilder NewPhi =
557 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
558 TII->get(TargetOpcode::PHI), NewReg);
559 NewPhi.addReg(PhiOp1).addMBB(BB1);
560 NewPhi.addReg(PhiOp2).addMBB(BB2);
561 if (np == 0)
562 InstrMap[NewPhi] = &*BBI;
563
564 // We define the Phis after creating the new pipelined code, so
565 // we need to rename the Phi values in scheduled instructions.
566
567 unsigned PrevReg = 0;
568 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
569 PrevReg = VRMap[PrevStage - np][LoopVal];
570 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
571 NewReg, PrevReg);
572 // If the Phi has been scheduled, use the new name for rewriting.
573 if (VRMap[CurStageNum - np].count(Def)) {
574 unsigned R = VRMap[CurStageNum - np][Def];
575 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
576 NewReg);
577 }
578
579 // Check if we need to rename any uses that occurs after the loop. The
580 // register to replace depends on whether the Phi is scheduled in the
581 // epilog.
582 if (IsLast && np == NumPhis - 1)
583 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
584
585 // In the kernel, a dependent Phi uses the value from this Phi.
586 if (InKernel)
587 PhiOp2 = NewReg;
588
589 // Update the map with the new Phi name.
590 VRMap[CurStageNum - np][Def] = NewReg;
591 }
592
593 while (NumPhis++ < NumStages) {
594 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
595 NewReg, 0);
596 }
597
598 // Check if we need to rename a Phi that has been eliminated due to
599 // scheduling.
600 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
601 replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
602 }
603}
604
605/// Generate Phis for the specified block in the generated pipelined code.
606/// These are new Phis needed because the definition is scheduled after the
607/// use in the pipelined sequence.
608void ModuloScheduleExpander::generatePhis(
610 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
611 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
612 bool IsLast) {
613 // Compute the stage number that contains the initial Phi value, and
614 // the Phi from the previous stage.
615 unsigned PrologStage = 0;
616 unsigned PrevStage = 0;
617 unsigned StageDiff = CurStageNum - LastStageNum;
618 bool InKernel = (StageDiff == 0);
619 if (InKernel) {
620 PrologStage = LastStageNum - 1;
621 PrevStage = CurStageNum;
622 } else {
623 PrologStage = LastStageNum - StageDiff;
624 PrevStage = LastStageNum + StageDiff - 1;
625 }
626
627 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
628 BBE = BB->instr_end();
629 BBI != BBE; ++BBI) {
630 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
631 MachineOperand &MO = BBI->getOperand(i);
632 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
633 continue;
634
635 int StageScheduled = Schedule.getStage(&*BBI);
636 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
637 Register Def = MO.getReg();
638 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
639 // An instruction scheduled in stage 0 and is used after the loop
640 // requires a phi in the epilog for the last definition from either
641 // the kernel or prolog.
642 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
643 hasUseAfterLoop(Def, BB, MRI))
644 NumPhis = 1;
645 if (!InKernel && (unsigned)StageScheduled > PrologStage)
646 continue;
647
648 unsigned PhiOp2;
649 if (InKernel) {
650 PhiOp2 = VRMap[PrevStage][Def];
651 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
652 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
653 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
654 }
655 // The number of Phis can't exceed the number of prolog stages. The
656 // prolog stage number is zero based.
657 if (NumPhis > PrologStage + 1 - StageScheduled)
658 NumPhis = PrologStage + 1 - StageScheduled;
659 for (unsigned np = 0; np < NumPhis; ++np) {
660 // Example for
661 // Org:
662 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
663 //
664 // Prolog0 (Stage0):
665 // %Clone0 = ...
666 // Prolog1 (Stage1):
667 // %Clone1 = ...
668 // Kernel (Stage2):
669 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
670 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
671 // %Clone2 = ...
672 // Epilog0 (Stage3):
673 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
674 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
675 // Epilog1 (Stage4):
676 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
677 //
678 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
679 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
680 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
681
682 unsigned PhiOp1 = VRMap[PrologStage][Def];
683 if (np <= PrologStage)
684 PhiOp1 = VRMap[PrologStage - np][Def];
685 if (!InKernel) {
686 if (PrevStage == LastStageNum && np == 0)
687 PhiOp2 = VRMap[LastStageNum][Def];
688 else
689 PhiOp2 = VRMapPhi[PrevStage - np][Def];
690 }
691
692 const TargetRegisterClass *RC = MRI.getRegClass(Def);
693 Register NewReg = MRI.createVirtualRegister(RC);
694
695 MachineInstrBuilder NewPhi =
696 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
697 TII->get(TargetOpcode::PHI), NewReg);
698 NewPhi.addReg(PhiOp1).addMBB(BB1);
699 NewPhi.addReg(PhiOp2).addMBB(BB2);
700 if (np == 0)
701 InstrMap[NewPhi] = &*BBI;
702
703 // Rewrite uses and update the map. The actions depend upon whether
704 // we generating code for the kernel or epilog blocks.
705 if (InKernel) {
706 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
707 NewReg);
708 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
709 NewReg);
710
711 PhiOp2 = NewReg;
712 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
713 } else {
714 VRMapPhi[CurStageNum - np][Def] = NewReg;
715 if (np == NumPhis - 1)
716 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
717 NewReg);
718 }
719 if (IsLast && np == NumPhis - 1)
720 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
721 }
722 }
723 }
724}
725
726/// Remove instructions that generate values with no uses.
727/// Typically, these are induction variable operations that generate values
728/// used in the loop itself. A dead instruction has a definition with
729/// no uses, or uses that occur in the original loop only.
730void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
731 MBBVectorTy &EpilogBBs) {
732 // For each epilog block, check that the value defined by each instruction
733 // is used. If not, delete it.
734 for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
736 ME = MBB->instr_rend();
737 MI != ME;) {
738 // From DeadMachineInstructionElem. Don't delete inline assembly.
739 if (MI->isInlineAsm()) {
740 ++MI;
741 continue;
742 }
743 bool SawStore = false;
744 // Check if it's safe to remove the instruction due to side effects.
745 // We can, and want to, remove Phis here.
746 if (!MI->isSafeToMove(SawStore) && !MI->isPHI()) {
747 ++MI;
748 continue;
749 }
750 bool used = true;
751 for (const MachineOperand &MO : MI->all_defs()) {
752 Register reg = MO.getReg();
753 // Assume physical registers are used, unless they are marked dead.
754 if (reg.isPhysical()) {
755 used = !MO.isDead();
756 if (used)
757 break;
758 continue;
759 }
760 unsigned realUses = 0;
761 for (const MachineOperand &U : MRI.use_operands(reg)) {
762 // Check if there are any uses that occur only in the original
763 // loop. If so, that's not a real use.
764 if (U.getParent()->getParent() != BB) {
765 realUses++;
766 used = true;
767 break;
768 }
769 }
770 if (realUses > 0)
771 break;
772 used = false;
773 }
774 if (!used) {
776 MI++->eraseFromParent();
777 continue;
778 }
779 ++MI;
780 }
781 // In the kernel block, check if we can remove a Phi that generates a value
782 // used in an instruction removed in the epilog block.
783 for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
784 Register reg = MI.getOperand(0).getReg();
785 if (MRI.use_begin(reg) == MRI.use_end()) {
787 MI.eraseFromParent();
788 }
789 }
790}
791
792/// For loop carried definitions, we split the lifetime of a virtual register
793/// that has uses past the definition in the next iteration. A copy with a new
794/// virtual register is inserted before the definition, which helps with
795/// generating a better register assignment.
796///
797/// v1 = phi(a, v2) v1 = phi(a, v2)
798/// v2 = phi(b, v3) v2 = phi(b, v3)
799/// v3 = .. v4 = copy v1
800/// .. = V1 v3 = ..
801/// .. = v4
802void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
803 MBBVectorTy &EpilogBBs) {
805 for (auto &PHI : KernelBB->phis()) {
806 Register Def = PHI.getOperand(0).getReg();
807 // Check for any Phi definition that used as an operand of another Phi
808 // in the same block.
810 E = MRI.use_instr_end();
811 I != E; ++I) {
812 if (I->isPHI() && I->getParent() == KernelBB) {
813 // Get the loop carried definition.
814 unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
815 if (!LCDef)
816 continue;
817 MachineInstr *MI = MRI.getVRegDef(LCDef);
818 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
819 continue;
820 // Search through the rest of the block looking for uses of the Phi
821 // definition. If one occurs, then split the lifetime.
822 unsigned SplitReg = 0;
824 KernelBB->instr_end()))
825 if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) {
826 // We split the lifetime when we find the first use.
827 if (SplitReg == 0) {
828 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
829 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
830 TII->get(TargetOpcode::COPY), SplitReg)
831 .addReg(Def);
832 }
833 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
834 }
835 if (!SplitReg)
836 continue;
837 // Search through each of the epilog blocks for any uses to be renamed.
838 for (auto &Epilog : EpilogBBs)
839 for (auto &I : *Epilog)
840 if (I.readsRegister(Def, /*TRI=*/nullptr))
841 I.substituteRegister(Def, SplitReg, 0, *TRI);
842 break;
843 }
844 }
845 }
846}
847
848/// Remove the incoming block from the Phis in a basic block.
850 for (MachineInstr &MI : *BB) {
851 if (!MI.isPHI())
852 break;
853 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
854 if (MI.getOperand(i + 1).getMBB() == Incoming) {
855 MI.removeOperand(i + 1);
856 MI.removeOperand(i);
857 break;
858 }
859 }
860}
861
862/// Create branches from each prolog basic block to the appropriate epilog
863/// block. These edges are needed if the loop ends before reaching the
864/// kernel.
865void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
866 MBBVectorTy &PrologBBs,
867 MachineBasicBlock *KernelBB,
868 MBBVectorTy &EpilogBBs,
869 ValueMapTy *VRMap) {
870 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
871 MachineBasicBlock *LastPro = KernelBB;
872 MachineBasicBlock *LastEpi = KernelBB;
873
874 // Start from the blocks connected to the kernel and work "out"
875 // to the first prolog and the last epilog blocks.
877 unsigned MaxIter = PrologBBs.size() - 1;
878 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
879 // Add branches to the prolog that go to the corresponding
880 // epilog, and the fall-thru prolog/kernel block.
881 MachineBasicBlock *Prolog = PrologBBs[j];
882 MachineBasicBlock *Epilog = EpilogBBs[i];
883
885 std::optional<bool> StaticallyGreater =
886 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
887 unsigned numAdded = 0;
888 if (!StaticallyGreater) {
889 Prolog->addSuccessor(Epilog);
890 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
891 } else if (*StaticallyGreater == false) {
892 Prolog->addSuccessor(Epilog);
893 Prolog->removeSuccessor(LastPro);
894 LastEpi->removeSuccessor(Epilog);
895 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
896 removePhis(Epilog, LastEpi);
897 // Remove the blocks that are no longer referenced.
898 if (LastPro != LastEpi) {
899 LastEpi->clear();
900 LastEpi->eraseFromParent();
901 }
902 if (LastPro == KernelBB) {
903 LoopInfo->disposed(&LIS);
904 NewKernel = nullptr;
905 }
906 LastPro->clear();
907 LastPro->eraseFromParent();
908 } else {
909 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
911 }
912 LastPro = Prolog;
913 LastEpi = Epilog;
915 E = Prolog->instr_rend();
916 I != E && numAdded > 0; ++I, --numAdded)
917 updateInstruction(&*I, false, j, 0, VRMap);
918 }
919
920 if (NewKernel) {
921 LoopInfo->setPreheader(PrologBBs[MaxIter]);
922 LoopInfo->adjustTripCount(-(MaxIter + 1));
923 }
924}
925
926/// Return true if we can compute the amount the instruction changes
927/// during each iteration. Set Delta to the amount of the change.
928bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
930 const MachineOperand *BaseOp;
931 int64_t Offset;
932 bool OffsetIsScalable;
933 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
934 return false;
935
936 // FIXME: This algorithm assumes instructions have fixed-size offsets.
937 if (OffsetIsScalable)
938 return false;
939
940 if (!BaseOp->isReg())
941 return false;
942
943 Register BaseReg = BaseOp->getReg();
944
946 // Check if there is a Phi. If so, get the definition in the loop.
947 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
948 if (BaseDef && BaseDef->isPHI()) {
949 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
950 BaseDef = MRI.getVRegDef(BaseReg);
951 }
952 if (!BaseDef)
953 return false;
954
955 int D = 0;
956 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
957 return false;
958
959 Delta = D;
960 return true;
961}
962
963/// Update the memory operand with a new offset when the pipeliner
964/// generates a new copy of the instruction that refers to a
965/// different memory location.
966void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
967 MachineInstr &OldMI,
968 unsigned Num) {
969 if (Num == 0)
970 return;
971 // If the instruction has memory operands, then adjust the offset
972 // when the instruction appears in different stages.
973 if (NewMI.memoperands_empty())
974 return;
976 for (MachineMemOperand *MMO : NewMI.memoperands()) {
977 // TODO: Figure out whether isAtomic is really necessary (see D57601).
978 if (MMO->isVolatile() || MMO->isAtomic() ||
979 (MMO->isInvariant() && MMO->isDereferenceable()) ||
980 (!MMO->getValue())) {
981 NewMMOs.push_back(MMO);
982 continue;
983 }
984 unsigned Delta;
985 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
986 int64_t AdjOffset = Delta * Num;
987 NewMMOs.push_back(
988 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
989 } else {
992 }
993 }
994 NewMI.setMemRefs(MF, NewMMOs);
995}
996
997/// Clone the instruction for the new pipelined loop and update the
998/// memory operands, if needed.
999MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
1000 unsigned CurStageNum,
1001 unsigned InstStageNum) {
1002 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1003 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1004 return NewMI;
1005}
1006
1007/// Clone the instruction for the new pipelined loop. If needed, this
1008/// function updates the instruction using the values saved in the
1009/// InstrChanges structure.
1010MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1011 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1012 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1013 auto It = InstrChanges.find(OldMI);
1014 if (It != InstrChanges.end()) {
1015 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1016 unsigned BasePos, OffsetPos;
1017 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1018 return nullptr;
1019 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1020 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1021 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1022 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1023 NewMI->getOperand(OffsetPos).setImm(NewOffset);
1024 }
1025 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1026 return NewMI;
1027}
1028
1029/// Update the machine instruction with new virtual registers. This
1030/// function may change the definitions and/or uses.
1031void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1032 bool LastDef,
1033 unsigned CurStageNum,
1034 unsigned InstrStageNum,
1035 ValueMapTy *VRMap) {
1036 for (MachineOperand &MO : NewMI->operands()) {
1037 if (!MO.isReg() || !MO.getReg().isVirtual())
1038 continue;
1039 Register reg = MO.getReg();
1040 if (MO.isDef()) {
1041 // Create a new virtual register for the definition.
1042 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1043 Register NewReg = MRI.createVirtualRegister(RC);
1044 MO.setReg(NewReg);
1045 VRMap[CurStageNum][reg] = NewReg;
1046 if (LastDef)
1047 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1048 } else if (MO.isUse()) {
1049 MachineInstr *Def = MRI.getVRegDef(reg);
1050 // Compute the stage that contains the last definition for instruction.
1051 int DefStageNum = Schedule.getStage(Def);
1052 unsigned StageNum = CurStageNum;
1053 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1054 // Compute the difference in stages between the defintion and the use.
1055 unsigned StageDiff = (InstrStageNum - DefStageNum);
1056 // Make an adjustment to get the last definition.
1057 StageNum -= StageDiff;
1058 }
1059 if (auto It = VRMap[StageNum].find(reg); It != VRMap[StageNum].end())
1060 MO.setReg(It->second);
1061 }
1062 }
1063}
1064
1065/// Return the instruction in the loop that defines the register.
1066/// If the definition is a Phi, then follow the Phi operand to
1067/// the instruction in the loop.
1068MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1070 MachineInstr *Def = MRI.getVRegDef(Reg);
1071 while (Def->isPHI()) {
1072 if (!Visited.insert(Def).second)
1073 break;
1074 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1075 if (Def->getOperand(i + 1).getMBB() == BB) {
1076 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1077 break;
1078 }
1079 }
1080 return Def;
1081}
1082
1083/// Return the new name for the value from the previous stage.
1084unsigned ModuloScheduleExpander::getPrevMapVal(
1085 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1086 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1087 unsigned PrevVal = 0;
1088 if (StageNum > PhiStage) {
1089 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1090 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1091 // The name is defined in the previous stage.
1092 PrevVal = VRMap[StageNum - 1][LoopVal];
1093 else if (VRMap[StageNum].count(LoopVal))
1094 // The previous name is defined in the current stage when the instruction
1095 // order is swapped.
1096 PrevVal = VRMap[StageNum][LoopVal];
1097 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1098 // The loop value hasn't yet been scheduled.
1099 PrevVal = LoopVal;
1100 else if (StageNum == PhiStage + 1)
1101 // The loop value is another phi, which has not been scheduled.
1102 PrevVal = getInitPhiReg(*LoopInst, BB);
1103 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1104 // The loop value is another phi, which has been scheduled.
1105 PrevVal =
1106 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1107 LoopStage, VRMap, BB);
1108 }
1109 return PrevVal;
1110}
1111
1112/// Rewrite the Phi values in the specified block to use the mappings
1113/// from the initial operand. Once the Phi is scheduled, we switch
1114/// to using the loop value instead of the Phi value, so those names
1115/// do not need to be rewritten.
1116void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1117 unsigned StageNum,
1118 ValueMapTy *VRMap,
1119 InstrMapTy &InstrMap) {
1120 for (auto &PHI : BB->phis()) {
1121 unsigned InitVal = 0;
1122 unsigned LoopVal = 0;
1123 getPhiRegs(PHI, BB, InitVal, LoopVal);
1124 Register PhiDef = PHI.getOperand(0).getReg();
1125
1126 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1127 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1128 unsigned NumPhis = getStagesForPhi(PhiDef);
1129 if (NumPhis > StageNum)
1130 NumPhis = StageNum;
1131 for (unsigned np = 0; np <= NumPhis; ++np) {
1132 unsigned NewVal =
1133 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1134 if (!NewVal)
1135 NewVal = InitVal;
1136 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1137 NewVal);
1138 }
1139 }
1140}
1141
1142/// Rewrite a previously scheduled instruction to use the register value
1143/// from the new instruction. Make sure the instruction occurs in the
1144/// basic block, and we don't change the uses in the new instruction.
1145void ModuloScheduleExpander::rewriteScheduledInstr(
1146 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1147 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1148 unsigned PrevReg) {
1149 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1150 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1151 // Rewrite uses that have been scheduled already to use the new
1152 // Phi register.
1153 for (MachineOperand &UseOp :
1154 llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
1155 MachineInstr *UseMI = UseOp.getParent();
1156 if (UseMI->getParent() != BB)
1157 continue;
1158 if (UseMI->isPHI()) {
1159 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1160 continue;
1161 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1162 continue;
1163 }
1164 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1165 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1166 MachineInstr *OrigMI = OrigInstr->second;
1167 int StageSched = Schedule.getStage(OrigMI);
1168 int CycleSched = Schedule.getCycle(OrigMI);
1169 unsigned ReplaceReg = 0;
1170 // This is the stage for the scheduled instruction.
1171 if (StagePhi == StageSched && Phi->isPHI()) {
1172 int CyclePhi = Schedule.getCycle(Phi);
1173 if (PrevReg && InProlog)
1174 ReplaceReg = PrevReg;
1175 else if (PrevReg && !isLoopCarried(*Phi) &&
1176 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1177 ReplaceReg = PrevReg;
1178 else
1179 ReplaceReg = NewReg;
1180 }
1181 // The scheduled instruction occurs before the scheduled Phi, and the
1182 // Phi is not loop carried.
1183 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1184 ReplaceReg = NewReg;
1185 if (StagePhi > StageSched && Phi->isPHI())
1186 ReplaceReg = NewReg;
1187 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1188 ReplaceReg = NewReg;
1189 if (ReplaceReg) {
1190 const TargetRegisterClass *NRC =
1191 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1192 if (NRC)
1193 UseOp.setReg(ReplaceReg);
1194 else {
1195 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1196 BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1197 SplitReg)
1198 .addReg(ReplaceReg);
1199 UseOp.setReg(SplitReg);
1200 }
1201 }
1202 }
1203}
1204
1205bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1206 if (!Phi.isPHI())
1207 return false;
1208 int DefCycle = Schedule.getCycle(&Phi);
1209 int DefStage = Schedule.getStage(&Phi);
1210
1211 unsigned InitVal = 0;
1212 unsigned LoopVal = 0;
1213 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1214 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1215 if (!Use || Use->isPHI())
1216 return true;
1217 int LoopCycle = Schedule.getCycle(Use);
1218 int LoopStage = Schedule.getStage(Use);
1219 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1220}
1221
1222//===----------------------------------------------------------------------===//
1223// PeelingModuloScheduleExpander implementation
1224//===----------------------------------------------------------------------===//
1225// This is a reimplementation of ModuloScheduleExpander that works by creating
1226// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1227//===----------------------------------------------------------------------===//
1228
1229namespace {
1230// Remove any dead phis in MBB. Dead phis either have only one block as input
1231// (in which case they are the identity) or have no uses.
1232void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1233 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1234 bool Changed = true;
1235 while (Changed) {
1236 Changed = false;
1238 assert(MI.isPHI());
1239 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1240 if (LIS)
1242 MI.eraseFromParent();
1243 Changed = true;
1244 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1245 const TargetRegisterClass *ConstrainRegClass =
1246 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1247 MRI.getRegClass(MI.getOperand(0).getReg()));
1248 assert(ConstrainRegClass &&
1249 "Expected a valid constrained register class!");
1250 (void)ConstrainRegClass;
1251 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1252 MI.getOperand(1).getReg());
1253 if (LIS)
1255 MI.eraseFromParent();
1256 Changed = true;
1257 }
1258 }
1259 }
1260}
1261
1262/// Rewrites the kernel block in-place to adhere to the given schedule.
1263/// KernelRewriter holds all of the state required to perform the rewriting.
1264class KernelRewriter {
1265 ModuloSchedule &S;
1267 MachineBasicBlock *PreheaderBB, *ExitBB;
1269 const TargetInstrInfo *TII;
1270 LiveIntervals *LIS;
1271
1272 // Map from register class to canonical undef register for that class.
1274 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1275 // this map is only used when InitReg is non-undef.
1277 // Map from LoopReg to phi register where the InitReg is undef.
1279
1280 // Reg is used by MI. Return the new register MI should use to adhere to the
1281 // schedule. Insert phis as necessary.
1282 Register remapUse(Register Reg, MachineInstr &MI);
1283 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1284 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1285 // or will be chosen so as to share another phi.
1286 Register phi(Register LoopReg, std::optional<Register> InitReg = {},
1287 const TargetRegisterClass *RC = nullptr);
1288 // Create an undef register of the given register class.
1289 Register undef(const TargetRegisterClass *RC);
1290
1291public:
1292 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1293 LiveIntervals *LIS = nullptr);
1294 void rewrite();
1295};
1296} // namespace
1297
1298KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1299 MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1300 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1301 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1302 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1303 PreheaderBB = *BB->pred_begin();
1304 if (PreheaderBB == BB)
1305 PreheaderBB = *std::next(BB->pred_begin());
1306}
1307
1308void KernelRewriter::rewrite() {
1309 // Rearrange the loop to be in schedule order. Note that the schedule may
1310 // contain instructions that are not owned by the loop block (InstrChanges and
1311 // friends), so we gracefully handle unowned instructions and delete any
1312 // instructions that weren't in the schedule.
1313 auto InsertPt = BB->getFirstTerminator();
1314 MachineInstr *FirstMI = nullptr;
1315 for (MachineInstr *MI : S.getInstructions()) {
1316 if (MI->isPHI())
1317 continue;
1318 if (MI->getParent())
1319 MI->removeFromParent();
1320 BB->insert(InsertPt, MI);
1321 if (!FirstMI)
1322 FirstMI = MI;
1323 }
1324 assert(FirstMI && "Failed to find first MI in schedule");
1325
1326 // At this point all of the scheduled instructions are between FirstMI
1327 // and the end of the block. Kill from the first non-phi to FirstMI.
1328 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1329 if (LIS)
1331 (I++)->eraseFromParent();
1332 }
1333
1334 // Now remap every instruction in the loop.
1335 for (MachineInstr &MI : *BB) {
1336 if (MI.isPHI() || MI.isTerminator())
1337 continue;
1338 for (MachineOperand &MO : MI.uses()) {
1339 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1340 continue;
1341 Register Reg = remapUse(MO.getReg(), MI);
1342 MO.setReg(Reg);
1343 }
1344 }
1345 EliminateDeadPhis(BB, MRI, LIS);
1346
1347 // Ensure a phi exists for all instructions that are either referenced by
1348 // an illegal phi or by an instruction outside the loop. This allows us to
1349 // treat remaps of these values the same as "normal" values that come from
1350 // loop-carried phis.
1351 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1352 if (MI->isPHI()) {
1353 Register R = MI->getOperand(0).getReg();
1354 phi(R);
1355 continue;
1356 }
1357
1358 for (MachineOperand &Def : MI->defs()) {
1359 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1360 if (MI.getParent() != BB) {
1361 phi(Def.getReg());
1362 break;
1363 }
1364 }
1365 }
1366 }
1367}
1368
1369Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1370 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1371 if (!Producer)
1372 return Reg;
1373
1374 int ConsumerStage = S.getStage(&MI);
1375 if (!Producer->isPHI()) {
1376 // Non-phi producers are simple to remap. Insert as many phis as the
1377 // difference between the consumer and producer stages.
1378 if (Producer->getParent() != BB)
1379 // Producer was not inside the loop. Use the register as-is.
1380 return Reg;
1381 int ProducerStage = S.getStage(Producer);
1382 assert(ConsumerStage != -1 &&
1383 "In-loop consumer should always be scheduled!");
1384 assert(ConsumerStage >= ProducerStage);
1385 unsigned StageDiff = ConsumerStage - ProducerStage;
1386
1387 for (unsigned I = 0; I < StageDiff; ++I)
1388 Reg = phi(Reg);
1389 return Reg;
1390 }
1391
1392 // First, dive through the phi chain to find the defaults for the generated
1393 // phis.
1395 Register LoopReg = Reg;
1396 auto LoopProducer = Producer;
1397 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1398 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1399 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1400 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1401 assert(LoopProducer);
1402 }
1403 int LoopProducerStage = S.getStage(LoopProducer);
1404
1405 std::optional<Register> IllegalPhiDefault;
1406
1407 if (LoopProducerStage == -1) {
1408 // Do nothing.
1409 } else if (LoopProducerStage > ConsumerStage) {
1410 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1411 // In addition, Consumer's cycle must be scheduled after Producer in the
1412 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1413 // functions.
1414#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1415 int LoopProducerCycle = S.getCycle(LoopProducer);
1416 int ConsumerCycle = S.getCycle(&MI);
1417#endif
1418 assert(LoopProducerCycle <= ConsumerCycle);
1419 assert(LoopProducerStage == ConsumerStage + 1);
1420 // Peel off the first phi from Defaults and insert a phi between producer
1421 // and consumer. This phi will not be at the front of the block so we
1422 // consider it illegal. It will only exist during the rewrite process; it
1423 // needs to exist while we peel off prologs because these could take the
1424 // default value. After that we can replace all uses with the loop producer
1425 // value.
1426 IllegalPhiDefault = Defaults.front();
1427 Defaults.erase(Defaults.begin());
1428 } else {
1429 assert(ConsumerStage >= LoopProducerStage);
1430 int StageDiff = ConsumerStage - LoopProducerStage;
1431 if (StageDiff > 0) {
1432 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1433 << " to " << (Defaults.size() + StageDiff) << "\n");
1434 // If we need more phis than we have defaults for, pad out with undefs for
1435 // the earliest phis, which are at the end of the defaults chain (the
1436 // chain is in reverse order).
1437 Defaults.resize(Defaults.size() + StageDiff,
1438 Defaults.empty() ? std::optional<Register>()
1439 : Defaults.back());
1440 }
1441 }
1442
1443 // Now we know the number of stages to jump back, insert the phi chain.
1444 auto DefaultI = Defaults.rbegin();
1445 while (DefaultI != Defaults.rend())
1446 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1447
1448 if (IllegalPhiDefault) {
1449 // The consumer optionally consumes LoopProducer in the same iteration
1450 // (because the producer is scheduled at an earlier cycle than the consumer)
1451 // or the initial value. To facilitate this we create an illegal block here
1452 // by embedding a phi in the middle of the block. We will fix this up
1453 // immediately prior to pruning.
1454 auto RC = MRI.getRegClass(Reg);
1455 Register R = MRI.createVirtualRegister(RC);
1456 MachineInstr *IllegalPhi =
1457 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1458 .addReg(*IllegalPhiDefault)
1459 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1460 .addReg(LoopReg)
1461 .addMBB(BB); // Block choice is arbitrary and has no effect.
1462 // Illegal phi should belong to the producer stage so that it can be
1463 // filtered correctly during peeling.
1464 S.setStage(IllegalPhi, LoopProducerStage);
1465 return R;
1466 }
1467
1468 return LoopReg;
1469}
1470
1471Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
1472 const TargetRegisterClass *RC) {
1473 // If the init register is not undef, try and find an existing phi.
1474 if (InitReg) {
1475 auto I = Phis.find({LoopReg, *InitReg});
1476 if (I != Phis.end())
1477 return I->second;
1478 } else {
1479 for (auto &KV : Phis) {
1480 if (KV.first.first == LoopReg)
1481 return KV.second;
1482 }
1483 }
1484
1485 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1486 // find a phi that takes undef as input.
1487 auto I = UndefPhis.find(LoopReg);
1488 if (I != UndefPhis.end()) {
1489 Register R = I->second;
1490 if (!InitReg)
1491 // Found a phi taking undef as input, and this input is undef so return
1492 // without any more changes.
1493 return R;
1494 // Found a phi taking undef as input, so rewrite it to take InitReg.
1495 MachineInstr *MI = MRI.getVRegDef(R);
1496 MI->getOperand(1).setReg(*InitReg);
1497 Phis.insert({{LoopReg, *InitReg}, R});
1498 const TargetRegisterClass *ConstrainRegClass =
1499 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1500 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1501 (void)ConstrainRegClass;
1502 UndefPhis.erase(I);
1503 return R;
1504 }
1505
1506 // Failed to find any existing phi to reuse, so create a new one.
1507 if (!RC)
1508 RC = MRI.getRegClass(LoopReg);
1509 Register R = MRI.createVirtualRegister(RC);
1510 if (InitReg) {
1511 const TargetRegisterClass *ConstrainRegClass =
1512 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1513 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1514 (void)ConstrainRegClass;
1515 }
1516 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1517 .addReg(InitReg ? *InitReg : undef(RC))
1518 .addMBB(PreheaderBB)
1519 .addReg(LoopReg)
1520 .addMBB(BB);
1521 if (!InitReg)
1522 UndefPhis[LoopReg] = R;
1523 else
1524 Phis[{LoopReg, *InitReg}] = R;
1525 return R;
1526}
1527
1528Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1529 Register &R = Undefs[RC];
1530 if (R == 0) {
1531 // Create an IMPLICIT_DEF that defines this register if we need it.
1532 // All uses of this should be removed by the time we have finished unrolling
1533 // prologs and epilogs.
1534 R = MRI.createVirtualRegister(RC);
1535 auto *InsertBB = &PreheaderBB->getParent()->front();
1536 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1537 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1538 }
1539 return R;
1540}
1541
1542namespace {
1543/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1544/// the operand are discovered, such as how many in-loop PHIs it has to jump
1545/// through and defaults for these phis.
1546class KernelOperandInfo {
1549 SmallVector<Register, 4> PhiDefaults;
1552
1553public:
1554 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1555 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1556 : MRI(MRI) {
1557 Source = MO;
1558 BB = MO->getParent()->getParent();
1559 while (isRegInLoop(MO)) {
1560 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1561 if (MI->isFullCopy()) {
1562 MO = &MI->getOperand(1);
1563 continue;
1564 }
1565 if (!MI->isPHI())
1566 break;
1567 // If this is an illegal phi, don't count it in distance.
1568 if (IllegalPhis.count(MI)) {
1569 MO = &MI->getOperand(3);
1570 continue;
1571 }
1572
1574 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1575 : &MI->getOperand(3);
1576 PhiDefaults.push_back(Default);
1577 }
1578 Target = MO;
1579 }
1580
1581 bool operator==(const KernelOperandInfo &Other) const {
1582 return PhiDefaults.size() == Other.PhiDefaults.size();
1583 }
1584
1585 void print(raw_ostream &OS) const {
1586 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1587 << *Source->getParent();
1588 }
1589
1590private:
1591 bool isRegInLoop(MachineOperand *MO) {
1592 return MO->isReg() && MO->getReg().isVirtual() &&
1593 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1594 }
1595};
1596} // namespace
1597
1601 if (LPD == LPD_Front)
1602 PeeledFront.push_back(NewBB);
1603 else
1604 PeeledBack.push_front(NewBB);
1605 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1606 ++I, ++NI) {
1607 CanonicalMIs[&*I] = &*I;
1608 CanonicalMIs[&*NI] = &*I;
1609 BlockMIs[{NewBB, &*I}] = &*NI;
1610 BlockMIs[{BB, &*I}] = &*I;
1611 }
1612 return NewBB;
1613}
1614
1616 int MinStage) {
1617 for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1618 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1619 MachineInstr *MI = &*I++;
1620 int Stage = getStage(MI);
1621 if (Stage == -1 || Stage >= MinStage)
1622 continue;
1623
1624 for (MachineOperand &DefMO : MI->defs()) {
1626 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1627 // Only PHIs can use values from this block by construction.
1628 // Match with the equivalent PHI in B.
1629 assert(UseMI.isPHI());
1630 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1631 MI->getParent());
1632 Subs.emplace_back(&UseMI, Reg);
1633 }
1634 for (auto &Sub : Subs)
1635 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1637 }
1638 if (LIS)
1640 MI->eraseFromParent();
1641 }
1642}
1643
1645 MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1646 auto InsertPt = DestBB->getFirstNonPHI();
1649 llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1650 if (MI.isPHI()) {
1651 // This is an illegal PHI. If we move any instructions using an illegal
1652 // PHI, we need to create a legal Phi.
1653 if (getStage(&MI) != Stage) {
1654 // The legal Phi is not necessary if the illegal phi's stage
1655 // is being moved.
1656 Register PhiR = MI.getOperand(0).getReg();
1657 auto RC = MRI.getRegClass(PhiR);
1659 MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1660 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1661 .addReg(PhiR)
1662 .addMBB(SourceBB);
1663 BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1665 Remaps[PhiR] = NR;
1666 }
1667 }
1668 if (getStage(&MI) != Stage)
1669 continue;
1670 MI.removeFromParent();
1671 DestBB->insert(InsertPt, &MI);
1672 auto *KernelMI = CanonicalMIs[&MI];
1673 BlockMIs[{DestBB, KernelMI}] = &MI;
1674 BlockMIs.erase({SourceBB, KernelMI});
1675 }
1677 for (MachineInstr &MI : DestBB->phis()) {
1678 assert(MI.getNumOperands() == 3);
1679 MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1680 // If the instruction referenced by the phi is moved inside the block
1681 // we don't need the phi anymore.
1682 if (getStage(Def) == Stage) {
1683 Register PhiReg = MI.getOperand(0).getReg();
1684 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1685 /*TRI=*/nullptr) != -1);
1686 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1687 MI.getOperand(0).setReg(PhiReg);
1688 PhiToDelete.push_back(&MI);
1689 }
1690 }
1691 for (auto *P : PhiToDelete)
1692 P->eraseFromParent();
1693 InsertPt = DestBB->getFirstNonPHI();
1694 // Helper to clone Phi instructions into the destination block. We clone Phi
1695 // greedily to avoid combinatorial explosion of Phi instructions.
1696 auto clonePhi = [&](MachineInstr *Phi) {
1697 MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1698 DestBB->insert(InsertPt, NewMI);
1699 Register OrigR = Phi->getOperand(0).getReg();
1701 NewMI->getOperand(0).setReg(R);
1702 NewMI->getOperand(1).setReg(OrigR);
1703 NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1704 Remaps[OrigR] = R;
1705 CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1706 BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1708 return R;
1709 };
1710 for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1711 for (MachineOperand &MO : I->uses()) {
1712 if (!MO.isReg())
1713 continue;
1714 if (auto It = Remaps.find(MO.getReg()); It != Remaps.end())
1715 MO.setReg(It->second);
1716 else {
1717 // If we are using a phi from the source block we need to add a new phi
1718 // pointing to the old one.
1720 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1721 Register R = clonePhi(Use);
1722 MO.setReg(R);
1723 }
1724 }
1725 }
1726 }
1727}
1728
1731 MachineInstr *Phi) {
1732 unsigned distance = PhiNodeLoopIteration[Phi];
1733 MachineInstr *CanonicalUse = CanonicalPhi;
1734 Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1735 for (unsigned I = 0; I < distance; ++I) {
1736 assert(CanonicalUse->isPHI());
1737 assert(CanonicalUse->getNumOperands() == 5);
1738 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1739 if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1740 std::swap(LoopRegIdx, InitRegIdx);
1741 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1742 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1743 }
1744 return CanonicalUseReg;
1745}
1746
1748 BitVector LS(Schedule.getNumStages(), true);
1749 BitVector AS(Schedule.getNumStages(), true);
1750 LiveStages[BB] = LS;
1751 AvailableStages[BB] = AS;
1752
1753 // Peel out the prologs.
1754 LS.reset();
1755 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1756 LS[I] = true;
1757 Prologs.push_back(peelKernel(LPD_Front));
1758 LiveStages[Prologs.back()] = LS;
1759 AvailableStages[Prologs.back()] = LS;
1760 }
1761
1762 // Create a block that will end up as the new loop exiting block (dominated by
1763 // all prologs and epilogs). It will only contain PHIs, in the same order as
1764 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1765 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1766 // property that any value deffed in BB but used outside of BB is used by a
1767 // PHI in the exiting block.
1769 EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1770 // Push out the epilogs, again in reverse order.
1771 // We can't assume anything about the minumum loop trip count at this point,
1772 // so emit a fairly complex epilog.
1773
1774 // We first peel number of stages minus one epilogue. Then we remove dead
1775 // stages and reorder instructions based on their stage. If we have 3 stages
1776 // we generate first:
1777 // E0[3, 2, 1]
1778 // E1[3', 2']
1779 // E2[3'']
1780 // And then we move instructions based on their stages to have:
1781 // E0[3]
1782 // E1[2, 3']
1783 // E2[1, 2', 3'']
1784 // The transformation is legal because we only move instructions past
1785 // instructions of a previous loop iteration.
1786 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1787 Epilogs.push_back(peelKernel(LPD_Back));
1788 MachineBasicBlock *B = Epilogs.back();
1790 // Keep track at which iteration each phi belongs to. We need it to know
1791 // what version of the variable to use during prologue/epilogue stitching.
1792 EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1793 for (MachineInstr &Phi : B->phis())
1795 }
1796 for (size_t I = 0; I < Epilogs.size(); I++) {
1797 LS.reset();
1798 for (size_t J = I; J < Epilogs.size(); J++) {
1799 int Iteration = J;
1800 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1801 // Move stage one block at a time so that Phi nodes are updated correctly.
1802 for (size_t K = Iteration; K > I; K--)
1803 moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1804 LS[Stage] = true;
1805 }
1806 LiveStages[Epilogs[I]] = LS;
1807 AvailableStages[Epilogs[I]] = AS;
1808 }
1809
1810 // Now we've defined all the prolog and epilog blocks as a fallthrough
1811 // sequence, add the edges that will be followed if the loop trip count is
1812 // lower than the number of stages (connecting prologs directly with epilogs).
1813 auto PI = Prologs.begin();
1814 auto EI = Epilogs.begin();
1815 assert(Prologs.size() == Epilogs.size());
1816 for (; PI != Prologs.end(); ++PI, ++EI) {
1817 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1818 (*PI)->addSuccessor(*EI);
1819 for (MachineInstr &MI : (*EI)->phis()) {
1820 Register Reg = MI.getOperand(1).getReg();
1822 if (Use && Use->getParent() == Pred) {
1823 MachineInstr *CanonicalUse = CanonicalMIs[Use];
1824 if (CanonicalUse->isPHI()) {
1825 // If the use comes from a phi we need to skip as many phi as the
1826 // distance between the epilogue and the kernel. Trace through the phi
1827 // chain to find the right value.
1828 Reg = getPhiCanonicalReg(CanonicalUse, Use);
1829 }
1830 Reg = getEquivalentRegisterIn(Reg, *PI);
1831 }
1832 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1833 MI.addOperand(MachineOperand::CreateMBB(*PI));
1834 }
1835 }
1836
1837 // Create a list of all blocks in order.
1839 llvm::copy(PeeledFront, std::back_inserter(Blocks));
1840 Blocks.push_back(BB);
1841 llvm::copy(PeeledBack, std::back_inserter(Blocks));
1842
1843 // Iterate in reverse order over all instructions, remapping as we go.
1844 for (MachineBasicBlock *B : reverse(Blocks)) {
1845 for (auto I = B->instr_rbegin();
1846 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1848 rewriteUsesOf(&*MI);
1849 }
1850 }
1851 for (auto *MI : IllegalPhisToDelete) {
1852 if (LIS)
1854 MI->eraseFromParent();
1855 }
1857
1858 // Now all remapping has been done, we're free to optimize the generated code.
1860 EliminateDeadPhis(B, MRI, LIS);
1861 EliminateDeadPhis(ExitingBB, MRI, LIS);
1862}
1863
1866 MachineBasicBlock *Exit = *BB->succ_begin();
1867 if (Exit == BB)
1868 Exit = *std::next(BB->succ_begin());
1869
1871 MF.insert(std::next(BB->getIterator()), NewBB);
1872
1873 // Clone all phis in BB into NewBB and rewrite.
1874 for (MachineInstr &MI : BB->phis()) {
1875 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1876 Register OldR = MI.getOperand(3).getReg();
1879 for (MachineInstr &Use : MRI.use_instructions(OldR))
1880 if (Use.getParent() != BB)
1881 Uses.push_back(&Use);
1882 for (MachineInstr *Use : Uses)
1883 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1885 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1886 .addReg(OldR)
1887 .addMBB(BB);
1888 BlockMIs[{NewBB, &MI}] = NI;
1889 CanonicalMIs[NI] = &MI;
1890 }
1891 BB->replaceSuccessor(Exit, NewBB);
1892 Exit->replacePhiUsesWith(BB, NewBB);
1893 NewBB->addSuccessor(Exit);
1894
1895 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1897 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1898 (void)CanAnalyzeBr;
1899 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1900 TII->removeBranch(*BB);
1901 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1902 Cond, DebugLoc());
1903 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1904 return NewBB;
1905}
1906
1909 MachineBasicBlock *BB) {
1911 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
1912 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1913}
1914
1916 if (MI->isPHI()) {
1917 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1918 // and it is produced by this block.
1919 Register PhiR = MI->getOperand(0).getReg();
1920 Register R = MI->getOperand(3).getReg();
1921 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1922 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1923 R = MI->getOperand(1).getReg();
1924 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1925 MRI.replaceRegWith(PhiR, R);
1926 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1927 // later to figure out how to remap registers.
1928 MI->getOperand(0).setReg(PhiR);
1930 return;
1931 }
1932
1933 int Stage = getStage(MI);
1934 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1935 LiveStages[MI->getParent()].test(Stage))
1936 // Instruction is live, no rewriting to do.
1937 return;
1938
1939 for (MachineOperand &DefMO : MI->defs()) {
1941 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1942 // Only PHIs can use values from this block by construction.
1943 // Match with the equivalent PHI in B.
1944 assert(UseMI.isPHI());
1945 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1946 MI->getParent());
1947 Subs.emplace_back(&UseMI, Reg);
1948 }
1949 for (auto &Sub : Subs)
1950 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1952 }
1953 if (LIS)
1955 MI->eraseFromParent();
1956}
1957
1959 // Work outwards from the kernel.
1960 bool KernelDisposed = false;
1961 int TC = Schedule.getNumStages() - 1;
1962 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1963 ++PI, ++EI, --TC) {
1965 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1969 std::optional<bool> StaticallyGreater =
1970 LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1971 if (!StaticallyGreater) {
1972 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1973 // Dynamically branch based on Cond.
1974 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1975 } else if (*StaticallyGreater == false) {
1976 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1977 // Prolog never falls through; branch to epilog and orphan interior
1978 // blocks. Leave it to unreachable-block-elim to clean up.
1979 Prolog->removeSuccessor(Fallthrough);
1980 for (MachineInstr &P : Fallthrough->phis()) {
1981 P.removeOperand(2);
1982 P.removeOperand(1);
1983 }
1985 KernelDisposed = true;
1986 } else {
1987 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1988 // Prolog always falls through; remove incoming values in epilog.
1989 Prolog->removeSuccessor(Epilog);
1990 for (MachineInstr &P : Epilog->phis()) {
1991 P.removeOperand(4);
1992 P.removeOperand(3);
1993 }
1994 }
1995 }
1996
1997 if (!KernelDisposed) {
1998 LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1999 LoopInfo->setPreheader(Prologs.back());
2000 } else {
2001 LoopInfo->disposed();
2002 }
2003}
2004
2006 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2007 KR.rewrite();
2008}
2009
2016
2017 rewriteKernel();
2019 fixupBranches();
2020}
2021
2025
2026 // Dump the schedule before we invalidate and remap all its instructions.
2027 // Stash it in a string so we can print it if we found an error.
2028 std::string ScheduleDump;
2029 raw_string_ostream OS(ScheduleDump);
2030 Schedule.print(OS);
2031 OS.flush();
2032
2033 // First, run the normal ModuleScheduleExpander. We don't support any
2034 // InstrChanges.
2035 assert(LIS && "Requires LiveIntervals!");
2038 MSE.expand();
2039 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2040 if (!ExpandedKernel) {
2041 // The expander optimized away the kernel. We can't do any useful checking.
2042 MSE.cleanup();
2043 return;
2044 }
2045 // Before running the KernelRewriter, re-add BB into the CFG.
2047
2048 // Now run the new expansion algorithm.
2049 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2050 KR.rewrite();
2052
2053 // Collect all illegal phis that the new algorithm created. We'll give these
2054 // to KernelOperandInfo.
2056 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2057 if (NI->isPHI())
2058 IllegalPhis.insert(&*NI);
2059 }
2060
2061 // Co-iterate across both kernels. We expect them to be identical apart from
2062 // phis and full COPYs (we look through both).
2064 auto OI = ExpandedKernel->begin();
2065 auto NI = BB->begin();
2066 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2067 while (OI->isPHI() || OI->isFullCopy())
2068 ++OI;
2069 while (NI->isPHI() || NI->isFullCopy())
2070 ++NI;
2071 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2072 // Analyze every operand separately.
2073 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2074 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2075 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2076 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2077 }
2078
2079 bool Failed = false;
2080 for (auto &OldAndNew : KOIs) {
2081 if (OldAndNew.first == OldAndNew.second)
2082 continue;
2083 Failed = true;
2084 errs() << "Modulo kernel validation error: [\n";
2085 errs() << " [golden] ";
2086 OldAndNew.first.print(errs());
2087 errs() << " ";
2088 OldAndNew.second.print(errs());
2089 errs() << "]\n";
2090 }
2091
2092 if (Failed) {
2093 errs() << "Golden reference kernel:\n";
2094 ExpandedKernel->print(errs());
2095 errs() << "New kernel:\n";
2096 BB->print(errs());
2097 errs() << ScheduleDump;
2099 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2100 }
2101
2102 // Cleanup by removing BB from the CFG again as the original
2103 // ModuloScheduleExpander intended.
2105 MSE.cleanup();
2106}
2107
2108MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
2109 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2110
2111 // TODO: Offset information needs to be corrected.
2112 NewMI->dropMemRefs(MF);
2113
2114 return NewMI;
2115}
2116
2117/// Create a dedicated exit for Loop. Exit is the original exit for Loop.
2118/// If it is already dedicated exit, return it. Otherwise, insert a new
2119/// block between them and return the new block.
2121 MachineBasicBlock *Exit) {
2122 if (Exit->pred_size() == 1)
2123 return Exit;
2124
2125 MachineFunction *MF = Loop->getParent();
2127
2128 MachineBasicBlock *NewExit =
2129 MF->CreateMachineBasicBlock(Loop->getBasicBlock());
2130 MF->insert(Loop->getIterator(), NewExit);
2131
2132 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2134 TII->analyzeBranch(*Loop, TBB, FBB, Cond);
2135 if (TBB == Loop)
2136 FBB = NewExit;
2137 else if (FBB == Loop)
2138 TBB = NewExit;
2139 else
2140 llvm_unreachable("unexpected loop structure");
2142 TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
2143 Loop->replaceSuccessor(Exit, NewExit);
2144 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2145 NewExit->addSuccessor(Exit);
2146
2147 Exit->replacePhiUsesWith(Loop, NewExit);
2148
2149 return NewExit;
2150}
2151
2152/// Insert branch code into the end of MBB. It branches to GreaterThan if the
2153/// remaining trip count for instructions in LastStage0Insts is greater than
2154/// RequiredTC, and to Otherwise otherwise.
2155void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
2156 int RequiredTC,
2157 InstrMapTy &LastStage0Insts,
2158 MachineBasicBlock &GreaterThan,
2159 MachineBasicBlock &Otherwise) {
2161 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2162 LastStage0Insts);
2163
2165 // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
2166 // FBB for optimal performance.
2167 if (TII->reverseBranchCondition(Cond))
2168 llvm_unreachable("can not reverse branch condition");
2169 TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
2170 } else {
2171 TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
2172 }
2173}
2174
2175/// Generate a pipelined loop that is unrolled by using MVE algorithm and any
2176/// other necessary blocks. The control flow is modified to execute the
2177/// pipelined loop if the trip count satisfies the condition, otherwise the
2178/// original loop. The original loop is also used to execute the remainder
2179/// iterations which occur due to unrolling.
2180void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2181 // The control flow for pipelining with MVE:
2182 //
2183 // OrigPreheader:
2184 // // The block that is originally the loop preheader
2185 // goto Check
2186 //
2187 // Check:
2188 // // Check whether the trip count satisfies the requirements to pipeline.
2189 // if (LoopCounter > NumStages + NumUnroll - 2)
2190 // // The minimum number of iterations to pipeline =
2191 // // iterations executed in prolog/epilog (NumStages-1) +
2192 // // iterations executed in one kernel run (NumUnroll)
2193 // goto Prolog
2194 // // fallback to the original loop
2195 // goto NewPreheader
2196 //
2197 // Prolog:
2198 // // All prolog stages. There are no direct branches to the epilogue.
2199 // goto NewKernel
2200 //
2201 // NewKernel:
2202 // // NumUnroll copies of the kernel
2203 // if (LoopCounter > MVE-1)
2204 // goto NewKernel
2205 // goto Epilog
2206 //
2207 // Epilog:
2208 // // All epilog stages.
2209 // if (LoopCounter > 0)
2210 // // The remainder is executed in the original loop
2211 // goto NewPreheader
2212 // goto NewExit
2213 //
2214 // NewPreheader:
2215 // // Newly created preheader for the original loop.
2216 // // The initial values of the phis in the loop are merged from two paths.
2217 // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
2218 // goto OrigKernel
2219 //
2220 // OrigKernel:
2221 // // The original loop block.
2222 // if (LoopCounter != 0)
2223 // goto OrigKernel
2224 // goto NewExit
2225 //
2226 // NewExit:
2227 // // Newly created dedicated exit for the original loop.
2228 // // Merge values which are referenced after the loop
2229 // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
2230 // goto OrigExit
2231 //
2232 // OrigExit:
2233 // // The block that is originally the loop exit.
2234 // // If it is already deicated exit, NewExit is not created.
2235
2236 // An example of where each stage is executed:
2237 // Assume #Stages 3, #MVE 4, #Iterations 12
2238 // Iter 0 1 2 3 4 5 6 7 8 9 10-11
2239 // -------------------------------------------------
2240 // Stage 0 Prolog#0
2241 // Stage 1 0 Prolog#1
2242 // Stage 2 1 0 Kernel Unroll#0 Iter#0
2243 // Stage 2 1 0 Kernel Unroll#1 Iter#0
2244 // Stage 2 1 0 Kernel Unroll#2 Iter#0
2245 // Stage 2 1 0 Kernel Unroll#3 Iter#0
2246 // Stage 2 1 0 Kernel Unroll#0 Iter#1
2247 // Stage 2 1 0 Kernel Unroll#1 Iter#1
2248 // Stage 2 1 0 Kernel Unroll#2 Iter#1
2249 // Stage 2 1 0 Kernel Unroll#3 Iter#1
2250 // Stage 2 1 Epilog#0
2251 // Stage 2 Epilog#1
2252 // Stage 0-2 OrigKernel
2253
2254 LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
2255 assert(LoopInfo && "Must be able to analyze loop!");
2256
2257 calcNumUnroll();
2258
2259 Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2260 Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2261 NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2262 Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2263 NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2264
2265 MF.insert(OrigKernel->getIterator(), Check);
2266 MF.insert(OrigKernel->getIterator(), Prolog);
2267 MF.insert(OrigKernel->getIterator(), NewKernel);
2268 MF.insert(OrigKernel->getIterator(), Epilog);
2269 MF.insert(OrigKernel->getIterator(), NewPreheader);
2270
2271 NewExit = createDedicatedExit(OrigKernel, OrigExit);
2272
2273 NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
2274 TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
2275
2276 OrigPreheader->addSuccessor(Check);
2277 TII->removeBranch(*OrigPreheader);
2278 TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
2279
2280 Check->addSuccessor(Prolog);
2281 Check->addSuccessor(NewPreheader);
2282
2283 Prolog->addSuccessor(NewKernel);
2284
2285 NewKernel->addSuccessor(NewKernel);
2286 NewKernel->addSuccessor(Epilog);
2287
2288 Epilog->addSuccessor(NewPreheader);
2289 Epilog->addSuccessor(NewExit);
2290
2291 InstrMapTy LastStage0Insts;
2292 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2293 LastStage0Insts, *Prolog, *NewPreheader);
2294
2295 // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
2296 // register#
2297 SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
2298 generateProlog(PrologVRMap);
2299 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2300 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2301}
2302
2303/// Replace MI's use operands according to the maps.
2304void ModuloScheduleExpanderMVE::updateInstrUse(
2305 MachineInstr *MI, int StageNum, int PhaseNum,
2307 SmallVectorImpl<ValueMapTy> *PrevVRMap) {
2308 // If MI is in the prolog/kernel/epilog block, CurVRMap is
2309 // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
2310 // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
2311 // Refer to the appropriate map according to the stage difference between
2312 // MI and the definition of an operand.
2313
2314 for (MachineOperand &UseMO : MI->uses()) {
2315 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2316 continue;
2317 int DiffStage = 0;
2318 Register OrigReg = UseMO.getReg();
2319 MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
2320 if (!DefInst || DefInst->getParent() != OrigKernel)
2321 continue;
2322 unsigned InitReg = 0;
2323 unsigned DefReg = OrigReg;
2324 if (DefInst->isPHI()) {
2325 ++DiffStage;
2326 unsigned LoopReg;
2327 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2328 // LoopReg is guaranteed to be defined within the loop by canApply()
2329 DefReg = LoopReg;
2330 DefInst = MRI.getVRegDef(LoopReg);
2331 }
2332 unsigned DefStageNum = Schedule.getStage(DefInst);
2333 DiffStage += StageNum - DefStageNum;
2334 Register NewReg;
2335 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2336 // NewReg is defined in a previous phase of the same block
2337 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2338 else if (!PrevVRMap)
2339 // Since this is the first iteration, refer the initial register of the
2340 // loop
2341 NewReg = InitReg;
2342 else
2343 // Cases where DiffStage is larger than PhaseNum.
2344 // If MI is in the kernel block, the value is defined by the previous
2345 // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
2346 // value is defined in the kernel block and KernelVRMap is referenced.
2347 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2348
2349 const TargetRegisterClass *NRC =
2350 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2351 if (NRC)
2352 UseMO.setReg(NewReg);
2353 else {
2354 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2355 BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
2356 SplitReg)
2357 .addReg(NewReg);
2358 UseMO.setReg(SplitReg);
2359 }
2360 }
2361}
2362
2363/// Return a phi if Reg is referenced by the phi.
2364/// canApply() guarantees that at most only one such phi exists.
2366 for (MachineInstr &Phi : Loop->phis()) {
2367 unsigned InitVal, LoopVal;
2368 getPhiRegs(Phi, Loop, InitVal, LoopVal);
2369 if (LoopVal == Reg)
2370 return &Phi;
2371 }
2372 return nullptr;
2373}
2374
2375/// Generate phis for registers defined by OrigMI.
2376void ModuloScheduleExpanderMVE::generatePhi(
2377 MachineInstr *OrigMI, int UnrollNum,
2378 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2379 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2380 SmallVectorImpl<ValueMapTy> &PhiVRMap) {
2381 int StageNum = Schedule.getStage(OrigMI);
2382 bool UsePrologReg;
2383 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2384 UsePrologReg = true;
2385 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2386 UsePrologReg = false;
2387 else
2388 return;
2389
2390 // Examples that show which stages are merged by phi.
2391 // Meaning of the symbol following the stage number:
2392 // a/b: Stages with the same letter are merged (UsePrologReg == true)
2393 // +: Merged with the initial value (UsePrologReg == false)
2394 // *: No phis required
2395 //
2396 // #Stages 3, #MVE 4
2397 // Iter 0 1 2 3 4 5 6 7 8
2398 // -----------------------------------------
2399 // Stage 0a Prolog#0
2400 // Stage 1a 0b Prolog#1
2401 // Stage 2* 1* 0* Kernel Unroll#0
2402 // Stage 2* 1* 0+ Kernel Unroll#1
2403 // Stage 2* 1+ 0a Kernel Unroll#2
2404 // Stage 2+ 1a 0b Kernel Unroll#3
2405 //
2406 // #Stages 3, #MVE 2
2407 // Iter 0 1 2 3 4 5 6 7 8
2408 // -----------------------------------------
2409 // Stage 0a Prolog#0
2410 // Stage 1a 0b Prolog#1
2411 // Stage 2* 1+ 0a Kernel Unroll#0
2412 // Stage 2+ 1a 0b Kernel Unroll#1
2413 //
2414 // #Stages 3, #MVE 1
2415 // Iter 0 1 2 3 4 5 6 7 8
2416 // -----------------------------------------
2417 // Stage 0* Prolog#0
2418 // Stage 1a 0b Prolog#1
2419 // Stage 2+ 1a 0b Kernel Unroll#0
2420
2421 for (MachineOperand &DefMO : OrigMI->defs()) {
2422 if (!DefMO.isReg() || DefMO.isDead())
2423 continue;
2424 Register OrigReg = DefMO.getReg();
2425 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2426 if (NewReg == KernelVRMap[UnrollNum].end())
2427 continue;
2428 Register CorrespondReg;
2429 if (UsePrologReg) {
2430 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2431 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2432 } else {
2433 MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
2434 if (!Phi)
2435 continue;
2436 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2437 }
2438
2439 assert(CorrespondReg.isValid());
2440 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2441 BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
2442 TII->get(TargetOpcode::PHI), PhiReg)
2443 .addReg(NewReg->second)
2444 .addMBB(NewKernel)
2445 .addReg(CorrespondReg)
2446 .addMBB(Prolog);
2447 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2448 }
2449}
2450
2451static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
2452 MachineBasicBlock *NewMBB) {
2453 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2454 if (Phi.getOperand(Idx).getReg() == OrigReg) {
2455 Phi.getOperand(Idx).setReg(NewReg);
2456 Phi.getOperand(Idx + 1).setMBB(NewMBB);
2457 return;
2458 }
2459 }
2460}
2461
2462/// Generate phis that merge values from multiple routes
2463void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2464 Register NewReg) {
2465 SmallVector<MachineOperand *> UsesAfterLoop;
2467 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
2468 E = MRI.use_end();
2469 I != E; ++I) {
2470 MachineOperand &O = *I;
2471 if (O.getParent()->getParent() != OrigKernel &&
2472 O.getParent()->getParent() != Prolog &&
2473 O.getParent()->getParent() != NewKernel &&
2474 O.getParent()->getParent() != Epilog)
2475 UsesAfterLoop.push_back(&O);
2476 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2477 LoopPhis.push_back(O.getParent());
2478 }
2479
2480 // Merge the route that only execute the pipelined loop (when there are no
2481 // remaining iterations) with the route that execute the original loop.
2482 if (!UsesAfterLoop.empty()) {
2483 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2484 BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
2485 TII->get(TargetOpcode::PHI), PhiReg)
2486 .addReg(OrigReg)
2487 .addMBB(OrigKernel)
2488 .addReg(NewReg)
2489 .addMBB(Epilog);
2490
2491 for (MachineOperand *MO : UsesAfterLoop)
2492 MO->setReg(PhiReg);
2493
2494 if (!LIS.hasInterval(PhiReg))
2495 LIS.createEmptyInterval(PhiReg);
2496 }
2497
2498 // Merge routes from the pipelined loop and the bypassed route before the
2499 // original loop
2500 if (!LoopPhis.empty()) {
2501 for (MachineInstr *Phi : LoopPhis) {
2502 unsigned InitReg, LoopReg;
2503 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2504 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2505 BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(), Phi->getDebugLoc(),
2506 TII->get(TargetOpcode::PHI), NewInit)
2507 .addReg(InitReg)
2508 .addMBB(Check)
2509 .addReg(NewReg)
2510 .addMBB(Epilog);
2511 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2512 }
2513 }
2514}
2515
2516void ModuloScheduleExpanderMVE::generateProlog(
2517 SmallVectorImpl<ValueMapTy> &PrologVRMap) {
2518 PrologVRMap.clear();
2519 PrologVRMap.resize(Schedule.getNumStages() - 1);
2521 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2522 ++PrologNum) {
2523 for (MachineInstr *MI : Schedule.getInstructions()) {
2524 if (MI->isPHI())
2525 continue;
2526 int StageNum = Schedule.getStage(MI);
2527 if (StageNum > PrologNum)
2528 continue;
2529 MachineInstr *NewMI = cloneInstr(MI);
2530 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2531 NewMIMap[NewMI] = {PrologNum, StageNum};
2532 Prolog->push_back(NewMI);
2533 }
2534 }
2535
2536 for (auto I : NewMIMap) {
2537 MachineInstr *MI = I.first;
2538 int PrologNum = I.second.first;
2539 int StageNum = I.second.second;
2540 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2541 }
2542
2543 LLVM_DEBUG({
2544 dbgs() << "prolog:\n";
2545 Prolog->dump();
2546 });
2547}
2548
2549void ModuloScheduleExpanderMVE::generateKernel(
2550 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2551 SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
2552 KernelVRMap.clear();
2553 KernelVRMap.resize(NumUnroll);
2554 SmallVector<ValueMapTy> PhiVRMap;
2555 PhiVRMap.resize(NumUnroll);
2558 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2559 for (MachineInstr *MI : Schedule.getInstructions()) {
2560 if (MI->isPHI())
2561 continue;
2562 int StageNum = Schedule.getStage(MI);
2563 MachineInstr *NewMI = cloneInstr(MI);
2564 if (UnrollNum == NumUnroll - 1)
2565 LastStage0Insts[MI] = NewMI;
2566 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2567 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2568 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2569 NewMIMap[NewMI] = {UnrollNum, StageNum};
2570 NewKernel->push_back(NewMI);
2571 }
2572 }
2573
2574 for (auto I : NewMIMap) {
2575 MachineInstr *MI = I.first;
2576 int UnrollNum = I.second.first;
2577 int StageNum = I.second.second;
2578 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2579 }
2580
2581 // If remaining trip count is greater than NumUnroll-1, loop continues
2582 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2583 *Epilog);
2584
2585 LLVM_DEBUG({
2586 dbgs() << "kernel:\n";
2587 NewKernel->dump();
2588 });
2589}
2590
2591void ModuloScheduleExpanderMVE::generateEpilog(
2592 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2593 SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
2594 EpilogVRMap.clear();
2595 EpilogVRMap.resize(Schedule.getNumStages() - 1);
2597 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2598 ++EpilogNum) {
2599 for (MachineInstr *MI : Schedule.getInstructions()) {
2600 if (MI->isPHI())
2601 continue;
2602 int StageNum = Schedule.getStage(MI);
2603 if (StageNum <= EpilogNum)
2604 continue;
2605 MachineInstr *NewMI = cloneInstr(MI);
2606 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2607 NewMIMap[NewMI] = {EpilogNum, StageNum};
2608 Epilog->push_back(NewMI);
2609 }
2610 }
2611
2612 for (auto I : NewMIMap) {
2613 MachineInstr *MI = I.first;
2614 int EpilogNum = I.second.first;
2615 int StageNum = I.second.second;
2616 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2617 }
2618
2619 // If there are remaining iterations, they are executed in the original loop.
2620 // Instructions related to loop control, such as loop counter comparison,
2621 // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
2622 // in stage 0. Thus, the map is for the last one in the kernel.
2623 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2624
2625 LLVM_DEBUG({
2626 dbgs() << "epilog:\n";
2627 Epilog->dump();
2628 });
2629}
2630
2631/// Calculate the number of unroll required and set it to NumUnroll
2632void ModuloScheduleExpanderMVE::calcNumUnroll() {
2634 NumUnroll = 1;
2635 for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
2636 Inst2Idx[Schedule.getInstructions()[I]] = I;
2637
2638 for (MachineInstr *MI : Schedule.getInstructions()) {
2639 if (MI->isPHI())
2640 continue;
2641 int StageNum = Schedule.getStage(MI);
2642 for (const MachineOperand &MO : MI->uses()) {
2643 if (!MO.isReg() || !MO.getReg().isVirtual())
2644 continue;
2645 MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
2646 if (DefMI->getParent() != OrigKernel)
2647 continue;
2648
2649 int NumUnrollLocal = 1;
2650 if (DefMI->isPHI()) {
2651 ++NumUnrollLocal;
2652 // canApply() guarantees that DefMI is not phi and is an instruction in
2653 // the loop
2654 DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
2655 }
2656 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2657 if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2658 --NumUnrollLocal;
2659 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2660 }
2661 }
2662 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2663}
2664
2665/// Create new virtual registers for definitions of NewMI and update NewMI.
2666/// If the definitions are referenced after the pipelined loop, phis are
2667/// created to merge with other routes.
2668void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2669 ValueMapTy &VRMap,
2670 bool LastDef) {
2671 for (MachineOperand &MO : NewMI->all_defs()) {
2672 if (!MO.getReg().isVirtual())
2673 continue;
2674 Register Reg = MO.getReg();
2675 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
2676 Register NewReg = MRI.createVirtualRegister(RC);
2677 MO.setReg(NewReg);
2678 VRMap[Reg] = NewReg;
2679 if (LastDef)
2680 mergeRegUsesAfterPipeline(Reg, NewReg);
2681 }
2682}
2683
2685 OrigKernel = Schedule.getLoop()->getTopBlock();
2686 OrigPreheader = Schedule.getLoop()->getLoopPreheader();
2687 OrigExit = Schedule.getLoop()->getExitBlock();
2688
2689 LLVM_DEBUG(Schedule.dump());
2690
2691 generatePipelinedLoop();
2692}
2693
2694/// Check if ModuloScheduleExpanderMVE can be applied to L
2696 if (!L.getExitBlock()) {
2697 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");
2698 return false;
2699 }
2700
2701 MachineBasicBlock *BB = L.getTopBlock();
2703
2704 // Put some constraints on the operands of the phis to simplify the
2705 // transformation
2706 DenseSet<unsigned> UsedByPhi;
2707 for (MachineInstr &MI : BB->phis()) {
2708 // Registers defined by phis must be used only inside the loop and be never
2709 // used by phis.
2710 for (MachineOperand &MO : MI.defs())
2711 if (MO.isReg())
2712 for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
2713 if (Ref.getParent() != BB || Ref.isPHI()) {
2714 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "
2715 "referenced outside of the loop or by phi.\n");
2716 return false;
2717 }
2718
2719 // A source register from the loop block must be defined inside the loop.
2720 // A register defined inside the loop must be referenced by only one phi at
2721 // most.
2722 unsigned InitVal, LoopVal;
2723 getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
2724 if (!Register(LoopVal).isVirtual() ||
2725 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2726 LLVM_DEBUG(
2727 dbgs() << "Can not apply MVE expander: A phi source value coming "
2728 "from the loop is not defined in the loop.\n");
2729 return false;
2730 }
2731 if (UsedByPhi.count(LoopVal)) {
2732 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2733 "loop is referenced by two or more phis.\n");
2734 return false;
2735 }
2736 UsedByPhi.insert(LoopVal);
2737 }
2738
2739 return true;
2740}
2741
2742//===----------------------------------------------------------------------===//
2743// ModuloScheduleTestPass implementation
2744//===----------------------------------------------------------------------===//
2745// This pass constructs a ModuloSchedule from its module and runs
2746// ModuloScheduleExpander.
2747//
2748// The module is expected to contain a single-block analyzable loop.
2749// The total order of instructions is taken from the loop as-is.
2750// Instructions are expected to be annotated with a PostInstrSymbol.
2751// This PostInstrSymbol must have the following format:
2752// "Stage=%d Cycle=%d".
2753//===----------------------------------------------------------------------===//
2754
2755namespace {
2756class ModuloScheduleTest : public MachineFunctionPass {
2757public:
2758 static char ID;
2759
2760 ModuloScheduleTest() : MachineFunctionPass(ID) {
2762 }
2763
2764 bool runOnMachineFunction(MachineFunction &MF) override;
2765 void runOnLoop(MachineFunction &MF, MachineLoop &L);
2766
2767 void getAnalysisUsage(AnalysisUsage &AU) const override {
2771 }
2772};
2773} // namespace
2774
2775char ModuloScheduleTest::ID = 0;
2776
2777INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2778 "Modulo Schedule test pass", false, false)
2781INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2782 "Modulo Schedule test pass", false, false)
2783
2784bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2785 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2786 for (auto *L : MLI) {
2787 if (L->getTopBlock() != L->getBottomBlock())
2788 continue;
2789 runOnLoop(MF, *L);
2790 return false;
2791 }
2792 return false;
2793}
2794
2795static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2796 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2797 std::pair<StringRef, StringRef> StageTokenAndValue =
2798 getToken(StageAndCycle.first, "-");
2799 std::pair<StringRef, StringRef> CycleTokenAndValue =
2800 getToken(StageAndCycle.second, "-");
2801 if (StageTokenAndValue.first != "Stage" ||
2802 CycleTokenAndValue.first != "_Cycle") {
2804 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2805 return;
2806 }
2807
2808 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2809 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2810
2811 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2812}
2813
2814void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2815 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
2816 MachineBasicBlock *BB = L.getTopBlock();
2817 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2818
2820 std::vector<MachineInstr *> Instrs;
2821 for (MachineInstr &MI : *BB) {
2822 if (MI.isTerminator())
2823 continue;
2824 Instrs.push_back(&MI);
2825 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2826 dbgs() << "Parsing post-instr symbol for " << MI;
2827 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2828 }
2829 }
2830
2831 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2832 std::move(Stage));
2834 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2835 MSE.expand();
2836 MSE.cleanup();
2837}
2838
2839//===----------------------------------------------------------------------===//
2840// ModuloScheduleTestAnnotater implementation
2841//===----------------------------------------------------------------------===//
2842
2844 for (MachineInstr *MI : S.getInstructions()) {
2847 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2849 MI->setPostInstrSymbol(MF, Sym);
2850 }
2851}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
@ Default
Definition: DwarfDebug.cpp:87
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Symbol * Sym
Definition: ELF_riscv.cpp:479
global merge Global merge function pass
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
This file provides utility analysis objects describing memory locations.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit)
Create a dedicated exit for Loop.
modulo schedule test
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
A possibly irreducible generalization of a Loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:212
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:71
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:349
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:580
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:820
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:693
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:730
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:790
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:501
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
Definition: MachineInstr.h:764
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:115
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:95
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
constexpr double phi
Definition: MathExtras.h:61
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1841
void initializeModuloScheduleTestPass(PassRegistry &)
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...