LLVM 22.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
17#include "llvm/MC/MCContext.h"
18#include "llvm/Support/Debug.h"
21
22#define DEBUG_TYPE "pipeliner"
23using namespace llvm;
24
26 "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
27 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
28
30 for (MachineInstr *MI : ScheduledInstrs)
31 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
32}
33
34//===----------------------------------------------------------------------===//
35// ModuloScheduleExpander implementation
36//===----------------------------------------------------------------------===//
37
38/// Return the register values for the operands of a Phi instruction.
39/// This function assume the instruction is a Phi.
41 Register &InitVal, Register &LoopVal) {
42 assert(Phi.isPHI() && "Expecting a Phi.");
43
44 InitVal = Register();
45 LoopVal = Register();
46 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
47 if (Phi.getOperand(i + 1).getMBB() != Loop)
48 InitVal = Phi.getOperand(i).getReg();
49 else
50 LoopVal = Phi.getOperand(i).getReg();
51
52 assert(InitVal && LoopVal && "Unexpected Phi structure.");
53}
54
55/// Return the Phi register value that comes from the incoming block.
57 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
59 return Phi.getOperand(i).getReg();
60 return Register();
61}
62
63/// Return the Phi register value that comes the loop block.
65 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
67 return Phi.getOperand(i).getReg();
68 return Register();
69}
70
72 BB = Schedule.getLoop()->getTopBlock();
73 Preheader = *BB->pred_begin();
74 if (Preheader == BB)
75 Preheader = *std::next(BB->pred_begin());
76
77 // Iterate over the definitions in each instruction, and compute the
78 // stage difference for each use. Keep the maximum value.
79 for (MachineInstr *MI : Schedule.getInstructions()) {
80 int DefStage = Schedule.getStage(MI);
81 for (const MachineOperand &Op : MI->all_defs()) {
82 Register Reg = Op.getReg();
83 unsigned MaxDiff = 0;
84 bool PhiIsSwapped = false;
85 for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
86 MachineInstr *UseMI = UseOp.getParent();
87 int UseStage = Schedule.getStage(UseMI);
88 unsigned Diff = 0;
89 if (UseStage != -1 && UseStage >= DefStage)
90 Diff = UseStage - DefStage;
91 if (MI->isPHI()) {
92 if (isLoopCarried(*MI))
93 ++Diff;
94 else
95 PhiIsSwapped = true;
96 }
97 MaxDiff = std::max(Diff, MaxDiff);
98 }
99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
100 }
101 }
102
103 generatePipelinedLoop();
104}
105
106void ModuloScheduleExpander::generatePipelinedLoop() {
107 LoopInfo = TII->analyzeLoopForPipelining(BB);
108 assert(LoopInfo && "Must be able to analyze loop!");
109
110 // Create a new basic block for the kernel and add it to the CFG.
112
113 unsigned MaxStageCount = Schedule.getNumStages() - 1;
114
115 // Remember the registers that are used in different stages. The index is
116 // the iteration, or stage, that the instruction is scheduled in. This is
117 // a map between register names in the original block and the names created
118 // in each stage of the pipelined loop.
119 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
120
121 // The renaming destination by Phis for the registers across stages.
122 // This map is updated during Phis generation to point to the most recent
123 // renaming destination.
124 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
125
126 InstrMapTy InstrMap;
127
129
130 // Generate the prolog instructions that set up the pipeline.
131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
132 MF.insert(BB->getIterator(), KernelBB);
133 LIS.insertMBBInMaps(KernelBB);
134
135 // Rearrange the instructions to generate the new, pipelined loop,
136 // and update register names as needed.
137 for (MachineInstr *CI : Schedule.getInstructions()) {
138 if (CI->isPHI())
139 continue;
140 unsigned StageNum = Schedule.getStage(CI);
141 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
143 KernelBB->push_back(NewMI);
144 LIS.InsertMachineInstrInMaps(*NewMI);
145 InstrMap[NewMI] = CI;
146 }
147
148 // Copy any terminator instructions to the new kernel, and update
149 // names as needed.
150 for (MachineInstr &MI : BB->terminators()) {
151 MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
152 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
153 KernelBB->push_back(NewMI);
154 LIS.InsertMachineInstrInMaps(*NewMI);
155 InstrMap[NewMI] = &MI;
156 }
157
158 NewKernel = KernelBB;
159 KernelBB->transferSuccessors(BB);
160 KernelBB->replaceSuccessor(BB, KernelBB);
161
162 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
163 InstrMap, MaxStageCount, MaxStageCount, false);
164 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
165 InstrMap, MaxStageCount, MaxStageCount, false);
166
167 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
168
169 SmallVector<MachineBasicBlock *, 4> EpilogBBs;
170 // Generate the epilog instructions to complete the pipeline.
171 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
172 PrologBBs);
173
174 // We need this step because the register allocation doesn't handle some
175 // situations well, so we insert copies to help out.
176 splitLifetimes(KernelBB, EpilogBBs);
177
178 // Remove dead instructions due to loop induction variables.
179 removeDeadInstructions(KernelBB, EpilogBBs);
180
181 // Add branches between prolog and epilog blocks.
182 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
183
184 delete[] VRMap;
185 delete[] VRMapPhi;
186}
187
189 // Remove the original loop since it's no longer referenced.
190 for (auto &I : *BB)
191 LIS.RemoveMachineInstrFromMaps(I);
192 BB->clear();
193 BB->eraseFromParent();
194}
195
196/// Generate the pipeline prolog code.
197void ModuloScheduleExpander::generateProlog(unsigned LastStage,
198 MachineBasicBlock *KernelBB,
199 ValueMapTy *VRMap,
200 MBBVectorTy &PrologBBs) {
201 MachineBasicBlock *PredBB = Preheader;
202 InstrMapTy InstrMap;
203
204 // Generate a basic block for each stage, not including the last stage,
205 // which will be generated in the kernel. Each basic block may contain
206 // instructions from multiple stages/iterations.
207 for (unsigned i = 0; i < LastStage; ++i) {
208 // Create and insert the prolog basic block prior to the original loop
209 // basic block. The original loop is removed later.
211 PrologBBs.push_back(NewBB);
212 MF.insert(BB->getIterator(), NewBB);
213 NewBB->transferSuccessors(PredBB);
214 PredBB->addSuccessor(NewBB);
215 PredBB = NewBB;
216 LIS.insertMBBInMaps(NewBB);
217
218 // Generate instructions for each appropriate stage. Process instructions
219 // in original program order.
220 for (int StageNum = i; StageNum >= 0; --StageNum) {
222 BBE = BB->getFirstTerminator();
223 BBI != BBE; ++BBI) {
224 if (Schedule.getStage(&*BBI) == StageNum) {
225 if (BBI->isPHI())
226 continue;
227 MachineInstr *NewMI =
228 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
229 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
230 NewBB->push_back(NewMI);
231 LIS.InsertMachineInstrInMaps(*NewMI);
232 InstrMap[NewMI] = &*BBI;
233 }
234 }
235 }
236 rewritePhiValues(NewBB, i, VRMap, InstrMap);
237 LLVM_DEBUG({
238 dbgs() << "prolog:\n";
239 NewBB->dump();
240 });
241 }
242
243 PredBB->replaceSuccessor(BB, KernelBB);
244
245 // Check if we need to remove the branch from the preheader to the original
246 // loop, and replace it with a branch to the new loop.
247 unsigned numBranches = TII->removeBranch(*Preheader);
248 if (numBranches) {
250 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
251 }
252}
253
254/// Generate the pipeline epilog code. The epilog code finishes the iterations
255/// that were started in either the prolog or the kernel. We create a basic
256/// block for each stage that needs to complete.
257void ModuloScheduleExpander::generateEpilog(
258 unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
259 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
260 MBBVectorTy &PrologBBs) {
261 // We need to change the branch from the kernel to the first epilog block, so
262 // this call to analyze branch uses the kernel rather than the original BB.
263 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
265 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
266 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
267 if (checkBranch)
268 return;
269
270 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
271 if (*LoopExitI == KernelBB)
272 ++LoopExitI;
273 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
274 MachineBasicBlock *LoopExitBB = *LoopExitI;
275
276 MachineBasicBlock *PredBB = KernelBB;
277 MachineBasicBlock *EpilogStart = LoopExitBB;
278 InstrMapTy InstrMap;
279
280 // Generate a basic block for each stage, not including the last stage,
281 // which was generated for the kernel. Each basic block may contain
282 // instructions from multiple stages/iterations.
283 int EpilogStage = LastStage + 1;
284 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
285 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
286 EpilogBBs.push_back(NewBB);
287 MF.insert(BB->getIterator(), NewBB);
288
289 PredBB->replaceSuccessor(LoopExitBB, NewBB);
290 NewBB->addSuccessor(LoopExitBB);
291 LIS.insertMBBInMaps(NewBB);
292
293 if (EpilogStart == LoopExitBB)
294 EpilogStart = NewBB;
295
296 // Add instructions to the epilog depending on the current block.
297 // Process instructions in original program order.
298 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
299 for (auto &BBI : *BB) {
300 if (BBI.isPHI())
301 continue;
302 MachineInstr *In = &BBI;
303 if ((unsigned)Schedule.getStage(In) == StageNum) {
304 // Instructions with memoperands in the epilog are updated with
305 // conservative values.
306 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
307 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
308 NewBB->push_back(NewMI);
309 LIS.InsertMachineInstrInMaps(*NewMI);
310 InstrMap[NewMI] = In;
311 }
312 }
313 }
314 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
315 InstrMap, LastStage, EpilogStage, i == 1);
316 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
317 InstrMap, LastStage, EpilogStage, i == 1);
318 PredBB = NewBB;
319
320 LLVM_DEBUG({
321 dbgs() << "epilog:\n";
322 NewBB->dump();
323 });
324 }
325
326 // Fix any Phi nodes in the loop exit block.
327 LoopExitBB->replacePhiUsesWith(BB, PredBB);
328
329 // Create a branch to the new epilog from the kernel.
330 // Remove the original branch and add a new branch to the epilog.
331 TII->removeBranch(*KernelBB);
332 assert((OrigBB == TBB || OrigBB == FBB) &&
333 "Unable to determine looping branch direction");
334 if (OrigBB != TBB)
335 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
336 else
337 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
338 // Add a branch to the loop exit.
339 if (EpilogBBs.size() > 0) {
340 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
342 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
343 }
344}
345
346/// Replace all uses of FromReg that appear outside the specified
347/// basic block with ToReg.
348static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
351 for (MachineOperand &O :
352 llvm::make_early_inc_range(MRI.use_operands(FromReg)))
353 if (O.getParent()->getParent() != MBB)
354 O.setReg(ToReg);
355}
356
357/// Return true if the register has a use that occurs outside the
358/// specified loop.
361 for (const MachineOperand &MO : MRI.use_operands(Reg))
362 if (MO.getParent()->getParent() != BB)
363 return true;
364 return false;
365}
366
367/// Generate Phis for the specific block in the generated pipelined code.
368/// This function looks at the Phis from the original code to guide the
369/// creation of new Phis.
370void ModuloScheduleExpander::generateExistingPhis(
372 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
373 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
374 // Compute the stage number for the initial value of the Phi, which
375 // comes from the prolog. The prolog to use depends on to which kernel/
376 // epilog that we're adding the Phi.
377 unsigned PrologStage = 0;
378 unsigned PrevStage = 0;
379 bool InKernel = (LastStageNum == CurStageNum);
380 if (InKernel) {
381 PrologStage = LastStageNum - 1;
382 PrevStage = CurStageNum;
383 } else {
384 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
385 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
386 }
387
388 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
389 BBE = BB->getFirstNonPHI();
390 BBI != BBE; ++BBI) {
391 Register Def = BBI->getOperand(0).getReg();
392
393 Register InitVal;
394 Register LoopVal;
395 getPhiRegs(*BBI, BB, InitVal, LoopVal);
396
397 Register PhiOp1;
398 // The Phi value from the loop body typically is defined in the loop, but
399 // not always. So, we need to check if the value is defined in the loop.
400 Register PhiOp2 = LoopVal;
401 if (auto It = VRMap[LastStageNum].find(LoopVal);
402 It != VRMap[LastStageNum].end())
403 PhiOp2 = It->second;
404
405 int StageScheduled = Schedule.getStage(&*BBI);
406 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
407 unsigned NumStages = getStagesForReg(Def, CurStageNum);
408 if (NumStages == 0) {
409 // We don't need to generate a Phi anymore, but we need to rename any uses
410 // of the Phi value.
411 Register NewReg = VRMap[PrevStage][LoopVal];
412 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
413 InitVal, NewReg);
414 auto It = VRMap[CurStageNum].find(LoopVal);
415 if (It != VRMap[CurStageNum].end()) {
416 Register Reg = It->second;
417 VRMap[CurStageNum][Def] = Reg;
418 }
419 }
420 // Adjust the number of Phis needed depending on the number of prologs left,
421 // and the distance from where the Phi is first scheduled. The number of
422 // Phis cannot exceed the number of prolog stages. Each stage can
423 // potentially define two values.
424 unsigned MaxPhis = PrologStage + 2;
425 if (!InKernel && (int)PrologStage <= LoopValStage)
426 MaxPhis = std::max((int)MaxPhis - LoopValStage, 1);
427 unsigned NumPhis = std::min(NumStages, MaxPhis);
428
429 Register NewReg;
430 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
431 // In the epilog, we may need to look back one stage to get the correct
432 // Phi name, because the epilog and prolog blocks execute the same stage.
433 // The correct name is from the previous block only when the Phi has
434 // been completely scheduled prior to the epilog, and Phi value is not
435 // needed in multiple stages.
436 int StageDiff = 0;
437 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
438 NumPhis == 1)
439 StageDiff = 1;
440 // Adjust the computations below when the phi and the loop definition
441 // are scheduled in different stages.
442 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
443 StageDiff = StageScheduled - LoopValStage;
444 for (unsigned np = 0; np < NumPhis; ++np) {
445 // If the Phi hasn't been scheduled, then use the initial Phi operand
446 // value. Otherwise, use the scheduled version of the instruction. This
447 // is a little complicated when a Phi references another Phi.
448 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
449 PhiOp1 = InitVal;
450 // Check if the Phi has already been scheduled in a prolog stage.
451 else if (PrologStage >= AccessStage + StageDiff + np &&
452 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
453 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
454 // Check if the Phi has already been scheduled, but the loop instruction
455 // is either another Phi, or doesn't occur in the loop.
456 else if (PrologStage >= AccessStage + StageDiff + np) {
457 // If the Phi references another Phi, we need to examine the other
458 // Phi to get the correct value.
459 PhiOp1 = LoopVal;
460 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
461 int Indirects = 1;
462 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
463 int PhiStage = Schedule.getStage(InstOp1);
464 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
465 PhiOp1 = getInitPhiReg(*InstOp1, BB);
466 else
467 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
468 InstOp1 = MRI.getVRegDef(PhiOp1);
469 int PhiOpStage = Schedule.getStage(InstOp1);
470 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
471 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np) {
472 auto &M = VRMap[PrologStage - StageAdj - Indirects - np];
473 if (auto It = M.find(PhiOp1); It != M.end()) {
474 PhiOp1 = It->second;
475 break;
476 }
477 }
478 ++Indirects;
479 }
480 } else
481 PhiOp1 = InitVal;
482 // If this references a generated Phi in the kernel, get the Phi operand
483 // from the incoming block.
484 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
485 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
486 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
487
488 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
489 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
490 // In the epilog, a map lookup is needed to get the value from the kernel,
491 // or previous epilog block. How is does this depends on if the
492 // instruction is scheduled in the previous block.
493 if (!InKernel) {
494 int StageDiffAdj = 0;
495 if (LoopValStage != -1 && StageScheduled > LoopValStage)
496 StageDiffAdj = StageScheduled - LoopValStage;
497 // Use the loop value defined in the kernel, unless the kernel
498 // contains the last definition of the Phi.
499 if (np == 0 && PrevStage == LastStageNum &&
500 (StageScheduled != 0 || LoopValStage != 0) &&
501 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
502 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
503 // Use the value defined by the Phi. We add one because we switch
504 // from looking at the loop value to the Phi definition.
505 else if (np > 0 && PrevStage == LastStageNum &&
506 VRMap[PrevStage - np + 1].count(Def))
507 PhiOp2 = VRMap[PrevStage - np + 1][Def];
508 // Use the loop value defined in the kernel.
509 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
510 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
511 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
512 // Use the value defined by the Phi, unless we're generating the first
513 // epilog and the Phi refers to a Phi in a different stage.
514 else if (VRMap[PrevStage - np].count(Def) &&
515 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
516 (LoopValStage == StageScheduled)))
517 PhiOp2 = VRMap[PrevStage - np][Def];
518 }
519
520 // Check if we can reuse an existing Phi. This occurs when a Phi
521 // references another Phi, and the other Phi is scheduled in an
522 // earlier stage. We can try to reuse an existing Phi up until the last
523 // stage of the current Phi.
524 if (LoopDefIsPhi) {
525 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
526 int LVNumStages = getStagesForPhi(LoopVal);
527 int StageDiff = (StageScheduled - LoopValStage);
528 LVNumStages -= StageDiff;
529 // Make sure the loop value Phi has been processed already.
530 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
531 NewReg = PhiOp2;
532 unsigned ReuseStage = CurStageNum;
533 if (isLoopCarried(*PhiInst))
534 ReuseStage -= LVNumStages;
535 // Check if the Phi to reuse has been generated yet. If not, then
536 // there is nothing to reuse.
537 if (VRMap[ReuseStage - np].count(LoopVal)) {
538 NewReg = VRMap[ReuseStage - np][LoopVal];
539
540 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
541 Def, NewReg);
542 // Update the map with the new Phi name.
543 VRMap[CurStageNum - np][Def] = NewReg;
544 PhiOp2 = NewReg;
545 if (VRMap[LastStageNum - np - 1].count(LoopVal))
546 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
547
548 if (IsLast && np == NumPhis - 1)
549 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
550 continue;
551 }
552 }
553 }
554 if (InKernel && StageDiff > 0 &&
555 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
556 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
557 }
558
559 const TargetRegisterClass *RC = MRI.getRegClass(Def);
560 NewReg = MRI.createVirtualRegister(RC);
561
562 MachineInstrBuilder NewPhi =
563 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
564 TII->get(TargetOpcode::PHI), NewReg);
565 NewPhi.addReg(PhiOp1).addMBB(BB1);
566 NewPhi.addReg(PhiOp2).addMBB(BB2);
567 LIS.InsertMachineInstrInMaps(*NewPhi);
568 if (np == 0)
569 InstrMap[NewPhi] = &*BBI;
570
571 // We define the Phis after creating the new pipelined code, so
572 // we need to rename the Phi values in scheduled instructions.
573
574 Register PrevReg;
575 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
576 PrevReg = VRMap[PrevStage - np][LoopVal];
577 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
578 NewReg, PrevReg);
579 // If the Phi has been scheduled, use the new name for rewriting.
580 if (VRMap[CurStageNum - np].count(Def)) {
581 Register R = VRMap[CurStageNum - np][Def];
582 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
583 NewReg);
584 }
585
586 // Check if we need to rename any uses that occurs after the loop. The
587 // register to replace depends on whether the Phi is scheduled in the
588 // epilog.
589 if (IsLast && np == NumPhis - 1)
590 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
591
592 // In the kernel, a dependent Phi uses the value from this Phi.
593 if (InKernel)
594 PhiOp2 = NewReg;
595
596 // Update the map with the new Phi name.
597 VRMap[CurStageNum - np][Def] = NewReg;
598 }
599
600 while (NumPhis++ < NumStages) {
601 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
602 NewReg, 0);
603 }
604
605 // Check if we need to rename a Phi that has been eliminated due to
606 // scheduling.
607 if (NumStages == 0 && IsLast) {
608 auto &CurStageMap = VRMap[CurStageNum];
609 auto It = CurStageMap.find(LoopVal);
610 if (It != CurStageMap.end())
611 replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
612 }
613 }
614}
615
616/// Generate Phis for the specified block in the generated pipelined code.
617/// These are new Phis needed because the definition is scheduled after the
618/// use in the pipelined sequence.
619void ModuloScheduleExpander::generatePhis(
621 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
622 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
623 bool IsLast) {
624 // Compute the stage number that contains the initial Phi value, and
625 // the Phi from the previous stage.
626 unsigned PrologStage = 0;
627 unsigned PrevStage = 0;
628 unsigned StageDiff = CurStageNum - LastStageNum;
629 bool InKernel = (StageDiff == 0);
630 if (InKernel) {
631 PrologStage = LastStageNum - 1;
632 PrevStage = CurStageNum;
633 } else {
634 PrologStage = LastStageNum - StageDiff;
635 PrevStage = LastStageNum + StageDiff - 1;
636 }
637
638 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
639 BBE = BB->instr_end();
640 BBI != BBE; ++BBI) {
641 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
642 MachineOperand &MO = BBI->getOperand(i);
643 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
644 continue;
645
646 int StageScheduled = Schedule.getStage(&*BBI);
647 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
648 Register Def = MO.getReg();
649 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
650 // An instruction scheduled in stage 0 and is used after the loop
651 // requires a phi in the epilog for the last definition from either
652 // the kernel or prolog.
653 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
654 hasUseAfterLoop(Def, BB, MRI))
655 NumPhis = 1;
656 if (!InKernel && (unsigned)StageScheduled > PrologStage)
657 continue;
658
659 Register PhiOp2;
660 if (InKernel) {
661 PhiOp2 = VRMap[PrevStage][Def];
662 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
663 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
664 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
665 }
666 // The number of Phis can't exceed the number of prolog stages. The
667 // prolog stage number is zero based.
668 if (NumPhis > PrologStage + 1 - StageScheduled)
669 NumPhis = PrologStage + 1 - StageScheduled;
670 for (unsigned np = 0; np < NumPhis; ++np) {
671 // Example for
672 // Org:
673 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
674 //
675 // Prolog0 (Stage0):
676 // %Clone0 = ...
677 // Prolog1 (Stage1):
678 // %Clone1 = ...
679 // Kernel (Stage2):
680 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
681 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
682 // %Clone2 = ...
683 // Epilog0 (Stage3):
684 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
685 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
686 // Epilog1 (Stage4):
687 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
688 //
689 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
690 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
691 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
692
693 Register PhiOp1 = VRMap[PrologStage][Def];
694 if (np <= PrologStage)
695 PhiOp1 = VRMap[PrologStage - np][Def];
696 if (!InKernel) {
697 if (PrevStage == LastStageNum && np == 0)
698 PhiOp2 = VRMap[LastStageNum][Def];
699 else
700 PhiOp2 = VRMapPhi[PrevStage - np][Def];
701 }
702
703 const TargetRegisterClass *RC = MRI.getRegClass(Def);
704 Register NewReg = MRI.createVirtualRegister(RC);
705
706 MachineInstrBuilder NewPhi =
707 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
708 TII->get(TargetOpcode::PHI), NewReg);
709 NewPhi.addReg(PhiOp1).addMBB(BB1);
710 NewPhi.addReg(PhiOp2).addMBB(BB2);
711 LIS.InsertMachineInstrInMaps(*NewPhi);
712 if (np == 0)
713 InstrMap[NewPhi] = &*BBI;
714
715 // Rewrite uses and update the map. The actions depend upon whether
716 // we generating code for the kernel or epilog blocks.
717 if (InKernel) {
718 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
719 NewReg);
720 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
721 NewReg);
722
723 PhiOp2 = NewReg;
724 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
725 } else {
726 VRMapPhi[CurStageNum - np][Def] = NewReg;
727 if (np == NumPhis - 1)
728 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
729 NewReg);
730 }
731 if (IsLast && np == NumPhis - 1)
732 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
733 }
734 }
735 }
736}
737
738/// Remove instructions that generate values with no uses.
739/// Typically, these are induction variable operations that generate values
740/// used in the loop itself. A dead instruction has a definition with
741/// no uses, or uses that occur in the original loop only.
742void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
743 MBBVectorTy &EpilogBBs) {
744 // For each epilog block, check that the value defined by each instruction
745 // is used. If not, delete it.
746 for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
748 ME = MBB->instr_rend();
749 MI != ME;) {
750 // From DeadMachineInstructionElem. Don't delete inline assembly.
751 if (MI->isInlineAsm()) {
752 ++MI;
753 continue;
754 }
755 bool SawStore = false;
756 // Check if it's safe to remove the instruction due to side effects.
757 // We can, and want to, remove Phis here.
758 if (!MI->isSafeToMove(SawStore) && !MI->isPHI()) {
759 ++MI;
760 continue;
761 }
762 bool used = true;
763 for (const MachineOperand &MO : MI->all_defs()) {
764 Register reg = MO.getReg();
765 // Assume physical registers are used, unless they are marked dead.
766 if (reg.isPhysical()) {
767 used = !MO.isDead();
768 if (used)
769 break;
770 continue;
771 }
772 unsigned realUses = 0;
773 for (const MachineOperand &U : MRI.use_operands(reg)) {
774 // Check if there are any uses that occur only in the original
775 // loop. If so, that's not a real use.
776 if (U.getParent()->getParent() != BB) {
777 realUses++;
778 used = true;
779 break;
780 }
781 }
782 if (realUses > 0)
783 break;
784 used = false;
785 }
786 if (!used) {
787 LIS.RemoveMachineInstrFromMaps(*MI);
788 MI++->eraseFromParent();
789 continue;
790 }
791 ++MI;
792 }
793 // In the kernel block, check if we can remove a Phi that generates a value
794 // used in an instruction removed in the epilog block.
795 for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
796 Register reg = MI.getOperand(0).getReg();
797 if (MRI.use_begin(reg) == MRI.use_end()) {
798 LIS.RemoveMachineInstrFromMaps(MI);
799 MI.eraseFromParent();
800 }
801 }
802}
803
804/// For loop carried definitions, we split the lifetime of a virtual register
805/// that has uses past the definition in the next iteration. A copy with a new
806/// virtual register is inserted before the definition, which helps with
807/// generating a better register assignment.
808///
809/// v1 = phi(a, v2) v1 = phi(a, v2)
810/// v2 = phi(b, v3) v2 = phi(b, v3)
811/// v3 = .. v4 = copy v1
812/// .. = V1 v3 = ..
813/// .. = v4
814void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
815 MBBVectorTy &EpilogBBs) {
816 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
817 for (auto &PHI : KernelBB->phis()) {
818 Register Def = PHI.getOperand(0).getReg();
819 // Check for any Phi definition that used as an operand of another Phi
820 // in the same block.
821 for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
822 E = MRI.use_instr_end();
823 I != E; ++I) {
824 if (I->isPHI() && I->getParent() == KernelBB) {
825 // Get the loop carried definition.
826 Register LCDef = getLoopPhiReg(PHI, KernelBB);
827 if (!LCDef)
828 continue;
829 MachineInstr *MI = MRI.getVRegDef(LCDef);
830 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
831 continue;
832 // Search through the rest of the block looking for uses of the Phi
833 // definition. If one occurs, then split the lifetime.
834 Register SplitReg;
836 KernelBB->instr_end()))
837 if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) {
838 // We split the lifetime when we find the first use.
839 if (!SplitReg) {
840 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
841 MachineInstr *newCopy =
842 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
843 TII->get(TargetOpcode::COPY), SplitReg)
844 .addReg(Def);
845 LIS.InsertMachineInstrInMaps(*newCopy);
846 }
847 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
848 }
849 if (!SplitReg)
850 continue;
851 // Search through each of the epilog blocks for any uses to be renamed.
852 for (auto &Epilog : EpilogBBs)
853 for (auto &I : *Epilog)
854 if (I.readsRegister(Def, /*TRI=*/nullptr))
855 I.substituteRegister(Def, SplitReg, 0, *TRI);
856 break;
857 }
858 }
859 }
860}
861
862/// Remove the incoming block from the Phis in a basic block.
864 for (MachineInstr &MI : *BB) {
865 if (!MI.isPHI())
866 break;
867 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
868 if (MI.getOperand(i + 1).getMBB() == Incoming) {
869 MI.removeOperand(i + 1);
870 MI.removeOperand(i);
871 break;
872 }
873 }
874}
875
876/// Create branches from each prolog basic block to the appropriate epilog
877/// block. These edges are needed if the loop ends before reaching the
878/// kernel.
879void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
880 MBBVectorTy &PrologBBs,
881 MachineBasicBlock *KernelBB,
882 MBBVectorTy &EpilogBBs,
883 ValueMapTy *VRMap) {
884 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
885 MachineBasicBlock *LastPro = KernelBB;
886 MachineBasicBlock *LastEpi = KernelBB;
887
888 // Start from the blocks connected to the kernel and work "out"
889 // to the first prolog and the last epilog blocks.
890 unsigned MaxIter = PrologBBs.size() - 1;
891 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
892 // Add branches to the prolog that go to the corresponding
893 // epilog, and the fall-thru prolog/kernel block.
894 MachineBasicBlock *Prolog = PrologBBs[j];
895 MachineBasicBlock *Epilog = EpilogBBs[i];
896
898 std::optional<bool> StaticallyGreater =
899 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
900 unsigned numAdded = 0;
901 if (!StaticallyGreater) {
902 Prolog->addSuccessor(Epilog);
903 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
904 } else if (*StaticallyGreater == false) {
905 Prolog->addSuccessor(Epilog);
906 Prolog->removeSuccessor(LastPro);
907 LastEpi->removeSuccessor(Epilog);
908 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
909 removePhis(Epilog, LastEpi);
910 // Remove the blocks that are no longer referenced.
911 if (LastPro != LastEpi) {
912 for (auto &MI : *LastEpi)
913 LIS.RemoveMachineInstrFromMaps(MI);
914 LastEpi->clear();
915 LastEpi->eraseFromParent();
916 }
917 if (LastPro == KernelBB) {
918 LoopInfo->disposed(&LIS);
919 NewKernel = nullptr;
920 }
921 for (auto &MI : *LastPro)
922 LIS.RemoveMachineInstrFromMaps(MI);
923 LastPro->clear();
924 LastPro->eraseFromParent();
925 } else {
926 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
928 }
929 LastPro = Prolog;
930 LastEpi = Epilog;
932 E = Prolog->instr_rend();
933 I != E && numAdded > 0; ++I, --numAdded)
934 updateInstruction(&*I, false, j, 0, VRMap);
935 }
936
937 if (NewKernel) {
938 LoopInfo->setPreheader(PrologBBs[MaxIter]);
939 LoopInfo->adjustTripCount(-(MaxIter + 1));
940 }
941}
942
943/// Return true if we can compute the amount the instruction changes
944/// during each iteration. Set Delta to the amount of the change.
945bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
946 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
947 const MachineOperand *BaseOp;
948 int64_t Offset;
949 bool OffsetIsScalable;
950 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
951 return false;
952
953 // FIXME: This algorithm assumes instructions have fixed-size offsets.
954 if (OffsetIsScalable)
955 return false;
956
957 if (!BaseOp->isReg())
958 return false;
959
960 Register BaseReg = BaseOp->getReg();
961
962 MachineRegisterInfo &MRI = MF.getRegInfo();
963 // Check if there is a Phi. If so, get the definition in the loop.
964 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
965 if (BaseDef && BaseDef->isPHI()) {
966 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
967 BaseDef = MRI.getVRegDef(BaseReg);
968 }
969 if (!BaseDef)
970 return false;
971
972 int D = 0;
973 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
974 return false;
975
976 Delta = D;
977 return true;
978}
979
980/// Update the memory operand with a new offset when the pipeliner
981/// generates a new copy of the instruction that refers to a
982/// different memory location.
983void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
984 MachineInstr &OldMI,
985 unsigned Num) {
986 if (Num == 0)
987 return;
988 // If the instruction has memory operands, then adjust the offset
989 // when the instruction appears in different stages.
990 if (NewMI.memoperands_empty())
991 return;
993 for (MachineMemOperand *MMO : NewMI.memoperands()) {
994 // TODO: Figure out whether isAtomic is really necessary (see D57601).
995 if (MMO->isVolatile() || MMO->isAtomic() ||
996 (MMO->isInvariant() && MMO->isDereferenceable()) ||
997 (!MMO->getValue())) {
998 NewMMOs.push_back(MMO);
999 continue;
1000 }
1001 unsigned Delta;
1002 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
1003 int64_t AdjOffset = Delta * Num;
1004 NewMMOs.push_back(
1005 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
1006 } else {
1007 NewMMOs.push_back(MF.getMachineMemOperand(
1009 }
1010 }
1011 NewMI.setMemRefs(MF, NewMMOs);
1012}
1013
1014/// Clone the instruction for the new pipelined loop and update the
1015/// memory operands, if needed.
1016MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
1017 unsigned CurStageNum,
1018 unsigned InstStageNum) {
1019 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1020 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1021 return NewMI;
1022}
1023
1024/// Clone the instruction for the new pipelined loop. If needed, this
1025/// function updates the instruction using the values saved in the
1026/// InstrChanges structure.
1027MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1028 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1029 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1030 auto It = InstrChanges.find(OldMI);
1031 if (It != InstrChanges.end()) {
1032 std::pair<Register, int64_t> RegAndOffset = It->second;
1033 unsigned BasePos, OffsetPos;
1034 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1035 return nullptr;
1036 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1037 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1038 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1039 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1040 NewMI->getOperand(OffsetPos).setImm(NewOffset);
1041 }
1042 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1043 return NewMI;
1044}
1045
1046/// Update the machine instruction with new virtual registers. This
1047/// function may change the definitions and/or uses.
1048void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1049 bool LastDef,
1050 unsigned CurStageNum,
1051 unsigned InstrStageNum,
1052 ValueMapTy *VRMap) {
1053 for (MachineOperand &MO : NewMI->operands()) {
1054 if (!MO.isReg() || !MO.getReg().isVirtual())
1055 continue;
1056 Register reg = MO.getReg();
1057 if (MO.isDef()) {
1058 // Create a new virtual register for the definition.
1059 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1060 Register NewReg = MRI.createVirtualRegister(RC);
1061 MO.setReg(NewReg);
1062 VRMap[CurStageNum][reg] = NewReg;
1063 if (LastDef)
1064 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
1065 } else if (MO.isUse()) {
1066 MachineInstr *Def = MRI.getVRegDef(reg);
1067 // Compute the stage that contains the last definition for instruction.
1068 int DefStageNum = Schedule.getStage(Def);
1069 unsigned StageNum = CurStageNum;
1070 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1071 // Compute the difference in stages between the defintion and the use.
1072 unsigned StageDiff = (InstrStageNum - DefStageNum);
1073 // Make an adjustment to get the last definition.
1074 StageNum -= StageDiff;
1075 }
1076 if (auto It = VRMap[StageNum].find(reg); It != VRMap[StageNum].end())
1077 MO.setReg(It->second);
1078 }
1079 }
1080}
1081
1082/// Return the instruction in the loop that defines the register.
1083/// If the definition is a Phi, then follow the Phi operand to
1084/// the instruction in the loop.
1085MachineInstr *ModuloScheduleExpander::findDefInLoop(Register Reg) {
1086 SmallPtrSet<MachineInstr *, 8> Visited;
1087 MachineInstr *Def = MRI.getVRegDef(Reg);
1088 while (Def->isPHI()) {
1089 if (!Visited.insert(Def).second)
1090 break;
1091 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1092 if (Def->getOperand(i + 1).getMBB() == BB) {
1093 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1094 break;
1095 }
1096 }
1097 return Def;
1098}
1099
1100/// Return the new name for the value from the previous stage.
1101Register ModuloScheduleExpander::getPrevMapVal(
1102 unsigned StageNum, unsigned PhiStage, Register LoopVal, unsigned LoopStage,
1103 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1104 Register PrevVal;
1105 if (StageNum > PhiStage) {
1106 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1107 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1108 // The name is defined in the previous stage.
1109 PrevVal = VRMap[StageNum - 1][LoopVal];
1110 else if (VRMap[StageNum].count(LoopVal))
1111 // The previous name is defined in the current stage when the instruction
1112 // order is swapped.
1113 PrevVal = VRMap[StageNum][LoopVal];
1114 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1115 // The loop value hasn't yet been scheduled.
1116 PrevVal = LoopVal;
1117 else if (StageNum == PhiStage + 1)
1118 // The loop value is another phi, which has not been scheduled.
1119 PrevVal = getInitPhiReg(*LoopInst, BB);
1120 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1121 // The loop value is another phi, which has been scheduled.
1122 PrevVal =
1123 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1124 LoopStage, VRMap, BB);
1125 }
1126 return PrevVal;
1127}
1128
1129/// Rewrite the Phi values in the specified block to use the mappings
1130/// from the initial operand. Once the Phi is scheduled, we switch
1131/// to using the loop value instead of the Phi value, so those names
1132/// do not need to be rewritten.
1133void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1134 unsigned StageNum,
1135 ValueMapTy *VRMap,
1136 InstrMapTy &InstrMap) {
1137 for (auto &PHI : BB->phis()) {
1138 Register InitVal;
1139 Register LoopVal;
1140 getPhiRegs(PHI, BB, InitVal, LoopVal);
1141 Register PhiDef = PHI.getOperand(0).getReg();
1142
1143 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1144 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1145 unsigned NumPhis = getStagesForPhi(PhiDef);
1146 if (NumPhis > StageNum)
1147 NumPhis = StageNum;
1148 for (unsigned np = 0; np <= NumPhis; ++np) {
1149 Register NewVal =
1150 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1151 if (!NewVal)
1152 NewVal = InitVal;
1153 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1154 NewVal);
1155 }
1156 }
1157}
1158
1159/// Rewrite a previously scheduled instruction to use the register value
1160/// from the new instruction. Make sure the instruction occurs in the
1161/// basic block, and we don't change the uses in the new instruction.
1162void ModuloScheduleExpander::rewriteScheduledInstr(
1163 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1164 unsigned PhiNum, MachineInstr *Phi, Register OldReg, Register NewReg,
1165 Register PrevReg) {
1166 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1167 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1168 // Rewrite uses that have been scheduled already to use the new
1169 // Phi register.
1170 for (MachineOperand &UseOp :
1171 llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
1172 MachineInstr *UseMI = UseOp.getParent();
1173 if (UseMI->getParent() != BB)
1174 continue;
1175 if (UseMI->isPHI()) {
1176 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1177 continue;
1178 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1179 continue;
1180 }
1181 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1182 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1183 MachineInstr *OrigMI = OrigInstr->second;
1184 int StageSched = Schedule.getStage(OrigMI);
1185 int CycleSched = Schedule.getCycle(OrigMI);
1186 Register ReplaceReg;
1187 // This is the stage for the scheduled instruction.
1188 if (StagePhi == StageSched && Phi->isPHI()) {
1189 int CyclePhi = Schedule.getCycle(Phi);
1190 if (PrevReg && InProlog)
1191 ReplaceReg = PrevReg;
1192 else if (PrevReg && !isLoopCarried(*Phi) &&
1193 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1194 ReplaceReg = PrevReg;
1195 else
1196 ReplaceReg = NewReg;
1197 }
1198 // The scheduled instruction occurs before the scheduled Phi, and the
1199 // Phi is not loop carried.
1200 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1201 ReplaceReg = NewReg;
1202 if (StagePhi > StageSched && Phi->isPHI())
1203 ReplaceReg = NewReg;
1204 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1205 ReplaceReg = NewReg;
1206 if (ReplaceReg) {
1207 const TargetRegisterClass *NRC =
1208 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1209 if (NRC)
1210 UseOp.setReg(ReplaceReg);
1211 else {
1212 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1213 MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
1214 TII->get(TargetOpcode::COPY), SplitReg)
1215 .addReg(ReplaceReg);
1216 UseOp.setReg(SplitReg);
1217 LIS.InsertMachineInstrInMaps(*newCopy);
1218 }
1219 }
1220 }
1221}
1222
1223bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1224 if (!Phi.isPHI())
1225 return false;
1226 int DefCycle = Schedule.getCycle(&Phi);
1227 int DefStage = Schedule.getStage(&Phi);
1228
1229 Register InitVal;
1230 Register LoopVal;
1231 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1232 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1233 if (!Use || Use->isPHI())
1234 return true;
1235 int LoopCycle = Schedule.getCycle(Use);
1236 int LoopStage = Schedule.getStage(Use);
1237 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1238}
1239
1240//===----------------------------------------------------------------------===//
1241// PeelingModuloScheduleExpander implementation
1242//===----------------------------------------------------------------------===//
1243// This is a reimplementation of ModuloScheduleExpander that works by creating
1244// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1245//===----------------------------------------------------------------------===//
1246
1247namespace {
1248// Remove any dead phis in MBB. Dead phis either have only one block as input
1249// (in which case they are the identity) or have no uses.
1250void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1251 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1252 bool Changed = true;
1253 while (Changed) {
1254 Changed = false;
1256 assert(MI.isPHI());
1257 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1258 if (LIS)
1260 MI.eraseFromParent();
1261 Changed = true;
1262 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1263 const TargetRegisterClass *ConstrainRegClass =
1264 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1265 MRI.getRegClass(MI.getOperand(0).getReg()));
1266 assert(ConstrainRegClass &&
1267 "Expected a valid constrained register class!");
1268 (void)ConstrainRegClass;
1269 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1270 MI.getOperand(1).getReg());
1271 if (LIS)
1273 MI.eraseFromParent();
1274 Changed = true;
1275 }
1276 }
1277 }
1278}
1279
1280/// Rewrites the kernel block in-place to adhere to the given schedule.
1281/// KernelRewriter holds all of the state required to perform the rewriting.
1282class KernelRewriter {
1283 ModuloSchedule &S;
1284 MachineBasicBlock *BB;
1285 MachineBasicBlock *PreheaderBB, *ExitBB;
1286 MachineRegisterInfo &MRI;
1287 const TargetInstrInfo *TII;
1288 LiveIntervals *LIS;
1289
1290 // Map from register class to canonical undef register for that class.
1291 DenseMap<const TargetRegisterClass *, Register> Undefs;
1292 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1293 // this map is only used when InitReg is non-undef.
1294 DenseMap<std::pair<Register, Register>, Register> Phis;
1295 // Map from LoopReg to phi register where the InitReg is undef.
1296 DenseMap<Register, Register> UndefPhis;
1297
1298 // Reg is used by MI. Return the new register MI should use to adhere to the
1299 // schedule. Insert phis as necessary.
1300 Register remapUse(Register Reg, MachineInstr &MI);
1301 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1302 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1303 // or will be chosen so as to share another phi.
1304 Register phi(Register LoopReg, std::optional<Register> InitReg = {},
1305 const TargetRegisterClass *RC = nullptr);
1306 // Create an undef register of the given register class.
1307 Register undef(const TargetRegisterClass *RC);
1308
1309public:
1310 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1311 LiveIntervals *LIS = nullptr);
1312 void rewrite();
1313};
1314} // namespace
1315
1316KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1317 MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1318 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1319 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1320 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1321 PreheaderBB = *BB->pred_begin();
1322 if (PreheaderBB == BB)
1323 PreheaderBB = *std::next(BB->pred_begin());
1324}
1325
1326void KernelRewriter::rewrite() {
1327 // Rearrange the loop to be in schedule order. Note that the schedule may
1328 // contain instructions that are not owned by the loop block (InstrChanges and
1329 // friends), so we gracefully handle unowned instructions and delete any
1330 // instructions that weren't in the schedule.
1331 auto InsertPt = BB->getFirstTerminator();
1332 MachineInstr *FirstMI = nullptr;
1333 for (MachineInstr *MI : S.getInstructions()) {
1334 if (MI->isPHI())
1335 continue;
1336 if (MI->getParent())
1337 MI->removeFromParent();
1338 BB->insert(InsertPt, MI);
1339 if (!FirstMI)
1340 FirstMI = MI;
1341 }
1342 assert(FirstMI && "Failed to find first MI in schedule");
1343
1344 // At this point all of the scheduled instructions are between FirstMI
1345 // and the end of the block. Kill from the first non-phi to FirstMI.
1346 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1347 if (LIS)
1349 (I++)->eraseFromParent();
1350 }
1351
1352 // Now remap every instruction in the loop.
1353 for (MachineInstr &MI : *BB) {
1354 if (MI.isPHI() || MI.isTerminator())
1355 continue;
1356 for (MachineOperand &MO : MI.uses()) {
1357 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1358 continue;
1359 Register Reg = remapUse(MO.getReg(), MI);
1360 MO.setReg(Reg);
1361 }
1362 }
1363 EliminateDeadPhis(BB, MRI, LIS);
1364
1365 // Ensure a phi exists for all instructions that are either referenced by
1366 // an illegal phi or by an instruction outside the loop. This allows us to
1367 // treat remaps of these values the same as "normal" values that come from
1368 // loop-carried phis.
1369 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1370 if (MI->isPHI()) {
1371 Register R = MI->getOperand(0).getReg();
1372 phi(R);
1373 continue;
1374 }
1375
1376 for (MachineOperand &Def : MI->defs()) {
1377 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1378 if (MI.getParent() != BB) {
1379 phi(Def.getReg());
1380 break;
1381 }
1382 }
1383 }
1384 }
1385}
1386
1387Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1388 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1389 if (!Producer)
1390 return Reg;
1391
1392 int ConsumerStage = S.getStage(&MI);
1393 if (!Producer->isPHI()) {
1394 // Non-phi producers are simple to remap. Insert as many phis as the
1395 // difference between the consumer and producer stages.
1396 if (Producer->getParent() != BB)
1397 // Producer was not inside the loop. Use the register as-is.
1398 return Reg;
1399 int ProducerStage = S.getStage(Producer);
1400 assert(ConsumerStage != -1 &&
1401 "In-loop consumer should always be scheduled!");
1402 assert(ConsumerStage >= ProducerStage);
1403 unsigned StageDiff = ConsumerStage - ProducerStage;
1404
1405 for (unsigned I = 0; I < StageDiff; ++I)
1406 Reg = phi(Reg);
1407 return Reg;
1408 }
1409
1410 // First, dive through the phi chain to find the defaults for the generated
1411 // phis.
1413 Register LoopReg = Reg;
1414 auto LoopProducer = Producer;
1415 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1416 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1417 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1418 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1419 assert(LoopProducer);
1420 }
1421 int LoopProducerStage = S.getStage(LoopProducer);
1422
1423 std::optional<Register> IllegalPhiDefault;
1424
1425 if (LoopProducerStage == -1) {
1426 // Do nothing.
1427 } else if (LoopProducerStage > ConsumerStage) {
1428 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1429 // In addition, Consumer's cycle must be scheduled after Producer in the
1430 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1431 // functions.
1432#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1433 int LoopProducerCycle = S.getCycle(LoopProducer);
1434 int ConsumerCycle = S.getCycle(&MI);
1435#endif
1436 assert(LoopProducerCycle <= ConsumerCycle);
1437 assert(LoopProducerStage == ConsumerStage + 1);
1438 // Peel off the first phi from Defaults and insert a phi between producer
1439 // and consumer. This phi will not be at the front of the block so we
1440 // consider it illegal. It will only exist during the rewrite process; it
1441 // needs to exist while we peel off prologs because these could take the
1442 // default value. After that we can replace all uses with the loop producer
1443 // value.
1444 IllegalPhiDefault = Defaults.front();
1445 Defaults.erase(Defaults.begin());
1446 } else {
1447 assert(ConsumerStage >= LoopProducerStage);
1448 int StageDiff = ConsumerStage - LoopProducerStage;
1449 if (StageDiff > 0) {
1450 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1451 << " to " << (Defaults.size() + StageDiff) << "\n");
1452 // If we need more phis than we have defaults for, pad out with undefs for
1453 // the earliest phis, which are at the end of the defaults chain (the
1454 // chain is in reverse order).
1455 Defaults.resize(Defaults.size() + StageDiff,
1456 Defaults.empty() ? std::optional<Register>()
1457 : Defaults.back());
1458 }
1459 }
1460
1461 // Now we know the number of stages to jump back, insert the phi chain.
1462 auto DefaultI = Defaults.rbegin();
1463 while (DefaultI != Defaults.rend())
1464 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1465
1466 if (IllegalPhiDefault) {
1467 // The consumer optionally consumes LoopProducer in the same iteration
1468 // (because the producer is scheduled at an earlier cycle than the consumer)
1469 // or the initial value. To facilitate this we create an illegal block here
1470 // by embedding a phi in the middle of the block. We will fix this up
1471 // immediately prior to pruning.
1472 auto RC = MRI.getRegClass(Reg);
1473 Register R = MRI.createVirtualRegister(RC);
1474 MachineInstr *IllegalPhi =
1475 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1476 .addReg(*IllegalPhiDefault)
1477 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1478 .addReg(LoopReg)
1479 .addMBB(BB); // Block choice is arbitrary and has no effect.
1480 // Illegal phi should belong to the producer stage so that it can be
1481 // filtered correctly during peeling.
1482 S.setStage(IllegalPhi, LoopProducerStage);
1483 return R;
1484 }
1485
1486 return LoopReg;
1487}
1488
1489Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
1490 const TargetRegisterClass *RC) {
1491 // If the init register is not undef, try and find an existing phi.
1492 if (InitReg) {
1493 auto I = Phis.find({LoopReg, *InitReg});
1494 if (I != Phis.end())
1495 return I->second;
1496 } else {
1497 for (auto &KV : Phis) {
1498 if (KV.first.first == LoopReg)
1499 return KV.second;
1500 }
1501 }
1502
1503 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1504 // find a phi that takes undef as input.
1505 auto I = UndefPhis.find(LoopReg);
1506 if (I != UndefPhis.end()) {
1507 Register R = I->second;
1508 if (!InitReg)
1509 // Found a phi taking undef as input, and this input is undef so return
1510 // without any more changes.
1511 return R;
1512 // Found a phi taking undef as input, so rewrite it to take InitReg.
1513 MachineInstr *MI = MRI.getVRegDef(R);
1514 MI->getOperand(1).setReg(*InitReg);
1515 Phis.insert({{LoopReg, *InitReg}, R});
1516 const TargetRegisterClass *ConstrainRegClass =
1517 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1518 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1519 (void)ConstrainRegClass;
1520 UndefPhis.erase(I);
1521 return R;
1522 }
1523
1524 // Failed to find any existing phi to reuse, so create a new one.
1525 if (!RC)
1526 RC = MRI.getRegClass(LoopReg);
1527 Register R = MRI.createVirtualRegister(RC);
1528 if (InitReg) {
1529 const TargetRegisterClass *ConstrainRegClass =
1530 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1531 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1532 (void)ConstrainRegClass;
1533 }
1534 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1535 .addReg(InitReg ? *InitReg : undef(RC))
1536 .addMBB(PreheaderBB)
1537 .addReg(LoopReg)
1538 .addMBB(BB);
1539 if (!InitReg)
1540 UndefPhis[LoopReg] = R;
1541 else
1542 Phis[{LoopReg, *InitReg}] = R;
1543 return R;
1544}
1545
1546Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1547 Register &R = Undefs[RC];
1548 if (R == 0) {
1549 // Create an IMPLICIT_DEF that defines this register if we need it.
1550 // All uses of this should be removed by the time we have finished unrolling
1551 // prologs and epilogs.
1552 R = MRI.createVirtualRegister(RC);
1553 auto *InsertBB = &PreheaderBB->getParent()->front();
1554 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1555 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1556 }
1557 return R;
1558}
1559
1560namespace {
1561/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1562/// the operand are discovered, such as how many in-loop PHIs it has to jump
1563/// through and defaults for these phis.
1564class KernelOperandInfo {
1565 MachineBasicBlock *BB;
1566 MachineRegisterInfo &MRI;
1567 SmallVector<Register, 4> PhiDefaults;
1568 MachineOperand *Source;
1569 MachineOperand *Target;
1570
1571public:
1572 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1573 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1574 : MRI(MRI) {
1575 Source = MO;
1576 BB = MO->getParent()->getParent();
1577 while (isRegInLoop(MO)) {
1578 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1579 if (MI->isFullCopy()) {
1580 MO = &MI->getOperand(1);
1581 continue;
1582 }
1583 if (!MI->isPHI())
1584 break;
1585 // If this is an illegal phi, don't count it in distance.
1586 if (IllegalPhis.count(MI)) {
1587 MO = &MI->getOperand(3);
1588 continue;
1589 }
1590
1592 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1593 : &MI->getOperand(3);
1594 PhiDefaults.push_back(Default);
1595 }
1596 Target = MO;
1597 }
1598
1599 bool operator==(const KernelOperandInfo &Other) const {
1600 return PhiDefaults.size() == Other.PhiDefaults.size();
1601 }
1602
1603 void print(raw_ostream &OS) const {
1604 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1605 << *Source->getParent();
1606 }
1607
1608private:
1609 bool isRegInLoop(MachineOperand *MO) {
1610 return MO->isReg() && MO->getReg().isVirtual() &&
1611 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1612 }
1613};
1614} // namespace
1615
1616MachineBasicBlock *
1619 if (LPD == LPD_Front)
1620 PeeledFront.push_back(NewBB);
1621 else
1622 PeeledBack.push_front(NewBB);
1623 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1624 ++I, ++NI) {
1625 CanonicalMIs[&*I] = &*I;
1626 CanonicalMIs[&*NI] = &*I;
1627 BlockMIs[{NewBB, &*I}] = &*NI;
1628 BlockMIs[{BB, &*I}] = &*I;
1629 }
1630 return NewBB;
1631}
1632
1634 int MinStage) {
1635 for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1636 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1637 MachineInstr *MI = &*I++;
1638 int Stage = getStage(MI);
1639 if (Stage == -1 || Stage >= MinStage)
1640 continue;
1641
1642 for (MachineOperand &DefMO : MI->defs()) {
1644 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1645 // Only PHIs can use values from this block by construction.
1646 // Match with the equivalent PHI in B.
1647 assert(UseMI.isPHI());
1648 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1649 MI->getParent());
1650 Subs.emplace_back(&UseMI, Reg);
1651 }
1652 for (auto &Sub : Subs)
1653 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1654 *MRI.getTargetRegisterInfo());
1655 }
1656 if (LIS)
1657 LIS->RemoveMachineInstrFromMaps(*MI);
1658 MI->eraseFromParent();
1659 }
1660}
1661
1663 MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1664 auto InsertPt = DestBB->getFirstNonPHI();
1667 llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1668 if (MI.isPHI()) {
1669 // This is an illegal PHI. If we move any instructions using an illegal
1670 // PHI, we need to create a legal Phi.
1671 if (getStage(&MI) != Stage) {
1672 // The legal Phi is not necessary if the illegal phi's stage
1673 // is being moved.
1674 Register PhiR = MI.getOperand(0).getReg();
1675 auto RC = MRI.getRegClass(PhiR);
1676 Register NR = MRI.createVirtualRegister(RC);
1677 MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1678 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1679 .addReg(PhiR)
1680 .addMBB(SourceBB);
1681 BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1683 Remaps[PhiR] = NR;
1684 }
1685 }
1686 if (getStage(&MI) != Stage)
1687 continue;
1688 MI.removeFromParent();
1689 DestBB->insert(InsertPt, &MI);
1690 auto *KernelMI = CanonicalMIs[&MI];
1691 BlockMIs[{DestBB, KernelMI}] = &MI;
1692 BlockMIs.erase({SourceBB, KernelMI});
1693 }
1695 for (MachineInstr &MI : DestBB->phis()) {
1696 assert(MI.getNumOperands() == 3);
1697 MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1698 // If the instruction referenced by the phi is moved inside the block
1699 // we don't need the phi anymore.
1700 if (getStage(Def) == Stage) {
1701 Register PhiReg = MI.getOperand(0).getReg();
1702 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1703 /*TRI=*/nullptr) != -1);
1704 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1705 MI.getOperand(0).setReg(PhiReg);
1706 PhiToDelete.push_back(&MI);
1707 }
1708 }
1709 for (auto *P : PhiToDelete)
1710 P->eraseFromParent();
1711 InsertPt = DestBB->getFirstNonPHI();
1712 // Helper to clone Phi instructions into the destination block. We clone Phi
1713 // greedily to avoid combinatorial explosion of Phi instructions.
1714 auto clonePhi = [&](MachineInstr *Phi) {
1715 MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1716 DestBB->insert(InsertPt, NewMI);
1717 Register OrigR = Phi->getOperand(0).getReg();
1718 Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
1719 NewMI->getOperand(0).setReg(R);
1720 NewMI->getOperand(1).setReg(OrigR);
1721 NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1722 Remaps[OrigR] = R;
1723 CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1724 BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1726 return R;
1727 };
1728 for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1729 for (MachineOperand &MO : I->uses()) {
1730 if (!MO.isReg())
1731 continue;
1732 if (auto It = Remaps.find(MO.getReg()); It != Remaps.end())
1733 MO.setReg(It->second);
1734 else {
1735 // If we are using a phi from the source block we need to add a new phi
1736 // pointing to the old one.
1737 MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
1738 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1739 Register R = clonePhi(Use);
1740 MO.setReg(R);
1741 }
1742 }
1743 }
1744 }
1745}
1746
1749 MachineInstr *Phi) {
1750 unsigned distance = PhiNodeLoopIteration[Phi];
1751 MachineInstr *CanonicalUse = CanonicalPhi;
1752 Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1753 for (unsigned I = 0; I < distance; ++I) {
1754 assert(CanonicalUse->isPHI());
1755 assert(CanonicalUse->getNumOperands() == 5);
1756 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1757 if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1758 std::swap(LoopRegIdx, InitRegIdx);
1759 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1760 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1761 }
1762 return CanonicalUseReg;
1763}
1764
1766 BitVector LS(Schedule.getNumStages(), true);
1767 BitVector AS(Schedule.getNumStages(), true);
1768 LiveStages[BB] = LS;
1769 AvailableStages[BB] = AS;
1770
1771 // Peel out the prologs.
1772 LS.reset();
1773 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1774 LS[I] = true;
1775 Prologs.push_back(peelKernel(LPD_Front));
1776 LiveStages[Prologs.back()] = LS;
1777 AvailableStages[Prologs.back()] = LS;
1778 }
1779
1780 // Create a block that will end up as the new loop exiting block (dominated by
1781 // all prologs and epilogs). It will only contain PHIs, in the same order as
1782 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1783 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1784 // property that any value deffed in BB but used outside of BB is used by a
1785 // PHI in the exiting block.
1787 EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1788 // Push out the epilogs, again in reverse order.
1789 // We can't assume anything about the minumum loop trip count at this point,
1790 // so emit a fairly complex epilog.
1791
1792 // We first peel number of stages minus one epilogue. Then we remove dead
1793 // stages and reorder instructions based on their stage. If we have 3 stages
1794 // we generate first:
1795 // E0[3, 2, 1]
1796 // E1[3', 2']
1797 // E2[3'']
1798 // And then we move instructions based on their stages to have:
1799 // E0[3]
1800 // E1[2, 3']
1801 // E2[1, 2', 3'']
1802 // The transformation is legal because we only move instructions past
1803 // instructions of a previous loop iteration.
1804 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1805 Epilogs.push_back(peelKernel(LPD_Back));
1806 MachineBasicBlock *B = Epilogs.back();
1807 filterInstructions(B, Schedule.getNumStages() - I);
1808 // Keep track at which iteration each phi belongs to. We need it to know
1809 // what version of the variable to use during prologue/epilogue stitching.
1810 EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1811 for (MachineInstr &Phi : B->phis())
1812 PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
1813 }
1814 for (size_t I = 0; I < Epilogs.size(); I++) {
1815 LS.reset();
1816 for (size_t J = I; J < Epilogs.size(); J++) {
1817 int Iteration = J;
1818 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1819 // Move stage one block at a time so that Phi nodes are updated correctly.
1820 for (size_t K = Iteration; K > I; K--)
1821 moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1822 LS[Stage] = true;
1823 }
1824 LiveStages[Epilogs[I]] = LS;
1825 AvailableStages[Epilogs[I]] = AS;
1826 }
1827
1828 // Now we've defined all the prolog and epilog blocks as a fallthrough
1829 // sequence, add the edges that will be followed if the loop trip count is
1830 // lower than the number of stages (connecting prologs directly with epilogs).
1831 auto PI = Prologs.begin();
1832 auto EI = Epilogs.begin();
1833 assert(Prologs.size() == Epilogs.size());
1834 for (; PI != Prologs.end(); ++PI, ++EI) {
1835 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1836 (*PI)->addSuccessor(*EI);
1837 for (MachineInstr &MI : (*EI)->phis()) {
1838 Register Reg = MI.getOperand(1).getReg();
1839 MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
1840 if (Use && Use->getParent() == Pred) {
1841 MachineInstr *CanonicalUse = CanonicalMIs[Use];
1842 if (CanonicalUse->isPHI()) {
1843 // If the use comes from a phi we need to skip as many phi as the
1844 // distance between the epilogue and the kernel. Trace through the phi
1845 // chain to find the right value.
1846 Reg = getPhiCanonicalReg(CanonicalUse, Use);
1847 }
1848 Reg = getEquivalentRegisterIn(Reg, *PI);
1849 }
1850 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1851 MI.addOperand(MachineOperand::CreateMBB(*PI));
1852 }
1853 }
1854
1855 // Create a list of all blocks in order.
1858 Blocks.push_back(BB);
1860
1861 // Iterate in reverse order over all instructions, remapping as we go.
1862 for (MachineBasicBlock *B : reverse(Blocks)) {
1863 for (auto I = B->instr_rbegin();
1864 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1866 rewriteUsesOf(&*MI);
1867 }
1868 }
1869 for (auto *MI : IllegalPhisToDelete) {
1870 if (LIS)
1871 LIS->RemoveMachineInstrFromMaps(*MI);
1872 MI->eraseFromParent();
1873 }
1874 IllegalPhisToDelete.clear();
1875
1876 // Now all remapping has been done, we're free to optimize the generated code.
1877 for (MachineBasicBlock *B : reverse(Blocks))
1878 EliminateDeadPhis(B, MRI, LIS);
1879 EliminateDeadPhis(ExitingBB, MRI, LIS);
1880}
1881
1883 MachineFunction &MF = *BB->getParent();
1884 MachineBasicBlock *Exit = *BB->succ_begin();
1885 if (Exit == BB)
1886 Exit = *std::next(BB->succ_begin());
1887
1888 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
1889 MF.insert(std::next(BB->getIterator()), NewBB);
1890
1891 // Clone all phis in BB into NewBB and rewrite.
1892 for (MachineInstr &MI : BB->phis()) {
1893 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1894 Register OldR = MI.getOperand(3).getReg();
1895 Register R = MRI.createVirtualRegister(RC);
1897 for (MachineInstr &Use : MRI.use_instructions(OldR))
1898 if (Use.getParent() != BB)
1899 Uses.push_back(&Use);
1900 for (MachineInstr *Use : Uses)
1901 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1902 *MRI.getTargetRegisterInfo());
1903 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1904 .addReg(OldR)
1905 .addMBB(BB);
1906 BlockMIs[{NewBB, &MI}] = NI;
1907 CanonicalMIs[NI] = &MI;
1908 }
1909 BB->replaceSuccessor(Exit, NewBB);
1910 Exit->replacePhiUsesWith(BB, NewBB);
1911 NewBB->addSuccessor(Exit);
1912
1913 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1915 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1916 (void)CanAnalyzeBr;
1917 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1918 TII->removeBranch(*BB);
1919 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1920 Cond, DebugLoc());
1921 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1922 return NewBB;
1923}
1924
1928 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
1929 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
1930 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1931}
1932
1934 if (MI->isPHI()) {
1935 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1936 // and it is produced by this block.
1937 Register PhiR = MI->getOperand(0).getReg();
1938 Register R = MI->getOperand(3).getReg();
1939 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1940 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1941 R = MI->getOperand(1).getReg();
1942 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1943 MRI.replaceRegWith(PhiR, R);
1944 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1945 // later to figure out how to remap registers.
1946 MI->getOperand(0).setReg(PhiR);
1947 IllegalPhisToDelete.push_back(MI);
1948 return;
1949 }
1950
1951 int Stage = getStage(MI);
1952 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1953 LiveStages[MI->getParent()].test(Stage))
1954 // Instruction is live, no rewriting to do.
1955 return;
1956
1957 for (MachineOperand &DefMO : MI->defs()) {
1959 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1960 // Only PHIs can use values from this block by construction.
1961 // Match with the equivalent PHI in B.
1962 assert(UseMI.isPHI());
1963 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1964 MI->getParent());
1965 Subs.emplace_back(&UseMI, Reg);
1966 }
1967 for (auto &Sub : Subs)
1968 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1969 *MRI.getTargetRegisterInfo());
1970 }
1971 if (LIS)
1972 LIS->RemoveMachineInstrFromMaps(*MI);
1973 MI->eraseFromParent();
1974}
1975
1977 // Work outwards from the kernel.
1978 bool KernelDisposed = false;
1979 int TC = Schedule.getNumStages() - 1;
1980 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1981 ++PI, ++EI, --TC) {
1983 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1986 TII->removeBranch(*Prolog);
1987 std::optional<bool> StaticallyGreater =
1988 LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1989 if (!StaticallyGreater) {
1990 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1991 // Dynamically branch based on Cond.
1992 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1993 } else if (*StaticallyGreater == false) {
1994 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1995 // Prolog never falls through; branch to epilog and orphan interior
1996 // blocks. Leave it to unreachable-block-elim to clean up.
1997 Prolog->removeSuccessor(Fallthrough);
1998 for (MachineInstr &P : Fallthrough->phis()) {
1999 P.removeOperand(2);
2000 P.removeOperand(1);
2001 }
2002 TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
2003 KernelDisposed = true;
2004 } else {
2005 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
2006 // Prolog always falls through; remove incoming values in epilog.
2007 Prolog->removeSuccessor(Epilog);
2008 for (MachineInstr &P : Epilog->phis()) {
2009 P.removeOperand(4);
2010 P.removeOperand(3);
2011 }
2012 }
2013 }
2014
2015 if (!KernelDisposed) {
2016 LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
2017 LoopInfo->setPreheader(Prologs.back());
2018 } else {
2019 LoopInfo->disposed();
2020 }
2021}
2022
2024 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2025 KR.rewrite();
2026}
2027
2029 BB = Schedule.getLoop()->getTopBlock();
2030 Preheader = Schedule.getLoop()->getLoopPreheader();
2031 LLVM_DEBUG(Schedule.dump());
2032 LoopInfo = TII->analyzeLoopForPipelining(BB);
2034
2035 rewriteKernel();
2037 fixupBranches();
2038}
2039
2041 BB = Schedule.getLoop()->getTopBlock();
2042 Preheader = Schedule.getLoop()->getLoopPreheader();
2043
2044 // Dump the schedule before we invalidate and remap all its instructions.
2045 // Stash it in a string so we can print it if we found an error.
2046 std::string ScheduleDump;
2047 raw_string_ostream OS(ScheduleDump);
2048 Schedule.print(OS);
2049 OS.flush();
2050
2051 // First, run the normal ModuleScheduleExpander. We don't support any
2052 // InstrChanges.
2053 assert(LIS && "Requires LiveIntervals!");
2056 MSE.expand();
2057 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2058 if (!ExpandedKernel) {
2059 // The expander optimized away the kernel. We can't do any useful checking.
2060 MSE.cleanup();
2061 return;
2062 }
2063 // Before running the KernelRewriter, re-add BB into the CFG.
2064 Preheader->addSuccessor(BB);
2065
2066 // Now run the new expansion algorithm.
2067 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2068 KR.rewrite();
2070
2071 // Collect all illegal phis that the new algorithm created. We'll give these
2072 // to KernelOperandInfo.
2074 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2075 if (NI->isPHI())
2076 IllegalPhis.insert(&*NI);
2077 }
2078
2079 // Co-iterate across both kernels. We expect them to be identical apart from
2080 // phis and full COPYs (we look through both).
2082 auto OI = ExpandedKernel->begin();
2083 auto NI = BB->begin();
2084 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2085 while (OI->isPHI() || OI->isFullCopy())
2086 ++OI;
2087 while (NI->isPHI() || NI->isFullCopy())
2088 ++NI;
2089 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2090 // Analyze every operand separately.
2091 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2092 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2093 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2094 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2095 }
2096
2097 bool Failed = false;
2098 for (auto &OldAndNew : KOIs) {
2099 if (OldAndNew.first == OldAndNew.second)
2100 continue;
2101 Failed = true;
2102 errs() << "Modulo kernel validation error: [\n";
2103 errs() << " [golden] ";
2104 OldAndNew.first.print(errs());
2105 errs() << " ";
2106 OldAndNew.second.print(errs());
2107 errs() << "]\n";
2108 }
2109
2110 if (Failed) {
2111 errs() << "Golden reference kernel:\n";
2112 ExpandedKernel->print(errs());
2113 errs() << "New kernel:\n";
2114 BB->print(errs());
2115 errs() << ScheduleDump;
2117 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2118 }
2119
2120 // Cleanup by removing BB from the CFG again as the original
2121 // ModuloScheduleExpander intended.
2122 Preheader->removeSuccessor(BB);
2123 MSE.cleanup();
2124}
2125
2126MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
2127 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2128
2129 // TODO: Offset information needs to be corrected.
2130 NewMI->dropMemRefs(MF);
2131
2132 return NewMI;
2133}
2134
2135/// Create a dedicated exit for Loop. Exit is the original exit for Loop.
2136/// If it is already dedicated exit, return it. Otherwise, insert a new
2137/// block between them and return the new block.
2139 MachineBasicBlock *Exit,
2140 LiveIntervals &LIS) {
2141 if (Exit->pred_size() == 1)
2142 return Exit;
2143
2144 MachineFunction *MF = Loop->getParent();
2146
2147 MachineBasicBlock *NewExit =
2148 MF->CreateMachineBasicBlock(Loop->getBasicBlock());
2149 MF->insert(Loop->getIterator(), NewExit);
2150 LIS.insertMBBInMaps(NewExit);
2151
2152 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2154 TII->analyzeBranch(*Loop, TBB, FBB, Cond);
2155 if (TBB == Loop)
2156 FBB = NewExit;
2157 else if (FBB == Loop)
2158 TBB = NewExit;
2159 else
2160 llvm_unreachable("unexpected loop structure");
2161 TII->removeBranch(*Loop);
2162 TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
2163 Loop->replaceSuccessor(Exit, NewExit);
2164 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2165 NewExit->addSuccessor(Exit);
2166
2167 Exit->replacePhiUsesWith(Loop, NewExit);
2168
2169 return NewExit;
2170}
2171
2172/// Insert branch code into the end of MBB. It branches to GreaterThan if the
2173/// remaining trip count for instructions in LastStage0Insts is greater than
2174/// RequiredTC, and to Otherwise otherwise.
2175void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
2176 int RequiredTC,
2177 InstrMapTy &LastStage0Insts,
2178 MachineBasicBlock &GreaterThan,
2179 MachineBasicBlock &Otherwise) {
2181 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2182 LastStage0Insts);
2183
2185 // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
2186 // FBB for optimal performance.
2188 llvm_unreachable("can not reverse branch condition");
2189 TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
2190 } else {
2191 TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
2192 }
2193}
2194
2195/// Generate a pipelined loop that is unrolled by using MVE algorithm and any
2196/// other necessary blocks. The control flow is modified to execute the
2197/// pipelined loop if the trip count satisfies the condition, otherwise the
2198/// original loop. The original loop is also used to execute the remainder
2199/// iterations which occur due to unrolling.
2200void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2201 // The control flow for pipelining with MVE:
2202 //
2203 // OrigPreheader:
2204 // // The block that is originally the loop preheader
2205 // goto Check
2206 //
2207 // Check:
2208 // // Check whether the trip count satisfies the requirements to pipeline.
2209 // if (LoopCounter > NumStages + NumUnroll - 2)
2210 // // The minimum number of iterations to pipeline =
2211 // // iterations executed in prolog/epilog (NumStages-1) +
2212 // // iterations executed in one kernel run (NumUnroll)
2213 // goto Prolog
2214 // // fallback to the original loop
2215 // goto NewPreheader
2216 //
2217 // Prolog:
2218 // // All prolog stages. There are no direct branches to the epilogue.
2219 // goto NewKernel
2220 //
2221 // NewKernel:
2222 // // NumUnroll copies of the kernel
2223 // if (LoopCounter > MVE-1)
2224 // goto NewKernel
2225 // goto Epilog
2226 //
2227 // Epilog:
2228 // // All epilog stages.
2229 // if (LoopCounter > 0)
2230 // // The remainder is executed in the original loop
2231 // goto NewPreheader
2232 // goto NewExit
2233 //
2234 // NewPreheader:
2235 // // Newly created preheader for the original loop.
2236 // // The initial values of the phis in the loop are merged from two paths.
2237 // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
2238 // goto OrigKernel
2239 //
2240 // OrigKernel:
2241 // // The original loop block.
2242 // if (LoopCounter != 0)
2243 // goto OrigKernel
2244 // goto NewExit
2245 //
2246 // NewExit:
2247 // // Newly created dedicated exit for the original loop.
2248 // // Merge values which are referenced after the loop
2249 // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
2250 // goto OrigExit
2251 //
2252 // OrigExit:
2253 // // The block that is originally the loop exit.
2254 // // If it is already deicated exit, NewExit is not created.
2255
2256 // An example of where each stage is executed:
2257 // Assume #Stages 3, #MVE 4, #Iterations 12
2258 // Iter 0 1 2 3 4 5 6 7 8 9 10-11
2259 // -------------------------------------------------
2260 // Stage 0 Prolog#0
2261 // Stage 1 0 Prolog#1
2262 // Stage 2 1 0 Kernel Unroll#0 Iter#0
2263 // Stage 2 1 0 Kernel Unroll#1 Iter#0
2264 // Stage 2 1 0 Kernel Unroll#2 Iter#0
2265 // Stage 2 1 0 Kernel Unroll#3 Iter#0
2266 // Stage 2 1 0 Kernel Unroll#0 Iter#1
2267 // Stage 2 1 0 Kernel Unroll#1 Iter#1
2268 // Stage 2 1 0 Kernel Unroll#2 Iter#1
2269 // Stage 2 1 0 Kernel Unroll#3 Iter#1
2270 // Stage 2 1 Epilog#0
2271 // Stage 2 Epilog#1
2272 // Stage 0-2 OrigKernel
2273
2274 LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
2275 assert(LoopInfo && "Must be able to analyze loop!");
2276
2277 calcNumUnroll();
2278
2279 Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2280 Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2281 NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2282 Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2283 NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2284
2285 MF.insert(OrigKernel->getIterator(), Check);
2287 MF.insert(OrigKernel->getIterator(), Prolog);
2289 MF.insert(OrigKernel->getIterator(), NewKernel);
2290 LIS.insertMBBInMaps(NewKernel);
2291 MF.insert(OrigKernel->getIterator(), Epilog);
2293 MF.insert(OrigKernel->getIterator(), NewPreheader);
2294 LIS.insertMBBInMaps(NewPreheader);
2295
2296 NewExit = createDedicatedExit(OrigKernel, OrigExit, LIS);
2297
2298 NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
2299 TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
2300
2301 OrigPreheader->addSuccessor(Check);
2302 TII->removeBranch(*OrigPreheader);
2303 TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
2304
2305 Check->addSuccessor(Prolog);
2306 Check->addSuccessor(NewPreheader);
2307
2308 Prolog->addSuccessor(NewKernel);
2309
2310 NewKernel->addSuccessor(NewKernel);
2311 NewKernel->addSuccessor(Epilog);
2312
2313 Epilog->addSuccessor(NewPreheader);
2314 Epilog->addSuccessor(NewExit);
2315
2316 InstrMapTy LastStage0Insts;
2317 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2318 LastStage0Insts, *Prolog, *NewPreheader);
2319
2320 // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
2321 // register#
2322 SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
2323 generateProlog(PrologVRMap);
2324 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2325 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2326}
2327
2328/// Replace MI's use operands according to the maps.
2329void ModuloScheduleExpanderMVE::updateInstrUse(
2330 MachineInstr *MI, int StageNum, int PhaseNum,
2331 SmallVectorImpl<ValueMapTy> &CurVRMap,
2332 SmallVectorImpl<ValueMapTy> *PrevVRMap) {
2333 // If MI is in the prolog/kernel/epilog block, CurVRMap is
2334 // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
2335 // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
2336 // Refer to the appropriate map according to the stage difference between
2337 // MI and the definition of an operand.
2338
2339 for (MachineOperand &UseMO : MI->uses()) {
2340 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2341 continue;
2342 int DiffStage = 0;
2343 Register OrigReg = UseMO.getReg();
2344 MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
2345 if (!DefInst || DefInst->getParent() != OrigKernel)
2346 continue;
2347 Register InitReg;
2348 Register DefReg = OrigReg;
2349 if (DefInst->isPHI()) {
2350 ++DiffStage;
2351 Register LoopReg;
2352 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2353 // LoopReg is guaranteed to be defined within the loop by canApply()
2354 DefReg = LoopReg;
2355 DefInst = MRI.getVRegDef(LoopReg);
2356 }
2357 unsigned DefStageNum = Schedule.getStage(DefInst);
2358 DiffStage += StageNum - DefStageNum;
2359 Register NewReg;
2360 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2361 // NewReg is defined in a previous phase of the same block
2362 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2363 else if (!PrevVRMap)
2364 // Since this is the first iteration, refer the initial register of the
2365 // loop
2366 NewReg = InitReg;
2367 else
2368 // Cases where DiffStage is larger than PhaseNum.
2369 // If MI is in the kernel block, the value is defined by the previous
2370 // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
2371 // value is defined in the kernel block and KernelVRMap is referenced.
2372 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2373
2374 const TargetRegisterClass *NRC =
2375 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2376 if (NRC)
2377 UseMO.setReg(NewReg);
2378 else {
2379 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2380 MachineInstr *NewCopy = BuildMI(*OrigKernel, MI, MI->getDebugLoc(),
2381 TII->get(TargetOpcode::COPY), SplitReg)
2382 .addReg(NewReg);
2383 LIS.InsertMachineInstrInMaps(*NewCopy);
2384 UseMO.setReg(SplitReg);
2385 }
2386 }
2387}
2388
2389/// Return a phi if Reg is referenced by the phi.
2390/// canApply() guarantees that at most only one such phi exists.
2392 for (MachineInstr &Phi : Loop->phis()) {
2393 Register InitVal, LoopVal;
2394 getPhiRegs(Phi, Loop, InitVal, LoopVal);
2395 if (LoopVal == Reg)
2396 return &Phi;
2397 }
2398 return nullptr;
2399}
2400
2401/// Generate phis for registers defined by OrigMI.
2402void ModuloScheduleExpanderMVE::generatePhi(
2403 MachineInstr *OrigMI, int UnrollNum,
2404 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2405 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2406 SmallVectorImpl<ValueMapTy> &PhiVRMap) {
2407 int StageNum = Schedule.getStage(OrigMI);
2408 bool UsePrologReg;
2409 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2410 UsePrologReg = true;
2411 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2412 UsePrologReg = false;
2413 else
2414 return;
2415
2416 // Examples that show which stages are merged by phi.
2417 // Meaning of the symbol following the stage number:
2418 // a/b: Stages with the same letter are merged (UsePrologReg == true)
2419 // +: Merged with the initial value (UsePrologReg == false)
2420 // *: No phis required
2421 //
2422 // #Stages 3, #MVE 4
2423 // Iter 0 1 2 3 4 5 6 7 8
2424 // -----------------------------------------
2425 // Stage 0a Prolog#0
2426 // Stage 1a 0b Prolog#1
2427 // Stage 2* 1* 0* Kernel Unroll#0
2428 // Stage 2* 1* 0+ Kernel Unroll#1
2429 // Stage 2* 1+ 0a Kernel Unroll#2
2430 // Stage 2+ 1a 0b Kernel Unroll#3
2431 //
2432 // #Stages 3, #MVE 2
2433 // Iter 0 1 2 3 4 5 6 7 8
2434 // -----------------------------------------
2435 // Stage 0a Prolog#0
2436 // Stage 1a 0b Prolog#1
2437 // Stage 2* 1+ 0a Kernel Unroll#0
2438 // Stage 2+ 1a 0b Kernel Unroll#1
2439 //
2440 // #Stages 3, #MVE 1
2441 // Iter 0 1 2 3 4 5 6 7 8
2442 // -----------------------------------------
2443 // Stage 0* Prolog#0
2444 // Stage 1a 0b Prolog#1
2445 // Stage 2+ 1a 0b Kernel Unroll#0
2446
2447 for (MachineOperand &DefMO : OrigMI->defs()) {
2448 if (!DefMO.isReg() || DefMO.isDead())
2449 continue;
2450 Register OrigReg = DefMO.getReg();
2451 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2452 if (NewReg == KernelVRMap[UnrollNum].end())
2453 continue;
2454 Register CorrespondReg;
2455 if (UsePrologReg) {
2456 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2457 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2458 } else {
2459 MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
2460 if (!Phi)
2461 continue;
2462 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2463 }
2464
2465 assert(CorrespondReg.isValid());
2466 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2467 MachineInstr *NewPhi =
2468 BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
2469 TII->get(TargetOpcode::PHI), PhiReg)
2470 .addReg(NewReg->second)
2471 .addMBB(NewKernel)
2472 .addReg(CorrespondReg)
2473 .addMBB(Prolog);
2474 LIS.InsertMachineInstrInMaps(*NewPhi);
2475 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2476 }
2477}
2478
2479static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
2480 MachineBasicBlock *NewMBB) {
2481 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2482 if (Phi.getOperand(Idx).getReg() == OrigReg) {
2483 Phi.getOperand(Idx).setReg(NewReg);
2484 Phi.getOperand(Idx + 1).setMBB(NewMBB);
2485 return;
2486 }
2487 }
2488}
2489
2490/// Generate phis that merge values from multiple routes
2491void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2492 Register NewReg) {
2493 SmallVector<MachineOperand *> UsesAfterLoop;
2495 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
2496 E = MRI.use_end();
2497 I != E; ++I) {
2498 MachineOperand &O = *I;
2499 if (O.getParent()->getParent() != OrigKernel &&
2500 O.getParent()->getParent() != Prolog &&
2501 O.getParent()->getParent() != NewKernel &&
2502 O.getParent()->getParent() != Epilog)
2503 UsesAfterLoop.push_back(&O);
2504 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2505 LoopPhis.push_back(O.getParent());
2506 }
2507
2508 // Merge the route that only execute the pipelined loop (when there are no
2509 // remaining iterations) with the route that execute the original loop.
2510 if (!UsesAfterLoop.empty()) {
2511 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2512 MachineInstr *NewPhi =
2513 BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
2514 TII->get(TargetOpcode::PHI), PhiReg)
2515 .addReg(OrigReg)
2516 .addMBB(OrigKernel)
2517 .addReg(NewReg)
2518 .addMBB(Epilog);
2519 LIS.InsertMachineInstrInMaps(*NewPhi);
2520
2521 for (MachineOperand *MO : UsesAfterLoop)
2522 MO->setReg(PhiReg);
2523
2524 // The interval of OrigReg is invalid and should be recalculated when
2525 // LiveInterval::getInterval() is called.
2526 if (LIS.hasInterval(OrigReg))
2527 LIS.removeInterval(OrigReg);
2528 }
2529
2530 // Merge routes from the pipelined loop and the bypassed route before the
2531 // original loop
2532 if (!LoopPhis.empty()) {
2533 for (MachineInstr *Phi : LoopPhis) {
2534 Register InitReg, LoopReg;
2535 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2536 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2537 MachineInstr *NewPhi =
2538 BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(),
2539 Phi->getDebugLoc(), TII->get(TargetOpcode::PHI), NewInit)
2540 .addReg(InitReg)
2541 .addMBB(Check)
2542 .addReg(NewReg)
2543 .addMBB(Epilog);
2544 LIS.InsertMachineInstrInMaps(*NewPhi);
2545 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2546 }
2547 }
2548}
2549
2550void ModuloScheduleExpanderMVE::generateProlog(
2551 SmallVectorImpl<ValueMapTy> &PrologVRMap) {
2552 PrologVRMap.clear();
2553 PrologVRMap.resize(Schedule.getNumStages() - 1);
2554 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2555 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2556 ++PrologNum) {
2557 for (MachineInstr *MI : Schedule.getInstructions()) {
2558 if (MI->isPHI())
2559 continue;
2560 int StageNum = Schedule.getStage(MI);
2561 if (StageNum > PrologNum)
2562 continue;
2563 MachineInstr *NewMI = cloneInstr(MI);
2564 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2565 NewMIMap[NewMI] = {PrologNum, StageNum};
2566 Prolog->push_back(NewMI);
2567 LIS.InsertMachineInstrInMaps(*NewMI);
2568 }
2569 }
2570
2571 for (auto I : NewMIMap) {
2572 MachineInstr *MI = I.first;
2573 int PrologNum = I.second.first;
2574 int StageNum = I.second.second;
2575 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2576 }
2577
2578 LLVM_DEBUG({
2579 dbgs() << "prolog:\n";
2580 Prolog->dump();
2581 });
2582}
2583
2584void ModuloScheduleExpanderMVE::generateKernel(
2585 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2586 SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
2587 KernelVRMap.clear();
2588 KernelVRMap.resize(NumUnroll);
2589 SmallVector<ValueMapTy> PhiVRMap;
2590 PhiVRMap.resize(NumUnroll);
2591 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2592 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2593 for (MachineInstr *MI : Schedule.getInstructions()) {
2594 if (MI->isPHI())
2595 continue;
2596 int StageNum = Schedule.getStage(MI);
2597 MachineInstr *NewMI = cloneInstr(MI);
2598 if (UnrollNum == NumUnroll - 1)
2599 LastStage0Insts[MI] = NewMI;
2600 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2601 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2602 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2603 NewMIMap[NewMI] = {UnrollNum, StageNum};
2604 NewKernel->push_back(NewMI);
2605 LIS.InsertMachineInstrInMaps(*NewMI);
2606 }
2607 }
2608
2609 for (auto I : NewMIMap) {
2610 MachineInstr *MI = I.first;
2611 int UnrollNum = I.second.first;
2612 int StageNum = I.second.second;
2613 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2614 }
2615
2616 // If remaining trip count is greater than NumUnroll-1, loop continues
2617 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2618 *Epilog);
2619
2620 LLVM_DEBUG({
2621 dbgs() << "kernel:\n";
2622 NewKernel->dump();
2623 });
2624}
2625
2626void ModuloScheduleExpanderMVE::generateEpilog(
2627 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2628 SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
2629 EpilogVRMap.clear();
2630 EpilogVRMap.resize(Schedule.getNumStages() - 1);
2631 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2632 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2633 ++EpilogNum) {
2634 for (MachineInstr *MI : Schedule.getInstructions()) {
2635 if (MI->isPHI())
2636 continue;
2637 int StageNum = Schedule.getStage(MI);
2638 if (StageNum <= EpilogNum)
2639 continue;
2640 MachineInstr *NewMI = cloneInstr(MI);
2641 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2642 NewMIMap[NewMI] = {EpilogNum, StageNum};
2643 Epilog->push_back(NewMI);
2644 LIS.InsertMachineInstrInMaps(*NewMI);
2645 }
2646 }
2647
2648 for (auto I : NewMIMap) {
2649 MachineInstr *MI = I.first;
2650 int EpilogNum = I.second.first;
2651 int StageNum = I.second.second;
2652 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2653 }
2654
2655 // If there are remaining iterations, they are executed in the original loop.
2656 // Instructions related to loop control, such as loop counter comparison,
2657 // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
2658 // in stage 0. Thus, the map is for the last one in the kernel.
2659 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2660
2661 LLVM_DEBUG({
2662 dbgs() << "epilog:\n";
2663 Epilog->dump();
2664 });
2665}
2666
2667/// Calculate the number of unroll required and set it to NumUnroll
2668void ModuloScheduleExpanderMVE::calcNumUnroll() {
2669 DenseMap<MachineInstr *, unsigned> Inst2Idx;
2670 NumUnroll = 1;
2671 for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
2672 Inst2Idx[Schedule.getInstructions()[I]] = I;
2673
2674 for (MachineInstr *MI : Schedule.getInstructions()) {
2675 if (MI->isPHI())
2676 continue;
2677 int StageNum = Schedule.getStage(MI);
2678 for (const MachineOperand &MO : MI->uses()) {
2679 if (!MO.isReg() || !MO.getReg().isVirtual())
2680 continue;
2681 MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
2682 if (DefMI->getParent() != OrigKernel)
2683 continue;
2684
2685 int NumUnrollLocal = 1;
2686 if (DefMI->isPHI()) {
2687 ++NumUnrollLocal;
2688 // canApply() guarantees that DefMI is not phi and is an instruction in
2689 // the loop
2690 DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
2691 }
2692 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2693 if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2694 --NumUnrollLocal;
2695 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2696 }
2697 }
2698 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2699}
2700
2701/// Create new virtual registers for definitions of NewMI and update NewMI.
2702/// If the definitions are referenced after the pipelined loop, phis are
2703/// created to merge with other routes.
2704void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2705 ValueMapTy &VRMap,
2706 bool LastDef) {
2707 for (MachineOperand &MO : NewMI->all_defs()) {
2708 if (!MO.getReg().isVirtual())
2709 continue;
2710 Register Reg = MO.getReg();
2711 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
2712 Register NewReg = MRI.createVirtualRegister(RC);
2713 MO.setReg(NewReg);
2714 VRMap[Reg] = NewReg;
2715 if (LastDef)
2716 mergeRegUsesAfterPipeline(Reg, NewReg);
2717 }
2718}
2719
2721 OrigKernel = Schedule.getLoop()->getTopBlock();
2722 OrigPreheader = Schedule.getLoop()->getLoopPreheader();
2723 OrigExit = Schedule.getLoop()->getExitBlock();
2724
2725 LLVM_DEBUG(Schedule.dump());
2726
2727 generatePipelinedLoop();
2728}
2729
2730/// Check if ModuloScheduleExpanderMVE can be applied to L
2732 if (!L.getExitBlock()) {
2733 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");
2734 return false;
2735 }
2736
2737 MachineBasicBlock *BB = L.getTopBlock();
2739
2740 // Put some constraints on the operands of the phis to simplify the
2741 // transformation
2742 DenseSet<Register> UsedByPhi;
2743 for (MachineInstr &MI : BB->phis()) {
2744 // Registers defined by phis must be used only inside the loop and be never
2745 // used by phis.
2746 for (MachineOperand &MO : MI.defs())
2747 if (MO.isReg())
2748 for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
2749 if (Ref.getParent() != BB || Ref.isPHI()) {
2750 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "
2751 "referenced outside of the loop or by phi.\n");
2752 return false;
2753 }
2754
2755 // A source register from the loop block must be defined inside the loop.
2756 // A register defined inside the loop must be referenced by only one phi at
2757 // most.
2758 Register InitVal, LoopVal;
2759 getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
2760 if (!Register(LoopVal).isVirtual() ||
2761 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2762 LLVM_DEBUG(
2763 dbgs() << "Can not apply MVE expander: A phi source value coming "
2764 "from the loop is not defined in the loop.\n");
2765 return false;
2766 }
2767 if (UsedByPhi.count(LoopVal)) {
2768 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2769 "loop is referenced by two or more phis.\n");
2770 return false;
2771 }
2772 UsedByPhi.insert(LoopVal);
2773 }
2774
2775 return true;
2776}
2777
2778//===----------------------------------------------------------------------===//
2779// ModuloScheduleTestPass implementation
2780//===----------------------------------------------------------------------===//
2781// This pass constructs a ModuloSchedule from its module and runs
2782// ModuloScheduleExpander.
2783//
2784// The module is expected to contain a single-block analyzable loop.
2785// The total order of instructions is taken from the loop as-is.
2786// Instructions are expected to be annotated with a PostInstrSymbol.
2787// This PostInstrSymbol must have the following format:
2788// "Stage=%d Cycle=%d".
2789//===----------------------------------------------------------------------===//
2790
2791namespace {
2792class ModuloScheduleTest : public MachineFunctionPass {
2793public:
2794 static char ID;
2795
2796 ModuloScheduleTest() : MachineFunctionPass(ID) {
2798 }
2799
2800 bool runOnMachineFunction(MachineFunction &MF) override;
2801 void runOnLoop(MachineFunction &MF, MachineLoop &L);
2802
2803 void getAnalysisUsage(AnalysisUsage &AU) const override {
2807 }
2808};
2809} // namespace
2810
2811char ModuloScheduleTest::ID = 0;
2812
2813INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2814 "Modulo Schedule test pass", false, false)
2817INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2818 "Modulo Schedule test pass", false, false)
2819
2820bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2821 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2822 for (auto *L : MLI) {
2823 if (L->getTopBlock() != L->getBottomBlock())
2824 continue;
2825 runOnLoop(MF, *L);
2826 return false;
2827 }
2828 return false;
2829}
2830
2831static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2832 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2833 std::pair<StringRef, StringRef> StageTokenAndValue =
2834 getToken(StageAndCycle.first, "-");
2835 std::pair<StringRef, StringRef> CycleTokenAndValue =
2836 getToken(StageAndCycle.second, "-");
2837 if (StageTokenAndValue.first != "Stage" ||
2838 CycleTokenAndValue.first != "_Cycle") {
2840 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2841 return;
2842 }
2843
2844 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2845 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2846
2847 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2848}
2849
2850void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2851 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
2852 MachineBasicBlock *BB = L.getTopBlock();
2853 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2854
2855 DenseMap<MachineInstr *, int> Cycle, Stage;
2856 std::vector<MachineInstr *> Instrs;
2857 for (MachineInstr &MI : *BB) {
2858 if (MI.isTerminator())
2859 continue;
2860 Instrs.push_back(&MI);
2861 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2862 dbgs() << "Parsing post-instr symbol for " << MI;
2863 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2864 }
2865 }
2866
2867 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2868 std::move(Stage));
2869 ModuloScheduleExpander MSE(
2870 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2871 MSE.expand();
2872 MSE.cleanup();
2873}
2874
2875//===----------------------------------------------------------------------===//
2876// ModuloScheduleTestAnnotater implementation
2877//===----------------------------------------------------------------------===//
2878
2880 for (MachineInstr *MI : S.getInstructions()) {
2882 raw_svector_ostream OS(SV);
2883 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2884 MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2885 MI->setPostInstrSymbol(MF, Sym);
2886 }
2887}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ Default
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static bool hasUseAfterLoop(Register Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit, LiveIntervals &LIS)
Create a dedicated exit for Loop.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static Register getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
MachineInstr unsigned OpIdx
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
bool erase(const KeyT &Val)
Definition DenseMap.h:303
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:269
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
LLVM_ABI void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
MachineInstrBundleIterator< MachineInstr > iterator
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
unsigned getNumOperands() const
Retuns the total number of operands.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
LLVM_ABI void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
LLVM_ABI void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
mop_range operands()
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
defusechain_iterator< true, false, false, true, false > use_iterator
use_iterator/use_begin/use_end - Walk all uses of the specified register.
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
DenseMap< MachineInstr *, std::pair< Register, int64_t > > InstrChangesTy
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopInfo
Target loop info before kernel peeling.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetInstrInfo * getInstrInfo() const
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:174
self_iterator getIterator()
Definition ilist_node.h:134
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
initializer< Ty > init(const Ty &Val)
constexpr double phi
Definition MathExtras.h:61
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
iterator end() const
Definition BasicBlock.h:89
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1733
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1665
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2118
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
LLVM_ABI void initializeModuloScheduleTestPass(PassRegistry &)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:400
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ Other
Any other memory.
Definition ModRef.h:68
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1936
DWARFExpression::Operation Op
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...