LLVM 23.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
18#include "llvm/MC/MCContext.h"
19#include "llvm/Support/Debug.h"
22
23#define DEBUG_TYPE "pipeliner"
24using namespace llvm;
25
27 "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
28 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
29
31 for (MachineInstr *MI : ScheduledInstrs)
32 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
33}
34
35//===----------------------------------------------------------------------===//
36// ModuloScheduleExpander implementation
37//===----------------------------------------------------------------------===//
38
39/// Return the register values for the operands of a Phi instruction.
40/// This function assume the instruction is a Phi.
42 Register &InitVal, Register &LoopVal) {
43 assert(Phi.isPHI() && "Expecting a Phi.");
44
45 InitVal = Register();
46 LoopVal = Register();
47 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
48 if (Phi.getOperand(i + 1).getMBB() != Loop)
49 InitVal = Phi.getOperand(i).getReg();
50 else
51 LoopVal = Phi.getOperand(i).getReg();
52
53 assert(InitVal && LoopVal && "Unexpected Phi structure.");
54}
55
56/// Return the Phi register value that comes from the incoming block.
58 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
59 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
60 return Phi.getOperand(i).getReg();
61 return Register();
62}
63
64/// Return the Phi register value that comes the loop block.
66 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
67 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
68 return Phi.getOperand(i).getReg();
69 return Register();
70}
71
73 BB = Schedule.getLoop()->getTopBlock();
74 Preheader = *BB->pred_begin();
75 if (Preheader == BB)
76 Preheader = *std::next(BB->pred_begin());
77
78 // Iterate over the definitions in each instruction, and compute the
79 // stage difference for each use. Keep the maximum value.
80 for (MachineInstr *MI : Schedule.getInstructions()) {
81 int DefStage = Schedule.getStage(MI);
82 for (const MachineOperand &Op : MI->all_defs()) {
83 Register Reg = Op.getReg();
84 unsigned MaxDiff = 0;
85 bool PhiIsSwapped = false;
86 for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
87 MachineInstr *UseMI = UseOp.getParent();
88 int UseStage = Schedule.getStage(UseMI);
89 unsigned Diff = 0;
90 if (UseStage != -1 && UseStage >= DefStage)
91 Diff = UseStage - DefStage;
92 if (MI->isPHI()) {
93 if (isLoopCarried(*MI))
94 ++Diff;
95 else
96 PhiIsSwapped = true;
97 }
98 MaxDiff = std::max(Diff, MaxDiff);
99 }
100 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
101 }
102 }
103
104 generatePipelinedLoop();
105}
106
107void ModuloScheduleExpander::generatePipelinedLoop() {
108 LoopInfo = TII->analyzeLoopForPipelining(BB);
109 assert(LoopInfo && "Must be able to analyze loop!");
110
111 // Create a new basic block for the kernel and add it to the CFG.
113
114 unsigned MaxStageCount = Schedule.getNumStages() - 1;
115
116 // Remember the registers that are used in different stages. The index is
117 // the iteration, or stage, that the instruction is scheduled in. This is
118 // a map between register names in the original block and the names created
119 // in each stage of the pipelined loop.
120 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
121
122 // The renaming destination by Phis for the registers across stages.
123 // This map is updated during Phis generation to point to the most recent
124 // renaming destination.
125 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
126
127 InstrMapTy InstrMap;
128
130
131 // Generate the prolog instructions that set up the pipeline.
132 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
133 MF.insert(BB->getIterator(), KernelBB);
134 LIS.insertMBBInMaps(KernelBB);
135
136 // Rearrange the instructions to generate the new, pipelined loop,
137 // and update register names as needed.
138 for (MachineInstr *CI : Schedule.getInstructions()) {
139 if (CI->isPHI())
140 continue;
141 unsigned StageNum = Schedule.getStage(CI);
142 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
143 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
144 KernelBB->push_back(NewMI);
145 LIS.InsertMachineInstrInMaps(*NewMI);
146 InstrMap[NewMI] = CI;
147 }
148
149 // Copy any terminator instructions to the new kernel, and update
150 // names as needed.
151 for (MachineInstr &MI : BB->terminators()) {
152 MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
153 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
154 KernelBB->push_back(NewMI);
155 LIS.InsertMachineInstrInMaps(*NewMI);
156 InstrMap[NewMI] = &MI;
157 }
158
159 NewKernel = KernelBB;
160 KernelBB->transferSuccessors(BB);
161 KernelBB->replaceSuccessor(BB, KernelBB);
162
163 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
164 InstrMap, MaxStageCount, MaxStageCount, false);
165 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
166 InstrMap, MaxStageCount, MaxStageCount, false);
167
168 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
169
170 SmallVector<MachineBasicBlock *, 4> EpilogBBs;
171 // Generate the epilog instructions to complete the pipeline.
172 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
173 PrologBBs);
174
175 // We need this step because the register allocation doesn't handle some
176 // situations well, so we insert copies to help out.
177 splitLifetimes(KernelBB, EpilogBBs);
178
179 // Remove dead instructions due to loop induction variables.
180 removeDeadInstructions(KernelBB, EpilogBBs);
181
182 // Add branches between prolog and epilog blocks.
183 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
184
185 delete[] VRMap;
186 delete[] VRMapPhi;
187}
188
190 // Remove the original loop since it's no longer referenced.
191 for (auto &I : *BB)
192 LIS.RemoveMachineInstrFromMaps(I);
193 BB->clear();
194 BB->eraseFromParent();
195}
196
197/// Generate the pipeline prolog code.
198void ModuloScheduleExpander::generateProlog(unsigned LastStage,
199 MachineBasicBlock *KernelBB,
200 ValueMapTy *VRMap,
201 MBBVectorTy &PrologBBs) {
202 MachineBasicBlock *PredBB = Preheader;
203 InstrMapTy InstrMap;
204
205 // Generate a basic block for each stage, not including the last stage,
206 // which will be generated in the kernel. Each basic block may contain
207 // instructions from multiple stages/iterations.
208 for (unsigned i = 0; i < LastStage; ++i) {
209 // Create and insert the prolog basic block prior to the original loop
210 // basic block. The original loop is removed later.
212 PrologBBs.push_back(NewBB);
213 MF.insert(BB->getIterator(), NewBB);
214 NewBB->transferSuccessors(PredBB);
215 PredBB->addSuccessor(NewBB);
216 PredBB = NewBB;
217 LIS.insertMBBInMaps(NewBB);
218
219 // Generate instructions for each appropriate stage. Process instructions
220 // in original program order.
221 for (int StageNum = i; StageNum >= 0; --StageNum) {
223 BBE = BB->getFirstTerminator();
224 BBI != BBE; ++BBI) {
225 if (Schedule.getStage(&*BBI) == StageNum) {
226 if (BBI->isPHI())
227 continue;
228 MachineInstr *NewMI =
229 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
230 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
231 NewBB->push_back(NewMI);
232 LIS.InsertMachineInstrInMaps(*NewMI);
233 InstrMap[NewMI] = &*BBI;
234 }
235 }
236 }
237 rewritePhiValues(NewBB, i, VRMap, InstrMap);
238 LLVM_DEBUG({
239 dbgs() << "prolog:\n";
240 NewBB->dump();
241 });
242 }
243
244 PredBB->replaceSuccessor(BB, KernelBB);
245
246 // Check if we need to remove the branch from the preheader to the original
247 // loop, and replace it with a branch to the new loop.
248 unsigned numBranches = TII->removeBranch(*Preheader);
249 if (numBranches) {
251 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
252 }
253}
254
255/// Generate the pipeline epilog code. The epilog code finishes the iterations
256/// that were started in either the prolog or the kernel. We create a basic
257/// block for each stage that needs to complete.
258void ModuloScheduleExpander::generateEpilog(
259 unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
260 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
261 MBBVectorTy &PrologBBs) {
262 // We need to change the branch from the kernel to the first epilog block, so
263 // this call to analyze branch uses the kernel rather than the original BB.
264 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
266 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
267 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
268 if (checkBranch)
269 return;
270
271 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
272 if (*LoopExitI == KernelBB)
273 ++LoopExitI;
274 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
275 MachineBasicBlock *LoopExitBB = *LoopExitI;
276
277 MachineBasicBlock *PredBB = KernelBB;
278 MachineBasicBlock *EpilogStart = LoopExitBB;
279 InstrMapTy InstrMap;
280
281 // Generate a basic block for each stage, not including the last stage,
282 // which was generated for the kernel. Each basic block may contain
283 // instructions from multiple stages/iterations.
284 int EpilogStage = LastStage + 1;
285 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
286 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
287 EpilogBBs.push_back(NewBB);
288 MF.insert(BB->getIterator(), NewBB);
289
290 PredBB->replaceSuccessor(LoopExitBB, NewBB);
291 NewBB->addSuccessor(LoopExitBB);
292 LIS.insertMBBInMaps(NewBB);
293
294 if (EpilogStart == LoopExitBB)
295 EpilogStart = NewBB;
296
297 // Add instructions to the epilog depending on the current block.
298 // Process instructions in original program order.
299 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
300 for (auto &BBI : *BB) {
301 if (BBI.isPHI())
302 continue;
303 MachineInstr *In = &BBI;
304 if ((unsigned)Schedule.getStage(In) == StageNum) {
305 // Instructions with memoperands in the epilog are updated with
306 // conservative values.
307 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
308 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
309 NewBB->push_back(NewMI);
310 LIS.InsertMachineInstrInMaps(*NewMI);
311 InstrMap[NewMI] = In;
312 }
313 }
314 }
315 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
316 InstrMap, LastStage, EpilogStage, i == 1);
317 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
318 InstrMap, LastStage, EpilogStage, i == 1);
319 PredBB = NewBB;
320
321 LLVM_DEBUG({
322 dbgs() << "epilog:\n";
323 NewBB->dump();
324 });
325 }
326
327 // Fix any Phi nodes in the loop exit block.
328 LoopExitBB->replacePhiUsesWith(BB, PredBB);
329
330 // Create a branch to the new epilog from the kernel.
331 // Remove the original branch and add a new branch to the epilog.
332 TII->removeBranch(*KernelBB);
333 assert((OrigBB == TBB || OrigBB == FBB) &&
334 "Unable to determine looping branch direction");
335 if (OrigBB != TBB)
336 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
337 else
338 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
339 // Add a branch to the loop exit.
340 if (EpilogBBs.size() > 0) {
341 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
343 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
344 }
345}
346
347/// Replace all uses of FromReg that appear outside the specified
348/// basic block with ToReg.
349static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
352 for (MachineOperand &O :
353 llvm::make_early_inc_range(MRI.use_operands(FromReg)))
354 if (O.getParent()->getParent() != MBB)
355 O.setReg(ToReg);
356}
357
358/// Return true if the register has a use that occurs outside the
359/// specified loop.
362 for (const MachineOperand &MO : MRI.use_operands(Reg))
363 if (MO.getParent()->getParent() != BB)
364 return true;
365 return false;
366}
367
368/// Generate Phis for the specific block in the generated pipelined code.
369/// This function looks at the Phis from the original code to guide the
370/// creation of new Phis.
371void ModuloScheduleExpander::generateExistingPhis(
373 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
374 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
375 // Compute the stage number for the initial value of the Phi, which
376 // comes from the prolog. The prolog to use depends on to which kernel/
377 // epilog that we're adding the Phi.
378 unsigned PrologStage = 0;
379 unsigned PrevStage = 0;
380 bool InKernel = (LastStageNum == CurStageNum);
381 if (InKernel) {
382 PrologStage = LastStageNum - 1;
383 PrevStage = CurStageNum;
384 } else {
385 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
386 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
387 }
388
389 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
390 BBE = BB->getFirstNonPHI();
391 BBI != BBE; ++BBI) {
392 Register Def = BBI->getOperand(0).getReg();
393
394 Register InitVal;
395 Register LoopVal;
396 getPhiRegs(*BBI, BB, InitVal, LoopVal);
397
398 Register PhiOp1;
399 // The Phi value from the loop body typically is defined in the loop, but
400 // not always. So, we need to check if the value is defined in the loop.
401 Register PhiOp2 = LoopVal;
402 if (auto It = VRMap[LastStageNum].find(LoopVal);
403 It != VRMap[LastStageNum].end())
404 PhiOp2 = It->second;
405
406 int StageScheduled = Schedule.getStage(&*BBI);
407 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
408 unsigned NumStages = getStagesForReg(Def, CurStageNum);
409 if (NumStages == 0) {
410 // We don't need to generate a Phi anymore, but we need to rename any uses
411 // of the Phi value.
412 Register NewReg = VRMap[PrevStage][LoopVal];
413 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
414 InitVal, NewReg);
415 auto It = VRMap[CurStageNum].find(LoopVal);
416 if (It != VRMap[CurStageNum].end()) {
417 Register Reg = It->second;
418 VRMap[CurStageNum][Def] = Reg;
419 }
420 }
421 // Adjust the number of Phis needed depending on the number of prologs left,
422 // and the distance from where the Phi is first scheduled. The number of
423 // Phis cannot exceed the number of prolog stages. Each stage can
424 // potentially define two values.
425 unsigned MaxPhis = PrologStage + 2;
426 if (!InKernel && (int)PrologStage <= LoopValStage)
427 MaxPhis = std::max((int)MaxPhis - LoopValStage, 1);
428 unsigned NumPhis = std::min(NumStages, MaxPhis);
429
430 Register NewReg;
431 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
432 // In the epilog, we may need to look back one stage to get the correct
433 // Phi name, because the epilog and prolog blocks execute the same stage.
434 // The correct name is from the previous block only when the Phi has
435 // been completely scheduled prior to the epilog, and Phi value is not
436 // needed in multiple stages.
437 int StageDiff = 0;
438 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
439 NumPhis == 1)
440 StageDiff = 1;
441 // Adjust the computations below when the phi and the loop definition
442 // are scheduled in different stages.
443 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
444 StageDiff = StageScheduled - LoopValStage;
445 for (unsigned np = 0; np < NumPhis; ++np) {
446 // If the Phi hasn't been scheduled, then use the initial Phi operand
447 // value. Otherwise, use the scheduled version of the instruction. This
448 // is a little complicated when a Phi references another Phi.
449 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
450 PhiOp1 = InitVal;
451 // Check if the Phi has already been scheduled in a prolog stage.
452 else if (PrologStage >= AccessStage + StageDiff + np &&
453 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
454 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
455 // Check if the Phi has already been scheduled, but the loop instruction
456 // is either another Phi, or doesn't occur in the loop.
457 else if (PrologStage >= AccessStage + StageDiff + np) {
458 // If the Phi references another Phi, we need to examine the other
459 // Phi to get the correct value.
460 PhiOp1 = LoopVal;
461 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
462 int Indirects = 1;
463 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
464 int PhiStage = Schedule.getStage(InstOp1);
465 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
466 PhiOp1 = getInitPhiReg(*InstOp1, BB);
467 else
468 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
469 InstOp1 = MRI.getVRegDef(PhiOp1);
470 int PhiOpStage = Schedule.getStage(InstOp1);
471 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
472 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np) {
473 auto &M = VRMap[PrologStage - StageAdj - Indirects - np];
474 if (auto It = M.find(PhiOp1); It != M.end()) {
475 PhiOp1 = It->second;
476 break;
477 }
478 }
479 ++Indirects;
480 }
481 } else
482 PhiOp1 = InitVal;
483 // If this references a generated Phi in the kernel, get the Phi operand
484 // from the incoming block.
485 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
486 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
487 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
488
489 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
490 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
491 // In the epilog, a map lookup is needed to get the value from the kernel,
492 // or previous epilog block. How is does this depends on if the
493 // instruction is scheduled in the previous block.
494 if (!InKernel) {
495 int StageDiffAdj = 0;
496 if (LoopValStage != -1 && StageScheduled > LoopValStage)
497 StageDiffAdj = StageScheduled - LoopValStage;
498 // Use the loop value defined in the kernel, unless the kernel
499 // contains the last definition of the Phi.
500 if (np == 0 && PrevStage == LastStageNum &&
501 (StageScheduled != 0 || LoopValStage != 0) &&
502 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
503 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
504 // Use the value defined by the Phi. We add one because we switch
505 // from looking at the loop value to the Phi definition.
506 else if (np > 0 && PrevStage == LastStageNum &&
507 VRMap[PrevStage - np + 1].count(Def))
508 PhiOp2 = VRMap[PrevStage - np + 1][Def];
509 // Use the loop value defined in the kernel.
510 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
511 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
512 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
513 // Use the value defined by the Phi, unless we're generating the first
514 // epilog and the Phi refers to a Phi in a different stage.
515 else if (VRMap[PrevStage - np].count(Def) &&
516 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
517 (LoopValStage == StageScheduled)))
518 PhiOp2 = VRMap[PrevStage - np][Def];
519 }
520
521 // Check if we can reuse an existing Phi. This occurs when a Phi
522 // references another Phi, and the other Phi is scheduled in an
523 // earlier stage. We can try to reuse an existing Phi up until the last
524 // stage of the current Phi.
525 if (LoopDefIsPhi) {
526 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
527 int LVNumStages = getStagesForPhi(LoopVal);
528 int StageDiff = (StageScheduled - LoopValStage);
529 LVNumStages -= StageDiff;
530 // Make sure the loop value Phi has been processed already.
531 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
532 NewReg = PhiOp2;
533 unsigned ReuseStage = CurStageNum;
534 if (isLoopCarried(*PhiInst))
535 ReuseStage -= LVNumStages;
536 // Check if the Phi to reuse has been generated yet. If not, then
537 // there is nothing to reuse.
538 if (VRMap[ReuseStage - np].count(LoopVal)) {
539 NewReg = VRMap[ReuseStage - np][LoopVal];
540
541 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
542 Def, NewReg);
543 // Update the map with the new Phi name.
544 VRMap[CurStageNum - np][Def] = NewReg;
545 PhiOp2 = NewReg;
546 if (VRMap[LastStageNum - np - 1].count(LoopVal))
547 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
548
549 if (IsLast && np == NumPhis - 1)
550 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
551 continue;
552 }
553 }
554 }
555 if (InKernel && StageDiff > 0 &&
556 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
557 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
558 }
559
560 const TargetRegisterClass *RC = MRI.getRegClass(Def);
561 NewReg = MRI.createVirtualRegister(RC);
562
563 MachineInstrBuilder NewPhi =
564 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
565 TII->get(TargetOpcode::PHI), NewReg);
566 NewPhi.addReg(PhiOp1).addMBB(BB1);
567 NewPhi.addReg(PhiOp2).addMBB(BB2);
568 LIS.InsertMachineInstrInMaps(*NewPhi);
569 if (np == 0)
570 InstrMap[NewPhi] = &*BBI;
571
572 // We define the Phis after creating the new pipelined code, so
573 // we need to rename the Phi values in scheduled instructions.
574
575 Register PrevReg;
576 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
577 PrevReg = VRMap[PrevStage - np][LoopVal];
578 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
579 NewReg, PrevReg);
580 // If the Phi has been scheduled, use the new name for rewriting.
581 if (VRMap[CurStageNum - np].count(Def)) {
582 Register R = VRMap[CurStageNum - np][Def];
583 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
584 NewReg);
585 }
586
587 // Check if we need to rename any uses that occurs after the loop. The
588 // register to replace depends on whether the Phi is scheduled in the
589 // epilog.
590 if (IsLast && np == NumPhis - 1)
591 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
592
593 // In the kernel, a dependent Phi uses the value from this Phi.
594 if (InKernel)
595 PhiOp2 = NewReg;
596
597 // Update the map with the new Phi name.
598 VRMap[CurStageNum - np][Def] = NewReg;
599 }
600
601 while (NumPhis++ < NumStages) {
602 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
603 NewReg, 0);
604 }
605
606 // Check if we need to rename a Phi that has been eliminated due to
607 // scheduling.
608 if (NumStages == 0 && IsLast) {
609 auto &CurStageMap = VRMap[CurStageNum];
610 auto It = CurStageMap.find(LoopVal);
611 if (It != CurStageMap.end())
612 replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
613 }
614 }
615}
616
617/// Generate Phis for the specified block in the generated pipelined code.
618/// These are new Phis needed because the definition is scheduled after the
619/// use in the pipelined sequence.
620void ModuloScheduleExpander::generatePhis(
622 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
623 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
624 bool IsLast) {
625 // Compute the stage number that contains the initial Phi value, and
626 // the Phi from the previous stage.
627 unsigned PrologStage = 0;
628 unsigned PrevStage = 0;
629 unsigned StageDiff = CurStageNum - LastStageNum;
630 bool InKernel = (StageDiff == 0);
631 if (InKernel) {
632 PrologStage = LastStageNum - 1;
633 PrevStage = CurStageNum;
634 } else {
635 PrologStage = LastStageNum - StageDiff;
636 PrevStage = LastStageNum + StageDiff - 1;
637 }
638
639 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
640 BBE = BB->instr_end();
641 BBI != BBE; ++BBI) {
642 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
643 MachineOperand &MO = BBI->getOperand(i);
644 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
645 continue;
646
647 int StageScheduled = Schedule.getStage(&*BBI);
648 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
649 Register Def = MO.getReg();
650 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
651 // An instruction scheduled in stage 0 and is used after the loop
652 // requires a phi in the epilog for the last definition from either
653 // the kernel or prolog.
654 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
655 hasUseAfterLoop(Def, BB, MRI))
656 NumPhis = 1;
657 if (!InKernel && (unsigned)StageScheduled > PrologStage)
658 continue;
659
660 Register PhiOp2;
661 if (InKernel) {
662 PhiOp2 = VRMap[PrevStage][Def];
663 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
664 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
665 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
666 }
667 // The number of Phis can't exceed the number of prolog stages. The
668 // prolog stage number is zero based.
669 if (NumPhis > PrologStage + 1 - StageScheduled)
670 NumPhis = PrologStage + 1 - StageScheduled;
671 for (unsigned np = 0; np < NumPhis; ++np) {
672 // Example for
673 // Org:
674 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
675 //
676 // Prolog0 (Stage0):
677 // %Clone0 = ...
678 // Prolog1 (Stage1):
679 // %Clone1 = ...
680 // Kernel (Stage2):
681 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
682 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
683 // %Clone2 = ...
684 // Epilog0 (Stage3):
685 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
686 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
687 // Epilog1 (Stage4):
688 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
689 //
690 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
691 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
692 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
693
694 Register PhiOp1 = VRMap[PrologStage][Def];
695 if (np <= PrologStage)
696 PhiOp1 = VRMap[PrologStage - np][Def];
697 if (!InKernel) {
698 if (PrevStage == LastStageNum && np == 0)
699 PhiOp2 = VRMap[LastStageNum][Def];
700 else
701 PhiOp2 = VRMapPhi[PrevStage - np][Def];
702 }
703
704 const TargetRegisterClass *RC = MRI.getRegClass(Def);
705 Register NewReg = MRI.createVirtualRegister(RC);
706
707 MachineInstrBuilder NewPhi =
708 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
709 TII->get(TargetOpcode::PHI), NewReg);
710 NewPhi.addReg(PhiOp1).addMBB(BB1);
711 NewPhi.addReg(PhiOp2).addMBB(BB2);
712 LIS.InsertMachineInstrInMaps(*NewPhi);
713 if (np == 0)
714 InstrMap[NewPhi] = &*BBI;
715
716 // Rewrite uses and update the map. The actions depend upon whether
717 // we generating code for the kernel or epilog blocks.
718 if (InKernel) {
719 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
720 NewReg);
721 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
722 NewReg);
723
724 PhiOp2 = NewReg;
725 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
726 } else {
727 VRMapPhi[CurStageNum - np][Def] = NewReg;
728 if (np == NumPhis - 1)
729 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
730 NewReg);
731 }
732 if (IsLast && np == NumPhis - 1)
733 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
734 }
735 }
736 }
737}
738
739/// Remove instructions that generate values with no uses.
740/// Typically, these are induction variable operations that generate values
741/// used in the loop itself. A dead instruction has a definition with
742/// no uses, or uses that occur in the original loop only.
743void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
744 MBBVectorTy &EpilogBBs) {
745 // For each epilog block, check that the value defined by each instruction
746 // is used. If not, delete it.
747 for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
749 ME = MBB->instr_rend();
750 MI != ME;) {
751 // From DeadMachineInstructionElem. Don't delete inline assembly.
752 if (MI->isInlineAsm()) {
753 ++MI;
754 continue;
755 }
756 bool SawStore = false;
757 // Check if it's safe to remove the instruction due to side effects.
758 // We can, and want to, remove Phis here.
759 if (!MI->isSafeToMove(SawStore) && !MI->isPHI()) {
760 ++MI;
761 continue;
762 }
763 bool used = true;
764 for (const MachineOperand &MO : MI->all_defs()) {
765 Register reg = MO.getReg();
766 // Assume physical registers are used, unless they are marked dead.
767 if (reg.isPhysical()) {
768 used = !MO.isDead();
769 if (used)
770 break;
771 continue;
772 }
773 unsigned realUses = 0;
774 for (const MachineOperand &U : MRI.use_operands(reg)) {
775 // Check if there are any uses that occur only in the original
776 // loop. If so, that's not a real use.
777 if (U.getParent()->getParent() != BB) {
778 realUses++;
779 used = true;
780 break;
781 }
782 }
783 if (realUses > 0)
784 break;
785 used = false;
786 }
787 if (!used) {
788 LIS.RemoveMachineInstrFromMaps(*MI);
789 MI++->eraseFromParent();
790 continue;
791 }
792 ++MI;
793 }
794 // In the kernel block, check if we can remove a Phi that generates a value
795 // used in an instruction removed in the epilog block.
796 for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
797 Register reg = MI.getOperand(0).getReg();
798 if (MRI.use_begin(reg) == MRI.use_end()) {
799 LIS.RemoveMachineInstrFromMaps(MI);
800 MI.eraseFromParent();
801 }
802 }
803}
804
805/// For loop carried definitions, we split the lifetime of a virtual register
806/// that has uses past the definition in the next iteration. A copy with a new
807/// virtual register is inserted before the definition, which helps with
808/// generating a better register assignment.
809///
810/// v1 = phi(a, v2) v1 = phi(a, v2)
811/// v2 = phi(b, v3) v2 = phi(b, v3)
812/// v3 = .. v4 = copy v1
813/// .. = V1 v3 = ..
814/// .. = v4
815void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
816 MBBVectorTy &EpilogBBs) {
817 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
818 for (auto &PHI : KernelBB->phis()) {
819 Register Def = PHI.getOperand(0).getReg();
820 // Check for any Phi definition that used as an operand of another Phi
821 // in the same block.
822 for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
823 E = MRI.use_instr_end();
824 I != E; ++I) {
825 if (I->isPHI() && I->getParent() == KernelBB) {
826 // Get the loop carried definition.
827 Register LCDef = getLoopPhiReg(PHI, KernelBB);
828 if (!LCDef)
829 continue;
830 MachineInstr *MI = MRI.getVRegDef(LCDef);
831 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
832 continue;
833 // Search through the rest of the block looking for uses of the Phi
834 // definition. If one occurs, then split the lifetime.
835 Register SplitReg;
837 KernelBB->instr_end()))
838 if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) {
839 // We split the lifetime when we find the first use.
840 if (!SplitReg) {
841 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
842 MachineInstr *newCopy =
843 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
844 TII->get(TargetOpcode::COPY), SplitReg)
845 .addReg(Def);
846 LIS.InsertMachineInstrInMaps(*newCopy);
847 }
848 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
849 }
850 if (!SplitReg)
851 continue;
852 // Search through each of the epilog blocks for any uses to be renamed.
853 for (auto &Epilog : EpilogBBs)
854 for (auto &I : *Epilog)
855 if (I.readsRegister(Def, /*TRI=*/nullptr))
856 I.substituteRegister(Def, SplitReg, 0, *TRI);
857 break;
858 }
859 }
860 }
861}
862
863/// Create branches from each prolog basic block to the appropriate epilog
864/// block. These edges are needed if the loop ends before reaching the
865/// kernel.
866void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
867 MBBVectorTy &PrologBBs,
868 MachineBasicBlock *KernelBB,
869 MBBVectorTy &EpilogBBs,
870 ValueMapTy *VRMap) {
871 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
872 MachineBasicBlock *LastPro = KernelBB;
873 MachineBasicBlock *LastEpi = KernelBB;
874
875 // Start from the blocks connected to the kernel and work "out"
876 // to the first prolog and the last epilog blocks.
877 unsigned MaxIter = PrologBBs.size() - 1;
878 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
879 // Add branches to the prolog that go to the corresponding
880 // epilog, and the fall-thru prolog/kernel block.
881 MachineBasicBlock *Prolog = PrologBBs[j];
882 MachineBasicBlock *Epilog = EpilogBBs[i];
883
885 std::optional<bool> StaticallyGreater =
886 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
887 unsigned numAdded = 0;
888 if (!StaticallyGreater) {
889 Prolog->addSuccessor(Epilog);
890 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
891 } else if (*StaticallyGreater == false) {
892 Prolog->addSuccessor(Epilog);
893 Prolog->removeSuccessor(LastPro);
894 LastEpi->removeSuccessor(Epilog);
895 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
896 Epilog->removePHIsIncomingValuesForPredecessor(*LastEpi);
897 // Remove the blocks that are no longer referenced.
898 if (LastPro != LastEpi) {
899 for (auto &MI : *LastEpi)
900 LIS.RemoveMachineInstrFromMaps(MI);
901 LastEpi->clear();
902 LastEpi->eraseFromParent();
903 }
904 if (LastPro == KernelBB) {
905 LoopInfo->disposed(&LIS);
906 NewKernel = nullptr;
907 }
908 for (auto &MI : *LastPro)
909 LIS.RemoveMachineInstrFromMaps(MI);
910 LastPro->clear();
911 LastPro->eraseFromParent();
912 } else {
913 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
914 Epilog->removePHIsIncomingValuesForPredecessor(*Prolog);
915 }
916 LastPro = Prolog;
917 LastEpi = Epilog;
919 E = Prolog->instr_rend();
920 I != E && numAdded > 0; ++I, --numAdded)
921 updateInstruction(&*I, false, j, 0, VRMap);
922 }
923
924 if (NewKernel) {
925 LoopInfo->setPreheader(PrologBBs[MaxIter]);
926 LoopInfo->adjustTripCount(-(MaxIter + 1));
927 }
928}
929
930/// Return true if we can compute the amount the instruction changes
931/// during each iteration. Set Delta to the amount of the change.
932bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
933 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
934 const MachineOperand *BaseOp;
935 int64_t Offset;
936 bool OffsetIsScalable;
937 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
938 return false;
939
940 // FIXME: This algorithm assumes instructions have fixed-size offsets.
941 if (OffsetIsScalable)
942 return false;
943
944 if (!BaseOp->isReg())
945 return false;
946
947 Register BaseReg = BaseOp->getReg();
948
949 MachineRegisterInfo &MRI = MF.getRegInfo();
950 // Check if there is a Phi. If so, get the definition in the loop.
951 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
952 if (BaseDef && BaseDef->isPHI()) {
953 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
954 BaseDef = MRI.getVRegDef(BaseReg);
955 }
956 if (!BaseDef)
957 return false;
958
959 int D = 0;
960 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
961 return false;
962
963 Delta = D;
964 return true;
965}
966
967/// Update the memory operand with a new offset when the pipeliner
968/// generates a new copy of the instruction that refers to a
969/// different memory location.
970void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
971 MachineInstr &OldMI,
972 unsigned Num) {
973 if (Num == 0)
974 return;
975 // If the instruction has memory operands, then adjust the offset
976 // when the instruction appears in different stages.
977 if (NewMI.memoperands_empty())
978 return;
980 for (MachineMemOperand *MMO : NewMI.memoperands()) {
981 // TODO: Figure out whether isAtomic is really necessary (see D57601).
982 if (MMO->isVolatile() || MMO->isAtomic() ||
983 (MMO->isInvariant() && MMO->isDereferenceable()) ||
984 (!MMO->getValue())) {
985 NewMMOs.push_back(MMO);
986 continue;
987 }
988 unsigned Delta;
989 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
990 int64_t AdjOffset = Delta * Num;
991 NewMMOs.push_back(
992 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
993 } else {
994 NewMMOs.push_back(MF.getMachineMemOperand(
996 }
997 }
998 NewMI.setMemRefs(MF, NewMMOs);
999}
1000
1001/// Clone the instruction for the new pipelined loop and update the
1002/// memory operands, if needed.
1003MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
1004 unsigned CurStageNum,
1005 unsigned InstStageNum) {
1006 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1007 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1008 return NewMI;
1009}
1010
1011/// Clone the instruction for the new pipelined loop. If needed, this
1012/// function updates the instruction using the values saved in the
1013/// InstrChanges structure.
1014MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1015 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1016 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1017 auto It = InstrChanges.find(OldMI);
1018 if (It != InstrChanges.end()) {
1019 std::pair<Register, int64_t> RegAndOffset = It->second;
1020 unsigned BasePos, OffsetPos;
1021 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1022 return nullptr;
1023 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1024 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1025 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1026 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1027 NewMI->getOperand(OffsetPos).setImm(NewOffset);
1028 }
1029 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1030 return NewMI;
1031}
1032
1033/// Update the machine instruction with new virtual registers. This
1034/// function may change the definitions and/or uses.
1035void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1036 bool LastDef,
1037 unsigned CurStageNum,
1038 unsigned InstrStageNum,
1039 ValueMapTy *VRMap) {
1040 for (MachineOperand &MO : NewMI->operands()) {
1041 if (!MO.isReg() || !MO.getReg().isVirtual())
1042 continue;
1043 Register reg = MO.getReg();
1044 if (MO.isDef()) {
1045 // Create a new virtual register for the definition.
1046 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1047 Register NewReg = MRI.createVirtualRegister(RC);
1048 MO.setReg(NewReg);
1049 VRMap[CurStageNum][reg] = NewReg;
1050 if (LastDef)
1051 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
1052 } else if (MO.isUse()) {
1053 MachineInstr *Def = MRI.getVRegDef(reg);
1054 // Compute the stage that contains the last definition for instruction.
1055 int DefStageNum = Schedule.getStage(Def);
1056 unsigned StageNum = CurStageNum;
1057 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1058 // Compute the difference in stages between the defintion and the use.
1059 unsigned StageDiff = (InstrStageNum - DefStageNum);
1060 // Make an adjustment to get the last definition.
1061 StageNum -= StageDiff;
1062 }
1063 if (auto It = VRMap[StageNum].find(reg); It != VRMap[StageNum].end())
1064 MO.setReg(It->second);
1065 }
1066 }
1067}
1068
1069/// Return the instruction in the loop that defines the register.
1070/// If the definition is a Phi, then follow the Phi operand to
1071/// the instruction in the loop.
1072MachineInstr *ModuloScheduleExpander::findDefInLoop(Register Reg) {
1073 SmallPtrSet<MachineInstr *, 8> Visited;
1074 MachineInstr *Def = MRI.getVRegDef(Reg);
1075 while (Def->isPHI()) {
1076 if (!Visited.insert(Def).second)
1077 break;
1078 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1079 if (Def->getOperand(i + 1).getMBB() == BB) {
1080 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1081 break;
1082 }
1083 }
1084 return Def;
1085}
1086
1087/// Return the new name for the value from the previous stage.
1088Register ModuloScheduleExpander::getPrevMapVal(
1089 unsigned StageNum, unsigned PhiStage, Register LoopVal, unsigned LoopStage,
1090 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1091 Register PrevVal;
1092 if (StageNum > PhiStage) {
1093 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1094 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1095 // The name is defined in the previous stage.
1096 PrevVal = VRMap[StageNum - 1][LoopVal];
1097 else if (VRMap[StageNum].count(LoopVal))
1098 // The previous name is defined in the current stage when the instruction
1099 // order is swapped.
1100 PrevVal = VRMap[StageNum][LoopVal];
1101 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1102 // The loop value hasn't yet been scheduled.
1103 PrevVal = LoopVal;
1104 else if (StageNum == PhiStage + 1)
1105 // The loop value is another phi, which has not been scheduled.
1106 PrevVal = getInitPhiReg(*LoopInst, BB);
1107 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1108 // The loop value is another phi, which has been scheduled.
1109 PrevVal =
1110 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1111 LoopStage, VRMap, BB);
1112 }
1113 return PrevVal;
1114}
1115
1116/// Rewrite the Phi values in the specified block to use the mappings
1117/// from the initial operand. Once the Phi is scheduled, we switch
1118/// to using the loop value instead of the Phi value, so those names
1119/// do not need to be rewritten.
1120void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1121 unsigned StageNum,
1122 ValueMapTy *VRMap,
1123 InstrMapTy &InstrMap) {
1124 for (auto &PHI : BB->phis()) {
1125 Register InitVal;
1126 Register LoopVal;
1127 getPhiRegs(PHI, BB, InitVal, LoopVal);
1128 Register PhiDef = PHI.getOperand(0).getReg();
1129
1130 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1131 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1132 unsigned NumPhis = getStagesForPhi(PhiDef);
1133 if (NumPhis > StageNum)
1134 NumPhis = StageNum;
1135 for (unsigned np = 0; np <= NumPhis; ++np) {
1136 Register NewVal =
1137 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1138 if (!NewVal)
1139 NewVal = InitVal;
1140 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1141 NewVal);
1142 }
1143 }
1144}
1145
1146/// Rewrite a previously scheduled instruction to use the register value
1147/// from the new instruction. Make sure the instruction occurs in the
1148/// basic block, and we don't change the uses in the new instruction.
1149void ModuloScheduleExpander::rewriteScheduledInstr(
1150 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1151 unsigned PhiNum, MachineInstr *Phi, Register OldReg, Register NewReg,
1152 Register PrevReg) {
1153 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1154 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1155 // Rewrite uses that have been scheduled already to use the new
1156 // Phi register.
1157 for (MachineOperand &UseOp :
1158 llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
1159 MachineInstr *UseMI = UseOp.getParent();
1160 if (UseMI->getParent() != BB)
1161 continue;
1162 if (UseMI->isPHI()) {
1163 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1164 continue;
1165 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1166 continue;
1167 }
1168 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1169 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1170 MachineInstr *OrigMI = OrigInstr->second;
1171 int StageSched = Schedule.getStage(OrigMI);
1172 int CycleSched = Schedule.getCycle(OrigMI);
1173 Register ReplaceReg;
1174 // This is the stage for the scheduled instruction.
1175 if (StagePhi == StageSched && Phi->isPHI()) {
1176 int CyclePhi = Schedule.getCycle(Phi);
1177 if (PrevReg && InProlog)
1178 ReplaceReg = PrevReg;
1179 else if (PrevReg && !isLoopCarried(*Phi) &&
1180 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1181 ReplaceReg = PrevReg;
1182 else
1183 ReplaceReg = NewReg;
1184 }
1185 // The scheduled instruction occurs before the scheduled Phi, and the
1186 // Phi is not loop carried.
1187 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1188 ReplaceReg = NewReg;
1189 if (StagePhi > StageSched && Phi->isPHI())
1190 ReplaceReg = NewReg;
1191 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1192 ReplaceReg = NewReg;
1193 if (ReplaceReg) {
1194 const TargetRegisterClass *NRC =
1195 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1196 if (NRC)
1197 UseOp.setReg(ReplaceReg);
1198 else {
1199 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1200 MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
1201 TII->get(TargetOpcode::COPY), SplitReg)
1202 .addReg(ReplaceReg);
1203 UseOp.setReg(SplitReg);
1204 LIS.InsertMachineInstrInMaps(*newCopy);
1205 }
1206 }
1207 }
1208}
1209
1210bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1211 if (!Phi.isPHI())
1212 return false;
1213 int DefCycle = Schedule.getCycle(&Phi);
1214 int DefStage = Schedule.getStage(&Phi);
1215
1216 Register InitVal;
1217 Register LoopVal;
1218 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1219 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1220 if (!Use || Use->isPHI())
1221 return true;
1222 int LoopCycle = Schedule.getCycle(Use);
1223 int LoopStage = Schedule.getStage(Use);
1224 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1225}
1226
1227//===----------------------------------------------------------------------===//
1228// PeelingModuloScheduleExpander implementation
1229//===----------------------------------------------------------------------===//
1230// This is a reimplementation of ModuloScheduleExpander that works by creating
1231// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1232//===----------------------------------------------------------------------===//
1233
1234namespace {
1235// Remove any dead phis in MBB. Dead phis either have only one block as input
1236// (in which case they are the identity) or have no uses.
1237void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1238 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1239 bool Changed = true;
1240 while (Changed) {
1241 Changed = false;
1243 assert(MI.isPHI());
1244 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1245 if (LIS)
1247 MI.eraseFromParent();
1248 Changed = true;
1249 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1250 const TargetRegisterClass *ConstrainRegClass =
1251 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1252 MRI.getRegClass(MI.getOperand(0).getReg()));
1253 assert(ConstrainRegClass &&
1254 "Expected a valid constrained register class!");
1255 (void)ConstrainRegClass;
1256 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1257 MI.getOperand(1).getReg());
1258 if (LIS)
1260 MI.eraseFromParent();
1261 Changed = true;
1262 }
1263 }
1264 }
1265}
1266
1267/// Rewrites the kernel block in-place to adhere to the given schedule.
1268/// KernelRewriter holds all of the state required to perform the rewriting.
1269class KernelRewriter {
1270 ModuloSchedule &S;
1271 MachineBasicBlock *BB;
1272 MachineBasicBlock *PreheaderBB, *ExitBB;
1273 MachineRegisterInfo &MRI;
1274 const TargetInstrInfo *TII;
1275 LiveIntervals *LIS;
1276
1277 // Map from register class to canonical undef register for that class.
1278 DenseMap<const TargetRegisterClass *, Register> Undefs;
1279 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1280 // this map is only used when InitReg is non-undef.
1281 DenseMap<std::pair<Register, Register>, Register> Phis;
1282 // Map from LoopReg to phi register where the InitReg is undef.
1283 DenseMap<Register, Register> UndefPhis;
1284
1285 // Reg is used by MI. Return the new register MI should use to adhere to the
1286 // schedule. Insert phis as necessary.
1287 Register remapUse(Register Reg, MachineInstr &MI);
1288 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1289 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1290 // or will be chosen so as to share another phi.
1291 Register phi(Register LoopReg, std::optional<Register> InitReg = {},
1292 const TargetRegisterClass *RC = nullptr);
1293 // Create an undef register of the given register class.
1294 Register undef(const TargetRegisterClass *RC);
1295
1296public:
1297 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1298 LiveIntervals *LIS = nullptr);
1299 void rewrite();
1300};
1301} // namespace
1302
1303KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1304 MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1305 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1306 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1307 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1308 PreheaderBB = *BB->pred_begin();
1309 if (PreheaderBB == BB)
1310 PreheaderBB = *std::next(BB->pred_begin());
1311}
1312
1313void KernelRewriter::rewrite() {
1314 // Rearrange the loop to be in schedule order. Note that the schedule may
1315 // contain instructions that are not owned by the loop block (InstrChanges and
1316 // friends), so we gracefully handle unowned instructions and delete any
1317 // instructions that weren't in the schedule.
1318 auto InsertPt = BB->getFirstTerminator();
1319 MachineInstr *FirstMI = nullptr;
1320 for (MachineInstr *MI : S.getInstructions()) {
1321 if (MI->isPHI())
1322 continue;
1323 if (MI->getParent())
1324 MI->removeFromParent();
1325 BB->insert(InsertPt, MI);
1326 if (!FirstMI)
1327 FirstMI = MI;
1328 }
1329 assert(FirstMI && "Failed to find first MI in schedule");
1330
1331 // At this point all of the scheduled instructions are between FirstMI
1332 // and the end of the block. Kill from the first non-phi to FirstMI.
1333 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1334 if (LIS)
1336 (I++)->eraseFromParent();
1337 }
1338
1339 // Now remap every instruction in the loop.
1340 for (MachineInstr &MI : *BB) {
1341 if (MI.isPHI() || MI.isTerminator())
1342 continue;
1343 for (MachineOperand &MO : MI.uses()) {
1344 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1345 continue;
1346 Register Reg = remapUse(MO.getReg(), MI);
1347 MO.setReg(Reg);
1348 }
1349 }
1350 EliminateDeadPhis(BB, MRI, LIS);
1351
1352 // Ensure a phi exists for all instructions that are either referenced by
1353 // an illegal phi or by an instruction outside the loop. This allows us to
1354 // treat remaps of these values the same as "normal" values that come from
1355 // loop-carried phis.
1356 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1357 if (MI->isPHI()) {
1358 Register R = MI->getOperand(0).getReg();
1359 phi(R);
1360 continue;
1361 }
1362
1363 for (MachineOperand &Def : MI->defs()) {
1364 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1365 if (MI.getParent() != BB) {
1366 phi(Def.getReg());
1367 break;
1368 }
1369 }
1370 }
1371 }
1372}
1373
1374Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1375 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1376 if (!Producer)
1377 return Reg;
1378
1379 int ConsumerStage = S.getStage(&MI);
1380 if (!Producer->isPHI()) {
1381 // Non-phi producers are simple to remap. Insert as many phis as the
1382 // difference between the consumer and producer stages.
1383 if (Producer->getParent() != BB)
1384 // Producer was not inside the loop. Use the register as-is.
1385 return Reg;
1386 int ProducerStage = S.getStage(Producer);
1387 assert(ConsumerStage != -1 &&
1388 "In-loop consumer should always be scheduled!");
1389 assert(ConsumerStage >= ProducerStage);
1390 unsigned StageDiff = ConsumerStage - ProducerStage;
1391
1392 for (unsigned I = 0; I < StageDiff; ++I)
1393 Reg = phi(Reg);
1394 return Reg;
1395 }
1396
1397 // First, dive through the phi chain to find the defaults for the generated
1398 // phis.
1400 Register LoopReg = Reg;
1401 auto LoopProducer = Producer;
1402 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1403 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1404 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1405 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1406 assert(LoopProducer);
1407 }
1408 int LoopProducerStage = S.getStage(LoopProducer);
1409
1410 std::optional<Register> IllegalPhiDefault;
1411
1412 if (LoopProducerStage == -1) {
1413 // Do nothing.
1414 } else if (LoopProducerStage > ConsumerStage) {
1415 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1416 // In addition, Consumer's cycle must be scheduled after Producer in the
1417 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1418 // functions.
1419#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1420 int LoopProducerCycle = S.getCycle(LoopProducer);
1421 int ConsumerCycle = S.getCycle(&MI);
1422#endif
1423 assert(LoopProducerCycle <= ConsumerCycle);
1424 assert(LoopProducerStage == ConsumerStage + 1);
1425 // Peel off the first phi from Defaults and insert a phi between producer
1426 // and consumer. This phi will not be at the front of the block so we
1427 // consider it illegal. It will only exist during the rewrite process; it
1428 // needs to exist while we peel off prologs because these could take the
1429 // default value. After that we can replace all uses with the loop producer
1430 // value.
1431 IllegalPhiDefault = Defaults.front();
1432 Defaults.erase(Defaults.begin());
1433 } else {
1434 assert(ConsumerStage >= LoopProducerStage);
1435 int StageDiff = ConsumerStage - LoopProducerStage;
1436 if (StageDiff > 0) {
1437 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1438 << " to " << (Defaults.size() + StageDiff) << "\n");
1439 // If we need more phis than we have defaults for, pad out with undefs for
1440 // the earliest phis, which are at the end of the defaults chain (the
1441 // chain is in reverse order).
1442 Defaults.resize(Defaults.size() + StageDiff,
1443 Defaults.empty() ? std::optional<Register>()
1444 : Defaults.back());
1445 }
1446 }
1447
1448 // Now we know the number of stages to jump back, insert the phi chain.
1449 auto DefaultI = Defaults.rbegin();
1450 while (DefaultI != Defaults.rend())
1451 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1452
1453 if (IllegalPhiDefault) {
1454 // The consumer optionally consumes LoopProducer in the same iteration
1455 // (because the producer is scheduled at an earlier cycle than the consumer)
1456 // or the initial value. To facilitate this we create an illegal block here
1457 // by embedding a phi in the middle of the block. We will fix this up
1458 // immediately prior to pruning.
1459 auto RC = MRI.getRegClass(Reg);
1460 Register R = MRI.createVirtualRegister(RC);
1461 MachineInstr *IllegalPhi =
1462 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1463 .addReg(*IllegalPhiDefault)
1464 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1465 .addReg(LoopReg)
1466 .addMBB(BB); // Block choice is arbitrary and has no effect.
1467 // Illegal phi should belong to the producer stage so that it can be
1468 // filtered correctly during peeling.
1469 S.setStage(IllegalPhi, LoopProducerStage);
1470 return R;
1471 }
1472
1473 return LoopReg;
1474}
1475
1476Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
1477 const TargetRegisterClass *RC) {
1478 // If the init register is not undef, try and find an existing phi.
1479 if (InitReg) {
1480 auto I = Phis.find({LoopReg, *InitReg});
1481 if (I != Phis.end())
1482 return I->second;
1483 } else {
1484 for (auto &KV : Phis) {
1485 if (KV.first.first == LoopReg)
1486 return KV.second;
1487 }
1488 }
1489
1490 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1491 // find a phi that takes undef as input.
1492 auto I = UndefPhis.find(LoopReg);
1493 if (I != UndefPhis.end()) {
1494 Register R = I->second;
1495 if (!InitReg)
1496 // Found a phi taking undef as input, and this input is undef so return
1497 // without any more changes.
1498 return R;
1499 // Found a phi taking undef as input, so rewrite it to take InitReg.
1500 MachineInstr *MI = MRI.getVRegDef(R);
1501 MI->getOperand(1).setReg(*InitReg);
1502 Phis.insert({{LoopReg, *InitReg}, R});
1503 const TargetRegisterClass *ConstrainRegClass =
1504 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1505 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1506 (void)ConstrainRegClass;
1507 UndefPhis.erase(I);
1508 return R;
1509 }
1510
1511 // Failed to find any existing phi to reuse, so create a new one.
1512 if (!RC)
1513 RC = MRI.getRegClass(LoopReg);
1514 Register R = MRI.createVirtualRegister(RC);
1515 if (InitReg) {
1516 const TargetRegisterClass *ConstrainRegClass =
1517 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1518 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1519 (void)ConstrainRegClass;
1520 }
1521 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1522 .addReg(InitReg ? *InitReg : undef(RC))
1523 .addMBB(PreheaderBB)
1524 .addReg(LoopReg)
1525 .addMBB(BB);
1526 if (!InitReg)
1527 UndefPhis[LoopReg] = R;
1528 else
1529 Phis[{LoopReg, *InitReg}] = R;
1530 return R;
1531}
1532
1533Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1534 Register &R = Undefs[RC];
1535 if (R == 0) {
1536 // Create an IMPLICIT_DEF that defines this register if we need it.
1537 // All uses of this should be removed by the time we have finished unrolling
1538 // prologs and epilogs.
1539 R = MRI.createVirtualRegister(RC);
1540 auto *InsertBB = &PreheaderBB->getParent()->front();
1541 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1542 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1543 }
1544 return R;
1545}
1546
1547namespace {
1548/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1549/// the operand are discovered, such as how many in-loop PHIs it has to jump
1550/// through and defaults for these phis.
1551class KernelOperandInfo {
1552 MachineBasicBlock *BB;
1553 MachineRegisterInfo &MRI;
1554 SmallVector<Register, 4> PhiDefaults;
1555 MachineOperand *Source;
1556 MachineOperand *Target;
1557
1558public:
1559 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1560 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1561 : MRI(MRI) {
1562 Source = MO;
1563 BB = MO->getParent()->getParent();
1564 while (isRegInLoop(MO)) {
1565 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1566 if (MI->isFullCopy()) {
1567 MO = &MI->getOperand(1);
1568 continue;
1569 }
1570 if (!MI->isPHI())
1571 break;
1572 // If this is an illegal phi, don't count it in distance.
1573 if (IllegalPhis.count(MI)) {
1574 MO = &MI->getOperand(3);
1575 continue;
1576 }
1577
1579 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1580 : &MI->getOperand(3);
1581 PhiDefaults.push_back(Default);
1582 }
1583 Target = MO;
1584 }
1585
1586 bool operator==(const KernelOperandInfo &Other) const {
1587 return PhiDefaults.size() == Other.PhiDefaults.size();
1588 }
1589
1590 void print(raw_ostream &OS) const {
1591 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1592 << *Source->getParent();
1593 }
1594
1595private:
1596 bool isRegInLoop(MachineOperand *MO) {
1597 return MO->isReg() && MO->getReg().isVirtual() &&
1598 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1599 }
1600};
1601} // namespace
1602
1603MachineBasicBlock *
1606 if (LPD == LPD_Front)
1607 PeeledFront.push_back(NewBB);
1608 else
1609 PeeledBack.push_front(NewBB);
1610 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1611 ++I, ++NI) {
1612 CanonicalMIs[&*I] = &*I;
1613 CanonicalMIs[&*NI] = &*I;
1614 BlockMIs[{NewBB, &*I}] = &*NI;
1615 BlockMIs[{BB, &*I}] = &*I;
1616 }
1617 return NewBB;
1618}
1619
1621 int MinStage) {
1622 for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1623 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1624 MachineInstr *MI = &*I++;
1625 int Stage = getStage(MI);
1626 if (Stage == -1 || Stage >= MinStage)
1627 continue;
1628
1629 for (MachineOperand &DefMO : MI->defs()) {
1631 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1632 // Only PHIs can use values from this block by construction.
1633 // Match with the equivalent PHI in B.
1634 assert(UseMI.isPHI());
1635 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1636 MI->getParent());
1637 Subs.emplace_back(&UseMI, Reg);
1638 }
1639 for (auto &Sub : Subs)
1640 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1641 *MRI.getTargetRegisterInfo());
1642 }
1643 if (LIS)
1644 LIS->RemoveMachineInstrFromMaps(*MI);
1645 MI->eraseFromParent();
1646 }
1647}
1648
1650 MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1651 auto InsertPt = DestBB->getFirstNonPHI();
1654 llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1655 if (MI.isPHI()) {
1656 // This is an illegal PHI. If we move any instructions using an illegal
1657 // PHI, we need to create a legal Phi.
1658 if (getStage(&MI) != Stage) {
1659 // The legal Phi is not necessary if the illegal phi's stage
1660 // is being moved.
1661 Register PhiR = MI.getOperand(0).getReg();
1662 auto RC = MRI.getRegClass(PhiR);
1663 Register NR = MRI.createVirtualRegister(RC);
1664 MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1665 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1666 .addReg(PhiR)
1667 .addMBB(SourceBB);
1668 BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1670 Remaps[PhiR] = NR;
1671 }
1672 }
1673 if (getStage(&MI) != Stage)
1674 continue;
1675 MI.removeFromParent();
1676 DestBB->insert(InsertPt, &MI);
1677 auto *KernelMI = CanonicalMIs[&MI];
1678 BlockMIs[{DestBB, KernelMI}] = &MI;
1679 BlockMIs.erase({SourceBB, KernelMI});
1680 }
1682 for (MachineInstr &MI : DestBB->phis()) {
1683 assert(MI.getNumOperands() == 3);
1684 MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1685 // If the instruction referenced by the phi is moved inside the block
1686 // we don't need the phi anymore.
1687 if (getStage(Def) == Stage) {
1688 Register PhiReg = MI.getOperand(0).getReg();
1689 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1690 /*TRI=*/nullptr) != -1);
1691 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1692 MI.getOperand(0).setReg(PhiReg);
1693 PhiToDelete.push_back(&MI);
1694 }
1695 }
1696 for (auto *P : PhiToDelete)
1697 P->eraseFromParent();
1698 InsertPt = DestBB->getFirstNonPHI();
1699 // Helper to clone Phi instructions into the destination block. We clone Phi
1700 // greedily to avoid combinatorial explosion of Phi instructions.
1701 auto clonePhi = [&](MachineInstr *Phi) {
1702 MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1703 DestBB->insert(InsertPt, NewMI);
1704 Register OrigR = Phi->getOperand(0).getReg();
1705 Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
1706 NewMI->getOperand(0).setReg(R);
1707 NewMI->getOperand(1).setReg(OrigR);
1708 NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1709 Remaps[OrigR] = R;
1710 CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1711 BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1713 return R;
1714 };
1715 for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1716 for (MachineOperand &MO : I->uses()) {
1717 if (!MO.isReg())
1718 continue;
1719 if (auto It = Remaps.find(MO.getReg()); It != Remaps.end())
1720 MO.setReg(It->second);
1721 else {
1722 // If we are using a phi from the source block we need to add a new phi
1723 // pointing to the old one.
1724 MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
1725 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1726 Register R = clonePhi(Use);
1727 MO.setReg(R);
1728 }
1729 }
1730 }
1731 }
1732}
1733
1736 MachineInstr *Phi) {
1737 unsigned distance = PhiNodeLoopIteration[Phi];
1738 MachineInstr *CanonicalUse = CanonicalPhi;
1739 Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1740 for (unsigned I = 0; I < distance; ++I) {
1741 assert(CanonicalUse->isPHI());
1742 assert(CanonicalUse->getNumOperands() == 5);
1743 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1744 if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1745 std::swap(LoopRegIdx, InitRegIdx);
1746 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1747 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1748 }
1749 return CanonicalUseReg;
1750}
1751
1753 BitVector LS(Schedule.getNumStages(), true);
1754 BitVector AS(Schedule.getNumStages(), true);
1755 LiveStages[BB] = LS;
1756 AvailableStages[BB] = AS;
1757
1758 // Peel out the prologs.
1759 LS.reset();
1760 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1761 LS[I] = true;
1762 Prologs.push_back(peelKernel(LPD_Front));
1763 LiveStages[Prologs.back()] = LS;
1764 AvailableStages[Prologs.back()] = LS;
1765 }
1766
1767 // Create a block that will end up as the new loop exiting block (dominated by
1768 // all prologs and epilogs). It will only contain PHIs, in the same order as
1769 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1770 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1771 // property that any value deffed in BB but used outside of BB is used by a
1772 // PHI in the exiting block.
1774 EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1775 // Push out the epilogs, again in reverse order.
1776 // We can't assume anything about the minumum loop trip count at this point,
1777 // so emit a fairly complex epilog.
1778
1779 // We first peel number of stages minus one epilogue. Then we remove dead
1780 // stages and reorder instructions based on their stage. If we have 3 stages
1781 // we generate first:
1782 // E0[3, 2, 1]
1783 // E1[3', 2']
1784 // E2[3'']
1785 // And then we move instructions based on their stages to have:
1786 // E0[3]
1787 // E1[2, 3']
1788 // E2[1, 2', 3'']
1789 // The transformation is legal because we only move instructions past
1790 // instructions of a previous loop iteration.
1791 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1792 Epilogs.push_back(peelKernel(LPD_Back));
1793 MachineBasicBlock *B = Epilogs.back();
1794 filterInstructions(B, Schedule.getNumStages() - I);
1795 // Keep track at which iteration each phi belongs to. We need it to know
1796 // what version of the variable to use during prologue/epilogue stitching.
1797 EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1798 for (MachineInstr &Phi : B->phis())
1799 PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
1800 }
1801 for (size_t I = 0; I < Epilogs.size(); I++) {
1802 LS.reset();
1803 for (size_t J = I; J < Epilogs.size(); J++) {
1804 int Iteration = J;
1805 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1806 // Move stage one block at a time so that Phi nodes are updated correctly.
1807 for (size_t K = Iteration; K > I; K--)
1808 moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1809 LS[Stage] = true;
1810 }
1811 LiveStages[Epilogs[I]] = LS;
1812 AvailableStages[Epilogs[I]] = AS;
1813 }
1814
1815 // Now we've defined all the prolog and epilog blocks as a fallthrough
1816 // sequence, add the edges that will be followed if the loop trip count is
1817 // lower than the number of stages (connecting prologs directly with epilogs).
1818 auto PI = Prologs.begin();
1819 auto EI = Epilogs.begin();
1820 assert(Prologs.size() == Epilogs.size());
1821 for (; PI != Prologs.end(); ++PI, ++EI) {
1822 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1823 (*PI)->addSuccessor(*EI);
1824 for (MachineInstr &MI : (*EI)->phis()) {
1825 Register Reg = MI.getOperand(1).getReg();
1826 MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
1827 if (Use && Use->getParent() == Pred) {
1828 MachineInstr *CanonicalUse = CanonicalMIs[Use];
1829 if (CanonicalUse->isPHI()) {
1830 // If the use comes from a phi we need to skip as many phi as the
1831 // distance between the epilogue and the kernel. Trace through the phi
1832 // chain to find the right value.
1833 Reg = getPhiCanonicalReg(CanonicalUse, Use);
1834 }
1835 Reg = getEquivalentRegisterIn(Reg, *PI);
1836 }
1837 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1838 MI.addOperand(MachineOperand::CreateMBB(*PI));
1839 }
1840 }
1841
1842 // Create a list of all blocks in order.
1845 Blocks.push_back(BB);
1847
1848 // Iterate in reverse order over all instructions, remapping as we go.
1849 for (MachineBasicBlock *B : reverse(Blocks)) {
1850 for (auto I = B->instr_rbegin();
1851 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1853 rewriteUsesOf(&*MI);
1854 }
1855 }
1856 for (auto *MI : IllegalPhisToDelete) {
1857 if (LIS)
1858 LIS->RemoveMachineInstrFromMaps(*MI);
1859 MI->eraseFromParent();
1860 }
1861 IllegalPhisToDelete.clear();
1862
1863 // Now all remapping has been done, we're free to optimize the generated code.
1864 for (MachineBasicBlock *B : reverse(Blocks))
1865 EliminateDeadPhis(B, MRI, LIS);
1866 EliminateDeadPhis(ExitingBB, MRI, LIS);
1867}
1868
1870 MachineFunction &MF = *BB->getParent();
1871 MachineBasicBlock *Exit = *BB->succ_begin();
1872 if (Exit == BB)
1873 Exit = *std::next(BB->succ_begin());
1874
1875 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
1876 MF.insert(std::next(BB->getIterator()), NewBB);
1877
1878 // Clone all phis in BB into NewBB and rewrite.
1879 for (MachineInstr &MI : BB->phis()) {
1880 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1881 Register OldR = MI.getOperand(3).getReg();
1882 Register R = MRI.createVirtualRegister(RC);
1884 for (MachineInstr &Use : MRI.use_instructions(OldR))
1885 if (Use.getParent() != BB)
1886 Uses.push_back(&Use);
1887 for (MachineInstr *Use : Uses)
1888 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1889 *MRI.getTargetRegisterInfo());
1890 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1891 .addReg(OldR)
1892 .addMBB(BB);
1893 BlockMIs[{NewBB, &MI}] = NI;
1894 CanonicalMIs[NI] = &MI;
1895 }
1896 BB->replaceSuccessor(Exit, NewBB);
1897 Exit->replacePhiUsesWith(BB, NewBB);
1898 NewBB->addSuccessor(Exit);
1899
1900 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1902 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1903 (void)CanAnalyzeBr;
1904 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1905 TII->removeBranch(*BB);
1906 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1907 Cond, DebugLoc());
1908 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1909 return NewBB;
1910}
1911
1915 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
1916 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
1917 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1918}
1919
1921 if (MI->isPHI()) {
1922 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1923 // and it is produced by this block.
1924 Register PhiR = MI->getOperand(0).getReg();
1925 Register R = MI->getOperand(3).getReg();
1926 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1927 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1928 R = MI->getOperand(1).getReg();
1929 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1930 MRI.replaceRegWith(PhiR, R);
1931 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1932 // later to figure out how to remap registers.
1933 MI->getOperand(0).setReg(PhiR);
1934 IllegalPhisToDelete.push_back(MI);
1935 return;
1936 }
1937
1938 int Stage = getStage(MI);
1939 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1940 LiveStages[MI->getParent()].test(Stage))
1941 // Instruction is live, no rewriting to do.
1942 return;
1943
1944 for (MachineOperand &DefMO : MI->defs()) {
1946 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1947 // Only PHIs can use values from this block by construction.
1948 // Match with the equivalent PHI in B.
1949 assert(UseMI.isPHI());
1950 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1951 MI->getParent());
1952 Subs.emplace_back(&UseMI, Reg);
1953 }
1954 for (auto &Sub : Subs)
1955 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1956 *MRI.getTargetRegisterInfo());
1957 }
1958 if (LIS)
1959 LIS->RemoveMachineInstrFromMaps(*MI);
1960 MI->eraseFromParent();
1961}
1962
1964 // Work outwards from the kernel.
1965 bool KernelDisposed = false;
1966 int TC = Schedule.getNumStages() - 1;
1967 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1968 ++PI, ++EI, --TC) {
1970 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1973 TII->removeBranch(*Prolog);
1974 std::optional<bool> StaticallyGreater =
1975 LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1976 if (!StaticallyGreater) {
1977 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1978 // Dynamically branch based on Cond.
1979 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1980 } else if (*StaticallyGreater == false) {
1981 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1982 // Prolog never falls through; branch to epilog and orphan interior
1983 // blocks. Leave it to unreachable-block-elim to clean up.
1984 Prolog->removeSuccessor(Fallthrough);
1985 for (MachineInstr &P : Fallthrough->phis()) {
1986 P.removeOperand(2);
1987 P.removeOperand(1);
1988 }
1989 TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
1990 KernelDisposed = true;
1991 } else {
1992 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1993 // Prolog always falls through; remove incoming values in epilog.
1994 Prolog->removeSuccessor(Epilog);
1995 for (MachineInstr &P : Epilog->phis()) {
1996 P.removeOperand(4);
1997 P.removeOperand(3);
1998 }
1999 }
2000 }
2001
2002 if (!KernelDisposed) {
2003 LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
2004 LoopInfo->setPreheader(Prologs.back());
2005 } else {
2006 LoopInfo->disposed();
2007 }
2008}
2009
2011 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2012 KR.rewrite();
2013}
2014
2016 BB = Schedule.getLoop()->getTopBlock();
2017 Preheader = Schedule.getLoop()->getLoopPreheader();
2018 LLVM_DEBUG(Schedule.dump());
2019 LoopInfo = TII->analyzeLoopForPipelining(BB);
2021
2022 rewriteKernel();
2024 fixupBranches();
2025}
2026
2028 BB = Schedule.getLoop()->getTopBlock();
2029 Preheader = Schedule.getLoop()->getLoopPreheader();
2030
2031 // Dump the schedule before we invalidate and remap all its instructions.
2032 // Stash it in a string so we can print it if we found an error.
2033 std::string ScheduleDump;
2034 raw_string_ostream OS(ScheduleDump);
2035 Schedule.print(OS);
2036
2037 // First, run the normal ModuleScheduleExpander. We don't support any
2038 // InstrChanges.
2039 assert(LIS && "Requires LiveIntervals!");
2042 MSE.expand();
2043 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2044 if (!ExpandedKernel) {
2045 // The expander optimized away the kernel. We can't do any useful checking.
2046 MSE.cleanup();
2047 return;
2048 }
2049 // Before running the KernelRewriter, re-add BB into the CFG.
2050 Preheader->addSuccessor(BB);
2051
2052 // Now run the new expansion algorithm.
2053 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2054 KR.rewrite();
2056
2057 // Collect all illegal phis that the new algorithm created. We'll give these
2058 // to KernelOperandInfo.
2060 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2061 if (NI->isPHI())
2062 IllegalPhis.insert(&*NI);
2063 }
2064
2065 // Co-iterate across both kernels. We expect them to be identical apart from
2066 // phis and full COPYs (we look through both).
2068 auto OI = ExpandedKernel->begin();
2069 auto NI = BB->begin();
2070 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2071 while (OI->isPHI() || OI->isFullCopy())
2072 ++OI;
2073 while (NI->isPHI() || NI->isFullCopy())
2074 ++NI;
2075 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2076 // Analyze every operand separately.
2077 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2078 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2079 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2080 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2081 }
2082
2083 bool Failed = false;
2084 for (auto &OldAndNew : KOIs) {
2085 if (OldAndNew.first == OldAndNew.second)
2086 continue;
2087 Failed = true;
2088 errs() << "Modulo kernel validation error: [\n";
2089 errs() << " [golden] ";
2090 OldAndNew.first.print(errs());
2091 errs() << " ";
2092 OldAndNew.second.print(errs());
2093 errs() << "]\n";
2094 }
2095
2096 if (Failed) {
2097 errs() << "Golden reference kernel:\n";
2098 ExpandedKernel->print(errs());
2099 errs() << "New kernel:\n";
2100 BB->print(errs());
2101 errs() << ScheduleDump;
2103 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2104 }
2105
2106 // Cleanup by removing BB from the CFG again as the original
2107 // ModuloScheduleExpander intended.
2108 Preheader->removeSuccessor(BB);
2109 MSE.cleanup();
2110}
2111
2112MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
2113 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2114
2115 // TODO: Offset information needs to be corrected.
2116 NewMI->dropMemRefs(MF);
2117
2118 return NewMI;
2119}
2120
2121/// Create a dedicated exit for Loop. Exit is the original exit for Loop.
2122/// If it is already dedicated exit, return it. Otherwise, insert a new
2123/// block between them and return the new block.
2125 MachineBasicBlock *Exit,
2126 LiveIntervals &LIS) {
2127 if (Exit->pred_size() == 1)
2128 return Exit;
2129
2130 MachineFunction *MF = Loop->getParent();
2132
2133 MachineBasicBlock *NewExit =
2134 MF->CreateMachineBasicBlock(Loop->getBasicBlock());
2135 MF->insert(Loop->getIterator(), NewExit);
2136 LIS.insertMBBInMaps(NewExit);
2137
2138 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2140 TII->analyzeBranch(*Loop, TBB, FBB, Cond);
2141 if (TBB == Loop)
2142 FBB = NewExit;
2143 else if (FBB == Loop)
2144 TBB = NewExit;
2145 else
2146 llvm_unreachable("unexpected loop structure");
2147 TII->removeBranch(*Loop);
2148 TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
2149 Loop->replaceSuccessor(Exit, NewExit);
2150 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2151 NewExit->addSuccessor(Exit);
2152
2153 Exit->replacePhiUsesWith(Loop, NewExit);
2154
2155 return NewExit;
2156}
2157
2158/// Insert branch code into the end of MBB. It branches to GreaterThan if the
2159/// remaining trip count for instructions in LastStage0Insts is greater than
2160/// RequiredTC, and to Otherwise otherwise.
2161void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
2162 int RequiredTC,
2163 InstrMapTy &LastStage0Insts,
2164 MachineBasicBlock &GreaterThan,
2165 MachineBasicBlock &Otherwise) {
2167 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2168 LastStage0Insts);
2169
2171 // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
2172 // FBB for optimal performance.
2174 llvm_unreachable("can not reverse branch condition");
2175 TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
2176 } else {
2177 TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
2178 }
2179}
2180
2181/// Generate a pipelined loop that is unrolled by using MVE algorithm and any
2182/// other necessary blocks. The control flow is modified to execute the
2183/// pipelined loop if the trip count satisfies the condition, otherwise the
2184/// original loop. The original loop is also used to execute the remainder
2185/// iterations which occur due to unrolling.
2186void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2187 // The control flow for pipelining with MVE:
2188 //
2189 // OrigPreheader:
2190 // // The block that is originally the loop preheader
2191 // goto Check
2192 //
2193 // Check:
2194 // // Check whether the trip count satisfies the requirements to pipeline.
2195 // if (LoopCounter > NumStages + NumUnroll - 2)
2196 // // The minimum number of iterations to pipeline =
2197 // // iterations executed in prolog/epilog (NumStages-1) +
2198 // // iterations executed in one kernel run (NumUnroll)
2199 // goto Prolog
2200 // // fallback to the original loop
2201 // goto NewPreheader
2202 //
2203 // Prolog:
2204 // // All prolog stages. There are no direct branches to the epilogue.
2205 // goto NewKernel
2206 //
2207 // NewKernel:
2208 // // NumUnroll copies of the kernel
2209 // if (LoopCounter > MVE-1)
2210 // goto NewKernel
2211 // goto Epilog
2212 //
2213 // Epilog:
2214 // // All epilog stages.
2215 // if (LoopCounter > 0)
2216 // // The remainder is executed in the original loop
2217 // goto NewPreheader
2218 // goto NewExit
2219 //
2220 // NewPreheader:
2221 // // Newly created preheader for the original loop.
2222 // // The initial values of the phis in the loop are merged from two paths.
2223 // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
2224 // goto OrigKernel
2225 //
2226 // OrigKernel:
2227 // // The original loop block.
2228 // if (LoopCounter != 0)
2229 // goto OrigKernel
2230 // goto NewExit
2231 //
2232 // NewExit:
2233 // // Newly created dedicated exit for the original loop.
2234 // // Merge values which are referenced after the loop
2235 // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
2236 // goto OrigExit
2237 //
2238 // OrigExit:
2239 // // The block that is originally the loop exit.
2240 // // If it is already deicated exit, NewExit is not created.
2241
2242 // An example of where each stage is executed:
2243 // Assume #Stages 3, #MVE 4, #Iterations 12
2244 // Iter 0 1 2 3 4 5 6 7 8 9 10-11
2245 // -------------------------------------------------
2246 // Stage 0 Prolog#0
2247 // Stage 1 0 Prolog#1
2248 // Stage 2 1 0 Kernel Unroll#0 Iter#0
2249 // Stage 2 1 0 Kernel Unroll#1 Iter#0
2250 // Stage 2 1 0 Kernel Unroll#2 Iter#0
2251 // Stage 2 1 0 Kernel Unroll#3 Iter#0
2252 // Stage 2 1 0 Kernel Unroll#0 Iter#1
2253 // Stage 2 1 0 Kernel Unroll#1 Iter#1
2254 // Stage 2 1 0 Kernel Unroll#2 Iter#1
2255 // Stage 2 1 0 Kernel Unroll#3 Iter#1
2256 // Stage 2 1 Epilog#0
2257 // Stage 2 Epilog#1
2258 // Stage 0-2 OrigKernel
2259
2260 LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
2261 assert(LoopInfo && "Must be able to analyze loop!");
2262
2263 calcNumUnroll();
2264
2265 Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2266 Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2267 NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2268 Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2269 NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2270
2271 MF.insert(OrigKernel->getIterator(), Check);
2273 MF.insert(OrigKernel->getIterator(), Prolog);
2275 MF.insert(OrigKernel->getIterator(), NewKernel);
2276 LIS.insertMBBInMaps(NewKernel);
2277 MF.insert(OrigKernel->getIterator(), Epilog);
2279 MF.insert(OrigKernel->getIterator(), NewPreheader);
2280 LIS.insertMBBInMaps(NewPreheader);
2281
2282 NewExit = createDedicatedExit(OrigKernel, OrigExit, LIS);
2283
2284 NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
2285 TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
2286
2287 OrigPreheader->addSuccessor(Check);
2288 TII->removeBranch(*OrigPreheader);
2289 TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
2290
2291 Check->addSuccessor(Prolog);
2292 Check->addSuccessor(NewPreheader);
2293
2294 Prolog->addSuccessor(NewKernel);
2295
2296 NewKernel->addSuccessor(NewKernel);
2297 NewKernel->addSuccessor(Epilog);
2298
2299 Epilog->addSuccessor(NewPreheader);
2300 Epilog->addSuccessor(NewExit);
2301
2302 InstrMapTy LastStage0Insts;
2303 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2304 LastStage0Insts, *Prolog, *NewPreheader);
2305
2306 // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
2307 // register#
2308 SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
2309 generateProlog(PrologVRMap);
2310 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2311 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2312}
2313
2314/// Replace MI's use operands according to the maps.
2315void ModuloScheduleExpanderMVE::updateInstrUse(
2316 MachineInstr *MI, int StageNum, int PhaseNum,
2317 SmallVectorImpl<ValueMapTy> &CurVRMap,
2318 SmallVectorImpl<ValueMapTy> *PrevVRMap) {
2319 // If MI is in the prolog/kernel/epilog block, CurVRMap is
2320 // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
2321 // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
2322 // Refer to the appropriate map according to the stage difference between
2323 // MI and the definition of an operand.
2324
2325 for (MachineOperand &UseMO : MI->uses()) {
2326 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2327 continue;
2328 int DiffStage = 0;
2329 Register OrigReg = UseMO.getReg();
2330 MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
2331 if (!DefInst || DefInst->getParent() != OrigKernel)
2332 continue;
2333 Register InitReg;
2334 Register DefReg = OrigReg;
2335 if (DefInst->isPHI()) {
2336 ++DiffStage;
2337 Register LoopReg;
2338 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2339 // LoopReg is guaranteed to be defined within the loop by canApply()
2340 DefReg = LoopReg;
2341 DefInst = MRI.getVRegDef(LoopReg);
2342 }
2343 unsigned DefStageNum = Schedule.getStage(DefInst);
2344 DiffStage += StageNum - DefStageNum;
2345 Register NewReg;
2346 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2347 // NewReg is defined in a previous phase of the same block
2348 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2349 else if (!PrevVRMap)
2350 // Since this is the first iteration, refer the initial register of the
2351 // loop
2352 NewReg = InitReg;
2353 else
2354 // Cases where DiffStage is larger than PhaseNum.
2355 // If MI is in the kernel block, the value is defined by the previous
2356 // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
2357 // value is defined in the kernel block and KernelVRMap is referenced.
2358 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2359
2360 const TargetRegisterClass *NRC =
2361 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2362 if (NRC)
2363 UseMO.setReg(NewReg);
2364 else {
2365 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2366 MachineInstr *NewCopy = BuildMI(*OrigKernel, MI, MI->getDebugLoc(),
2367 TII->get(TargetOpcode::COPY), SplitReg)
2368 .addReg(NewReg);
2369 LIS.InsertMachineInstrInMaps(*NewCopy);
2370 UseMO.setReg(SplitReg);
2371 }
2372 }
2373}
2374
2375/// Return a phi if Reg is referenced by the phi.
2376/// canApply() guarantees that at most only one such phi exists.
2378 for (MachineInstr &Phi : Loop->phis()) {
2379 Register InitVal, LoopVal;
2380 getPhiRegs(Phi, Loop, InitVal, LoopVal);
2381 if (LoopVal == Reg)
2382 return &Phi;
2383 }
2384 return nullptr;
2385}
2386
2387/// Generate phis for registers defined by OrigMI.
2388void ModuloScheduleExpanderMVE::generatePhi(
2389 MachineInstr *OrigMI, int UnrollNum,
2390 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2391 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2392 SmallVectorImpl<ValueMapTy> &PhiVRMap) {
2393 int StageNum = Schedule.getStage(OrigMI);
2394 bool UsePrologReg;
2395 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2396 UsePrologReg = true;
2397 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2398 UsePrologReg = false;
2399 else
2400 return;
2401
2402 // Examples that show which stages are merged by phi.
2403 // Meaning of the symbol following the stage number:
2404 // a/b: Stages with the same letter are merged (UsePrologReg == true)
2405 // +: Merged with the initial value (UsePrologReg == false)
2406 // *: No phis required
2407 //
2408 // #Stages 3, #MVE 4
2409 // Iter 0 1 2 3 4 5 6 7 8
2410 // -----------------------------------------
2411 // Stage 0a Prolog#0
2412 // Stage 1a 0b Prolog#1
2413 // Stage 2* 1* 0* Kernel Unroll#0
2414 // Stage 2* 1* 0+ Kernel Unroll#1
2415 // Stage 2* 1+ 0a Kernel Unroll#2
2416 // Stage 2+ 1a 0b Kernel Unroll#3
2417 //
2418 // #Stages 3, #MVE 2
2419 // Iter 0 1 2 3 4 5 6 7 8
2420 // -----------------------------------------
2421 // Stage 0a Prolog#0
2422 // Stage 1a 0b Prolog#1
2423 // Stage 2* 1+ 0a Kernel Unroll#0
2424 // Stage 2+ 1a 0b Kernel Unroll#1
2425 //
2426 // #Stages 3, #MVE 1
2427 // Iter 0 1 2 3 4 5 6 7 8
2428 // -----------------------------------------
2429 // Stage 0* Prolog#0
2430 // Stage 1a 0b Prolog#1
2431 // Stage 2+ 1a 0b Kernel Unroll#0
2432
2433 for (MachineOperand &DefMO : OrigMI->defs()) {
2434 if (!DefMO.isReg() || DefMO.isDead())
2435 continue;
2436 Register OrigReg = DefMO.getReg();
2437 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2438 if (NewReg == KernelVRMap[UnrollNum].end())
2439 continue;
2440 Register CorrespondReg;
2441 if (UsePrologReg) {
2442 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2443 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2444 } else {
2445 MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
2446 if (!Phi)
2447 continue;
2448 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2449 }
2450
2451 assert(CorrespondReg.isValid());
2452 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2453 MachineInstr *NewPhi =
2454 BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
2455 TII->get(TargetOpcode::PHI), PhiReg)
2456 .addReg(NewReg->second)
2457 .addMBB(NewKernel)
2458 .addReg(CorrespondReg)
2459 .addMBB(Prolog);
2460 LIS.InsertMachineInstrInMaps(*NewPhi);
2461 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2462 }
2463}
2464
2465static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
2466 MachineBasicBlock *NewMBB) {
2467 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2468 if (Phi.getOperand(Idx).getReg() == OrigReg) {
2469 Phi.getOperand(Idx).setReg(NewReg);
2470 Phi.getOperand(Idx + 1).setMBB(NewMBB);
2471 return;
2472 }
2473 }
2474}
2475
2476/// Generate phis that merge values from multiple routes
2477void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2478 Register NewReg) {
2479 SmallVector<MachineOperand *> UsesAfterLoop;
2481 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
2482 E = MRI.use_end();
2483 I != E; ++I) {
2484 MachineOperand &O = *I;
2485 if (O.getParent()->getParent() != OrigKernel &&
2486 O.getParent()->getParent() != Prolog &&
2487 O.getParent()->getParent() != NewKernel &&
2488 O.getParent()->getParent() != Epilog)
2489 UsesAfterLoop.push_back(&O);
2490 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2491 LoopPhis.push_back(O.getParent());
2492 }
2493
2494 // Merge the route that only execute the pipelined loop (when there are no
2495 // remaining iterations) with the route that execute the original loop.
2496 if (!UsesAfterLoop.empty()) {
2497 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2498 MachineInstr *NewPhi =
2499 BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
2500 TII->get(TargetOpcode::PHI), PhiReg)
2501 .addReg(OrigReg)
2502 .addMBB(OrigKernel)
2503 .addReg(NewReg)
2504 .addMBB(Epilog);
2505 LIS.InsertMachineInstrInMaps(*NewPhi);
2506
2507 for (MachineOperand *MO : UsesAfterLoop)
2508 MO->setReg(PhiReg);
2509
2510 // The interval of OrigReg is invalid and should be recalculated when
2511 // LiveInterval::getInterval() is called.
2512 if (LIS.hasInterval(OrigReg))
2513 LIS.removeInterval(OrigReg);
2514 }
2515
2516 // Merge routes from the pipelined loop and the bypassed route before the
2517 // original loop
2518 if (!LoopPhis.empty()) {
2519 for (MachineInstr *Phi : LoopPhis) {
2520 Register InitReg, LoopReg;
2521 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2522 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2523 MachineInstr *NewPhi =
2524 BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(),
2525 Phi->getDebugLoc(), TII->get(TargetOpcode::PHI), NewInit)
2526 .addReg(InitReg)
2527 .addMBB(Check)
2528 .addReg(NewReg)
2529 .addMBB(Epilog);
2530 LIS.InsertMachineInstrInMaps(*NewPhi);
2531 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2532 }
2533 }
2534}
2535
2536void ModuloScheduleExpanderMVE::generateProlog(
2537 SmallVectorImpl<ValueMapTy> &PrologVRMap) {
2538 PrologVRMap.clear();
2539 PrologVRMap.resize(Schedule.getNumStages() - 1);
2540 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2541 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2542 ++PrologNum) {
2543 for (MachineInstr *MI : Schedule.getInstructions()) {
2544 if (MI->isPHI())
2545 continue;
2546 int StageNum = Schedule.getStage(MI);
2547 if (StageNum > PrologNum)
2548 continue;
2549 MachineInstr *NewMI = cloneInstr(MI);
2550 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2551 NewMIMap[NewMI] = {PrologNum, StageNum};
2552 Prolog->push_back(NewMI);
2553 LIS.InsertMachineInstrInMaps(*NewMI);
2554 }
2555 }
2556
2557 for (auto I : NewMIMap) {
2558 MachineInstr *MI = I.first;
2559 int PrologNum = I.second.first;
2560 int StageNum = I.second.second;
2561 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2562 }
2563
2564 LLVM_DEBUG({
2565 dbgs() << "prolog:\n";
2566 Prolog->dump();
2567 });
2568}
2569
2570void ModuloScheduleExpanderMVE::generateKernel(
2571 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2572 SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
2573 KernelVRMap.clear();
2574 KernelVRMap.resize(NumUnroll);
2575 SmallVector<ValueMapTy> PhiVRMap;
2576 PhiVRMap.resize(NumUnroll);
2577 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2578 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2579 for (MachineInstr *MI : Schedule.getInstructions()) {
2580 if (MI->isPHI())
2581 continue;
2582 int StageNum = Schedule.getStage(MI);
2583 MachineInstr *NewMI = cloneInstr(MI);
2584 if (UnrollNum == NumUnroll - 1)
2585 LastStage0Insts[MI] = NewMI;
2586 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2587 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2588 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2589 NewMIMap[NewMI] = {UnrollNum, StageNum};
2590 NewKernel->push_back(NewMI);
2591 LIS.InsertMachineInstrInMaps(*NewMI);
2592 }
2593 }
2594
2595 for (auto I : NewMIMap) {
2596 MachineInstr *MI = I.first;
2597 int UnrollNum = I.second.first;
2598 int StageNum = I.second.second;
2599 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2600 }
2601
2602 // If remaining trip count is greater than NumUnroll-1, loop continues
2603 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2604 *Epilog);
2605
2606 LLVM_DEBUG({
2607 dbgs() << "kernel:\n";
2608 NewKernel->dump();
2609 });
2610}
2611
2612void ModuloScheduleExpanderMVE::generateEpilog(
2613 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2614 SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
2615 EpilogVRMap.clear();
2616 EpilogVRMap.resize(Schedule.getNumStages() - 1);
2617 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2618 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2619 ++EpilogNum) {
2620 for (MachineInstr *MI : Schedule.getInstructions()) {
2621 if (MI->isPHI())
2622 continue;
2623 int StageNum = Schedule.getStage(MI);
2624 if (StageNum <= EpilogNum)
2625 continue;
2626 MachineInstr *NewMI = cloneInstr(MI);
2627 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2628 NewMIMap[NewMI] = {EpilogNum, StageNum};
2629 Epilog->push_back(NewMI);
2630 LIS.InsertMachineInstrInMaps(*NewMI);
2631 }
2632 }
2633
2634 for (auto I : NewMIMap) {
2635 MachineInstr *MI = I.first;
2636 int EpilogNum = I.second.first;
2637 int StageNum = I.second.second;
2638 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2639 }
2640
2641 // If there are remaining iterations, they are executed in the original loop.
2642 // Instructions related to loop control, such as loop counter comparison,
2643 // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
2644 // in stage 0. Thus, the map is for the last one in the kernel.
2645 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2646
2647 LLVM_DEBUG({
2648 dbgs() << "epilog:\n";
2649 Epilog->dump();
2650 });
2651}
2652
2653/// Calculate the number of unroll required and set it to NumUnroll
2654void ModuloScheduleExpanderMVE::calcNumUnroll() {
2655 DenseMap<MachineInstr *, unsigned> Inst2Idx;
2656 NumUnroll = 1;
2657 for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
2658 Inst2Idx[Schedule.getInstructions()[I]] = I;
2659
2660 for (MachineInstr *MI : Schedule.getInstructions()) {
2661 if (MI->isPHI())
2662 continue;
2663 int StageNum = Schedule.getStage(MI);
2664 for (const MachineOperand &MO : MI->uses()) {
2665 if (!MO.isReg() || !MO.getReg().isVirtual())
2666 continue;
2667 MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
2668 if (DefMI->getParent() != OrigKernel)
2669 continue;
2670
2671 int NumUnrollLocal = 1;
2672 if (DefMI->isPHI()) {
2673 ++NumUnrollLocal;
2674 // canApply() guarantees that DefMI is not phi and is an instruction in
2675 // the loop
2676 DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
2677 }
2678 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2679 if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2680 --NumUnrollLocal;
2681 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2682 }
2683 }
2684 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2685}
2686
2687/// Create new virtual registers for definitions of NewMI and update NewMI.
2688/// If the definitions are referenced after the pipelined loop, phis are
2689/// created to merge with other routes.
2690void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2691 ValueMapTy &VRMap,
2692 bool LastDef) {
2693 for (MachineOperand &MO : NewMI->all_defs()) {
2694 if (!MO.getReg().isVirtual())
2695 continue;
2696 Register Reg = MO.getReg();
2697 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
2698 Register NewReg = MRI.createVirtualRegister(RC);
2699 MO.setReg(NewReg);
2700 VRMap[Reg] = NewReg;
2701 if (LastDef)
2702 mergeRegUsesAfterPipeline(Reg, NewReg);
2703 }
2704}
2705
2707 OrigKernel = Schedule.getLoop()->getTopBlock();
2708 OrigPreheader = Schedule.getLoop()->getLoopPreheader();
2709 OrigExit = Schedule.getLoop()->getExitBlock();
2710
2711 LLVM_DEBUG(Schedule.dump());
2712
2713 generatePipelinedLoop();
2714}
2715
2716/// Check if ModuloScheduleExpanderMVE can be applied to L
2718 if (!L.getExitBlock()) {
2719 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");
2720 return false;
2721 }
2722
2723 MachineBasicBlock *BB = L.getTopBlock();
2725
2726 // Put some constraints on the operands of the phis to simplify the
2727 // transformation
2728 DenseSet<Register> UsedByPhi;
2729 for (MachineInstr &MI : BB->phis()) {
2730 // Registers defined by phis must be used only inside the loop and be never
2731 // used by phis.
2732 for (MachineOperand &MO : MI.defs())
2733 if (MO.isReg())
2734 for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
2735 if (Ref.getParent() != BB || Ref.isPHI()) {
2736 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "
2737 "referenced outside of the loop or by phi.\n");
2738 return false;
2739 }
2740
2741 // A source register from the loop block must be defined inside the loop.
2742 // A register defined inside the loop must be referenced by only one phi at
2743 // most.
2744 Register InitVal, LoopVal;
2745 getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
2746 if (!Register(LoopVal).isVirtual() ||
2747 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2748 LLVM_DEBUG(
2749 dbgs() << "Can not apply MVE expander: A phi source value coming "
2750 "from the loop is not defined in the loop.\n");
2751 return false;
2752 }
2753 if (UsedByPhi.count(LoopVal)) {
2754 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2755 "loop is referenced by two or more phis.\n");
2756 return false;
2757 }
2758 UsedByPhi.insert(LoopVal);
2759 }
2760
2761 return true;
2762}
2763
2764//===----------------------------------------------------------------------===//
2765// ModuloScheduleTestPass implementation
2766//===----------------------------------------------------------------------===//
2767// This pass constructs a ModuloSchedule from its module and runs
2768// ModuloScheduleExpander.
2769//
2770// The module is expected to contain a single-block analyzable loop.
2771// The total order of instructions is taken from the loop as-is.
2772// Instructions are expected to be annotated with a PostInstrSymbol.
2773// This PostInstrSymbol must have the following format:
2774// "Stage=%d Cycle=%d".
2775//===----------------------------------------------------------------------===//
2776
2777namespace {
2778class ModuloScheduleTest : public MachineFunctionPass {
2779public:
2780 static char ID;
2781
2782 ModuloScheduleTest() : MachineFunctionPass(ID) {
2784 }
2785
2786 bool runOnMachineFunction(MachineFunction &MF) override;
2787 void runOnLoop(MachineFunction &MF, MachineLoop &L);
2788
2789 void getAnalysisUsage(AnalysisUsage &AU) const override {
2793 }
2794};
2795} // namespace
2796
2797char ModuloScheduleTest::ID = 0;
2798
2799INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2800 "Modulo Schedule test pass", false, false)
2803INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2804 "Modulo Schedule test pass", false, false)
2805
2806bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2807 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2808 for (auto *L : MLI) {
2809 if (L->getTopBlock() != L->getBottomBlock())
2810 continue;
2811 runOnLoop(MF, *L);
2812 return false;
2813 }
2814 return false;
2815}
2816
2817static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2818 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2819 std::pair<StringRef, StringRef> StageTokenAndValue =
2820 getToken(StageAndCycle.first, "-");
2821 std::pair<StringRef, StringRef> CycleTokenAndValue =
2822 getToken(StageAndCycle.second, "-");
2823 if (StageTokenAndValue.first != "Stage" ||
2824 CycleTokenAndValue.first != "_Cycle") {
2826 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2827 return;
2828 }
2829
2830 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2831 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2832
2833 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2834}
2835
2836void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2837 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
2838 MachineBasicBlock *BB = L.getTopBlock();
2839 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2840
2841 DenseMap<MachineInstr *, int> Cycle, Stage;
2842 std::vector<MachineInstr *> Instrs;
2843 for (MachineInstr &MI : *BB) {
2844 if (MI.isTerminator())
2845 continue;
2846 Instrs.push_back(&MI);
2847 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2848 dbgs() << "Parsing post-instr symbol for " << MI;
2849 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2850 }
2851 }
2852
2853 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2854 std::move(Stage));
2855 ModuloScheduleExpander MSE(
2856 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2857 MSE.expand();
2858 MSE.cleanup();
2859}
2860
2861//===----------------------------------------------------------------------===//
2862// ModuloScheduleTestAnnotater implementation
2863//===----------------------------------------------------------------------===//
2864
2866 for (MachineInstr *MI : S.getInstructions()) {
2868 raw_svector_ostream OS(SV);
2869 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2870 MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2871 MI->setPostInstrSymbol(MF, Sym);
2872 }
2873}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ Default
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static bool hasUseAfterLoop(Register Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit, LiveIntervals &LIS)
Create a dedicated exit for Loop.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static Register getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
MachineInstr unsigned OpIdx
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:330
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
LLVM_ABI void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
MachineInstrBundleIterator< MachineInstr > iterator
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
unsigned getNumOperands() const
Retuns the total number of operands.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
LLVM_ABI void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
LLVM_ABI void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
mop_range operands()
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
defusechain_iterator< true, false, false, true, false > use_iterator
use_iterator/use_begin/use_end - Walk all uses of the specified register.
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
DenseMap< MachineInstr *, std::pair< Register, int64_t > > InstrChangesTy
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopInfo
Target loop info before kernel peeling.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetInstrInfo * getInstrInfo() const
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
initializer< Ty > init(const Ty &Val)
constexpr double phi
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
iterator end() const
Definition BasicBlock.h:89
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
LLVM_ABI void initializeModuloScheduleTestPass(PassRegistry &)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ Other
Any other memory.
Definition ModRef.h:68
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
DWARFExpression::Operation Op
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872