Bug Summary

File:llvm/lib/CodeGen/ModuloSchedule.cpp
Warning:line 507, column 13
Value stored to 'NewReg' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ModuloSchedule.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/CodeGen -I /build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/lib/CodeGen -I include -I /build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-16-232930-107970-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/lib/CodeGen/ModuloSchedule.cpp
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/CodeGen/ModuloSchedule.h"
10#include "llvm/ADT/StringExtras.h"
11#include "llvm/Analysis/MemoryLocation.h"
12#include "llvm/CodeGen/LiveIntervals.h"
13#include "llvm/CodeGen/MachineInstrBuilder.h"
14#include "llvm/CodeGen/MachineRegisterInfo.h"
15#include "llvm/InitializePasses.h"
16#include "llvm/MC/MCContext.h"
17#include "llvm/Support/Debug.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/raw_ostream.h"
20
21#define DEBUG_TYPE"pipeliner" "pipeliner"
22using namespace llvm;
23
24void ModuloSchedule::print(raw_ostream &OS) {
25 for (MachineInstr *MI : ScheduledInstrs)
26 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
27}
28
29//===----------------------------------------------------------------------===//
30// ModuloScheduleExpander implementation
31//===----------------------------------------------------------------------===//
32
33/// Return the register values for the operands of a Phi instruction.
34/// This function assume the instruction is a Phi.
35static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
36 unsigned &InitVal, unsigned &LoopVal) {
37 assert(Phi.isPHI() && "Expecting a Phi.")(static_cast <bool> (Phi.isPHI() && "Expecting a Phi."
) ? void (0) : __assert_fail ("Phi.isPHI() && \"Expecting a Phi.\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 37, __extension__ __PRETTY_FUNCTION__
))
;
38
39 InitVal = 0;
40 LoopVal = 0;
41 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
42 if (Phi.getOperand(i + 1).getMBB() != Loop)
43 InitVal = Phi.getOperand(i).getReg();
44 else
45 LoopVal = Phi.getOperand(i).getReg();
46
47 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.")(static_cast <bool> (InitVal != 0 && LoopVal !=
0 && "Unexpected Phi structure.") ? void (0) : __assert_fail
("InitVal != 0 && LoopVal != 0 && \"Unexpected Phi structure.\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 47, __extension__ __PRETTY_FUNCTION__
))
;
48}
49
50/// Return the Phi register value that comes from the incoming block.
51static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
52 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
53 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
54 return Phi.getOperand(i).getReg();
55 return 0;
56}
57
58/// Return the Phi register value that comes the loop block.
59static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
60 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
61 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
62 return Phi.getOperand(i).getReg();
63 return 0;
64}
65
66void ModuloScheduleExpander::expand() {
67 BB = Schedule.getLoop()->getTopBlock();
68 Preheader = *BB->pred_begin();
69 if (Preheader == BB)
70 Preheader = *std::next(BB->pred_begin());
71
72 // Iterate over the definitions in each instruction, and compute the
73 // stage difference for each use. Keep the maximum value.
74 for (MachineInstr *MI : Schedule.getInstructions()) {
75 int DefStage = Schedule.getStage(MI);
76 for (const MachineOperand &Op : MI->operands()) {
77 if (!Op.isReg() || !Op.isDef())
78 continue;
79
80 Register Reg = Op.getReg();
81 unsigned MaxDiff = 0;
82 bool PhiIsSwapped = false;
83 for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
84 MachineInstr *UseMI = UseOp.getParent();
85 int UseStage = Schedule.getStage(UseMI);
86 unsigned Diff = 0;
87 if (UseStage != -1 && UseStage >= DefStage)
88 Diff = UseStage - DefStage;
89 if (MI->isPHI()) {
90 if (isLoopCarried(*MI))
91 ++Diff;
92 else
93 PhiIsSwapped = true;
94 }
95 MaxDiff = std::max(Diff, MaxDiff);
96 }
97 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
98 }
99 }
100
101 generatePipelinedLoop();
102}
103
104void ModuloScheduleExpander::generatePipelinedLoop() {
105 LoopInfo = TII->analyzeLoopForPipelining(BB);
106 assert(LoopInfo && "Must be able to analyze loop!")(static_cast <bool> (LoopInfo && "Must be able to analyze loop!"
) ? void (0) : __assert_fail ("LoopInfo && \"Must be able to analyze loop!\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 106, __extension__ __PRETTY_FUNCTION__
))
;
107
108 // Create a new basic block for the kernel and add it to the CFG.
109 MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
110
111 unsigned MaxStageCount = Schedule.getNumStages() - 1;
112
113 // Remember the registers that are used in different stages. The index is
114 // the iteration, or stage, that the instruction is scheduled in. This is
115 // a map between register names in the original block and the names created
116 // in each stage of the pipelined loop.
117 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
118 InstrMapTy InstrMap;
119
120 SmallVector<MachineBasicBlock *, 4> PrologBBs;
121
122 // Generate the prolog instructions that set up the pipeline.
123 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
124 MF.insert(BB->getIterator(), KernelBB);
125
126 // Rearrange the instructions to generate the new, pipelined loop,
127 // and update register names as needed.
128 for (MachineInstr *CI : Schedule.getInstructions()) {
129 if (CI->isPHI())
130 continue;
131 unsigned StageNum = Schedule.getStage(CI);
132 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
133 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
134 KernelBB->push_back(NewMI);
135 InstrMap[NewMI] = CI;
136 }
137
138 // Copy any terminator instructions to the new kernel, and update
139 // names as needed.
140 for (MachineInstr &MI : BB->terminators()) {
141 MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
142 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
143 KernelBB->push_back(NewMI);
144 InstrMap[NewMI] = &MI;
145 }
146
147 NewKernel = KernelBB;
148 KernelBB->transferSuccessors(BB);
149 KernelBB->replaceSuccessor(BB, KernelBB);
150
151 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
152 InstrMap, MaxStageCount, MaxStageCount, false);
153 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
154 MaxStageCount, MaxStageCount, false);
155
156 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << "New block\n"; KernelBB->
dump();; } } while (false)
;
157
158 SmallVector<MachineBasicBlock *, 4> EpilogBBs;
159 // Generate the epilog instructions to complete the pipeline.
160 generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
161
162 // We need this step because the register allocation doesn't handle some
163 // situations well, so we insert copies to help out.
164 splitLifetimes(KernelBB, EpilogBBs);
165
166 // Remove dead instructions due to loop induction variables.
167 removeDeadInstructions(KernelBB, EpilogBBs);
168
169 // Add branches between prolog and epilog blocks.
170 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
171
172 delete[] VRMap;
173}
174
175void ModuloScheduleExpander::cleanup() {
176 // Remove the original loop since it's no longer referenced.
177 for (auto &I : *BB)
178 LIS.RemoveMachineInstrFromMaps(I);
179 BB->clear();
180 BB->eraseFromParent();
181}
182
183/// Generate the pipeline prolog code.
184void ModuloScheduleExpander::generateProlog(unsigned LastStage,
185 MachineBasicBlock *KernelBB,
186 ValueMapTy *VRMap,
187 MBBVectorTy &PrologBBs) {
188 MachineBasicBlock *PredBB = Preheader;
189 InstrMapTy InstrMap;
190
191 // Generate a basic block for each stage, not including the last stage,
192 // which will be generated in the kernel. Each basic block may contain
193 // instructions from multiple stages/iterations.
194 for (unsigned i = 0; i < LastStage; ++i) {
195 // Create and insert the prolog basic block prior to the original loop
196 // basic block. The original loop is removed later.
197 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
198 PrologBBs.push_back(NewBB);
199 MF.insert(BB->getIterator(), NewBB);
200 NewBB->transferSuccessors(PredBB);
201 PredBB->addSuccessor(NewBB);
202 PredBB = NewBB;
203
204 // Generate instructions for each appropriate stage. Process instructions
205 // in original program order.
206 for (int StageNum = i; StageNum >= 0; --StageNum) {
207 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
208 BBE = BB->getFirstTerminator();
209 BBI != BBE; ++BBI) {
210 if (Schedule.getStage(&*BBI) == StageNum) {
211 if (BBI->isPHI())
212 continue;
213 MachineInstr *NewMI =
214 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
215 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
216 NewBB->push_back(NewMI);
217 InstrMap[NewMI] = &*BBI;
218 }
219 }
220 }
221 rewritePhiValues(NewBB, i, VRMap, InstrMap);
222 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "prolog:\n"; NewBB->dump
(); }; } } while (false)
223 dbgs() << "prolog:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "prolog:\n"; NewBB->dump
(); }; } } while (false)
224 NewBB->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "prolog:\n"; NewBB->dump
(); }; } } while (false)
225 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "prolog:\n"; NewBB->dump
(); }; } } while (false)
;
226 }
227
228 PredBB->replaceSuccessor(BB, KernelBB);
229
230 // Check if we need to remove the branch from the preheader to the original
231 // loop, and replace it with a branch to the new loop.
232 unsigned numBranches = TII->removeBranch(*Preheader);
233 if (numBranches) {
234 SmallVector<MachineOperand, 0> Cond;
235 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
236 }
237}
238
239/// Generate the pipeline epilog code. The epilog code finishes the iterations
240/// that were started in either the prolog or the kernel. We create a basic
241/// block for each stage that needs to complete.
242void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
243 MachineBasicBlock *KernelBB,
244 ValueMapTy *VRMap,
245 MBBVectorTy &EpilogBBs,
246 MBBVectorTy &PrologBBs) {
247 // We need to change the branch from the kernel to the first epilog block, so
248 // this call to analyze branch uses the kernel rather than the original BB.
249 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
250 SmallVector<MachineOperand, 4> Cond;
251 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
252 assert(!checkBranch && "generateEpilog must be able to analyze the branch")(static_cast <bool> (!checkBranch && "generateEpilog must be able to analyze the branch"
) ? void (0) : __assert_fail ("!checkBranch && \"generateEpilog must be able to analyze the branch\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 252, __extension__ __PRETTY_FUNCTION__
))
;
253 if (checkBranch)
254 return;
255
256 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
257 if (*LoopExitI == KernelBB)
258 ++LoopExitI;
259 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor")(static_cast <bool> (LoopExitI != KernelBB->succ_end
() && "Expecting a successor") ? void (0) : __assert_fail
("LoopExitI != KernelBB->succ_end() && \"Expecting a successor\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 259, __extension__ __PRETTY_FUNCTION__
))
;
260 MachineBasicBlock *LoopExitBB = *LoopExitI;
261
262 MachineBasicBlock *PredBB = KernelBB;
263 MachineBasicBlock *EpilogStart = LoopExitBB;
264 InstrMapTy InstrMap;
265
266 // Generate a basic block for each stage, not including the last stage,
267 // which was generated for the kernel. Each basic block may contain
268 // instructions from multiple stages/iterations.
269 int EpilogStage = LastStage + 1;
270 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
271 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
272 EpilogBBs.push_back(NewBB);
273 MF.insert(BB->getIterator(), NewBB);
274
275 PredBB->replaceSuccessor(LoopExitBB, NewBB);
276 NewBB->addSuccessor(LoopExitBB);
277
278 if (EpilogStart == LoopExitBB)
279 EpilogStart = NewBB;
280
281 // Add instructions to the epilog depending on the current block.
282 // Process instructions in original program order.
283 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
284 for (auto &BBI : *BB) {
285 if (BBI.isPHI())
286 continue;
287 MachineInstr *In = &BBI;
288 if ((unsigned)Schedule.getStage(In) == StageNum) {
289 // Instructions with memoperands in the epilog are updated with
290 // conservative values.
291 MachineInstr *NewMI = cloneInstr(In, UINT_MAX(2147483647 *2U +1U), 0);
292 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
293 NewBB->push_back(NewMI);
294 InstrMap[NewMI] = In;
295 }
296 }
297 }
298 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
299 InstrMap, LastStage, EpilogStage, i == 1);
300 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
301 LastStage, EpilogStage, i == 1);
302 PredBB = NewBB;
303
304 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "epilog:\n"; NewBB->dump
(); }; } } while (false)
305 dbgs() << "epilog:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "epilog:\n"; NewBB->dump
(); }; } } while (false)
306 NewBB->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "epilog:\n"; NewBB->dump
(); }; } } while (false)
307 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "epilog:\n"; NewBB->dump
(); }; } } while (false)
;
308 }
309
310 // Fix any Phi nodes in the loop exit block.
311 LoopExitBB->replacePhiUsesWith(BB, PredBB);
312
313 // Create a branch to the new epilog from the kernel.
314 // Remove the original branch and add a new branch to the epilog.
315 TII->removeBranch(*KernelBB);
316 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
317 // Add a branch to the loop exit.
318 if (EpilogBBs.size() > 0) {
319 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
320 SmallVector<MachineOperand, 4> Cond1;
321 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
322 }
323}
324
325/// Replace all uses of FromReg that appear outside the specified
326/// basic block with ToReg.
327static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
328 MachineBasicBlock *MBB,
329 MachineRegisterInfo &MRI,
330 LiveIntervals &LIS) {
331 for (MachineOperand &O :
332 llvm::make_early_inc_range(MRI.use_operands(FromReg)))
333 if (O.getParent()->getParent() != MBB)
334 O.setReg(ToReg);
335 if (!LIS.hasInterval(ToReg))
336 LIS.createEmptyInterval(ToReg);
337}
338
339/// Return true if the register has a use that occurs outside the
340/// specified loop.
341static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
342 MachineRegisterInfo &MRI) {
343 for (const MachineOperand &MO : MRI.use_operands(Reg))
344 if (MO.getParent()->getParent() != BB)
345 return true;
346 return false;
347}
348
349/// Generate Phis for the specific block in the generated pipelined code.
350/// This function looks at the Phis from the original code to guide the
351/// creation of new Phis.
352void ModuloScheduleExpander::generateExistingPhis(
353 MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
354 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
355 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
356 // Compute the stage number for the initial value of the Phi, which
357 // comes from the prolog. The prolog to use depends on to which kernel/
358 // epilog that we're adding the Phi.
359 unsigned PrologStage = 0;
360 unsigned PrevStage = 0;
361 bool InKernel = (LastStageNum == CurStageNum);
362 if (InKernel) {
363 PrologStage = LastStageNum - 1;
364 PrevStage = CurStageNum;
365 } else {
366 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
367 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
368 }
369
370 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
371 BBE = BB->getFirstNonPHI();
372 BBI != BBE; ++BBI) {
373 Register Def = BBI->getOperand(0).getReg();
374
375 unsigned InitVal = 0;
376 unsigned LoopVal = 0;
377 getPhiRegs(*BBI, BB, InitVal, LoopVal);
378
379 unsigned PhiOp1 = 0;
380 // The Phi value from the loop body typically is defined in the loop, but
381 // not always. So, we need to check if the value is defined in the loop.
382 unsigned PhiOp2 = LoopVal;
383 if (VRMap[LastStageNum].count(LoopVal))
384 PhiOp2 = VRMap[LastStageNum][LoopVal];
385
386 int StageScheduled = Schedule.getStage(&*BBI);
387 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
388 unsigned NumStages = getStagesForReg(Def, CurStageNum);
389 if (NumStages == 0) {
390 // We don't need to generate a Phi anymore, but we need to rename any uses
391 // of the Phi value.
392 unsigned NewReg = VRMap[PrevStage][LoopVal];
393 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
394 InitVal, NewReg);
395 if (VRMap[CurStageNum].count(LoopVal))
396 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
397 }
398 // Adjust the number of Phis needed depending on the number of prologs left,
399 // and the distance from where the Phi is first scheduled. The number of
400 // Phis cannot exceed the number of prolog stages. Each stage can
401 // potentially define two values.
402 unsigned MaxPhis = PrologStage + 2;
403 if (!InKernel && (int)PrologStage <= LoopValStage)
404 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
405 unsigned NumPhis = std::min(NumStages, MaxPhis);
406
407 unsigned NewReg = 0;
408 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
409 // In the epilog, we may need to look back one stage to get the correct
410 // Phi name, because the epilog and prolog blocks execute the same stage.
411 // The correct name is from the previous block only when the Phi has
412 // been completely scheduled prior to the epilog, and Phi value is not
413 // needed in multiple stages.
414 int StageDiff = 0;
415 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
416 NumPhis == 1)
417 StageDiff = 1;
418 // Adjust the computations below when the phi and the loop definition
419 // are scheduled in different stages.
420 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
421 StageDiff = StageScheduled - LoopValStage;
422 for (unsigned np = 0; np < NumPhis; ++np) {
423 // If the Phi hasn't been scheduled, then use the initial Phi operand
424 // value. Otherwise, use the scheduled version of the instruction. This
425 // is a little complicated when a Phi references another Phi.
426 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
427 PhiOp1 = InitVal;
428 // Check if the Phi has already been scheduled in a prolog stage.
429 else if (PrologStage >= AccessStage + StageDiff + np &&
430 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
431 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
432 // Check if the Phi has already been scheduled, but the loop instruction
433 // is either another Phi, or doesn't occur in the loop.
434 else if (PrologStage >= AccessStage + StageDiff + np) {
435 // If the Phi references another Phi, we need to examine the other
436 // Phi to get the correct value.
437 PhiOp1 = LoopVal;
438 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
439 int Indirects = 1;
440 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
441 int PhiStage = Schedule.getStage(InstOp1);
442 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
443 PhiOp1 = getInitPhiReg(*InstOp1, BB);
444 else
445 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
446 InstOp1 = MRI.getVRegDef(PhiOp1);
447 int PhiOpStage = Schedule.getStage(InstOp1);
448 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
449 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
450 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
451 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
452 break;
453 }
454 ++Indirects;
455 }
456 } else
457 PhiOp1 = InitVal;
458 // If this references a generated Phi in the kernel, get the Phi operand
459 // from the incoming block.
460 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
461 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
462 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
463
464 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
465 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
466 // In the epilog, a map lookup is needed to get the value from the kernel,
467 // or previous epilog block. How is does this depends on if the
468 // instruction is scheduled in the previous block.
469 if (!InKernel) {
470 int StageDiffAdj = 0;
471 if (LoopValStage != -1 && StageScheduled > LoopValStage)
472 StageDiffAdj = StageScheduled - LoopValStage;
473 // Use the loop value defined in the kernel, unless the kernel
474 // contains the last definition of the Phi.
475 if (np == 0 && PrevStage == LastStageNum &&
476 (StageScheduled != 0 || LoopValStage != 0) &&
477 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
478 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
479 // Use the value defined by the Phi. We add one because we switch
480 // from looking at the loop value to the Phi definition.
481 else if (np > 0 && PrevStage == LastStageNum &&
482 VRMap[PrevStage - np + 1].count(Def))
483 PhiOp2 = VRMap[PrevStage - np + 1][Def];
484 // Use the loop value defined in the kernel.
485 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
486 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
487 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
488 // Use the value defined by the Phi, unless we're generating the first
489 // epilog and the Phi refers to a Phi in a different stage.
490 else if (VRMap[PrevStage - np].count(Def) &&
491 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
492 (LoopValStage == StageScheduled)))
493 PhiOp2 = VRMap[PrevStage - np][Def];
494 }
495
496 // Check if we can reuse an existing Phi. This occurs when a Phi
497 // references another Phi, and the other Phi is scheduled in an
498 // earlier stage. We can try to reuse an existing Phi up until the last
499 // stage of the current Phi.
500 if (LoopDefIsPhi) {
501 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
502 int LVNumStages = getStagesForPhi(LoopVal);
503 int StageDiff = (StageScheduled - LoopValStage);
504 LVNumStages -= StageDiff;
505 // Make sure the loop value Phi has been processed already.
506 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
507 NewReg = PhiOp2;
Value stored to 'NewReg' is never read
508 unsigned ReuseStage = CurStageNum;
509 if (isLoopCarried(*PhiInst))
510 ReuseStage -= LVNumStages;
511 // Check if the Phi to reuse has been generated yet. If not, then
512 // there is nothing to reuse.
513 if (VRMap[ReuseStage - np].count(LoopVal)) {
514 NewReg = VRMap[ReuseStage - np][LoopVal];
515
516 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
517 Def, NewReg);
518 // Update the map with the new Phi name.
519 VRMap[CurStageNum - np][Def] = NewReg;
520 PhiOp2 = NewReg;
521 if (VRMap[LastStageNum - np - 1].count(LoopVal))
522 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
523
524 if (IsLast && np == NumPhis - 1)
525 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
526 continue;
527 }
528 }
529 }
530 if (InKernel && StageDiff > 0 &&
531 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
532 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
533 }
534
535 const TargetRegisterClass *RC = MRI.getRegClass(Def);
536 NewReg = MRI.createVirtualRegister(RC);
537
538 MachineInstrBuilder NewPhi =
539 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
540 TII->get(TargetOpcode::PHI), NewReg);
541 NewPhi.addReg(PhiOp1).addMBB(BB1);
542 NewPhi.addReg(PhiOp2).addMBB(BB2);
543 if (np == 0)
544 InstrMap[NewPhi] = &*BBI;
545
546 // We define the Phis after creating the new pipelined code, so
547 // we need to rename the Phi values in scheduled instructions.
548
549 unsigned PrevReg = 0;
550 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
551 PrevReg = VRMap[PrevStage - np][LoopVal];
552 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
553 NewReg, PrevReg);
554 // If the Phi has been scheduled, use the new name for rewriting.
555 if (VRMap[CurStageNum - np].count(Def)) {
556 unsigned R = VRMap[CurStageNum - np][Def];
557 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
558 NewReg);
559 }
560
561 // Check if we need to rename any uses that occurs after the loop. The
562 // register to replace depends on whether the Phi is scheduled in the
563 // epilog.
564 if (IsLast && np == NumPhis - 1)
565 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
566
567 // In the kernel, a dependent Phi uses the value from this Phi.
568 if (InKernel)
569 PhiOp2 = NewReg;
570
571 // Update the map with the new Phi name.
572 VRMap[CurStageNum - np][Def] = NewReg;
573 }
574
575 while (NumPhis++ < NumStages) {
576 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
577 NewReg, 0);
578 }
579
580 // Check if we need to rename a Phi that has been eliminated due to
581 // scheduling.
582 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
583 replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
584 }
585}
586
587/// Generate Phis for the specified block in the generated pipelined code.
588/// These are new Phis needed because the definition is scheduled after the
589/// use in the pipelined sequence.
590void ModuloScheduleExpander::generatePhis(
591 MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
592 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
593 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
594 // Compute the stage number that contains the initial Phi value, and
595 // the Phi from the previous stage.
596 unsigned PrologStage = 0;
597 unsigned PrevStage = 0;
598 unsigned StageDiff = CurStageNum - LastStageNum;
599 bool InKernel = (StageDiff == 0);
600 if (InKernel) {
601 PrologStage = LastStageNum - 1;
602 PrevStage = CurStageNum;
603 } else {
604 PrologStage = LastStageNum - StageDiff;
605 PrevStage = LastStageNum + StageDiff - 1;
606 }
607
608 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
609 BBE = BB->instr_end();
610 BBI != BBE; ++BBI) {
611 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
612 MachineOperand &MO = BBI->getOperand(i);
613 if (!MO.isReg() || !MO.isDef() ||
614 !Register::isVirtualRegister(MO.getReg()))
615 continue;
616
617 int StageScheduled = Schedule.getStage(&*BBI);
618 assert(StageScheduled != -1 && "Expecting scheduled instruction.")(static_cast <bool> (StageScheduled != -1 && "Expecting scheduled instruction."
) ? void (0) : __assert_fail ("StageScheduled != -1 && \"Expecting scheduled instruction.\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 618, __extension__ __PRETTY_FUNCTION__
))
;
619 Register Def = MO.getReg();
620 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
621 // An instruction scheduled in stage 0 and is used after the loop
622 // requires a phi in the epilog for the last definition from either
623 // the kernel or prolog.
624 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
625 hasUseAfterLoop(Def, BB, MRI))
626 NumPhis = 1;
627 if (!InKernel && (unsigned)StageScheduled > PrologStage)
628 continue;
629
630 unsigned PhiOp2 = VRMap[PrevStage][Def];
631 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
632 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
633 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
634 // The number of Phis can't exceed the number of prolog stages. The
635 // prolog stage number is zero based.
636 if (NumPhis > PrologStage + 1 - StageScheduled)
637 NumPhis = PrologStage + 1 - StageScheduled;
638 for (unsigned np = 0; np < NumPhis; ++np) {
639 unsigned PhiOp1 = VRMap[PrologStage][Def];
640 if (np <= PrologStage)
641 PhiOp1 = VRMap[PrologStage - np][Def];
642 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
643 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
644 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
645 if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
646 PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
647 }
648 if (!InKernel)
649 PhiOp2 = VRMap[PrevStage - np][Def];
650
651 const TargetRegisterClass *RC = MRI.getRegClass(Def);
652 Register NewReg = MRI.createVirtualRegister(RC);
653
654 MachineInstrBuilder NewPhi =
655 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
656 TII->get(TargetOpcode::PHI), NewReg);
657 NewPhi.addReg(PhiOp1).addMBB(BB1);
658 NewPhi.addReg(PhiOp2).addMBB(BB2);
659 if (np == 0)
660 InstrMap[NewPhi] = &*BBI;
661
662 // Rewrite uses and update the map. The actions depend upon whether
663 // we generating code for the kernel or epilog blocks.
664 if (InKernel) {
665 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
666 NewReg);
667 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
668 NewReg);
669
670 PhiOp2 = NewReg;
671 VRMap[PrevStage - np - 1][Def] = NewReg;
672 } else {
673 VRMap[CurStageNum - np][Def] = NewReg;
674 if (np == NumPhis - 1)
675 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
676 NewReg);
677 }
678 if (IsLast && np == NumPhis - 1)
679 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
680 }
681 }
682 }
683}
684
685/// Remove instructions that generate values with no uses.
686/// Typically, these are induction variable operations that generate values
687/// used in the loop itself. A dead instruction has a definition with
688/// no uses, or uses that occur in the original loop only.
689void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
690 MBBVectorTy &EpilogBBs) {
691 // For each epilog block, check that the value defined by each instruction
692 // is used. If not, delete it.
693 for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
694 for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(),
695 ME = MBB->instr_rend();
696 MI != ME;) {
697 // From DeadMachineInstructionElem. Don't delete inline assembly.
698 if (MI->isInlineAsm()) {
699 ++MI;
700 continue;
701 }
702 bool SawStore = false;
703 // Check if it's safe to remove the instruction due to side effects.
704 // We can, and want to, remove Phis here.
705 if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
706 ++MI;
707 continue;
708 }
709 bool used = true;
710 for (const MachineOperand &MO : MI->operands()) {
711 if (!MO.isReg() || !MO.isDef())
712 continue;
713 Register reg = MO.getReg();
714 // Assume physical registers are used, unless they are marked dead.
715 if (Register::isPhysicalRegister(reg)) {
716 used = !MO.isDead();
717 if (used)
718 break;
719 continue;
720 }
721 unsigned realUses = 0;
722 for (const MachineOperand &U : MRI.use_operands(reg)) {
723 // Check if there are any uses that occur only in the original
724 // loop. If so, that's not a real use.
725 if (U.getParent()->getParent() != BB) {
726 realUses++;
727 used = true;
728 break;
729 }
730 }
731 if (realUses > 0)
732 break;
733 used = false;
734 }
735 if (!used) {
736 LIS.RemoveMachineInstrFromMaps(*MI);
737 MI++->eraseFromParent();
738 continue;
739 }
740 ++MI;
741 }
742 // In the kernel block, check if we can remove a Phi that generates a value
743 // used in an instruction removed in the epilog block.
744 for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
745 Register reg = MI.getOperand(0).getReg();
746 if (MRI.use_begin(reg) == MRI.use_end()) {
747 LIS.RemoveMachineInstrFromMaps(MI);
748 MI.eraseFromParent();
749 }
750 }
751}
752
753/// For loop carried definitions, we split the lifetime of a virtual register
754/// that has uses past the definition in the next iteration. A copy with a new
755/// virtual register is inserted before the definition, which helps with
756/// generating a better register assignment.
757///
758/// v1 = phi(a, v2) v1 = phi(a, v2)
759/// v2 = phi(b, v3) v2 = phi(b, v3)
760/// v3 = .. v4 = copy v1
761/// .. = V1 v3 = ..
762/// .. = v4
763void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
764 MBBVectorTy &EpilogBBs) {
765 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
766 for (auto &PHI : KernelBB->phis()) {
767 Register Def = PHI.getOperand(0).getReg();
768 // Check for any Phi definition that used as an operand of another Phi
769 // in the same block.
770 for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
771 E = MRI.use_instr_end();
772 I != E; ++I) {
773 if (I->isPHI() && I->getParent() == KernelBB) {
774 // Get the loop carried definition.
775 unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
776 if (!LCDef)
777 continue;
778 MachineInstr *MI = MRI.getVRegDef(LCDef);
779 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
780 continue;
781 // Search through the rest of the block looking for uses of the Phi
782 // definition. If one occurs, then split the lifetime.
783 unsigned SplitReg = 0;
784 for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
785 KernelBB->instr_end()))
786 if (BBJ.readsRegister(Def)) {
787 // We split the lifetime when we find the first use.
788 if (SplitReg == 0) {
789 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
790 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
791 TII->get(TargetOpcode::COPY), SplitReg)
792 .addReg(Def);
793 }
794 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
795 }
796 if (!SplitReg)
797 continue;
798 // Search through each of the epilog blocks for any uses to be renamed.
799 for (auto &Epilog : EpilogBBs)
800 for (auto &I : *Epilog)
801 if (I.readsRegister(Def))
802 I.substituteRegister(Def, SplitReg, 0, *TRI);
803 break;
804 }
805 }
806 }
807}
808
809/// Remove the incoming block from the Phis in a basic block.
810static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
811 for (MachineInstr &MI : *BB) {
812 if (!MI.isPHI())
813 break;
814 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
815 if (MI.getOperand(i + 1).getMBB() == Incoming) {
816 MI.RemoveOperand(i + 1);
817 MI.RemoveOperand(i);
818 break;
819 }
820 }
821}
822
823/// Create branches from each prolog basic block to the appropriate epilog
824/// block. These edges are needed if the loop ends before reaching the
825/// kernel.
826void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
827 MBBVectorTy &PrologBBs,
828 MachineBasicBlock *KernelBB,
829 MBBVectorTy &EpilogBBs,
830 ValueMapTy *VRMap) {
831 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch")(static_cast <bool> (PrologBBs.size() == EpilogBBs.size
() && "Prolog/Epilog mismatch") ? void (0) : __assert_fail
("PrologBBs.size() == EpilogBBs.size() && \"Prolog/Epilog mismatch\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 831, __extension__ __PRETTY_FUNCTION__
))
;
832 MachineBasicBlock *LastPro = KernelBB;
833 MachineBasicBlock *LastEpi = KernelBB;
834
835 // Start from the blocks connected to the kernel and work "out"
836 // to the first prolog and the last epilog blocks.
837 SmallVector<MachineInstr *, 4> PrevInsts;
838 unsigned MaxIter = PrologBBs.size() - 1;
839 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
840 // Add branches to the prolog that go to the corresponding
841 // epilog, and the fall-thru prolog/kernel block.
842 MachineBasicBlock *Prolog = PrologBBs[j];
843 MachineBasicBlock *Epilog = EpilogBBs[i];
844
845 SmallVector<MachineOperand, 4> Cond;
846 Optional<bool> StaticallyGreater =
847 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
848 unsigned numAdded = 0;
849 if (!StaticallyGreater.hasValue()) {
850 Prolog->addSuccessor(Epilog);
851 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
852 } else if (*StaticallyGreater == false) {
853 Prolog->addSuccessor(Epilog);
854 Prolog->removeSuccessor(LastPro);
855 LastEpi->removeSuccessor(Epilog);
856 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
857 removePhis(Epilog, LastEpi);
858 // Remove the blocks that are no longer referenced.
859 if (LastPro != LastEpi) {
860 LastEpi->clear();
861 LastEpi->eraseFromParent();
862 }
863 if (LastPro == KernelBB) {
864 LoopInfo->disposed();
865 NewKernel = nullptr;
866 }
867 LastPro->clear();
868 LastPro->eraseFromParent();
869 } else {
870 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
871 removePhis(Epilog, Prolog);
872 }
873 LastPro = Prolog;
874 LastEpi = Epilog;
875 for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
876 E = Prolog->instr_rend();
877 I != E && numAdded > 0; ++I, --numAdded)
878 updateInstruction(&*I, false, j, 0, VRMap);
879 }
880
881 if (NewKernel) {
882 LoopInfo->setPreheader(PrologBBs[MaxIter]);
883 LoopInfo->adjustTripCount(-(MaxIter + 1));
884 }
885}
886
887/// Return true if we can compute the amount the instruction changes
888/// during each iteration. Set Delta to the amount of the change.
889bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
890 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
891 const MachineOperand *BaseOp;
892 int64_t Offset;
893 bool OffsetIsScalable;
894 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
895 return false;
896
897 // FIXME: This algorithm assumes instructions have fixed-size offsets.
898 if (OffsetIsScalable)
899 return false;
900
901 if (!BaseOp->isReg())
902 return false;
903
904 Register BaseReg = BaseOp->getReg();
905
906 MachineRegisterInfo &MRI = MF.getRegInfo();
907 // Check if there is a Phi. If so, get the definition in the loop.
908 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
909 if (BaseDef && BaseDef->isPHI()) {
910 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
911 BaseDef = MRI.getVRegDef(BaseReg);
912 }
913 if (!BaseDef)
914 return false;
915
916 int D = 0;
917 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
918 return false;
919
920 Delta = D;
921 return true;
922}
923
924/// Update the memory operand with a new offset when the pipeliner
925/// generates a new copy of the instruction that refers to a
926/// different memory location.
927void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
928 MachineInstr &OldMI,
929 unsigned Num) {
930 if (Num == 0)
931 return;
932 // If the instruction has memory operands, then adjust the offset
933 // when the instruction appears in different stages.
934 if (NewMI.memoperands_empty())
935 return;
936 SmallVector<MachineMemOperand *, 2> NewMMOs;
937 for (MachineMemOperand *MMO : NewMI.memoperands()) {
938 // TODO: Figure out whether isAtomic is really necessary (see D57601).
939 if (MMO->isVolatile() || MMO->isAtomic() ||
940 (MMO->isInvariant() && MMO->isDereferenceable()) ||
941 (!MMO->getValue())) {
942 NewMMOs.push_back(MMO);
943 continue;
944 }
945 unsigned Delta;
946 if (Num != UINT_MAX(2147483647 *2U +1U) && computeDelta(OldMI, Delta)) {
947 int64_t AdjOffset = Delta * Num;
948 NewMMOs.push_back(
949 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
950 } else {
951 NewMMOs.push_back(
952 MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
953 }
954 }
955 NewMI.setMemRefs(MF, NewMMOs);
956}
957
958/// Clone the instruction for the new pipelined loop and update the
959/// memory operands, if needed.
960MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
961 unsigned CurStageNum,
962 unsigned InstStageNum) {
963 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
964 // Check for tied operands in inline asm instructions. This should be handled
965 // elsewhere, but I'm not sure of the best solution.
966 if (OldMI->isInlineAsm())
967 for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
968 const auto &MO = OldMI->getOperand(i);
969 if (MO.isReg() && MO.isUse())
970 break;
971 unsigned UseIdx;
972 if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
973 NewMI->tieOperands(i, UseIdx);
974 }
975 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
976 return NewMI;
977}
978
979/// Clone the instruction for the new pipelined loop. If needed, this
980/// function updates the instruction using the values saved in the
981/// InstrChanges structure.
982MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
983 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
984 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
985 auto It = InstrChanges.find(OldMI);
986 if (It != InstrChanges.end()) {
987 std::pair<unsigned, int64_t> RegAndOffset = It->second;
988 unsigned BasePos, OffsetPos;
989 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
990 return nullptr;
991 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
992 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
993 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
994 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
995 NewMI->getOperand(OffsetPos).setImm(NewOffset);
996 }
997 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
998 return NewMI;
999}
1000
1001/// Update the machine instruction with new virtual registers. This
1002/// function may change the defintions and/or uses.
1003void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1004 bool LastDef,
1005 unsigned CurStageNum,
1006 unsigned InstrStageNum,
1007 ValueMapTy *VRMap) {
1008 for (MachineOperand &MO : NewMI->operands()) {
1009 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
1010 continue;
1011 Register reg = MO.getReg();
1012 if (MO.isDef()) {
1013 // Create a new virtual register for the definition.
1014 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1015 Register NewReg = MRI.createVirtualRegister(RC);
1016 MO.setReg(NewReg);
1017 VRMap[CurStageNum][reg] = NewReg;
1018 if (LastDef)
1019 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1020 } else if (MO.isUse()) {
1021 MachineInstr *Def = MRI.getVRegDef(reg);
1022 // Compute the stage that contains the last definition for instruction.
1023 int DefStageNum = Schedule.getStage(Def);
1024 unsigned StageNum = CurStageNum;
1025 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1026 // Compute the difference in stages between the defintion and the use.
1027 unsigned StageDiff = (InstrStageNum - DefStageNum);
1028 // Make an adjustment to get the last definition.
1029 StageNum -= StageDiff;
1030 }
1031 if (VRMap[StageNum].count(reg))
1032 MO.setReg(VRMap[StageNum][reg]);
1033 }
1034 }
1035}
1036
1037/// Return the instruction in the loop that defines the register.
1038/// If the definition is a Phi, then follow the Phi operand to
1039/// the instruction in the loop.
1040MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1041 SmallPtrSet<MachineInstr *, 8> Visited;
1042 MachineInstr *Def = MRI.getVRegDef(Reg);
1043 while (Def->isPHI()) {
1044 if (!Visited.insert(Def).second)
1045 break;
1046 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1047 if (Def->getOperand(i + 1).getMBB() == BB) {
1048 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1049 break;
1050 }
1051 }
1052 return Def;
1053}
1054
1055/// Return the new name for the value from the previous stage.
1056unsigned ModuloScheduleExpander::getPrevMapVal(
1057 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1058 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1059 unsigned PrevVal = 0;
1060 if (StageNum > PhiStage) {
1061 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1062 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1063 // The name is defined in the previous stage.
1064 PrevVal = VRMap[StageNum - 1][LoopVal];
1065 else if (VRMap[StageNum].count(LoopVal))
1066 // The previous name is defined in the current stage when the instruction
1067 // order is swapped.
1068 PrevVal = VRMap[StageNum][LoopVal];
1069 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1070 // The loop value hasn't yet been scheduled.
1071 PrevVal = LoopVal;
1072 else if (StageNum == PhiStage + 1)
1073 // The loop value is another phi, which has not been scheduled.
1074 PrevVal = getInitPhiReg(*LoopInst, BB);
1075 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1076 // The loop value is another phi, which has been scheduled.
1077 PrevVal =
1078 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1079 LoopStage, VRMap, BB);
1080 }
1081 return PrevVal;
1082}
1083
1084/// Rewrite the Phi values in the specified block to use the mappings
1085/// from the initial operand. Once the Phi is scheduled, we switch
1086/// to using the loop value instead of the Phi value, so those names
1087/// do not need to be rewritten.
1088void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1089 unsigned StageNum,
1090 ValueMapTy *VRMap,
1091 InstrMapTy &InstrMap) {
1092 for (auto &PHI : BB->phis()) {
1093 unsigned InitVal = 0;
1094 unsigned LoopVal = 0;
1095 getPhiRegs(PHI, BB, InitVal, LoopVal);
1096 Register PhiDef = PHI.getOperand(0).getReg();
1097
1098 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1099 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1100 unsigned NumPhis = getStagesForPhi(PhiDef);
1101 if (NumPhis > StageNum)
1102 NumPhis = StageNum;
1103 for (unsigned np = 0; np <= NumPhis; ++np) {
1104 unsigned NewVal =
1105 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1106 if (!NewVal)
1107 NewVal = InitVal;
1108 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1109 NewVal);
1110 }
1111 }
1112}
1113
1114/// Rewrite a previously scheduled instruction to use the register value
1115/// from the new instruction. Make sure the instruction occurs in the
1116/// basic block, and we don't change the uses in the new instruction.
1117void ModuloScheduleExpander::rewriteScheduledInstr(
1118 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1119 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1120 unsigned PrevReg) {
1121 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1122 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1123 // Rewrite uses that have been scheduled already to use the new
1124 // Phi register.
1125 for (MachineOperand &UseOp :
1126 llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
1127 MachineInstr *UseMI = UseOp.getParent();
1128 if (UseMI->getParent() != BB)
1129 continue;
1130 if (UseMI->isPHI()) {
1131 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1132 continue;
1133 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1134 continue;
1135 }
1136 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1137 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.")(static_cast <bool> (OrigInstr != InstrMap.end() &&
"Instruction not scheduled.") ? void (0) : __assert_fail ("OrigInstr != InstrMap.end() && \"Instruction not scheduled.\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1137, __extension__ __PRETTY_FUNCTION__
))
;
1138 MachineInstr *OrigMI = OrigInstr->second;
1139 int StageSched = Schedule.getStage(OrigMI);
1140 int CycleSched = Schedule.getCycle(OrigMI);
1141 unsigned ReplaceReg = 0;
1142 // This is the stage for the scheduled instruction.
1143 if (StagePhi == StageSched && Phi->isPHI()) {
1144 int CyclePhi = Schedule.getCycle(Phi);
1145 if (PrevReg && InProlog)
1146 ReplaceReg = PrevReg;
1147 else if (PrevReg && !isLoopCarried(*Phi) &&
1148 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1149 ReplaceReg = PrevReg;
1150 else
1151 ReplaceReg = NewReg;
1152 }
1153 // The scheduled instruction occurs before the scheduled Phi, and the
1154 // Phi is not loop carried.
1155 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1156 ReplaceReg = NewReg;
1157 if (StagePhi > StageSched && Phi->isPHI())
1158 ReplaceReg = NewReg;
1159 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1160 ReplaceReg = NewReg;
1161 if (ReplaceReg) {
1162 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1163 UseOp.setReg(ReplaceReg);
1164 }
1165 }
1166}
1167
1168bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1169 if (!Phi.isPHI())
1170 return false;
1171 int DefCycle = Schedule.getCycle(&Phi);
1172 int DefStage = Schedule.getStage(&Phi);
1173
1174 unsigned InitVal = 0;
1175 unsigned LoopVal = 0;
1176 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1177 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1178 if (!Use || Use->isPHI())
1179 return true;
1180 int LoopCycle = Schedule.getCycle(Use);
1181 int LoopStage = Schedule.getStage(Use);
1182 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1183}
1184
1185//===----------------------------------------------------------------------===//
1186// PeelingModuloScheduleExpander implementation
1187//===----------------------------------------------------------------------===//
1188// This is a reimplementation of ModuloScheduleExpander that works by creating
1189// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1190//===----------------------------------------------------------------------===//
1191
1192namespace {
1193// Remove any dead phis in MBB. Dead phis either have only one block as input
1194// (in which case they are the identity) or have no uses.
1195void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1196 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1197 bool Changed = true;
1198 while (Changed) {
1199 Changed = false;
1200 for (MachineInstr &MI : llvm::make_early_inc_range(MBB->phis())) {
1201 assert(MI.isPHI())(static_cast <bool> (MI.isPHI()) ? void (0) : __assert_fail
("MI.isPHI()", "llvm/lib/CodeGen/ModuloSchedule.cpp", 1201, __extension__
__PRETTY_FUNCTION__))
;
1202 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1203 if (LIS)
1204 LIS->RemoveMachineInstrFromMaps(MI);
1205 MI.eraseFromParent();
1206 Changed = true;
1207 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1208 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1209 MRI.getRegClass(MI.getOperand(0).getReg()));
1210 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1211 MI.getOperand(1).getReg());
1212 if (LIS)
1213 LIS->RemoveMachineInstrFromMaps(MI);
1214 MI.eraseFromParent();
1215 Changed = true;
1216 }
1217 }
1218 }
1219}
1220
1221/// Rewrites the kernel block in-place to adhere to the given schedule.
1222/// KernelRewriter holds all of the state required to perform the rewriting.
1223class KernelRewriter {
1224 ModuloSchedule &S;
1225 MachineBasicBlock *BB;
1226 MachineBasicBlock *PreheaderBB, *ExitBB;
1227 MachineRegisterInfo &MRI;
1228 const TargetInstrInfo *TII;
1229 LiveIntervals *LIS;
1230
1231 // Map from register class to canonical undef register for that class.
1232 DenseMap<const TargetRegisterClass *, Register> Undefs;
1233 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1234 // this map is only used when InitReg is non-undef.
1235 DenseMap<std::pair<unsigned, unsigned>, Register> Phis;
1236 // Map from LoopReg to phi register where the InitReg is undef.
1237 DenseMap<Register, Register> UndefPhis;
1238
1239 // Reg is used by MI. Return the new register MI should use to adhere to the
1240 // schedule. Insert phis as necessary.
1241 Register remapUse(Register Reg, MachineInstr &MI);
1242 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1243 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1244 // or will be chosen so as to share another phi.
1245 Register phi(Register LoopReg, Optional<Register> InitReg = {},
1246 const TargetRegisterClass *RC = nullptr);
1247 // Create an undef register of the given register class.
1248 Register undef(const TargetRegisterClass *RC);
1249
1250public:
1251 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1252 LiveIntervals *LIS = nullptr);
1253 void rewrite();
1254};
1255} // namespace
1256
1257KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1258 MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1259 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1260 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1261 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1262 PreheaderBB = *BB->pred_begin();
1263 if (PreheaderBB == BB)
1264 PreheaderBB = *std::next(BB->pred_begin());
1265}
1266
1267void KernelRewriter::rewrite() {
1268 // Rearrange the loop to be in schedule order. Note that the schedule may
1269 // contain instructions that are not owned by the loop block (InstrChanges and
1270 // friends), so we gracefully handle unowned instructions and delete any
1271 // instructions that weren't in the schedule.
1272 auto InsertPt = BB->getFirstTerminator();
1273 MachineInstr *FirstMI = nullptr;
1274 for (MachineInstr *MI : S.getInstructions()) {
1275 if (MI->isPHI())
1276 continue;
1277 if (MI->getParent())
1278 MI->removeFromParent();
1279 BB->insert(InsertPt, MI);
1280 if (!FirstMI)
1281 FirstMI = MI;
1282 }
1283 assert(FirstMI && "Failed to find first MI in schedule")(static_cast <bool> (FirstMI && "Failed to find first MI in schedule"
) ? void (0) : __assert_fail ("FirstMI && \"Failed to find first MI in schedule\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1283, __extension__ __PRETTY_FUNCTION__
))
;
1284
1285 // At this point all of the scheduled instructions are between FirstMI
1286 // and the end of the block. Kill from the first non-phi to FirstMI.
1287 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1288 if (LIS)
1289 LIS->RemoveMachineInstrFromMaps(*I);
1290 (I++)->eraseFromParent();
1291 }
1292
1293 // Now remap every instruction in the loop.
1294 for (MachineInstr &MI : *BB) {
1295 if (MI.isPHI() || MI.isTerminator())
1296 continue;
1297 for (MachineOperand &MO : MI.uses()) {
1298 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1299 continue;
1300 Register Reg = remapUse(MO.getReg(), MI);
1301 MO.setReg(Reg);
1302 }
1303 }
1304 EliminateDeadPhis(BB, MRI, LIS);
1305
1306 // Ensure a phi exists for all instructions that are either referenced by
1307 // an illegal phi or by an instruction outside the loop. This allows us to
1308 // treat remaps of these values the same as "normal" values that come from
1309 // loop-carried phis.
1310 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1311 if (MI->isPHI()) {
1312 Register R = MI->getOperand(0).getReg();
1313 phi(R);
1314 continue;
1315 }
1316
1317 for (MachineOperand &Def : MI->defs()) {
1318 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1319 if (MI.getParent() != BB) {
1320 phi(Def.getReg());
1321 break;
1322 }
1323 }
1324 }
1325 }
1326}
1327
1328Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1329 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1330 if (!Producer)
1331 return Reg;
1332
1333 int ConsumerStage = S.getStage(&MI);
1334 if (!Producer->isPHI()) {
1335 // Non-phi producers are simple to remap. Insert as many phis as the
1336 // difference between the consumer and producer stages.
1337 if (Producer->getParent() != BB)
1338 // Producer was not inside the loop. Use the register as-is.
1339 return Reg;
1340 int ProducerStage = S.getStage(Producer);
1341 assert(ConsumerStage != -1 &&(static_cast <bool> (ConsumerStage != -1 && "In-loop consumer should always be scheduled!"
) ? void (0) : __assert_fail ("ConsumerStage != -1 && \"In-loop consumer should always be scheduled!\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1342, __extension__ __PRETTY_FUNCTION__
))
1342 "In-loop consumer should always be scheduled!")(static_cast <bool> (ConsumerStage != -1 && "In-loop consumer should always be scheduled!"
) ? void (0) : __assert_fail ("ConsumerStage != -1 && \"In-loop consumer should always be scheduled!\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1342, __extension__ __PRETTY_FUNCTION__
))
;
1343 assert(ConsumerStage >= ProducerStage)(static_cast <bool> (ConsumerStage >= ProducerStage)
? void (0) : __assert_fail ("ConsumerStage >= ProducerStage"
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1343, __extension__ __PRETTY_FUNCTION__
))
;
1344 unsigned StageDiff = ConsumerStage - ProducerStage;
1345
1346 for (unsigned I = 0; I < StageDiff; ++I)
1347 Reg = phi(Reg);
1348 return Reg;
1349 }
1350
1351 // First, dive through the phi chain to find the defaults for the generated
1352 // phis.
1353 SmallVector<Optional<Register>, 4> Defaults;
1354 Register LoopReg = Reg;
1355 auto LoopProducer = Producer;
1356 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1357 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1358 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1359 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1360 assert(LoopProducer)(static_cast <bool> (LoopProducer) ? void (0) : __assert_fail
("LoopProducer", "llvm/lib/CodeGen/ModuloSchedule.cpp", 1360
, __extension__ __PRETTY_FUNCTION__))
;
1361 }
1362 int LoopProducerStage = S.getStage(LoopProducer);
1363
1364 Optional<Register> IllegalPhiDefault;
1365
1366 if (LoopProducerStage == -1) {
1367 // Do nothing.
1368 } else if (LoopProducerStage > ConsumerStage) {
1369 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1370 // In addition, Consumer's cycle must be scheduled after Producer in the
1371 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1372 // functions.
1373#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1374 int LoopProducerCycle = S.getCycle(LoopProducer);
1375 int ConsumerCycle = S.getCycle(&MI);
1376#endif
1377 assert(LoopProducerCycle <= ConsumerCycle)(static_cast <bool> (LoopProducerCycle <= ConsumerCycle
) ? void (0) : __assert_fail ("LoopProducerCycle <= ConsumerCycle"
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1377, __extension__ __PRETTY_FUNCTION__
))
;
1378 assert(LoopProducerStage == ConsumerStage + 1)(static_cast <bool> (LoopProducerStage == ConsumerStage
+ 1) ? void (0) : __assert_fail ("LoopProducerStage == ConsumerStage + 1"
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1378, __extension__ __PRETTY_FUNCTION__
))
;
1379 // Peel off the first phi from Defaults and insert a phi between producer
1380 // and consumer. This phi will not be at the front of the block so we
1381 // consider it illegal. It will only exist during the rewrite process; it
1382 // needs to exist while we peel off prologs because these could take the
1383 // default value. After that we can replace all uses with the loop producer
1384 // value.
1385 IllegalPhiDefault = Defaults.front();
1386 Defaults.erase(Defaults.begin());
1387 } else {
1388 assert(ConsumerStage >= LoopProducerStage)(static_cast <bool> (ConsumerStage >= LoopProducerStage
) ? void (0) : __assert_fail ("ConsumerStage >= LoopProducerStage"
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1388, __extension__ __PRETTY_FUNCTION__
))
;
1389 int StageDiff = ConsumerStage - LoopProducerStage;
1390 if (StageDiff > 0) {
1391 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << " -- padding defaults array from "
<< Defaults.size() << " to " << (Defaults.
size() + StageDiff) << "\n"; } } while (false)
1392 << " to " << (Defaults.size() + StageDiff) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << " -- padding defaults array from "
<< Defaults.size() << " to " << (Defaults.
size() + StageDiff) << "\n"; } } while (false)
;
1393 // If we need more phis than we have defaults for, pad out with undefs for
1394 // the earliest phis, which are at the end of the defaults chain (the
1395 // chain is in reverse order).
1396 Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
1397 ? Optional<Register>()
1398 : Defaults.back());
1399 }
1400 }
1401
1402 // Now we know the number of stages to jump back, insert the phi chain.
1403 auto DefaultI = Defaults.rbegin();
1404 while (DefaultI != Defaults.rend())
1405 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1406
1407 if (IllegalPhiDefault.hasValue()) {
1408 // The consumer optionally consumes LoopProducer in the same iteration
1409 // (because the producer is scheduled at an earlier cycle than the consumer)
1410 // or the initial value. To facilitate this we create an illegal block here
1411 // by embedding a phi in the middle of the block. We will fix this up
1412 // immediately prior to pruning.
1413 auto RC = MRI.getRegClass(Reg);
1414 Register R = MRI.createVirtualRegister(RC);
1415 MachineInstr *IllegalPhi =
1416 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1417 .addReg(IllegalPhiDefault.getValue())
1418 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1419 .addReg(LoopReg)
1420 .addMBB(BB); // Block choice is arbitrary and has no effect.
1421 // Illegal phi should belong to the producer stage so that it can be
1422 // filtered correctly during peeling.
1423 S.setStage(IllegalPhi, LoopProducerStage);
1424 return R;
1425 }
1426
1427 return LoopReg;
1428}
1429
1430Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
1431 const TargetRegisterClass *RC) {
1432 // If the init register is not undef, try and find an existing phi.
1433 if (InitReg.hasValue()) {
1434 auto I = Phis.find({LoopReg, InitReg.getValue()});
1435 if (I != Phis.end())
1436 return I->second;
1437 } else {
1438 for (auto &KV : Phis) {
1439 if (KV.first.first == LoopReg)
1440 return KV.second;
1441 }
1442 }
1443
1444 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1445 // find a phi that takes undef as input.
1446 auto I = UndefPhis.find(LoopReg);
1447 if (I != UndefPhis.end()) {
1448 Register R = I->second;
1449 if (!InitReg.hasValue())
1450 // Found a phi taking undef as input, and this input is undef so return
1451 // without any more changes.
1452 return R;
1453 // Found a phi taking undef as input, so rewrite it to take InitReg.
1454 MachineInstr *MI = MRI.getVRegDef(R);
1455 MI->getOperand(1).setReg(InitReg.getValue());
1456 Phis.insert({{LoopReg, InitReg.getValue()}, R});
1457 MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
1458 UndefPhis.erase(I);
1459 return R;
1460 }
1461
1462 // Failed to find any existing phi to reuse, so create a new one.
1463 if (!RC)
1464 RC = MRI.getRegClass(LoopReg);
1465 Register R = MRI.createVirtualRegister(RC);
1466 if (InitReg.hasValue())
1467 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1468 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1469 .addReg(InitReg.hasValue() ? *InitReg : undef(RC))
1470 .addMBB(PreheaderBB)
1471 .addReg(LoopReg)
1472 .addMBB(BB);
1473 if (!InitReg.hasValue())
1474 UndefPhis[LoopReg] = R;
1475 else
1476 Phis[{LoopReg, *InitReg}] = R;
1477 return R;
1478}
1479
1480Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1481 Register &R = Undefs[RC];
1482 if (R == 0) {
1483 // Create an IMPLICIT_DEF that defines this register if we need it.
1484 // All uses of this should be removed by the time we have finished unrolling
1485 // prologs and epilogs.
1486 R = MRI.createVirtualRegister(RC);
1487 auto *InsertBB = &PreheaderBB->getParent()->front();
1488 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1489 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1490 }
1491 return R;
1492}
1493
1494namespace {
1495/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1496/// the operand are discovered, such as how many in-loop PHIs it has to jump
1497/// through and defaults for these phis.
1498class KernelOperandInfo {
1499 MachineBasicBlock *BB;
1500 MachineRegisterInfo &MRI;
1501 SmallVector<Register, 4> PhiDefaults;
1502 MachineOperand *Source;
1503 MachineOperand *Target;
1504
1505public:
1506 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1507 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1508 : MRI(MRI) {
1509 Source = MO;
1510 BB = MO->getParent()->getParent();
1511 while (isRegInLoop(MO)) {
1512 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1513 if (MI->isFullCopy()) {
1514 MO = &MI->getOperand(1);
1515 continue;
1516 }
1517 if (!MI->isPHI())
1518 break;
1519 // If this is an illegal phi, don't count it in distance.
1520 if (IllegalPhis.count(MI)) {
1521 MO = &MI->getOperand(3);
1522 continue;
1523 }
1524
1525 Register Default = getInitPhiReg(*MI, BB);
1526 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1527 : &MI->getOperand(3);
1528 PhiDefaults.push_back(Default);
1529 }
1530 Target = MO;
1531 }
1532
1533 bool operator==(const KernelOperandInfo &Other) const {
1534 return PhiDefaults.size() == Other.PhiDefaults.size();
1535 }
1536
1537 void print(raw_ostream &OS) const {
1538 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1539 << *Source->getParent();
1540 }
1541
1542private:
1543 bool isRegInLoop(MachineOperand *MO) {
1544 return MO->isReg() && MO->getReg().isVirtual() &&
1545 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1546 }
1547};
1548} // namespace
1549
1550MachineBasicBlock *
1551PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
1552 MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
1553 if (LPD == LPD_Front)
1554 PeeledFront.push_back(NewBB);
1555 else
1556 PeeledBack.push_front(NewBB);
1557 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1558 ++I, ++NI) {
1559 CanonicalMIs[&*I] = &*I;
1560 CanonicalMIs[&*NI] = &*I;
1561 BlockMIs[{NewBB, &*I}] = &*NI;
1562 BlockMIs[{BB, &*I}] = &*I;
1563 }
1564 return NewBB;
1565}
1566
1567void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock *MB,
1568 int MinStage) {
1569 for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1570 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1571 MachineInstr *MI = &*I++;
1572 int Stage = getStage(MI);
1573 if (Stage == -1 || Stage >= MinStage)
1574 continue;
1575
1576 for (MachineOperand &DefMO : MI->defs()) {
1577 SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
1578 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1579 // Only PHIs can use values from this block by construction.
1580 // Match with the equivalent PHI in B.
1581 assert(UseMI.isPHI())(static_cast <bool> (UseMI.isPHI()) ? void (0) : __assert_fail
("UseMI.isPHI()", "llvm/lib/CodeGen/ModuloSchedule.cpp", 1581
, __extension__ __PRETTY_FUNCTION__))
;
1582 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1583 MI->getParent());
1584 Subs.emplace_back(&UseMI, Reg);
1585 }
1586 for (auto &Sub : Subs)
1587 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1588 *MRI.getTargetRegisterInfo());
1589 }
1590 if (LIS)
1591 LIS->RemoveMachineInstrFromMaps(*MI);
1592 MI->eraseFromParent();
1593 }
1594}
1595
1596void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
1597 MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1598 auto InsertPt = DestBB->getFirstNonPHI();
1599 DenseMap<Register, Register> Remaps;
1600 for (MachineInstr &MI : llvm::make_early_inc_range(
1601 llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1602 if (MI.isPHI()) {
1603 // This is an illegal PHI. If we move any instructions using an illegal
1604 // PHI, we need to create a legal Phi.
1605 if (getStage(&MI) != Stage) {
1606 // The legal Phi is not necessary if the illegal phi's stage
1607 // is being moved.
1608 Register PhiR = MI.getOperand(0).getReg();
1609 auto RC = MRI.getRegClass(PhiR);
1610 Register NR = MRI.createVirtualRegister(RC);
1611 MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1612 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1613 .addReg(PhiR)
1614 .addMBB(SourceBB);
1615 BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1616 CanonicalMIs[NI] = CanonicalMIs[&MI];
1617 Remaps[PhiR] = NR;
1618 }
1619 }
1620 if (getStage(&MI) != Stage)
1621 continue;
1622 MI.removeFromParent();
1623 DestBB->insert(InsertPt, &MI);
1624 auto *KernelMI = CanonicalMIs[&MI];
1625 BlockMIs[{DestBB, KernelMI}] = &MI;
1626 BlockMIs.erase({SourceBB, KernelMI});
1627 }
1628 SmallVector<MachineInstr *, 4> PhiToDelete;
1629 for (MachineInstr &MI : DestBB->phis()) {
1630 assert(MI.getNumOperands() == 3)(static_cast <bool> (MI.getNumOperands() == 3) ? void (
0) : __assert_fail ("MI.getNumOperands() == 3", "llvm/lib/CodeGen/ModuloSchedule.cpp"
, 1630, __extension__ __PRETTY_FUNCTION__))
;
1631 MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1632 // If the instruction referenced by the phi is moved inside the block
1633 // we don't need the phi anymore.
1634 if (getStage(Def) == Stage) {
1635 Register PhiReg = MI.getOperand(0).getReg();
1636 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1)(static_cast <bool> (Def->findRegisterDefOperandIdx(
MI.getOperand(1).getReg()) != -1) ? void (0) : __assert_fail (
"Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1"
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1636, __extension__ __PRETTY_FUNCTION__
))
;
1637 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1638 MI.getOperand(0).setReg(PhiReg);
1639 PhiToDelete.push_back(&MI);
1640 }
1641 }
1642 for (auto *P : PhiToDelete)
1643 P->eraseFromParent();
1644 InsertPt = DestBB->getFirstNonPHI();
1645 // Helper to clone Phi instructions into the destination block. We clone Phi
1646 // greedily to avoid combinatorial explosion of Phi instructions.
1647 auto clonePhi = [&](MachineInstr *Phi) {
1648 MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1649 DestBB->insert(InsertPt, NewMI);
1650 Register OrigR = Phi->getOperand(0).getReg();
1651 Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
1652 NewMI->getOperand(0).setReg(R);
1653 NewMI->getOperand(1).setReg(OrigR);
1654 NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1655 Remaps[OrigR] = R;
1656 CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1657 BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1658 PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi];
1659 return R;
1660 };
1661 for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1662 for (MachineOperand &MO : I->uses()) {
1663 if (!MO.isReg())
1664 continue;
1665 if (Remaps.count(MO.getReg()))
1666 MO.setReg(Remaps[MO.getReg()]);
1667 else {
1668 // If we are using a phi from the source block we need to add a new phi
1669 // pointing to the old one.
1670 MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
1671 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1672 Register R = clonePhi(Use);
1673 MO.setReg(R);
1674 }
1675 }
1676 }
1677 }
1678}
1679
1680Register
1681PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi,
1682 MachineInstr *Phi) {
1683 unsigned distance = PhiNodeLoopIteration[Phi];
1684 MachineInstr *CanonicalUse = CanonicalPhi;
1685 Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1686 for (unsigned I = 0; I < distance; ++I) {
1687 assert(CanonicalUse->isPHI())(static_cast <bool> (CanonicalUse->isPHI()) ? void (
0) : __assert_fail ("CanonicalUse->isPHI()", "llvm/lib/CodeGen/ModuloSchedule.cpp"
, 1687, __extension__ __PRETTY_FUNCTION__))
;
1688 assert(CanonicalUse->getNumOperands() == 5)(static_cast <bool> (CanonicalUse->getNumOperands() ==
5) ? void (0) : __assert_fail ("CanonicalUse->getNumOperands() == 5"
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1688, __extension__ __PRETTY_FUNCTION__
))
;
1689 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1690 if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1691 std::swap(LoopRegIdx, InitRegIdx);
1692 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1693 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1694 }
1695 return CanonicalUseReg;
1696}
1697
1698void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
1699 BitVector LS(Schedule.getNumStages(), true);
1700 BitVector AS(Schedule.getNumStages(), true);
1701 LiveStages[BB] = LS;
1702 AvailableStages[BB] = AS;
1703
1704 // Peel out the prologs.
1705 LS.reset();
1706 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1707 LS[I] = true;
1708 Prologs.push_back(peelKernel(LPD_Front));
1709 LiveStages[Prologs.back()] = LS;
1710 AvailableStages[Prologs.back()] = LS;
1711 }
1712
1713 // Create a block that will end up as the new loop exiting block (dominated by
1714 // all prologs and epilogs). It will only contain PHIs, in the same order as
1715 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1716 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1717 // property that any value deffed in BB but used outside of BB is used by a
1718 // PHI in the exiting block.
1719 MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
1720 EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1721 // Push out the epilogs, again in reverse order.
1722 // We can't assume anything about the minumum loop trip count at this point,
1723 // so emit a fairly complex epilog.
1724
1725 // We first peel number of stages minus one epilogue. Then we remove dead
1726 // stages and reorder instructions based on their stage. If we have 3 stages
1727 // we generate first:
1728 // E0[3, 2, 1]
1729 // E1[3', 2']
1730 // E2[3'']
1731 // And then we move instructions based on their stages to have:
1732 // E0[3]
1733 // E1[2, 3']
1734 // E2[1, 2', 3'']
1735 // The transformation is legal because we only move instructions past
1736 // instructions of a previous loop iteration.
1737 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1738 Epilogs.push_back(peelKernel(LPD_Back));
1739 MachineBasicBlock *B = Epilogs.back();
1740 filterInstructions(B, Schedule.getNumStages() - I);
1741 // Keep track at which iteration each phi belongs to. We need it to know
1742 // what version of the variable to use during prologue/epilogue stitching.
1743 EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1744 for (MachineInstr &Phi : B->phis())
1745 PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
1746 }
1747 for (size_t I = 0; I < Epilogs.size(); I++) {
1748 LS.reset();
1749 for (size_t J = I; J < Epilogs.size(); J++) {
1750 int Iteration = J;
1751 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1752 // Move stage one block at a time so that Phi nodes are updated correctly.
1753 for (size_t K = Iteration; K > I; K--)
1754 moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1755 LS[Stage] = true;
1756 }
1757 LiveStages[Epilogs[I]] = LS;
1758 AvailableStages[Epilogs[I]] = AS;
1759 }
1760
1761 // Now we've defined all the prolog and epilog blocks as a fallthrough
1762 // sequence, add the edges that will be followed if the loop trip count is
1763 // lower than the number of stages (connecting prologs directly with epilogs).
1764 auto PI = Prologs.begin();
1765 auto EI = Epilogs.begin();
1766 assert(Prologs.size() == Epilogs.size())(static_cast <bool> (Prologs.size() == Epilogs.size()) ?
void (0) : __assert_fail ("Prologs.size() == Epilogs.size()"
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1766, __extension__ __PRETTY_FUNCTION__
))
;
1767 for (; PI != Prologs.end(); ++PI, ++EI) {
1768 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1769 (*PI)->addSuccessor(*EI);
1770 for (MachineInstr &MI : (*EI)->phis()) {
1771 Register Reg = MI.getOperand(1).getReg();
1772 MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
1773 if (Use && Use->getParent() == Pred) {
1774 MachineInstr *CanonicalUse = CanonicalMIs[Use];
1775 if (CanonicalUse->isPHI()) {
1776 // If the use comes from a phi we need to skip as many phi as the
1777 // distance between the epilogue and the kernel. Trace through the phi
1778 // chain to find the right value.
1779 Reg = getPhiCanonicalReg(CanonicalUse, Use);
1780 }
1781 Reg = getEquivalentRegisterIn(Reg, *PI);
1782 }
1783 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1784 MI.addOperand(MachineOperand::CreateMBB(*PI));
1785 }
1786 }
1787
1788 // Create a list of all blocks in order.
1789 SmallVector<MachineBasicBlock *, 8> Blocks;
1790 llvm::copy(PeeledFront, std::back_inserter(Blocks));
1791 Blocks.push_back(BB);
1792 llvm::copy(PeeledBack, std::back_inserter(Blocks));
1793
1794 // Iterate in reverse order over all instructions, remapping as we go.
1795 for (MachineBasicBlock *B : reverse(Blocks)) {
1796 for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
1797 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1798 MachineInstr *MI = &*I++;
1799 rewriteUsesOf(MI);
1800 }
1801 }
1802 for (auto *MI : IllegalPhisToDelete) {
1803 if (LIS)
1804 LIS->RemoveMachineInstrFromMaps(*MI);
1805 MI->eraseFromParent();
1806 }
1807 IllegalPhisToDelete.clear();
1808
1809 // Now all remapping has been done, we're free to optimize the generated code.
1810 for (MachineBasicBlock *B : reverse(Blocks))
1811 EliminateDeadPhis(B, MRI, LIS);
1812 EliminateDeadPhis(ExitingBB, MRI, LIS);
1813}
1814
1815MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
1816 MachineFunction &MF = *BB->getParent();
1817 MachineBasicBlock *Exit = *BB->succ_begin();
1818 if (Exit == BB)
1819 Exit = *std::next(BB->succ_begin());
1820
1821 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
1822 MF.insert(std::next(BB->getIterator()), NewBB);
1823
1824 // Clone all phis in BB into NewBB and rewrite.
1825 for (MachineInstr &MI : BB->phis()) {
1826 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1827 Register OldR = MI.getOperand(3).getReg();
1828 Register R = MRI.createVirtualRegister(RC);
1829 SmallVector<MachineInstr *, 4> Uses;
1830 for (MachineInstr &Use : MRI.use_instructions(OldR))
1831 if (Use.getParent() != BB)
1832 Uses.push_back(&Use);
1833 for (MachineInstr *Use : Uses)
1834 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1835 *MRI.getTargetRegisterInfo());
1836 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1837 .addReg(OldR)
1838 .addMBB(BB);
1839 BlockMIs[{NewBB, &MI}] = NI;
1840 CanonicalMIs[NI] = &MI;
1841 }
1842 BB->replaceSuccessor(Exit, NewBB);
1843 Exit->replacePhiUsesWith(BB, NewBB);
1844 NewBB->addSuccessor(Exit);
1845
1846 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1847 SmallVector<MachineOperand, 4> Cond;
1848 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1849 (void)CanAnalyzeBr;
1850 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!")(static_cast <bool> (CanAnalyzeBr && "Must be able to analyze the loop branch!"
) ? void (0) : __assert_fail ("CanAnalyzeBr && \"Must be able to analyze the loop branch!\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1850, __extension__ __PRETTY_FUNCTION__
))
;
1851 TII->removeBranch(*BB);
1852 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1853 Cond, DebugLoc());
1854 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1855 return NewBB;
1856}
1857
1858Register
1859PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
1860 MachineBasicBlock *BB) {
1861 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
1862 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
1863 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1864}
1865
1866void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
1867 if (MI->isPHI()) {
1868 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1869 // and it is produced by this block.
1870 Register PhiR = MI->getOperand(0).getReg();
1871 Register R = MI->getOperand(3).getReg();
1872 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1873 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1874 R = MI->getOperand(1).getReg();
1875 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1876 MRI.replaceRegWith(PhiR, R);
1877 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1878 // later to figure out how to remap registers.
1879 MI->getOperand(0).setReg(PhiR);
1880 IllegalPhisToDelete.push_back(MI);
1881 return;
1882 }
1883
1884 int Stage = getStage(MI);
1885 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1886 LiveStages[MI->getParent()].test(Stage))
1887 // Instruction is live, no rewriting to do.
1888 return;
1889
1890 for (MachineOperand &DefMO : MI->defs()) {
1891 SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
1892 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1893 // Only PHIs can use values from this block by construction.
1894 // Match with the equivalent PHI in B.
1895 assert(UseMI.isPHI())(static_cast <bool> (UseMI.isPHI()) ? void (0) : __assert_fail
("UseMI.isPHI()", "llvm/lib/CodeGen/ModuloSchedule.cpp", 1895
, __extension__ __PRETTY_FUNCTION__))
;
1896 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1897 MI->getParent());
1898 Subs.emplace_back(&UseMI, Reg);
1899 }
1900 for (auto &Sub : Subs)
1901 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1902 *MRI.getTargetRegisterInfo());
1903 }
1904 if (LIS)
1905 LIS->RemoveMachineInstrFromMaps(*MI);
1906 MI->eraseFromParent();
1907}
1908
1909void PeelingModuloScheduleExpander::fixupBranches() {
1910 // Work outwards from the kernel.
1911 bool KernelDisposed = false;
1912 int TC = Schedule.getNumStages() - 1;
1913 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1914 ++PI, ++EI, --TC) {
1915 MachineBasicBlock *Prolog = *PI;
1916 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1917 MachineBasicBlock *Epilog = *EI;
1918 SmallVector<MachineOperand, 4> Cond;
1919 TII->removeBranch(*Prolog);
1920 Optional<bool> StaticallyGreater =
1921 LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1922 if (!StaticallyGreater.hasValue()) {
1923 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << "Dynamic: TC > " <<
TC << "\n"; } } while (false)
;
1924 // Dynamically branch based on Cond.
1925 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1926 } else if (*StaticallyGreater == false) {
1927 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << "Static-false: TC > " <<
TC << "\n"; } } while (false)
;
1928 // Prolog never falls through; branch to epilog and orphan interior
1929 // blocks. Leave it to unreachable-block-elim to clean up.
1930 Prolog->removeSuccessor(Fallthrough);
1931 for (MachineInstr &P : Fallthrough->phis()) {
1932 P.RemoveOperand(2);
1933 P.RemoveOperand(1);
1934 }
1935 TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
1936 KernelDisposed = true;
1937 } else {
1938 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << "Static-true: TC > " <<
TC << "\n"; } } while (false)
;
1939 // Prolog always falls through; remove incoming values in epilog.
1940 Prolog->removeSuccessor(Epilog);
1941 for (MachineInstr &P : Epilog->phis()) {
1942 P.RemoveOperand(4);
1943 P.RemoveOperand(3);
1944 }
1945 }
1946 }
1947
1948 if (!KernelDisposed) {
1949 LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1950 LoopInfo->setPreheader(Prologs.back());
1951 } else {
1952 LoopInfo->disposed();
1953 }
1954}
1955
1956void PeelingModuloScheduleExpander::rewriteKernel() {
1957 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
1958 KR.rewrite();
1959}
1960
1961void PeelingModuloScheduleExpander::expand() {
1962 BB = Schedule.getLoop()->getTopBlock();
1963 Preheader = Schedule.getLoop()->getLoopPreheader();
1964 LLVM_DEBUG(Schedule.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { Schedule.dump(); } } while (false)
;
1965 LoopInfo = TII->analyzeLoopForPipelining(BB);
1966 assert(LoopInfo)(static_cast <bool> (LoopInfo) ? void (0) : __assert_fail
("LoopInfo", "llvm/lib/CodeGen/ModuloSchedule.cpp", 1966, __extension__
__PRETTY_FUNCTION__))
;
1967
1968 rewriteKernel();
1969 peelPrologAndEpilogs();
1970 fixupBranches();
1971}
1972
1973void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
1974 BB = Schedule.getLoop()->getTopBlock();
1975 Preheader = Schedule.getLoop()->getLoopPreheader();
1976
1977 // Dump the schedule before we invalidate and remap all its instructions.
1978 // Stash it in a string so we can print it if we found an error.
1979 std::string ScheduleDump;
1980 raw_string_ostream OS(ScheduleDump);
1981 Schedule.print(OS);
1982 OS.flush();
1983
1984 // First, run the normal ModuleScheduleExpander. We don't support any
1985 // InstrChanges.
1986 assert(LIS && "Requires LiveIntervals!")(static_cast <bool> (LIS && "Requires LiveIntervals!"
) ? void (0) : __assert_fail ("LIS && \"Requires LiveIntervals!\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 1986, __extension__ __PRETTY_FUNCTION__
))
;
1987 ModuloScheduleExpander MSE(MF, Schedule, *LIS,
1988 ModuloScheduleExpander::InstrChangesTy());
1989 MSE.expand();
1990 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
1991 if (!ExpandedKernel) {
1992 // The expander optimized away the kernel. We can't do any useful checking.
1993 MSE.cleanup();
1994 return;
1995 }
1996 // Before running the KernelRewriter, re-add BB into the CFG.
1997 Preheader->addSuccessor(BB);
1998
1999 // Now run the new expansion algorithm.
2000 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2001 KR.rewrite();
2002 peelPrologAndEpilogs();
2003
2004 // Collect all illegal phis that the new algorithm created. We'll give these
2005 // to KernelOperandInfo.
2006 SmallPtrSet<MachineInstr *, 4> IllegalPhis;
2007 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2008 if (NI->isPHI())
2009 IllegalPhis.insert(&*NI);
2010 }
2011
2012 // Co-iterate across both kernels. We expect them to be identical apart from
2013 // phis and full COPYs (we look through both).
2014 SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs;
2015 auto OI = ExpandedKernel->begin();
2016 auto NI = BB->begin();
2017 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2018 while (OI->isPHI() || OI->isFullCopy())
2019 ++OI;
2020 while (NI->isPHI() || NI->isFullCopy())
2021 ++NI;
2022 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!")(static_cast <bool> (OI->getOpcode() == NI->getOpcode
() && "Opcodes don't match?!") ? void (0) : __assert_fail
("OI->getOpcode() == NI->getOpcode() && \"Opcodes don't match?!\""
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 2022, __extension__ __PRETTY_FUNCTION__
))
;
2023 // Analyze every operand separately.
2024 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2025 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2026 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2027 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2028 }
2029
2030 bool Failed = false;
2031 for (auto &OldAndNew : KOIs) {
2032 if (OldAndNew.first == OldAndNew.second)
2033 continue;
2034 Failed = true;
2035 errs() << "Modulo kernel validation error: [\n";
2036 errs() << " [golden] ";
2037 OldAndNew.first.print(errs());
2038 errs() << " ";
2039 OldAndNew.second.print(errs());
2040 errs() << "]\n";
2041 }
2042
2043 if (Failed) {
2044 errs() << "Golden reference kernel:\n";
2045 ExpandedKernel->print(errs());
2046 errs() << "New kernel:\n";
2047 BB->print(errs());
2048 errs() << ScheduleDump;
2049 report_fatal_error(
2050 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2051 }
2052
2053 // Cleanup by removing BB from the CFG again as the original
2054 // ModuloScheduleExpander intended.
2055 Preheader->removeSuccessor(BB);
2056 MSE.cleanup();
2057}
2058
2059//===----------------------------------------------------------------------===//
2060// ModuloScheduleTestPass implementation
2061//===----------------------------------------------------------------------===//
2062// This pass constructs a ModuloSchedule from its module and runs
2063// ModuloScheduleExpander.
2064//
2065// The module is expected to contain a single-block analyzable loop.
2066// The total order of instructions is taken from the loop as-is.
2067// Instructions are expected to be annotated with a PostInstrSymbol.
2068// This PostInstrSymbol must have the following format:
2069// "Stage=%d Cycle=%d".
2070//===----------------------------------------------------------------------===//
2071
2072namespace {
2073class ModuloScheduleTest : public MachineFunctionPass {
2074public:
2075 static char ID;
2076
2077 ModuloScheduleTest() : MachineFunctionPass(ID) {
2078 initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
2079 }
2080
2081 bool runOnMachineFunction(MachineFunction &MF) override;
2082 void runOnLoop(MachineFunction &MF, MachineLoop &L);
2083
2084 void getAnalysisUsage(AnalysisUsage &AU) const override {
2085 AU.addRequired<MachineLoopInfo>();
2086 AU.addRequired<LiveIntervals>();
2087 MachineFunctionPass::getAnalysisUsage(AU);
2088 }
2089};
2090} // namespace
2091
2092char ModuloScheduleTest::ID = 0;
2093
2094INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",static void *initializeModuloScheduleTestPassOnce(PassRegistry
&Registry) {
2095 "Modulo Schedule test pass", false, false)static void *initializeModuloScheduleTestPassOnce(PassRegistry
&Registry) {
2096INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)initializeMachineLoopInfoPass(Registry);
2097INITIALIZE_PASS_DEPENDENCY(LiveIntervals)initializeLiveIntervalsPass(Registry);
2098INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",PassInfo *PI = new PassInfo( "Modulo Schedule test pass", "modulo-schedule-test"
, &ModuloScheduleTest::ID, PassInfo::NormalCtor_t(callDefaultCtor
<ModuloScheduleTest>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeModuloScheduleTestPassFlag
; void llvm::initializeModuloScheduleTestPass(PassRegistry &
Registry) { llvm::call_once(InitializeModuloScheduleTestPassFlag
, initializeModuloScheduleTestPassOnce, std::ref(Registry)); }
2099 "Modulo Schedule test pass", false, false)PassInfo *PI = new PassInfo( "Modulo Schedule test pass", "modulo-schedule-test"
, &ModuloScheduleTest::ID, PassInfo::NormalCtor_t(callDefaultCtor
<ModuloScheduleTest>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeModuloScheduleTestPassFlag
; void llvm::initializeModuloScheduleTestPass(PassRegistry &
Registry) { llvm::call_once(InitializeModuloScheduleTestPassFlag
, initializeModuloScheduleTestPassOnce, std::ref(Registry)); }
2100
2101bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2102 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
2103 for (auto *L : MLI) {
2104 if (L->getTopBlock() != L->getBottomBlock())
2105 continue;
2106 runOnLoop(MF, *L);
2107 return false;
2108 }
2109 return false;
2110}
2111
2112static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2113 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2114 std::pair<StringRef, StringRef> StageTokenAndValue =
2115 getToken(StageAndCycle.first, "-");
2116 std::pair<StringRef, StringRef> CycleTokenAndValue =
2117 getToken(StageAndCycle.second, "-");
2118 if (StageTokenAndValue.first != "Stage" ||
2119 CycleTokenAndValue.first != "_Cycle") {
2120 llvm_unreachable(::llvm::llvm_unreachable_internal("Bad post-instr symbol syntax: see comment in ModuloScheduleTest"
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 2121)
2121 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest")::llvm::llvm_unreachable_internal("Bad post-instr symbol syntax: see comment in ModuloScheduleTest"
, "llvm/lib/CodeGen/ModuloSchedule.cpp", 2121)
;
2122 return;
2123 }
2124
2125 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2126 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2127
2128 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2129}
2130
2131void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2132 LiveIntervals &LIS = getAnalysis<LiveIntervals>();
2133 MachineBasicBlock *BB = L.getTopBlock();
2134 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2135
2136 DenseMap<MachineInstr *, int> Cycle, Stage;
2137 std::vector<MachineInstr *> Instrs;
2138 for (MachineInstr &MI : *BB) {
2139 if (MI.isTerminator())
2140 continue;
2141 Instrs.push_back(&MI);
2142 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2143 dbgs() << "Parsing post-instr symbol for " << MI;
2144 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2145 }
2146 }
2147
2148 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2149 std::move(Stage));
2150 ModuloScheduleExpander MSE(
2151 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2152 MSE.expand();
2153 MSE.cleanup();
2154}
2155
2156//===----------------------------------------------------------------------===//
2157// ModuloScheduleTestAnnotater implementation
2158//===----------------------------------------------------------------------===//
2159
2160void ModuloScheduleTestAnnotater::annotate() {
2161 for (MachineInstr *MI : S.getInstructions()) {
2162 SmallVector<char, 16> SV;
2163 raw_svector_ostream OS(SV);
2164 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2165 MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2166 MI->setPostInstrSymbol(MF, Sym);
2167 }
2168}