Bug Summary

File:lib/CodeGen/ModuloSchedule.cpp
Warning:line 519, column 13
Value stored to 'NewReg' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ModuloSchedule.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen -I /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/include -I /build/llvm-toolchain-snapshot-10~svn374877/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/lib/CodeGen -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~svn374877=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-10-15-233810-7101-1 -x c++ /build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/CodeGen/ModuloSchedule.h"
10#include "llvm/ADT/StringExtras.h"
11#include "llvm/CodeGen/LiveIntervals.h"
12#include "llvm/CodeGen/MachineInstrBuilder.h"
13#include "llvm/CodeGen/MachineLoopUtils.h"
14#include "llvm/CodeGen/MachineRegisterInfo.h"
15#include "llvm/CodeGen/TargetInstrInfo.h"
16#include "llvm/MC/MCContext.h"
17#include "llvm/Support/Debug.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/raw_ostream.h"
20
21#define DEBUG_TYPE"pipeliner" "pipeliner"
22using namespace llvm;
23
24void ModuloSchedule::print(raw_ostream &OS) {
25 for (MachineInstr *MI : ScheduledInstrs)
26 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
27}
28
29//===----------------------------------------------------------------------===//
30// ModuloScheduleExpander implementation
31//===----------------------------------------------------------------------===//
32
33/// Return the register values for the operands of a Phi instruction.
34/// This function assume the instruction is a Phi.
35static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
36 unsigned &InitVal, unsigned &LoopVal) {
37 assert(Phi.isPHI() && "Expecting a Phi.")((Phi.isPHI() && "Expecting a Phi.") ? static_cast<
void> (0) : __assert_fail ("Phi.isPHI() && \"Expecting a Phi.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 37, __PRETTY_FUNCTION__))
;
38
39 InitVal = 0;
40 LoopVal = 0;
41 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
42 if (Phi.getOperand(i + 1).getMBB() != Loop)
43 InitVal = Phi.getOperand(i).getReg();
44 else
45 LoopVal = Phi.getOperand(i).getReg();
46
47 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.")((InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure."
) ? static_cast<void> (0) : __assert_fail ("InitVal != 0 && LoopVal != 0 && \"Unexpected Phi structure.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 47, __PRETTY_FUNCTION__))
;
48}
49
50/// Return the Phi register value that comes from the incoming block.
51static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
52 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
53 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
54 return Phi.getOperand(i).getReg();
55 return 0;
56}
57
58/// Return the Phi register value that comes the loop block.
59static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
60 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
61 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
62 return Phi.getOperand(i).getReg();
63 return 0;
64}
65
66void ModuloScheduleExpander::expand() {
67 BB = Schedule.getLoop()->getTopBlock();
68 Preheader = *BB->pred_begin();
69 if (Preheader == BB)
70 Preheader = *std::next(BB->pred_begin());
71
72 // Iterate over the definitions in each instruction, and compute the
73 // stage difference for each use. Keep the maximum value.
74 for (MachineInstr *MI : Schedule.getInstructions()) {
75 int DefStage = Schedule.getStage(MI);
76 for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
77 MachineOperand &Op = MI->getOperand(i);
78 if (!Op.isReg() || !Op.isDef())
79 continue;
80
81 Register Reg = Op.getReg();
82 unsigned MaxDiff = 0;
83 bool PhiIsSwapped = false;
84 for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
85 EI = MRI.use_end();
86 UI != EI; ++UI) {
87 MachineOperand &UseOp = *UI;
88 MachineInstr *UseMI = UseOp.getParent();
89 int UseStage = Schedule.getStage(UseMI);
90 unsigned Diff = 0;
91 if (UseStage != -1 && UseStage >= DefStage)
92 Diff = UseStage - DefStage;
93 if (MI->isPHI()) {
94 if (isLoopCarried(*MI))
95 ++Diff;
96 else
97 PhiIsSwapped = true;
98 }
99 MaxDiff = std::max(Diff, MaxDiff);
100 }
101 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
102 }
103 }
104
105 generatePipelinedLoop();
106}
107
108void ModuloScheduleExpander::generatePipelinedLoop() {
109 LoopInfo = TII->analyzeLoopForPipelining(BB);
110 assert(LoopInfo && "Must be able to analyze loop!")((LoopInfo && "Must be able to analyze loop!") ? static_cast
<void> (0) : __assert_fail ("LoopInfo && \"Must be able to analyze loop!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 110, __PRETTY_FUNCTION__))
;
111
112 // Create a new basic block for the kernel and add it to the CFG.
113 MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
114
115 unsigned MaxStageCount = Schedule.getNumStages() - 1;
116
117 // Remember the registers that are used in different stages. The index is
118 // the iteration, or stage, that the instruction is scheduled in. This is
119 // a map between register names in the original block and the names created
120 // in each stage of the pipelined loop.
121 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
122 InstrMapTy InstrMap;
123
124 SmallVector<MachineBasicBlock *, 4> PrologBBs;
125
126 // Generate the prolog instructions that set up the pipeline.
127 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
128 MF.insert(BB->getIterator(), KernelBB);
129
130 // Rearrange the instructions to generate the new, pipelined loop,
131 // and update register names as needed.
132 for (MachineInstr *CI : Schedule.getInstructions()) {
133 if (CI->isPHI())
134 continue;
135 unsigned StageNum = Schedule.getStage(CI);
136 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
137 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
138 KernelBB->push_back(NewMI);
139 InstrMap[NewMI] = CI;
140 }
141
142 // Copy any terminator instructions to the new kernel, and update
143 // names as needed.
144 for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
145 E = BB->instr_end();
146 I != E; ++I) {
147 MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
148 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
149 KernelBB->push_back(NewMI);
150 InstrMap[NewMI] = &*I;
151 }
152
153 NewKernel = KernelBB;
154 KernelBB->transferSuccessors(BB);
155 KernelBB->replaceSuccessor(BB, KernelBB);
156
157 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
158 InstrMap, MaxStageCount, MaxStageCount, false);
159 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
160 MaxStageCount, MaxStageCount, false);
161
162 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << "New block\n"; KernelBB->
dump();; } } while (false)
;
163
164 SmallVector<MachineBasicBlock *, 4> EpilogBBs;
165 // Generate the epilog instructions to complete the pipeline.
166 generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
167
168 // We need this step because the register allocation doesn't handle some
169 // situations well, so we insert copies to help out.
170 splitLifetimes(KernelBB, EpilogBBs);
171
172 // Remove dead instructions due to loop induction variables.
173 removeDeadInstructions(KernelBB, EpilogBBs);
174
175 // Add branches between prolog and epilog blocks.
176 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
177
178 delete[] VRMap;
179}
180
181void ModuloScheduleExpander::cleanup() {
182 // Remove the original loop since it's no longer referenced.
183 for (auto &I : *BB)
184 LIS.RemoveMachineInstrFromMaps(I);
185 BB->clear();
186 BB->eraseFromParent();
187}
188
189/// Generate the pipeline prolog code.
190void ModuloScheduleExpander::generateProlog(unsigned LastStage,
191 MachineBasicBlock *KernelBB,
192 ValueMapTy *VRMap,
193 MBBVectorTy &PrologBBs) {
194 MachineBasicBlock *PredBB = Preheader;
195 InstrMapTy InstrMap;
196
197 // Generate a basic block for each stage, not including the last stage,
198 // which will be generated in the kernel. Each basic block may contain
199 // instructions from multiple stages/iterations.
200 for (unsigned i = 0; i < LastStage; ++i) {
201 // Create and insert the prolog basic block prior to the original loop
202 // basic block. The original loop is removed later.
203 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
204 PrologBBs.push_back(NewBB);
205 MF.insert(BB->getIterator(), NewBB);
206 NewBB->transferSuccessors(PredBB);
207 PredBB->addSuccessor(NewBB);
208 PredBB = NewBB;
209
210 // Generate instructions for each appropriate stage. Process instructions
211 // in original program order.
212 for (int StageNum = i; StageNum >= 0; --StageNum) {
213 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
214 BBE = BB->getFirstTerminator();
215 BBI != BBE; ++BBI) {
216 if (Schedule.getStage(&*BBI) == StageNum) {
217 if (BBI->isPHI())
218 continue;
219 MachineInstr *NewMI =
220 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
221 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
222 NewBB->push_back(NewMI);
223 InstrMap[NewMI] = &*BBI;
224 }
225 }
226 }
227 rewritePhiValues(NewBB, i, VRMap, InstrMap);
228 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "prolog:\n"; NewBB->dump
(); }; } } while (false)
229 dbgs() << "prolog:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "prolog:\n"; NewBB->dump
(); }; } } while (false)
230 NewBB->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "prolog:\n"; NewBB->dump
(); }; } } while (false)
231 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "prolog:\n"; NewBB->dump
(); }; } } while (false)
;
232 }
233
234 PredBB->replaceSuccessor(BB, KernelBB);
235
236 // Check if we need to remove the branch from the preheader to the original
237 // loop, and replace it with a branch to the new loop.
238 unsigned numBranches = TII->removeBranch(*Preheader);
239 if (numBranches) {
240 SmallVector<MachineOperand, 0> Cond;
241 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
242 }
243}
244
245/// Generate the pipeline epilog code. The epilog code finishes the iterations
246/// that were started in either the prolog or the kernel. We create a basic
247/// block for each stage that needs to complete.
248void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
249 MachineBasicBlock *KernelBB,
250 ValueMapTy *VRMap,
251 MBBVectorTy &EpilogBBs,
252 MBBVectorTy &PrologBBs) {
253 // We need to change the branch from the kernel to the first epilog block, so
254 // this call to analyze branch uses the kernel rather than the original BB.
255 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
256 SmallVector<MachineOperand, 4> Cond;
257 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
258 assert(!checkBranch && "generateEpilog must be able to analyze the branch")((!checkBranch && "generateEpilog must be able to analyze the branch"
) ? static_cast<void> (0) : __assert_fail ("!checkBranch && \"generateEpilog must be able to analyze the branch\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 258, __PRETTY_FUNCTION__))
;
259 if (checkBranch)
260 return;
261
262 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
263 if (*LoopExitI == KernelBB)
264 ++LoopExitI;
265 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor")((LoopExitI != KernelBB->succ_end() && "Expecting a successor"
) ? static_cast<void> (0) : __assert_fail ("LoopExitI != KernelBB->succ_end() && \"Expecting a successor\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 265, __PRETTY_FUNCTION__))
;
266 MachineBasicBlock *LoopExitBB = *LoopExitI;
267
268 MachineBasicBlock *PredBB = KernelBB;
269 MachineBasicBlock *EpilogStart = LoopExitBB;
270 InstrMapTy InstrMap;
271
272 // Generate a basic block for each stage, not including the last stage,
273 // which was generated for the kernel. Each basic block may contain
274 // instructions from multiple stages/iterations.
275 int EpilogStage = LastStage + 1;
276 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
277 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
278 EpilogBBs.push_back(NewBB);
279 MF.insert(BB->getIterator(), NewBB);
280
281 PredBB->replaceSuccessor(LoopExitBB, NewBB);
282 NewBB->addSuccessor(LoopExitBB);
283
284 if (EpilogStart == LoopExitBB)
285 EpilogStart = NewBB;
286
287 // Add instructions to the epilog depending on the current block.
288 // Process instructions in original program order.
289 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
290 for (auto &BBI : *BB) {
291 if (BBI.isPHI())
292 continue;
293 MachineInstr *In = &BBI;
294 if ((unsigned)Schedule.getStage(In) == StageNum) {
295 // Instructions with memoperands in the epilog are updated with
296 // conservative values.
297 MachineInstr *NewMI = cloneInstr(In, UINT_MAX(2147483647 *2U +1U), 0);
298 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
299 NewBB->push_back(NewMI);
300 InstrMap[NewMI] = In;
301 }
302 }
303 }
304 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
305 InstrMap, LastStage, EpilogStage, i == 1);
306 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
307 LastStage, EpilogStage, i == 1);
308 PredBB = NewBB;
309
310 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "epilog:\n"; NewBB->dump
(); }; } } while (false)
311 dbgs() << "epilog:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "epilog:\n"; NewBB->dump
(); }; } } while (false)
312 NewBB->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "epilog:\n"; NewBB->dump
(); }; } } while (false)
313 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { { dbgs() << "epilog:\n"; NewBB->dump
(); }; } } while (false)
;
314 }
315
316 // Fix any Phi nodes in the loop exit block.
317 LoopExitBB->replacePhiUsesWith(BB, PredBB);
318
319 // Create a branch to the new epilog from the kernel.
320 // Remove the original branch and add a new branch to the epilog.
321 TII->removeBranch(*KernelBB);
322 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
323 // Add a branch to the loop exit.
324 if (EpilogBBs.size() > 0) {
325 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
326 SmallVector<MachineOperand, 4> Cond1;
327 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
328 }
329}
330
331/// Replace all uses of FromReg that appear outside the specified
332/// basic block with ToReg.
333static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
334 MachineBasicBlock *MBB,
335 MachineRegisterInfo &MRI,
336 LiveIntervals &LIS) {
337 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
338 E = MRI.use_end();
339 I != E;) {
340 MachineOperand &O = *I;
341 ++I;
342 if (O.getParent()->getParent() != MBB)
343 O.setReg(ToReg);
344 }
345 if (!LIS.hasInterval(ToReg))
346 LIS.createEmptyInterval(ToReg);
347}
348
349/// Return true if the register has a use that occurs outside the
350/// specified loop.
351static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
352 MachineRegisterInfo &MRI) {
353 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
354 E = MRI.use_end();
355 I != E; ++I)
356 if (I->getParent()->getParent() != BB)
357 return true;
358 return false;
359}
360
361/// Generate Phis for the specific block in the generated pipelined code.
362/// This function looks at the Phis from the original code to guide the
363/// creation of new Phis.
364void ModuloScheduleExpander::generateExistingPhis(
365 MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
366 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
367 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
368 // Compute the stage number for the initial value of the Phi, which
369 // comes from the prolog. The prolog to use depends on to which kernel/
370 // epilog that we're adding the Phi.
371 unsigned PrologStage = 0;
372 unsigned PrevStage = 0;
373 bool InKernel = (LastStageNum == CurStageNum);
374 if (InKernel) {
375 PrologStage = LastStageNum - 1;
376 PrevStage = CurStageNum;
377 } else {
378 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
379 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
380 }
381
382 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
383 BBE = BB->getFirstNonPHI();
384 BBI != BBE; ++BBI) {
385 Register Def = BBI->getOperand(0).getReg();
386
387 unsigned InitVal = 0;
388 unsigned LoopVal = 0;
389 getPhiRegs(*BBI, BB, InitVal, LoopVal);
390
391 unsigned PhiOp1 = 0;
392 // The Phi value from the loop body typically is defined in the loop, but
393 // not always. So, we need to check if the value is defined in the loop.
394 unsigned PhiOp2 = LoopVal;
395 if (VRMap[LastStageNum].count(LoopVal))
396 PhiOp2 = VRMap[LastStageNum][LoopVal];
397
398 int StageScheduled = Schedule.getStage(&*BBI);
399 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
400 unsigned NumStages = getStagesForReg(Def, CurStageNum);
401 if (NumStages == 0) {
402 // We don't need to generate a Phi anymore, but we need to rename any uses
403 // of the Phi value.
404 unsigned NewReg = VRMap[PrevStage][LoopVal];
405 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
406 InitVal, NewReg);
407 if (VRMap[CurStageNum].count(LoopVal))
408 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
409 }
410 // Adjust the number of Phis needed depending on the number of prologs left,
411 // and the distance from where the Phi is first scheduled. The number of
412 // Phis cannot exceed the number of prolog stages. Each stage can
413 // potentially define two values.
414 unsigned MaxPhis = PrologStage + 2;
415 if (!InKernel && (int)PrologStage <= LoopValStage)
416 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
417 unsigned NumPhis = std::min(NumStages, MaxPhis);
418
419 unsigned NewReg = 0;
420 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
421 // In the epilog, we may need to look back one stage to get the correct
422 // Phi name because the epilog and prolog blocks execute the same stage.
423 // The correct name is from the previous block only when the Phi has
424 // been completely scheduled prior to the epilog, and Phi value is not
425 // needed in multiple stages.
426 int StageDiff = 0;
427 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
428 NumPhis == 1)
429 StageDiff = 1;
430 // Adjust the computations below when the phi and the loop definition
431 // are scheduled in different stages.
432 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
433 StageDiff = StageScheduled - LoopValStage;
434 for (unsigned np = 0; np < NumPhis; ++np) {
435 // If the Phi hasn't been scheduled, then use the initial Phi operand
436 // value. Otherwise, use the scheduled version of the instruction. This
437 // is a little complicated when a Phi references another Phi.
438 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
439 PhiOp1 = InitVal;
440 // Check if the Phi has already been scheduled in a prolog stage.
441 else if (PrologStage >= AccessStage + StageDiff + np &&
442 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
443 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
444 // Check if the Phi has already been scheduled, but the loop instruction
445 // is either another Phi, or doesn't occur in the loop.
446 else if (PrologStage >= AccessStage + StageDiff + np) {
447 // If the Phi references another Phi, we need to examine the other
448 // Phi to get the correct value.
449 PhiOp1 = LoopVal;
450 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
451 int Indirects = 1;
452 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
453 int PhiStage = Schedule.getStage(InstOp1);
454 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
455 PhiOp1 = getInitPhiReg(*InstOp1, BB);
456 else
457 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
458 InstOp1 = MRI.getVRegDef(PhiOp1);
459 int PhiOpStage = Schedule.getStage(InstOp1);
460 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
461 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
462 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
463 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
464 break;
465 }
466 ++Indirects;
467 }
468 } else
469 PhiOp1 = InitVal;
470 // If this references a generated Phi in the kernel, get the Phi operand
471 // from the incoming block.
472 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
473 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
474 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
475
476 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
477 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
478 // In the epilog, a map lookup is needed to get the value from the kernel,
479 // or previous epilog block. How is does this depends on if the
480 // instruction is scheduled in the previous block.
481 if (!InKernel) {
482 int StageDiffAdj = 0;
483 if (LoopValStage != -1 && StageScheduled > LoopValStage)
484 StageDiffAdj = StageScheduled - LoopValStage;
485 // Use the loop value defined in the kernel, unless the kernel
486 // contains the last definition of the Phi.
487 if (np == 0 && PrevStage == LastStageNum &&
488 (StageScheduled != 0 || LoopValStage != 0) &&
489 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
490 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
491 // Use the value defined by the Phi. We add one because we switch
492 // from looking at the loop value to the Phi definition.
493 else if (np > 0 && PrevStage == LastStageNum &&
494 VRMap[PrevStage - np + 1].count(Def))
495 PhiOp2 = VRMap[PrevStage - np + 1][Def];
496 // Use the loop value defined in the kernel.
497 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
498 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
499 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
500 // Use the value defined by the Phi, unless we're generating the first
501 // epilog and the Phi refers to a Phi in a different stage.
502 else if (VRMap[PrevStage - np].count(Def) &&
503 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
504 (LoopValStage == StageScheduled)))
505 PhiOp2 = VRMap[PrevStage - np][Def];
506 }
507
508 // Check if we can reuse an existing Phi. This occurs when a Phi
509 // references another Phi, and the other Phi is scheduled in an
510 // earlier stage. We can try to reuse an existing Phi up until the last
511 // stage of the current Phi.
512 if (LoopDefIsPhi) {
513 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
514 int LVNumStages = getStagesForPhi(LoopVal);
515 int StageDiff = (StageScheduled - LoopValStage);
516 LVNumStages -= StageDiff;
517 // Make sure the loop value Phi has been processed already.
518 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
519 NewReg = PhiOp2;
Value stored to 'NewReg' is never read
520 unsigned ReuseStage = CurStageNum;
521 if (isLoopCarried(*PhiInst))
522 ReuseStage -= LVNumStages;
523 // Check if the Phi to reuse has been generated yet. If not, then
524 // there is nothing to reuse.
525 if (VRMap[ReuseStage - np].count(LoopVal)) {
526 NewReg = VRMap[ReuseStage - np][LoopVal];
527
528 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
529 Def, NewReg);
530 // Update the map with the new Phi name.
531 VRMap[CurStageNum - np][Def] = NewReg;
532 PhiOp2 = NewReg;
533 if (VRMap[LastStageNum - np - 1].count(LoopVal))
534 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
535
536 if (IsLast && np == NumPhis - 1)
537 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
538 continue;
539 }
540 }
541 }
542 if (InKernel && StageDiff > 0 &&
543 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
544 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
545 }
546
547 const TargetRegisterClass *RC = MRI.getRegClass(Def);
548 NewReg = MRI.createVirtualRegister(RC);
549
550 MachineInstrBuilder NewPhi =
551 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
552 TII->get(TargetOpcode::PHI), NewReg);
553 NewPhi.addReg(PhiOp1).addMBB(BB1);
554 NewPhi.addReg(PhiOp2).addMBB(BB2);
555 if (np == 0)
556 InstrMap[NewPhi] = &*BBI;
557
558 // We define the Phis after creating the new pipelined code, so
559 // we need to rename the Phi values in scheduled instructions.
560
561 unsigned PrevReg = 0;
562 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
563 PrevReg = VRMap[PrevStage - np][LoopVal];
564 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
565 NewReg, PrevReg);
566 // If the Phi has been scheduled, use the new name for rewriting.
567 if (VRMap[CurStageNum - np].count(Def)) {
568 unsigned R = VRMap[CurStageNum - np][Def];
569 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
570 NewReg);
571 }
572
573 // Check if we need to rename any uses that occurs after the loop. The
574 // register to replace depends on whether the Phi is scheduled in the
575 // epilog.
576 if (IsLast && np == NumPhis - 1)
577 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
578
579 // In the kernel, a dependent Phi uses the value from this Phi.
580 if (InKernel)
581 PhiOp2 = NewReg;
582
583 // Update the map with the new Phi name.
584 VRMap[CurStageNum - np][Def] = NewReg;
585 }
586
587 while (NumPhis++ < NumStages) {
588 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
589 NewReg, 0);
590 }
591
592 // Check if we need to rename a Phi that has been eliminated due to
593 // scheduling.
594 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
595 replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
596 }
597}
598
599/// Generate Phis for the specified block in the generated pipelined code.
600/// These are new Phis needed because the definition is scheduled after the
601/// use in the pipelined sequence.
602void ModuloScheduleExpander::generatePhis(
603 MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
604 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
605 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
606 // Compute the stage number that contains the initial Phi value, and
607 // the Phi from the previous stage.
608 unsigned PrologStage = 0;
609 unsigned PrevStage = 0;
610 unsigned StageDiff = CurStageNum - LastStageNum;
611 bool InKernel = (StageDiff == 0);
612 if (InKernel) {
613 PrologStage = LastStageNum - 1;
614 PrevStage = CurStageNum;
615 } else {
616 PrologStage = LastStageNum - StageDiff;
617 PrevStage = LastStageNum + StageDiff - 1;
618 }
619
620 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
621 BBE = BB->instr_end();
622 BBI != BBE; ++BBI) {
623 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
624 MachineOperand &MO = BBI->getOperand(i);
625 if (!MO.isReg() || !MO.isDef() ||
626 !Register::isVirtualRegister(MO.getReg()))
627 continue;
628
629 int StageScheduled = Schedule.getStage(&*BBI);
630 assert(StageScheduled != -1 && "Expecting scheduled instruction.")((StageScheduled != -1 && "Expecting scheduled instruction."
) ? static_cast<void> (0) : __assert_fail ("StageScheduled != -1 && \"Expecting scheduled instruction.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 630, __PRETTY_FUNCTION__))
;
631 Register Def = MO.getReg();
632 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
633 // An instruction scheduled in stage 0 and is used after the loop
634 // requires a phi in the epilog for the last definition from either
635 // the kernel or prolog.
636 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
637 hasUseAfterLoop(Def, BB, MRI))
638 NumPhis = 1;
639 if (!InKernel && (unsigned)StageScheduled > PrologStage)
640 continue;
641
642 unsigned PhiOp2 = VRMap[PrevStage][Def];
643 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
644 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
645 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
646 // The number of Phis can't exceed the number of prolog stages. The
647 // prolog stage number is zero based.
648 if (NumPhis > PrologStage + 1 - StageScheduled)
649 NumPhis = PrologStage + 1 - StageScheduled;
650 for (unsigned np = 0; np < NumPhis; ++np) {
651 unsigned PhiOp1 = VRMap[PrologStage][Def];
652 if (np <= PrologStage)
653 PhiOp1 = VRMap[PrologStage - np][Def];
654 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
655 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
656 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
657 if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
658 PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
659 }
660 if (!InKernel)
661 PhiOp2 = VRMap[PrevStage - np][Def];
662
663 const TargetRegisterClass *RC = MRI.getRegClass(Def);
664 Register NewReg = MRI.createVirtualRegister(RC);
665
666 MachineInstrBuilder NewPhi =
667 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
668 TII->get(TargetOpcode::PHI), NewReg);
669 NewPhi.addReg(PhiOp1).addMBB(BB1);
670 NewPhi.addReg(PhiOp2).addMBB(BB2);
671 if (np == 0)
672 InstrMap[NewPhi] = &*BBI;
673
674 // Rewrite uses and update the map. The actions depend upon whether
675 // we generating code for the kernel or epilog blocks.
676 if (InKernel) {
677 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
678 NewReg);
679 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
680 NewReg);
681
682 PhiOp2 = NewReg;
683 VRMap[PrevStage - np - 1][Def] = NewReg;
684 } else {
685 VRMap[CurStageNum - np][Def] = NewReg;
686 if (np == NumPhis - 1)
687 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
688 NewReg);
689 }
690 if (IsLast && np == NumPhis - 1)
691 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
692 }
693 }
694 }
695}
696
697/// Remove instructions that generate values with no uses.
698/// Typically, these are induction variable operations that generate values
699/// used in the loop itself. A dead instruction has a definition with
700/// no uses, or uses that occur in the original loop only.
701void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
702 MBBVectorTy &EpilogBBs) {
703 // For each epilog block, check that the value defined by each instruction
704 // is used. If not, delete it.
705 for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
706 MBE = EpilogBBs.rend();
707 MBB != MBE; ++MBB)
708 for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
709 ME = (*MBB)->instr_rend();
710 MI != ME;) {
711 // From DeadMachineInstructionElem. Don't delete inline assembly.
712 if (MI->isInlineAsm()) {
713 ++MI;
714 continue;
715 }
716 bool SawStore = false;
717 // Check if it's safe to remove the instruction due to side effects.
718 // We can, and want to, remove Phis here.
719 if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
720 ++MI;
721 continue;
722 }
723 bool used = true;
724 for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
725 MOE = MI->operands_end();
726 MOI != MOE; ++MOI) {
727 if (!MOI->isReg() || !MOI->isDef())
728 continue;
729 Register reg = MOI->getReg();
730 // Assume physical registers are used, unless they are marked dead.
731 if (Register::isPhysicalRegister(reg)) {
732 used = !MOI->isDead();
733 if (used)
734 break;
735 continue;
736 }
737 unsigned realUses = 0;
738 for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
739 EI = MRI.use_end();
740 UI != EI; ++UI) {
741 // Check if there are any uses that occur only in the original
742 // loop. If so, that's not a real use.
743 if (UI->getParent()->getParent() != BB) {
744 realUses++;
745 used = true;
746 break;
747 }
748 }
749 if (realUses > 0)
750 break;
751 used = false;
752 }
753 if (!used) {
754 LIS.RemoveMachineInstrFromMaps(*MI);
755 MI++->eraseFromParent();
756 continue;
757 }
758 ++MI;
759 }
760 // In the kernel block, check if we can remove a Phi that generates a value
761 // used in an instruction removed in the epilog block.
762 for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
763 BBE = KernelBB->getFirstNonPHI();
764 BBI != BBE;) {
765 MachineInstr *MI = &*BBI;
766 ++BBI;
767 Register reg = MI->getOperand(0).getReg();
768 if (MRI.use_begin(reg) == MRI.use_end()) {
769 LIS.RemoveMachineInstrFromMaps(*MI);
770 MI->eraseFromParent();
771 }
772 }
773}
774
775/// For loop carried definitions, we split the lifetime of a virtual register
776/// that has uses past the definition in the next iteration. A copy with a new
777/// virtual register is inserted before the definition, which helps with
778/// generating a better register assignment.
779///
780/// v1 = phi(a, v2) v1 = phi(a, v2)
781/// v2 = phi(b, v3) v2 = phi(b, v3)
782/// v3 = .. v4 = copy v1
783/// .. = V1 v3 = ..
784/// .. = v4
785void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
786 MBBVectorTy &EpilogBBs) {
787 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
788 for (auto &PHI : KernelBB->phis()) {
789 Register Def = PHI.getOperand(0).getReg();
790 // Check for any Phi definition that used as an operand of another Phi
791 // in the same block.
792 for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
793 E = MRI.use_instr_end();
794 I != E; ++I) {
795 if (I->isPHI() && I->getParent() == KernelBB) {
796 // Get the loop carried definition.
797 unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
798 if (!LCDef)
799 continue;
800 MachineInstr *MI = MRI.getVRegDef(LCDef);
801 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
802 continue;
803 // Search through the rest of the block looking for uses of the Phi
804 // definition. If one occurs, then split the lifetime.
805 unsigned SplitReg = 0;
806 for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
807 KernelBB->instr_end()))
808 if (BBJ.readsRegister(Def)) {
809 // We split the lifetime when we find the first use.
810 if (SplitReg == 0) {
811 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
812 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
813 TII->get(TargetOpcode::COPY), SplitReg)
814 .addReg(Def);
815 }
816 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
817 }
818 if (!SplitReg)
819 continue;
820 // Search through each of the epilog blocks for any uses to be renamed.
821 for (auto &Epilog : EpilogBBs)
822 for (auto &I : *Epilog)
823 if (I.readsRegister(Def))
824 I.substituteRegister(Def, SplitReg, 0, *TRI);
825 break;
826 }
827 }
828 }
829}
830
831/// Remove the incoming block from the Phis in a basic block.
832static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
833 for (MachineInstr &MI : *BB) {
834 if (!MI.isPHI())
835 break;
836 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
837 if (MI.getOperand(i + 1).getMBB() == Incoming) {
838 MI.RemoveOperand(i + 1);
839 MI.RemoveOperand(i);
840 break;
841 }
842 }
843}
844
845/// Create branches from each prolog basic block to the appropriate epilog
846/// block. These edges are needed if the loop ends before reaching the
847/// kernel.
848void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
849 MBBVectorTy &PrologBBs,
850 MachineBasicBlock *KernelBB,
851 MBBVectorTy &EpilogBBs,
852 ValueMapTy *VRMap) {
853 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch")((PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"
) ? static_cast<void> (0) : __assert_fail ("PrologBBs.size() == EpilogBBs.size() && \"Prolog/Epilog mismatch\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 853, __PRETTY_FUNCTION__))
;
854 MachineBasicBlock *LastPro = KernelBB;
855 MachineBasicBlock *LastEpi = KernelBB;
856
857 // Start from the blocks connected to the kernel and work "out"
858 // to the first prolog and the last epilog blocks.
859 SmallVector<MachineInstr *, 4> PrevInsts;
860 unsigned MaxIter = PrologBBs.size() - 1;
861 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
862 // Add branches to the prolog that go to the corresponding
863 // epilog, and the fall-thru prolog/kernel block.
864 MachineBasicBlock *Prolog = PrologBBs[j];
865 MachineBasicBlock *Epilog = EpilogBBs[i];
866
867 SmallVector<MachineOperand, 4> Cond;
868 Optional<bool> StaticallyGreater =
869 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
870 unsigned numAdded = 0;
871 if (!StaticallyGreater.hasValue()) {
872 Prolog->addSuccessor(Epilog);
873 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
874 } else if (*StaticallyGreater == false) {
875 Prolog->addSuccessor(Epilog);
876 Prolog->removeSuccessor(LastPro);
877 LastEpi->removeSuccessor(Epilog);
878 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
879 removePhis(Epilog, LastEpi);
880 // Remove the blocks that are no longer referenced.
881 if (LastPro != LastEpi) {
882 LastEpi->clear();
883 LastEpi->eraseFromParent();
884 }
885 if (LastPro == KernelBB) {
886 LoopInfo->disposed();
887 NewKernel = nullptr;
888 }
889 LastPro->clear();
890 LastPro->eraseFromParent();
891 } else {
892 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
893 removePhis(Epilog, Prolog);
894 }
895 LastPro = Prolog;
896 LastEpi = Epilog;
897 for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
898 E = Prolog->instr_rend();
899 I != E && numAdded > 0; ++I, --numAdded)
900 updateInstruction(&*I, false, j, 0, VRMap);
901 }
902
903 if (NewKernel) {
904 LoopInfo->setPreheader(PrologBBs[MaxIter]);
905 LoopInfo->adjustTripCount(-(MaxIter + 1));
906 }
907}
908
909/// Return true if we can compute the amount the instruction changes
910/// during each iteration. Set Delta to the amount of the change.
911bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
912 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
913 const MachineOperand *BaseOp;
914 int64_t Offset;
915 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
916 return false;
917
918 if (!BaseOp->isReg())
919 return false;
920
921 Register BaseReg = BaseOp->getReg();
922
923 MachineRegisterInfo &MRI = MF.getRegInfo();
924 // Check if there is a Phi. If so, get the definition in the loop.
925 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
926 if (BaseDef && BaseDef->isPHI()) {
927 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
928 BaseDef = MRI.getVRegDef(BaseReg);
929 }
930 if (!BaseDef)
931 return false;
932
933 int D = 0;
934 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
935 return false;
936
937 Delta = D;
938 return true;
939}
940
941/// Update the memory operand with a new offset when the pipeliner
942/// generates a new copy of the instruction that refers to a
943/// different memory location.
944void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
945 MachineInstr &OldMI,
946 unsigned Num) {
947 if (Num == 0)
948 return;
949 // If the instruction has memory operands, then adjust the offset
950 // when the instruction appears in different stages.
951 if (NewMI.memoperands_empty())
952 return;
953 SmallVector<MachineMemOperand *, 2> NewMMOs;
954 for (MachineMemOperand *MMO : NewMI.memoperands()) {
955 // TODO: Figure out whether isAtomic is really necessary (see D57601).
956 if (MMO->isVolatile() || MMO->isAtomic() ||
957 (MMO->isInvariant() && MMO->isDereferenceable()) ||
958 (!MMO->getValue())) {
959 NewMMOs.push_back(MMO);
960 continue;
961 }
962 unsigned Delta;
963 if (Num != UINT_MAX(2147483647 *2U +1U) && computeDelta(OldMI, Delta)) {
964 int64_t AdjOffset = Delta * Num;
965 NewMMOs.push_back(
966 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
967 } else {
968 NewMMOs.push_back(
969 MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
970 }
971 }
972 NewMI.setMemRefs(MF, NewMMOs);
973}
974
975/// Clone the instruction for the new pipelined loop and update the
976/// memory operands, if needed.
977MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
978 unsigned CurStageNum,
979 unsigned InstStageNum) {
980 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
981 // Check for tied operands in inline asm instructions. This should be handled
982 // elsewhere, but I'm not sure of the best solution.
983 if (OldMI->isInlineAsm())
984 for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
985 const auto &MO = OldMI->getOperand(i);
986 if (MO.isReg() && MO.isUse())
987 break;
988 unsigned UseIdx;
989 if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
990 NewMI->tieOperands(i, UseIdx);
991 }
992 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
993 return NewMI;
994}
995
996/// Clone the instruction for the new pipelined loop. If needed, this
997/// function updates the instruction using the values saved in the
998/// InstrChanges structure.
999MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1000 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1001 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1002 auto It = InstrChanges.find(OldMI);
1003 if (It != InstrChanges.end()) {
1004 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1005 unsigned BasePos, OffsetPos;
1006 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1007 return nullptr;
1008 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1009 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1010 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1011 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1012 NewMI->getOperand(OffsetPos).setImm(NewOffset);
1013 }
1014 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1015 return NewMI;
1016}
1017
1018/// Update the machine instruction with new virtual registers. This
1019/// function may change the defintions and/or uses.
1020void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1021 bool LastDef,
1022 unsigned CurStageNum,
1023 unsigned InstrStageNum,
1024 ValueMapTy *VRMap) {
1025 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
1026 MachineOperand &MO = NewMI->getOperand(i);
1027 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
1028 continue;
1029 Register reg = MO.getReg();
1030 if (MO.isDef()) {
1031 // Create a new virtual register for the definition.
1032 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1033 Register NewReg = MRI.createVirtualRegister(RC);
1034 MO.setReg(NewReg);
1035 VRMap[CurStageNum][reg] = NewReg;
1036 if (LastDef)
1037 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1038 } else if (MO.isUse()) {
1039 MachineInstr *Def = MRI.getVRegDef(reg);
1040 // Compute the stage that contains the last definition for instruction.
1041 int DefStageNum = Schedule.getStage(Def);
1042 unsigned StageNum = CurStageNum;
1043 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1044 // Compute the difference in stages between the defintion and the use.
1045 unsigned StageDiff = (InstrStageNum - DefStageNum);
1046 // Make an adjustment to get the last definition.
1047 StageNum -= StageDiff;
1048 }
1049 if (VRMap[StageNum].count(reg))
1050 MO.setReg(VRMap[StageNum][reg]);
1051 }
1052 }
1053}
1054
1055/// Return the instruction in the loop that defines the register.
1056/// If the definition is a Phi, then follow the Phi operand to
1057/// the instruction in the loop.
1058MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1059 SmallPtrSet<MachineInstr *, 8> Visited;
1060 MachineInstr *Def = MRI.getVRegDef(Reg);
1061 while (Def->isPHI()) {
1062 if (!Visited.insert(Def).second)
1063 break;
1064 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1065 if (Def->getOperand(i + 1).getMBB() == BB) {
1066 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1067 break;
1068 }
1069 }
1070 return Def;
1071}
1072
1073/// Return the new name for the value from the previous stage.
1074unsigned ModuloScheduleExpander::getPrevMapVal(
1075 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1076 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1077 unsigned PrevVal = 0;
1078 if (StageNum > PhiStage) {
1079 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1080 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1081 // The name is defined in the previous stage.
1082 PrevVal = VRMap[StageNum - 1][LoopVal];
1083 else if (VRMap[StageNum].count(LoopVal))
1084 // The previous name is defined in the current stage when the instruction
1085 // order is swapped.
1086 PrevVal = VRMap[StageNum][LoopVal];
1087 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1088 // The loop value hasn't yet been scheduled.
1089 PrevVal = LoopVal;
1090 else if (StageNum == PhiStage + 1)
1091 // The loop value is another phi, which has not been scheduled.
1092 PrevVal = getInitPhiReg(*LoopInst, BB);
1093 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1094 // The loop value is another phi, which has been scheduled.
1095 PrevVal =
1096 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1097 LoopStage, VRMap, BB);
1098 }
1099 return PrevVal;
1100}
1101
1102/// Rewrite the Phi values in the specified block to use the mappings
1103/// from the initial operand. Once the Phi is scheduled, we switch
1104/// to using the loop value instead of the Phi value, so those names
1105/// do not need to be rewritten.
1106void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1107 unsigned StageNum,
1108 ValueMapTy *VRMap,
1109 InstrMapTy &InstrMap) {
1110 for (auto &PHI : BB->phis()) {
1111 unsigned InitVal = 0;
1112 unsigned LoopVal = 0;
1113 getPhiRegs(PHI, BB, InitVal, LoopVal);
1114 Register PhiDef = PHI.getOperand(0).getReg();
1115
1116 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1117 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1118 unsigned NumPhis = getStagesForPhi(PhiDef);
1119 if (NumPhis > StageNum)
1120 NumPhis = StageNum;
1121 for (unsigned np = 0; np <= NumPhis; ++np) {
1122 unsigned NewVal =
1123 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1124 if (!NewVal)
1125 NewVal = InitVal;
1126 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1127 NewVal);
1128 }
1129 }
1130}
1131
1132/// Rewrite a previously scheduled instruction to use the register value
1133/// from the new instruction. Make sure the instruction occurs in the
1134/// basic block, and we don't change the uses in the new instruction.
1135void ModuloScheduleExpander::rewriteScheduledInstr(
1136 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1137 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1138 unsigned PrevReg) {
1139 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1140 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1141 // Rewrite uses that have been scheduled already to use the new
1142 // Phi register.
1143 for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
1144 EI = MRI.use_end();
1145 UI != EI;) {
1146 MachineOperand &UseOp = *UI;
1147 MachineInstr *UseMI = UseOp.getParent();
1148 ++UI;
1149 if (UseMI->getParent() != BB)
1150 continue;
1151 if (UseMI->isPHI()) {
1152 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1153 continue;
1154 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1155 continue;
1156 }
1157 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1158 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.")((OrigInstr != InstrMap.end() && "Instruction not scheduled."
) ? static_cast<void> (0) : __assert_fail ("OrigInstr != InstrMap.end() && \"Instruction not scheduled.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1158, __PRETTY_FUNCTION__))
;
1159 MachineInstr *OrigMI = OrigInstr->second;
1160 int StageSched = Schedule.getStage(OrigMI);
1161 int CycleSched = Schedule.getCycle(OrigMI);
1162 unsigned ReplaceReg = 0;
1163 // This is the stage for the scheduled instruction.
1164 if (StagePhi == StageSched && Phi->isPHI()) {
1165 int CyclePhi = Schedule.getCycle(Phi);
1166 if (PrevReg && InProlog)
1167 ReplaceReg = PrevReg;
1168 else if (PrevReg && !isLoopCarried(*Phi) &&
1169 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1170 ReplaceReg = PrevReg;
1171 else
1172 ReplaceReg = NewReg;
1173 }
1174 // The scheduled instruction occurs before the scheduled Phi, and the
1175 // Phi is not loop carried.
1176 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1177 ReplaceReg = NewReg;
1178 if (StagePhi > StageSched && Phi->isPHI())
1179 ReplaceReg = NewReg;
1180 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1181 ReplaceReg = NewReg;
1182 if (ReplaceReg) {
1183 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1184 UseOp.setReg(ReplaceReg);
1185 }
1186 }
1187}
1188
1189bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1190 if (!Phi.isPHI())
1191 return false;
1192 unsigned DefCycle = Schedule.getCycle(&Phi);
1193 int DefStage = Schedule.getStage(&Phi);
1194
1195 unsigned InitVal = 0;
1196 unsigned LoopVal = 0;
1197 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1198 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1199 if (!Use || Use->isPHI())
1200 return true;
1201 unsigned LoopCycle = Schedule.getCycle(Use);
1202 int LoopStage = Schedule.getStage(Use);
1203 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1204}
1205
1206//===----------------------------------------------------------------------===//
1207// PeelingModuloScheduleExpander implementation
1208//===----------------------------------------------------------------------===//
1209// This is a reimplementation of ModuloScheduleExpander that works by creating
1210// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1211//===----------------------------------------------------------------------===//
1212
1213namespace {
1214// Remove any dead phis in MBB. Dead phis either have only one block as input
1215// (in which case they are the identity) or have no uses.
1216void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1217 LiveIntervals *LIS) {
1218 bool Changed = true;
1219 while (Changed) {
1220 Changed = false;
1221 for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) {
1222 MachineInstr &MI = *I++;
1223 assert(MI.isPHI())((MI.isPHI()) ? static_cast<void> (0) : __assert_fail (
"MI.isPHI()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1223, __PRETTY_FUNCTION__))
;
1224 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1225 if (LIS)
1226 LIS->RemoveMachineInstrFromMaps(MI);
1227 MI.eraseFromParent();
1228 Changed = true;
1229 } else if (MI.getNumExplicitOperands() == 3) {
1230 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1231 MRI.getRegClass(MI.getOperand(0).getReg()));
1232 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1233 MI.getOperand(1).getReg());
1234 if (LIS)
1235 LIS->RemoveMachineInstrFromMaps(MI);
1236 MI.eraseFromParent();
1237 Changed = true;
1238 }
1239 }
1240 }
1241}
1242
1243/// Rewrites the kernel block in-place to adhere to the given schedule.
1244/// KernelRewriter holds all of the state required to perform the rewriting.
1245class KernelRewriter {
1246 ModuloSchedule &S;
1247 MachineBasicBlock *BB;
1248 MachineBasicBlock *PreheaderBB, *ExitBB;
1249 MachineRegisterInfo &MRI;
1250 const TargetInstrInfo *TII;
1251 LiveIntervals *LIS;
1252
1253 // Map from register class to canonical undef register for that class.
1254 DenseMap<const TargetRegisterClass *, Register> Undefs;
1255 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1256 // this map is only used when InitReg is non-undef.
1257 DenseMap<std::pair<unsigned, unsigned>, Register> Phis;
1258 // Map from LoopReg to phi register where the InitReg is undef.
1259 DenseMap<Register, Register> UndefPhis;
1260
1261 // Reg is used by MI. Return the new register MI should use to adhere to the
1262 // schedule. Insert phis as necessary.
1263 Register remapUse(Register Reg, MachineInstr &MI);
1264 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1265 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1266 // or will be chosen so as to share another phi.
1267 Register phi(Register LoopReg, Optional<Register> InitReg = {},
1268 const TargetRegisterClass *RC = nullptr);
1269 // Create an undef register of the given register class.
1270 Register undef(const TargetRegisterClass *RC);
1271
1272public:
1273 KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1274 LiveIntervals *LIS = nullptr);
1275 void rewrite();
1276};
1277} // namespace
1278
1279KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1280 LiveIntervals *LIS)
1281 : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()),
1282 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1283 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1284 PreheaderBB = *BB->pred_begin();
1285 if (PreheaderBB == BB)
1286 PreheaderBB = *std::next(BB->pred_begin());
1287}
1288
1289void KernelRewriter::rewrite() {
1290 // Rearrange the loop to be in schedule order. Note that the schedule may
1291 // contain instructions that are not owned by the loop block (InstrChanges and
1292 // friends), so we gracefully handle unowned instructions and delete any
1293 // instructions that weren't in the schedule.
1294 auto InsertPt = BB->getFirstTerminator();
1295 MachineInstr *FirstMI = nullptr;
1296 for (MachineInstr *MI : S.getInstructions()) {
1297 if (MI->isPHI())
1298 continue;
1299 if (MI->getParent())
1300 MI->removeFromParent();
1301 BB->insert(InsertPt, MI);
1302 if (!FirstMI)
1303 FirstMI = MI;
1304 }
1305 assert(FirstMI && "Failed to find first MI in schedule")((FirstMI && "Failed to find first MI in schedule") ?
static_cast<void> (0) : __assert_fail ("FirstMI && \"Failed to find first MI in schedule\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1305, __PRETTY_FUNCTION__))
;
1306
1307 // At this point all of the scheduled instructions are between FirstMI
1308 // and the end of the block. Kill from the first non-phi to FirstMI.
1309 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1310 if (LIS)
1311 LIS->RemoveMachineInstrFromMaps(*I);
1312 (I++)->eraseFromParent();
1313 }
1314
1315 // Now remap every instruction in the loop.
1316 for (MachineInstr &MI : *BB) {
1317 if (MI.isPHI() || MI.isTerminator())
1318 continue;
1319 for (MachineOperand &MO : MI.uses()) {
1320 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1321 continue;
1322 Register Reg = remapUse(MO.getReg(), MI);
1323 MO.setReg(Reg);
1324 }
1325 }
1326 EliminateDeadPhis(BB, MRI, LIS);
1327
1328 // Ensure a phi exists for all instructions that are either referenced by
1329 // an illegal phi or by an instruction outside the loop. This allows us to
1330 // treat remaps of these values the same as "normal" values that come from
1331 // loop-carried phis.
1332 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1333 if (MI->isPHI()) {
1334 Register R = MI->getOperand(0).getReg();
1335 phi(R);
1336 continue;
1337 }
1338
1339 for (MachineOperand &Def : MI->defs()) {
1340 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1341 if (MI.getParent() != BB) {
1342 phi(Def.getReg());
1343 break;
1344 }
1345 }
1346 }
1347 }
1348}
1349
1350Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1351 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1352 if (!Producer)
1353 return Reg;
1354
1355 int ConsumerStage = S.getStage(&MI);
1356 if (!Producer->isPHI()) {
1357 // Non-phi producers are simple to remap. Insert as many phis as the
1358 // difference between the consumer and producer stages.
1359 if (Producer->getParent() != BB)
1360 // Producer was not inside the loop. Use the register as-is.
1361 return Reg;
1362 int ProducerStage = S.getStage(Producer);
1363 assert(ConsumerStage != -1 &&((ConsumerStage != -1 && "In-loop consumer should always be scheduled!"
) ? static_cast<void> (0) : __assert_fail ("ConsumerStage != -1 && \"In-loop consumer should always be scheduled!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1364, __PRETTY_FUNCTION__))
1364 "In-loop consumer should always be scheduled!")((ConsumerStage != -1 && "In-loop consumer should always be scheduled!"
) ? static_cast<void> (0) : __assert_fail ("ConsumerStage != -1 && \"In-loop consumer should always be scheduled!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1364, __PRETTY_FUNCTION__))
;
1365 assert(ConsumerStage >= ProducerStage)((ConsumerStage >= ProducerStage) ? static_cast<void>
(0) : __assert_fail ("ConsumerStage >= ProducerStage", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1365, __PRETTY_FUNCTION__))
;
1366 unsigned StageDiff = ConsumerStage - ProducerStage;
1367
1368 for (unsigned I = 0; I < StageDiff; ++I)
1369 Reg = phi(Reg);
1370 return Reg;
1371 }
1372
1373 // First, dive through the phi chain to find the defaults for the generated
1374 // phis.
1375 SmallVector<Optional<Register>, 4> Defaults;
1376 Register LoopReg = Reg;
1377 auto LoopProducer = Producer;
1378 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1379 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1380 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1381 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1382 assert(LoopProducer)((LoopProducer) ? static_cast<void> (0) : __assert_fail
("LoopProducer", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1382, __PRETTY_FUNCTION__))
;
1383 }
1384 int LoopProducerStage = S.getStage(LoopProducer);
1385
1386 Optional<Register> IllegalPhiDefault;
1387
1388 if (LoopProducerStage == -1) {
1389 // Do nothing.
1390 } else if (LoopProducerStage > ConsumerStage) {
1391 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1392 // In addition, Consumer's cycle must be scheduled after Producer in the
1393 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1394 // functions.
1395#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1396 int LoopProducerCycle = S.getCycle(LoopProducer);
1397 int ConsumerCycle = S.getCycle(&MI);
1398#endif
1399 assert(LoopProducerCycle <= ConsumerCycle)((LoopProducerCycle <= ConsumerCycle) ? static_cast<void
> (0) : __assert_fail ("LoopProducerCycle <= ConsumerCycle"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1399, __PRETTY_FUNCTION__))
;
1400 assert(LoopProducerStage == ConsumerStage + 1)((LoopProducerStage == ConsumerStage + 1) ? static_cast<void
> (0) : __assert_fail ("LoopProducerStage == ConsumerStage + 1"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1400, __PRETTY_FUNCTION__))
;
1401 // Peel off the first phi from Defaults and insert a phi between producer
1402 // and consumer. This phi will not be at the front of the block so we
1403 // consider it illegal. It will only exist during the rewrite process; it
1404 // needs to exist while we peel off prologs because these could take the
1405 // default value. After that we can replace all uses with the loop producer
1406 // value.
1407 IllegalPhiDefault = Defaults.front();
1408 Defaults.erase(Defaults.begin());
1409 } else {
1410 assert(ConsumerStage >= LoopProducerStage)((ConsumerStage >= LoopProducerStage) ? static_cast<void
> (0) : __assert_fail ("ConsumerStage >= LoopProducerStage"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1410, __PRETTY_FUNCTION__))
;
1411 int StageDiff = ConsumerStage - LoopProducerStage;
1412 if (StageDiff > 0) {
1413 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << " -- padding defaults array from "
<< Defaults.size() << " to " << (Defaults.
size() + StageDiff) << "\n"; } } while (false)
1414 << " to " << (Defaults.size() + StageDiff) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << " -- padding defaults array from "
<< Defaults.size() << " to " << (Defaults.
size() + StageDiff) << "\n"; } } while (false)
;
1415 // If we need more phis than we have defaults for, pad out with undefs for
1416 // the earliest phis, which are at the end of the defaults chain (the
1417 // chain is in reverse order).
1418 Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
1419 ? Optional<Register>()
1420 : Defaults.back());
1421 }
1422 }
1423
1424 // Now we know the number of stages to jump back, insert the phi chain.
1425 auto DefaultI = Defaults.rbegin();
1426 while (DefaultI != Defaults.rend())
1427 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1428
1429 if (IllegalPhiDefault.hasValue()) {
1430 // The consumer optionally consumes LoopProducer in the same iteration
1431 // (because the producer is scheduled at an earlier cycle than the consumer)
1432 // or the initial value. To facilitate this we create an illegal block here
1433 // by embedding a phi in the middle of the block. We will fix this up
1434 // immediately prior to pruning.
1435 auto RC = MRI.getRegClass(Reg);
1436 Register R = MRI.createVirtualRegister(RC);
1437 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1438 .addReg(IllegalPhiDefault.getValue())
1439 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1440 .addReg(LoopReg)
1441 .addMBB(BB); // Block choice is arbitrary and has no effect.
1442 return R;
1443 }
1444
1445 return LoopReg;
1446}
1447
1448Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
1449 const TargetRegisterClass *RC) {
1450 // If the init register is not undef, try and find an existing phi.
1451 if (InitReg.hasValue()) {
1452 auto I = Phis.find({LoopReg, InitReg.getValue()});
1453 if (I != Phis.end())
1454 return I->second;
1455 } else {
1456 for (auto &KV : Phis) {
1457 if (KV.first.first == LoopReg)
1458 return KV.second;
1459 }
1460 }
1461
1462 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1463 // find a phi that takes undef as input.
1464 auto I = UndefPhis.find(LoopReg);
1465 if (I != UndefPhis.end()) {
1466 Register R = I->second;
1467 if (!InitReg.hasValue())
1468 // Found a phi taking undef as input, and this input is undef so return
1469 // without any more changes.
1470 return R;
1471 // Found a phi taking undef as input, so rewrite it to take InitReg.
1472 MachineInstr *MI = MRI.getVRegDef(R);
1473 MI->getOperand(1).setReg(InitReg.getValue());
1474 Phis.insert({{LoopReg, InitReg.getValue()}, R});
1475 MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
1476 UndefPhis.erase(I);
1477 return R;
1478 }
1479
1480 // Failed to find any existing phi to reuse, so create a new one.
1481 if (!RC)
1482 RC = MRI.getRegClass(LoopReg);
1483 Register R = MRI.createVirtualRegister(RC);
1484 if (InitReg.hasValue())
1485 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1486 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1487 .addReg(InitReg.hasValue() ? *InitReg : undef(RC))
1488 .addMBB(PreheaderBB)
1489 .addReg(LoopReg)
1490 .addMBB(BB);
1491 if (!InitReg.hasValue())
1492 UndefPhis[LoopReg] = R;
1493 else
1494 Phis[{LoopReg, *InitReg}] = R;
1495 return R;
1496}
1497
1498Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1499 Register &R = Undefs[RC];
1500 if (R == 0) {
1501 // Create an IMPLICIT_DEF that defines this register if we need it.
1502 // All uses of this should be removed by the time we have finished unrolling
1503 // prologs and epilogs.
1504 R = MRI.createVirtualRegister(RC);
1505 auto *InsertBB = &PreheaderBB->getParent()->front();
1506 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1507 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1508 }
1509 return R;
1510}
1511
1512namespace {
1513/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1514/// the operand are discovered, such as how many in-loop PHIs it has to jump
1515/// through and defaults for these phis.
1516class KernelOperandInfo {
1517 MachineBasicBlock *BB;
1518 MachineRegisterInfo &MRI;
1519 SmallVector<Register, 4> PhiDefaults;
1520 MachineOperand *Source;
1521 MachineOperand *Target;
1522
1523public:
1524 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1525 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1526 : MRI(MRI) {
1527 Source = MO;
1528 BB = MO->getParent()->getParent();
1529 while (isRegInLoop(MO)) {
1530 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1531 if (MI->isFullCopy()) {
1532 MO = &MI->getOperand(1);
1533 continue;
1534 }
1535 if (!MI->isPHI())
1536 break;
1537 // If this is an illegal phi, don't count it in distance.
1538 if (IllegalPhis.count(MI)) {
1539 MO = &MI->getOperand(3);
1540 continue;
1541 }
1542
1543 Register Default = getInitPhiReg(*MI, BB);
1544 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1545 : &MI->getOperand(3);
1546 PhiDefaults.push_back(Default);
1547 }
1548 Target = MO;
1549 }
1550
1551 bool operator==(const KernelOperandInfo &Other) const {
1552 return PhiDefaults.size() == Other.PhiDefaults.size();
1553 }
1554
1555 void print(raw_ostream &OS) const {
1556 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1557 << *Source->getParent();
1558 }
1559
1560private:
1561 bool isRegInLoop(MachineOperand *MO) {
1562 return MO->isReg() && MO->getReg().isVirtual() &&
1563 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1564 }
1565};
1566} // namespace
1567
1568MachineBasicBlock *
1569PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
1570 MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
1571 if (LPD == LPD_Front)
1572 PeeledFront.push_back(NewBB);
1573 else
1574 PeeledBack.push_front(NewBB);
1575 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1576 ++I, ++NI) {
1577 CanonicalMIs[&*I] = &*I;
1578 CanonicalMIs[&*NI] = &*I;
1579 BlockMIs[{NewBB, &*I}] = &*NI;
1580 BlockMIs[{BB, &*I}] = &*I;
1581 }
1582 return NewBB;
1583}
1584
1585void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
1586 BitVector LS(Schedule.getNumStages(), true);
1587 BitVector AS(Schedule.getNumStages(), true);
1588 LiveStages[BB] = LS;
1589 AvailableStages[BB] = AS;
1590
1591 // Peel out the prologs.
1592 LS.reset();
1593 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1594 LS[I] = 1;
1595 Prologs.push_back(peelKernel(LPD_Front));
1596 LiveStages[Prologs.back()] = LS;
1597 AvailableStages[Prologs.back()] = LS;
1598 }
1599
1600 // Create a block that will end up as the new loop exiting block (dominated by
1601 // all prologs and epilogs). It will only contain PHIs, in the same order as
1602 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1603 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1604 // property that any value deffed in BB but used outside of BB is used by a
1605 // PHI in the exiting block.
1606 MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
1607
1608 // Push out the epilogs, again in reverse order.
1609 // We can't assume anything about the minumum loop trip count at this point,
1610 // so emit a fairly complex epilog:
1611 // K[0, 1, 2] // Kernel runs stages 0, 1, 2
1612 // E0[2] <- P1 // Epilog runs stage 2 only, so the state after is [0].
1613 // E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2.
1614 //
1615 // This creates a single-successor single-predecessor sequence of blocks for
1616 // each epilog, which are kept this way for simplicity at this stage and
1617 // cleaned up by the optimizer later.
1618 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1619 Epilogs.push_back(nullptr);
1620 for (int J = Schedule.getNumStages() - 1; J >= I; --J) {
1621 LS.reset();
1622 LS[J] = 1;
1623 Epilogs.back() = peelKernel(LPD_Back);
1624 LiveStages[Epilogs.back()] = LS;
1625 AvailableStages[Epilogs.back()] = AS;
1626 }
1627 }
1628
1629 // Now we've defined all the prolog and epilog blocks as a fallthrough
1630 // sequence, add the edges that will be followed if the loop trip count is
1631 // lower than the number of stages (connecting prologs directly with epilogs).
1632 auto PI = Prologs.begin();
1633 auto EI = Epilogs.begin();
1634 assert(Prologs.size() == Epilogs.size())((Prologs.size() == Epilogs.size()) ? static_cast<void>
(0) : __assert_fail ("Prologs.size() == Epilogs.size()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1634, __PRETTY_FUNCTION__))
;
1635 for (; PI != Prologs.end(); ++PI, ++EI) {
1636 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1637 (*PI)->addSuccessor(*EI);
1638 for (MachineInstr &MI : (*EI)->phis()) {
1639 Register Reg = MI.getOperand(1).getReg();
1640 MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
1641 if (Use && Use->getParent() == Pred)
1642 Reg = getEquivalentRegisterIn(Reg, *PI);
1643 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1644 MI.addOperand(MachineOperand::CreateMBB(*PI));
1645 }
1646 }
1647
1648 // Create a list of all blocks in order.
1649 SmallVector<MachineBasicBlock *, 8> Blocks;
1650 llvm::copy(PeeledFront, std::back_inserter(Blocks));
1651 Blocks.push_back(BB);
1652 llvm::copy(PeeledBack, std::back_inserter(Blocks));
1653
1654 // Iterate in reverse order over all instructions, remapping as we go.
1655 for (MachineBasicBlock *B : reverse(Blocks)) {
1656 for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
1657 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1658 MachineInstr *MI = &*I++;
1659 rewriteUsesOf(MI);
1660 }
1661 }
1662 // Now all remapping has been done, we're free to optimize the generated code.
1663 for (MachineBasicBlock *B : reverse(Blocks))
1664 EliminateDeadPhis(B, MRI, LIS);
1665 EliminateDeadPhis(ExitingBB, MRI, LIS);
1666}
1667
1668MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
1669 MachineFunction &MF = *BB->getParent();
1670 MachineBasicBlock *Exit = *BB->succ_begin();
1671 if (Exit == BB)
1672 Exit = *std::next(BB->succ_begin());
1673
1674 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
1675 MF.insert(std::next(BB->getIterator()), NewBB);
1676
1677 // Clone all phis in BB into NewBB and rewrite.
1678 for (MachineInstr &MI : BB->phis()) {
1679 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1680 Register OldR = MI.getOperand(3).getReg();
1681 Register R = MRI.createVirtualRegister(RC);
1682 SmallVector<MachineInstr *, 4> Uses;
1683 for (MachineInstr &Use : MRI.use_instructions(OldR))
1684 if (Use.getParent() != BB)
1685 Uses.push_back(&Use);
1686 for (MachineInstr *Use : Uses)
1687 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1688 *MRI.getTargetRegisterInfo());
1689 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1690 .addReg(OldR)
1691 .addMBB(BB);
1692 BlockMIs[{NewBB, &MI}] = NI;
1693 CanonicalMIs[NI] = &MI;
1694 }
1695 BB->replaceSuccessor(Exit, NewBB);
1696 Exit->replacePhiUsesWith(BB, NewBB);
1697 NewBB->addSuccessor(Exit);
1698
1699 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1700 SmallVector<MachineOperand, 4> Cond;
1701 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1702 (void)CanAnalyzeBr;
1703 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!")((CanAnalyzeBr && "Must be able to analyze the loop branch!"
) ? static_cast<void> (0) : __assert_fail ("CanAnalyzeBr && \"Must be able to analyze the loop branch!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1703, __PRETTY_FUNCTION__))
;
1704 TII->removeBranch(*BB);
1705 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1706 Cond, DebugLoc());
1707 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1708 return NewBB;
1709}
1710
1711Register
1712PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
1713 MachineBasicBlock *BB) {
1714 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
1715 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
1716 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1717}
1718
1719void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
1720 if (MI->isPHI()) {
1721 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1722 // and it is produced by this block.
1723 Register PhiR = MI->getOperand(0).getReg();
1724 Register R = MI->getOperand(3).getReg();
1725 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1726 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1727 R = MI->getOperand(1).getReg();
1728 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1729 MRI.replaceRegWith(PhiR, R);
1730 if (LIS)
1731 LIS->RemoveMachineInstrFromMaps(*MI);
1732 MI->eraseFromParent();
1733 return;
1734 }
1735
1736 int Stage = getStage(MI);
1737 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1738 LiveStages[MI->getParent()].test(Stage))
1739 // Instruction is live, no rewriting to do.
1740 return;
1741
1742 for (MachineOperand &DefMO : MI->defs()) {
1743 SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
1744 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1745 // Only PHIs can use values from this block by construction.
1746 // Match with the equivalent PHI in B.
1747 assert(UseMI.isPHI())((UseMI.isPHI()) ? static_cast<void> (0) : __assert_fail
("UseMI.isPHI()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1747, __PRETTY_FUNCTION__))
;
1748 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1749 MI->getParent());
1750 Subs.emplace_back(&UseMI, Reg);
1751 }
1752 for (auto &Sub : Subs)
1753 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1754 *MRI.getTargetRegisterInfo());
1755 }
1756 if (LIS)
1757 LIS->RemoveMachineInstrFromMaps(*MI);
1758 MI->eraseFromParent();
1759}
1760
1761void PeelingModuloScheduleExpander::fixupBranches() {
1762 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info =
1763 TII->analyzeLoopForPipelining(BB);
1764 assert(Info)((Info) ? static_cast<void> (0) : __assert_fail ("Info"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1764, __PRETTY_FUNCTION__))
;
1765
1766 // Work outwards from the kernel.
1767 bool KernelDisposed = false;
1768 int TC = Schedule.getNumStages() - 1;
1769 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1770 ++PI, ++EI, --TC) {
1771 MachineBasicBlock *Prolog = *PI;
1772 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1773 MachineBasicBlock *Epilog = *EI;
1774 SmallVector<MachineOperand, 4> Cond;
1775 TII->removeBranch(*Prolog);
1776 Optional<bool> StaticallyGreater =
1777 Info->createTripCountGreaterCondition(TC, *Prolog, Cond);
1778 if (!StaticallyGreater.hasValue()) {
1779 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << "Dynamic: TC > " <<
TC << "\n"; } } while (false)
;
1780 // Dynamically branch based on Cond.
1781 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1782 } else if (*StaticallyGreater == false) {
1783 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << "Static-false: TC > " <<
TC << "\n"; } } while (false)
;
1784 // Prolog never falls through; branch to epilog and orphan interior
1785 // blocks. Leave it to unreachable-block-elim to clean up.
1786 Prolog->removeSuccessor(Fallthrough);
1787 for (MachineInstr &P : Fallthrough->phis()) {
1788 P.RemoveOperand(2);
1789 P.RemoveOperand(1);
1790 }
1791 TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
1792 KernelDisposed = true;
1793 } else {
1794 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { dbgs() << "Static-true: TC > " <<
TC << "\n"; } } while (false)
;
1795 // Prolog always falls through; remove incoming values in epilog.
1796 Prolog->removeSuccessor(Epilog);
1797 for (MachineInstr &P : Epilog->phis()) {
1798 P.RemoveOperand(4);
1799 P.RemoveOperand(3);
1800 }
1801 }
1802 }
1803
1804 if (!KernelDisposed) {
1805 Info->adjustTripCount(-(Schedule.getNumStages() - 1));
1806 Info->setPreheader(Prologs.back());
1807 } else {
1808 Info->disposed();
1809 }
1810}
1811
1812void PeelingModuloScheduleExpander::rewriteKernel() {
1813 KernelRewriter KR(*Schedule.getLoop(), Schedule);
1814 KR.rewrite();
1815}
1816
1817void PeelingModuloScheduleExpander::expand() {
1818 BB = Schedule.getLoop()->getTopBlock();
1819 Preheader = Schedule.getLoop()->getLoopPreheader();
1820 LLVM_DEBUG(Schedule.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("pipeliner")) { Schedule.dump(); } } while (false)
;
1821
1822 rewriteKernel();
1823 peelPrologAndEpilogs();
1824 fixupBranches();
1825}
1826
1827void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
1828 BB = Schedule.getLoop()->getTopBlock();
1829 Preheader = Schedule.getLoop()->getLoopPreheader();
1830
1831 // Dump the schedule before we invalidate and remap all its instructions.
1832 // Stash it in a string so we can print it if we found an error.
1833 std::string ScheduleDump;
1834 raw_string_ostream OS(ScheduleDump);
1835 Schedule.print(OS);
1836 OS.flush();
1837
1838 // First, run the normal ModuleScheduleExpander. We don't support any
1839 // InstrChanges.
1840 assert(LIS && "Requires LiveIntervals!")((LIS && "Requires LiveIntervals!") ? static_cast<
void> (0) : __assert_fail ("LIS && \"Requires LiveIntervals!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1840, __PRETTY_FUNCTION__))
;
1841 ModuloScheduleExpander MSE(MF, Schedule, *LIS,
1842 ModuloScheduleExpander::InstrChangesTy());
1843 MSE.expand();
1844 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
1845 if (!ExpandedKernel) {
1846 // The expander optimized away the kernel. We can't do any useful checking.
1847 MSE.cleanup();
1848 return;
1849 }
1850 // Before running the KernelRewriter, re-add BB into the CFG.
1851 Preheader->addSuccessor(BB);
1852
1853 // Now run the new expansion algorithm.
1854 KernelRewriter KR(*Schedule.getLoop(), Schedule);
1855 KR.rewrite();
1856 peelPrologAndEpilogs();
1857
1858 // Collect all illegal phis that the new algorithm created. We'll give these
1859 // to KernelOperandInfo.
1860 SmallPtrSet<MachineInstr *, 4> IllegalPhis;
1861 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
1862 if (NI->isPHI())
1863 IllegalPhis.insert(&*NI);
1864 }
1865
1866 // Co-iterate across both kernels. We expect them to be identical apart from
1867 // phis and full COPYs (we look through both).
1868 SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs;
1869 auto OI = ExpandedKernel->begin();
1870 auto NI = BB->begin();
1871 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
1872 while (OI->isPHI() || OI->isFullCopy())
1873 ++OI;
1874 while (NI->isPHI() || NI->isFullCopy())
1875 ++NI;
1876 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!")((OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!"
) ? static_cast<void> (0) : __assert_fail ("OI->getOpcode() == NI->getOpcode() && \"Opcodes don't match?!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1876, __PRETTY_FUNCTION__))
;
1877 // Analyze every operand separately.
1878 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
1879 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
1880 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
1881 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
1882 }
1883
1884 bool Failed = false;
1885 for (auto &OldAndNew : KOIs) {
1886 if (OldAndNew.first == OldAndNew.second)
1887 continue;
1888 Failed = true;
1889 errs() << "Modulo kernel validation error: [\n";
1890 errs() << " [golden] ";
1891 OldAndNew.first.print(errs());
1892 errs() << " ";
1893 OldAndNew.second.print(errs());
1894 errs() << "]\n";
1895 }
1896
1897 if (Failed) {
1898 errs() << "Golden reference kernel:\n";
1899 ExpandedKernel->print(errs());
1900 errs() << "New kernel:\n";
1901 BB->print(errs());
1902 errs() << ScheduleDump;
1903 report_fatal_error(
1904 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
1905 }
1906
1907 // Cleanup by removing BB from the CFG again as the original
1908 // ModuloScheduleExpander intended.
1909 Preheader->removeSuccessor(BB);
1910 MSE.cleanup();
1911}
1912
1913//===----------------------------------------------------------------------===//
1914// ModuloScheduleTestPass implementation
1915//===----------------------------------------------------------------------===//
1916// This pass constructs a ModuloSchedule from its module and runs
1917// ModuloScheduleExpander.
1918//
1919// The module is expected to contain a single-block analyzable loop.
1920// The total order of instructions is taken from the loop as-is.
1921// Instructions are expected to be annotated with a PostInstrSymbol.
1922// This PostInstrSymbol must have the following format:
1923// "Stage=%d Cycle=%d".
1924//===----------------------------------------------------------------------===//
1925
1926namespace {
1927class ModuloScheduleTest : public MachineFunctionPass {
1928public:
1929 static char ID;
1930
1931 ModuloScheduleTest() : MachineFunctionPass(ID) {
1932 initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
1933 }
1934
1935 bool runOnMachineFunction(MachineFunction &MF) override;
1936 void runOnLoop(MachineFunction &MF, MachineLoop &L);
1937
1938 void getAnalysisUsage(AnalysisUsage &AU) const override {
1939 AU.addRequired<MachineLoopInfo>();
1940 AU.addRequired<LiveIntervals>();
1941 MachineFunctionPass::getAnalysisUsage(AU);
1942 }
1943};
1944} // namespace
1945
1946char ModuloScheduleTest::ID = 0;
1947
1948INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",static void *initializeModuloScheduleTestPassOnce(PassRegistry
&Registry) {
1949 "Modulo Schedule test pass", false, false)static void *initializeModuloScheduleTestPassOnce(PassRegistry
&Registry) {
1950INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)initializeMachineLoopInfoPass(Registry);
1951INITIALIZE_PASS_DEPENDENCY(LiveIntervals)initializeLiveIntervalsPass(Registry);
1952INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",PassInfo *PI = new PassInfo( "Modulo Schedule test pass", "modulo-schedule-test"
, &ModuloScheduleTest::ID, PassInfo::NormalCtor_t(callDefaultCtor
<ModuloScheduleTest>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeModuloScheduleTestPassFlag
; void llvm::initializeModuloScheduleTestPass(PassRegistry &
Registry) { llvm::call_once(InitializeModuloScheduleTestPassFlag
, initializeModuloScheduleTestPassOnce, std::ref(Registry)); }
1953 "Modulo Schedule test pass", false, false)PassInfo *PI = new PassInfo( "Modulo Schedule test pass", "modulo-schedule-test"
, &ModuloScheduleTest::ID, PassInfo::NormalCtor_t(callDefaultCtor
<ModuloScheduleTest>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeModuloScheduleTestPassFlag
; void llvm::initializeModuloScheduleTestPass(PassRegistry &
Registry) { llvm::call_once(InitializeModuloScheduleTestPassFlag
, initializeModuloScheduleTestPassOnce, std::ref(Registry)); }
1954
1955bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
1956 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
1957 for (auto *L : MLI) {
1958 if (L->getTopBlock() != L->getBottomBlock())
1959 continue;
1960 runOnLoop(MF, *L);
1961 return false;
1962 }
1963 return false;
1964}
1965
1966static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
1967 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
1968 std::pair<StringRef, StringRef> StageTokenAndValue =
1969 getToken(StageAndCycle.first, "-");
1970 std::pair<StringRef, StringRef> CycleTokenAndValue =
1971 getToken(StageAndCycle.second, "-");
1972 if (StageTokenAndValue.first != "Stage" ||
1973 CycleTokenAndValue.first != "_Cycle") {
1974 llvm_unreachable(::llvm::llvm_unreachable_internal("Bad post-instr symbol syntax: see comment in ModuloScheduleTest"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1975)
1975 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest")::llvm::llvm_unreachable_internal("Bad post-instr symbol syntax: see comment in ModuloScheduleTest"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/ModuloSchedule.cpp"
, 1975)
;
1976 return;
1977 }
1978
1979 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
1980 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
1981
1982 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
1983}
1984
1985void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
1986 LiveIntervals &LIS = getAnalysis<LiveIntervals>();
1987 MachineBasicBlock *BB = L.getTopBlock();
1988 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
1989
1990 DenseMap<MachineInstr *, int> Cycle, Stage;
1991 std::vector<MachineInstr *> Instrs;
1992 for (MachineInstr &MI : *BB) {
1993 if (MI.isTerminator())
1994 continue;
1995 Instrs.push_back(&MI);
1996 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
1997 dbgs() << "Parsing post-instr symbol for " << MI;
1998 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
1999 }
2000 }
2001
2002 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2003 std::move(Stage));
2004 ModuloScheduleExpander MSE(
2005 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2006 MSE.expand();
2007 MSE.cleanup();
2008}
2009
2010//===----------------------------------------------------------------------===//
2011// ModuloScheduleTestAnnotater implementation
2012//===----------------------------------------------------------------------===//
2013
2014void ModuloScheduleTestAnnotater::annotate() {
2015 for (MachineInstr *MI : S.getInstructions()) {
2016 SmallVector<char, 16> SV;
2017 raw_svector_ostream OS(SV);
2018 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2019 MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2020 MI->setPostInstrSymbol(MF, Sym);
2021 }
2022}