File: | lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp |
Warning: | line 538, column 3 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This pass removes the computation of provably redundant expressions that have | |||
10 | // been computed earlier in a previous iteration. It relies on the use of PHIs | |||
11 | // to identify loop carried dependences. This is scalar replacement for vector | |||
12 | // types. | |||
13 | // | |||
14 | //----------------------------------------------------------------------------- | |||
15 | // Motivation: Consider the case where we have the following loop structure. | |||
16 | // | |||
17 | // Loop: | |||
18 | // t0 = a[i]; | |||
19 | // t1 = f(t0); | |||
20 | // t2 = g(t1); | |||
21 | // ... | |||
22 | // t3 = a[i+1]; | |||
23 | // t4 = f(t3); | |||
24 | // t5 = g(t4); | |||
25 | // t6 = op(t2, t5) | |||
26 | // cond_branch <Loop> | |||
27 | // | |||
28 | // This can be converted to | |||
29 | // t00 = a[0]; | |||
30 | // t10 = f(t00); | |||
31 | // t20 = g(t10); | |||
32 | // Loop: | |||
33 | // t2 = t20; | |||
34 | // t3 = a[i+1]; | |||
35 | // t4 = f(t3); | |||
36 | // t5 = g(t4); | |||
37 | // t6 = op(t2, t5) | |||
38 | // t20 = t5 | |||
39 | // cond_branch <Loop> | |||
40 | // | |||
41 | // SROA does a good job of reusing a[i+1] as a[i] in the next iteration. | |||
42 | // Such a loop comes to this pass in the following form. | |||
43 | // | |||
44 | // LoopPreheader: | |||
45 | // X0 = a[0]; | |||
46 | // Loop: | |||
47 | // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> | |||
48 | // t1 = f(X2) <-- I1 | |||
49 | // t2 = g(t1) | |||
50 | // ... | |||
51 | // X1 = a[i+1] | |||
52 | // t4 = f(X1) <-- I2 | |||
53 | // t5 = g(t4) | |||
54 | // t6 = op(t2, t5) | |||
55 | // cond_branch <Loop> | |||
56 | // | |||
57 | // In this pass, we look for PHIs such as X2 whose incoming values come only | |||
58 | // from the Loop Preheader and over the backedge and additionaly, both these | |||
59 | // values are the results of the same operation in terms of opcode. We call such | |||
60 | // a PHI node a dependence chain or DepChain. In this case, the dependence of X2 | |||
61 | // over X1 is carried over only one iteration and so the DepChain is only one | |||
62 | // PHI node long. | |||
63 | // | |||
64 | // Then, we traverse the uses of the PHI (X2) and the uses of the value of the | |||
65 | // PHI coming over the backedge (X1). We stop at the first pair of such users | |||
66 | // I1 (of X2) and I2 (of X1) that meet the following conditions. | |||
67 | // 1. I1 and I2 are the same operation, but with different operands. | |||
68 | // 2. X2 and X1 are used at the same operand number in the two instructions. | |||
69 | // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a | |||
70 | // a DepChain from Op1 to Op2 of the same length as that between X2 and X1. | |||
71 | // | |||
72 | // We then make the following transformation | |||
73 | // LoopPreheader: | |||
74 | // X0 = a[0]; | |||
75 | // Y0 = f(X0); | |||
76 | // Loop: | |||
77 | // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> | |||
78 | // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)> | |||
79 | // t1 = f(X2) <-- Will be removed by DCE. | |||
80 | // t2 = g(Y2) | |||
81 | // ... | |||
82 | // X1 = a[i+1] | |||
83 | // t4 = f(X1) | |||
84 | // t5 = g(t4) | |||
85 | // t6 = op(t2, t5) | |||
86 | // cond_branch <Loop> | |||
87 | // | |||
88 | // We proceed until we cannot find any more such instructions I1 and I2. | |||
89 | // | |||
90 | // --- DepChains & Loop carried dependences --- | |||
91 | // Consider a single basic block loop such as | |||
92 | // | |||
93 | // LoopPreheader: | |||
94 | // X0 = ... | |||
95 | // Y0 = ... | |||
96 | // Loop: | |||
97 | // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> | |||
98 | // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)> | |||
99 | // ... | |||
100 | // X1 = ... | |||
101 | // ... | |||
102 | // cond_branch <Loop> | |||
103 | // | |||
104 | // Then there is a dependence between X2 and X1 that goes back one iteration, | |||
105 | // i.e. X1 is used as X2 in the very next iteration. We represent this as a | |||
106 | // DepChain from X2 to X1 (X2->X1). | |||
107 | // Similarly, there is a dependence between Y2 and X1 that goes back two | |||
108 | // iterations. X1 is used as Y2 two iterations after it is computed. This is | |||
109 | // represented by a DepChain as (Y2->X2->X1). | |||
110 | // | |||
111 | // A DepChain has the following properties. | |||
112 | // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of | |||
113 | // iterations of carried dependence + 1. | |||
114 | // 2. All instructions in the DepChain except the last are PHIs. | |||
115 | // | |||
116 | //===----------------------------------------------------------------------===// | |||
117 | ||||
118 | #include "llvm/ADT/SetVector.h" | |||
119 | #include "llvm/ADT/SmallVector.h" | |||
120 | #include "llvm/ADT/Statistic.h" | |||
121 | #include "llvm/Analysis/LoopInfo.h" | |||
122 | #include "llvm/Analysis/LoopPass.h" | |||
123 | #include "llvm/IR/BasicBlock.h" | |||
124 | #include "llvm/IR/DerivedTypes.h" | |||
125 | #include "llvm/IR/IRBuilder.h" | |||
126 | #include "llvm/IR/Instruction.h" | |||
127 | #include "llvm/IR/Instructions.h" | |||
128 | #include "llvm/IR/IntrinsicInst.h" | |||
129 | #include "llvm/IR/Intrinsics.h" | |||
130 | #include "llvm/IR/Use.h" | |||
131 | #include "llvm/IR/User.h" | |||
132 | #include "llvm/IR/Value.h" | |||
133 | #include "llvm/Pass.h" | |||
134 | #include "llvm/Support/Casting.h" | |||
135 | #include "llvm/Support/CommandLine.h" | |||
136 | #include "llvm/Support/Compiler.h" | |||
137 | #include "llvm/Support/Debug.h" | |||
138 | #include "llvm/Support/raw_ostream.h" | |||
139 | #include "llvm/Transforms/Scalar.h" | |||
140 | #include "llvm/Transforms/Utils.h" | |||
141 | #include <algorithm> | |||
142 | #include <cassert> | |||
143 | #include <cstddef> | |||
144 | #include <map> | |||
145 | #include <memory> | |||
146 | #include <set> | |||
147 | ||||
148 | using namespace llvm; | |||
149 | ||||
150 | #define DEBUG_TYPE"hexagon-vlcr" "hexagon-vlcr" | |||
151 | ||||
152 | STATISTIC(HexagonNumVectorLoopCarriedReuse,static llvm::Statistic HexagonNumVectorLoopCarriedReuse = {"hexagon-vlcr" , "HexagonNumVectorLoopCarriedReuse", "Number of values that were reused from a previous iteration." , {0}, {false}} | |||
153 | "Number of values that were reused from a previous iteration.")static llvm::Statistic HexagonNumVectorLoopCarriedReuse = {"hexagon-vlcr" , "HexagonNumVectorLoopCarriedReuse", "Number of values that were reused from a previous iteration." , {0}, {false}}; | |||
154 | ||||
155 | static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", | |||
156 | cl::Hidden, | |||
157 | cl::desc("Maximum distance of loop carried dependences that are handled"), | |||
158 | cl::init(2), cl::ZeroOrMore); | |||
159 | ||||
160 | namespace llvm { | |||
161 | ||||
162 | void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&); | |||
163 | Pass *createHexagonVectorLoopCarriedReusePass(); | |||
164 | ||||
165 | } // end namespace llvm | |||
166 | ||||
167 | namespace { | |||
168 | ||||
169 | // See info about DepChain in the comments at the top of this file. | |||
170 | using ChainOfDependences = SmallVector<Instruction *, 4>; | |||
171 | ||||
172 | class DepChain { | |||
173 | ChainOfDependences Chain; | |||
174 | ||||
175 | public: | |||
176 | bool isIdentical(DepChain &Other) const { | |||
177 | if (Other.size() != size()) | |||
178 | return false; | |||
179 | ChainOfDependences &OtherChain = Other.getChain(); | |||
180 | for (int i = 0; i < size(); ++i) { | |||
181 | if (Chain[i] != OtherChain[i]) | |||
182 | return false; | |||
183 | } | |||
184 | return true; | |||
185 | } | |||
186 | ||||
187 | ChainOfDependences &getChain() { | |||
188 | return Chain; | |||
189 | } | |||
190 | ||||
191 | int size() const { | |||
192 | return Chain.size(); | |||
193 | } | |||
194 | ||||
195 | void clear() { | |||
196 | Chain.clear(); | |||
197 | } | |||
198 | ||||
199 | void push_back(Instruction *I) { | |||
200 | Chain.push_back(I); | |||
201 | } | |||
202 | ||||
203 | int iterations() const { | |||
204 | return size() - 1; | |||
205 | } | |||
206 | ||||
207 | Instruction *front() const { | |||
208 | return Chain.front(); | |||
209 | } | |||
210 | ||||
211 | Instruction *back() const { | |||
212 | return Chain.back(); | |||
213 | } | |||
214 | ||||
215 | Instruction *&operator[](const int index) { | |||
216 | return Chain[index]; | |||
217 | } | |||
218 | ||||
219 | friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); | |||
220 | }; | |||
221 | ||||
222 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) | |||
223 | raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { | |||
224 | const ChainOfDependences &CD = D.Chain; | |||
225 | int ChainSize = CD.size(); | |||
226 | OS << "**DepChain Start::**\n"; | |||
227 | for (int i = 0; i < ChainSize -1; ++i) { | |||
228 | OS << *(CD[i]) << " -->\n"; | |||
229 | } | |||
230 | OS << *CD[ChainSize-1] << "\n"; | |||
231 | return OS; | |||
232 | } | |||
233 | ||||
234 | struct ReuseValue { | |||
235 | Instruction *Inst2Replace = nullptr; | |||
236 | ||||
237 | // In the new PHI node that we'll construct this is the value that'll be | |||
238 | // used over the backedge. This is teh value that gets reused from a | |||
239 | // previous iteration. | |||
240 | Instruction *BackedgeInst = nullptr; | |||
241 | ||||
242 | ReuseValue() = default; | |||
243 | ||||
244 | void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; } | |||
245 | bool isDefined() { return Inst2Replace != nullptr; } | |||
246 | }; | |||
247 | ||||
248 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) | |||
249 | raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { | |||
250 | OS << "** ReuseValue ***\n"; | |||
251 | OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; | |||
252 | OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n"; | |||
253 | return OS; | |||
254 | } | |||
255 | ||||
256 | class HexagonVectorLoopCarriedReuse : public LoopPass { | |||
257 | public: | |||
258 | static char ID; | |||
259 | ||||
260 | explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) { | |||
261 | PassRegistry *PR = PassRegistry::getPassRegistry(); | |||
262 | initializeHexagonVectorLoopCarriedReusePass(*PR); | |||
263 | } | |||
264 | ||||
265 | StringRef getPassName() const override { | |||
266 | return "Hexagon-specific loop carried reuse for HVX vectors"; | |||
267 | } | |||
268 | ||||
269 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
270 | AU.addRequired<LoopInfoWrapperPass>(); | |||
271 | AU.addRequiredID(LoopSimplifyID); | |||
272 | AU.addRequiredID(LCSSAID); | |||
273 | AU.addPreservedID(LCSSAID); | |||
274 | AU.setPreservesCFG(); | |||
275 | } | |||
276 | ||||
277 | bool runOnLoop(Loop *L, LPPassManager &LPM) override; | |||
278 | ||||
279 | private: | |||
280 | SetVector<DepChain *> Dependences; | |||
281 | std::set<Instruction *> ReplacedInsts; | |||
282 | Loop *CurLoop; | |||
283 | ReuseValue ReuseCandidate; | |||
284 | ||||
285 | bool doVLCR(); | |||
286 | void findLoopCarriedDeps(); | |||
287 | void findValueToReuse(); | |||
288 | void findDepChainFromPHI(Instruction *I, DepChain &D); | |||
289 | void reuseValue(); | |||
290 | Value *findValueInBlock(Value *Op, BasicBlock *BB); | |||
291 | bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters); | |||
292 | DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2); | |||
293 | bool isEquivalentOperation(Instruction *I1, Instruction *I2); | |||
294 | bool canReplace(Instruction *I); | |||
295 | }; | |||
296 | ||||
297 | } // end anonymous namespace | |||
298 | ||||
299 | char HexagonVectorLoopCarriedReuse::ID = 0; | |||
300 | ||||
301 | INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",static void *initializeHexagonVectorLoopCarriedReusePassOnce( PassRegistry &Registry) { | |||
302 | "Hexagon-specific predictive commoning for HVX vectors", false, false)static void *initializeHexagonVectorLoopCarriedReusePassOnce( PassRegistry &Registry) { | |||
303 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); | |||
304 | INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry); | |||
305 | INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry); | |||
306 | INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",PassInfo *PI = new PassInfo( "Hexagon-specific predictive commoning for HVX vectors" , "hexagon-vlcr", &HexagonVectorLoopCarriedReuse::ID, PassInfo ::NormalCtor_t(callDefaultCtor<HexagonVectorLoopCarriedReuse >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeHexagonVectorLoopCarriedReusePassFlag ; void llvm::initializeHexagonVectorLoopCarriedReusePass(PassRegistry &Registry) { llvm::call_once(InitializeHexagonVectorLoopCarriedReusePassFlag , initializeHexagonVectorLoopCarriedReusePassOnce, std::ref(Registry )); } | |||
307 | "Hexagon-specific predictive commoning for HVX vectors", false, false)PassInfo *PI = new PassInfo( "Hexagon-specific predictive commoning for HVX vectors" , "hexagon-vlcr", &HexagonVectorLoopCarriedReuse::ID, PassInfo ::NormalCtor_t(callDefaultCtor<HexagonVectorLoopCarriedReuse >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeHexagonVectorLoopCarriedReusePassFlag ; void llvm::initializeHexagonVectorLoopCarriedReusePass(PassRegistry &Registry) { llvm::call_once(InitializeHexagonVectorLoopCarriedReusePassFlag , initializeHexagonVectorLoopCarriedReusePassOnce, std::ref(Registry )); } | |||
308 | ||||
309 | bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) { | |||
310 | if (skipLoop(L)) | |||
| ||||
311 | return false; | |||
312 | ||||
313 | if (!L->getLoopPreheader()) | |||
314 | return false; | |||
315 | ||||
316 | // Work only on innermost loops. | |||
317 | if (!L->getSubLoops().empty()) | |||
318 | return false; | |||
319 | ||||
320 | // Work only on single basic blocks loops. | |||
321 | if (L->getNumBlocks() != 1) | |||
322 | return false; | |||
323 | ||||
324 | CurLoop = L; | |||
325 | ||||
326 | return doVLCR(); | |||
327 | } | |||
328 | ||||
329 | bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1, | |||
330 | Instruction *I2) { | |||
331 | if (!I1->isSameOperationAs(I2)) | |||
332 | return false; | |||
333 | // This check is in place specifically for intrinsics. isSameOperationAs will | |||
334 | // return two for any two hexagon intrinsics because they are essentially the | |||
335 | // same instruciton (CallInst). We need to scratch the surface to see if they | |||
336 | // are calls to the same function. | |||
337 | if (CallInst *C1 = dyn_cast<CallInst>(I1)) { | |||
338 | if (CallInst *C2 = dyn_cast<CallInst>(I2)) { | |||
339 | if (C1->getCalledFunction() != C2->getCalledFunction()) | |||
340 | return false; | |||
341 | } | |||
342 | } | |||
343 | ||||
344 | // If both the Instructions are of Vector Type and any of the element | |||
345 | // is integer constant, check their values too for equivalence. | |||
346 | if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) { | |||
347 | unsigned NumOperands = I1->getNumOperands(); | |||
348 | for (unsigned i = 0; i < NumOperands; ++i) { | |||
349 | ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i)); | |||
350 | ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i)); | |||
351 | if(!C1) continue; | |||
352 | assert(C2)((C2) ? static_cast<void> (0) : __assert_fail ("C2", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 352, __PRETTY_FUNCTION__)); | |||
353 | if (C1->getSExtValue() != C2->getSExtValue()) | |||
354 | return false; | |||
355 | } | |||
356 | } | |||
357 | ||||
358 | return true; | |||
359 | } | |||
360 | ||||
361 | bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) { | |||
362 | const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); | |||
363 | if (II && | |||
364 | (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi || | |||
365 | II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) { | |||
366 | LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Not considering for reuse: " << *II << "\n"; } } while (false); | |||
367 | return false; | |||
368 | } | |||
369 | return true; | |||
370 | } | |||
371 | void HexagonVectorLoopCarriedReuse::findValueToReuse() { | |||
372 | for (auto *D : Dependences) { | |||
373 | LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Processing dependence " << *(D->front()) << "\n"; } } while (false); | |||
374 | if (D->iterations() > HexagonVLCRIterationLim) { | |||
375 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << ".. Skipping because number of iterations > than the limit\n" ; } } while (false) | |||
376 | dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << ".. Skipping because number of iterations > than the limit\n" ; } } while (false) | |||
377 | << ".. Skipping because number of iterations > than the limit\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << ".. Skipping because number of iterations > than the limit\n" ; } } while (false); | |||
378 | continue; | |||
379 | } | |||
380 | ||||
381 | PHINode *PN = cast<PHINode>(D->front()); | |||
382 | Instruction *BEInst = D->back(); | |||
383 | int Iters = D->iterations(); | |||
384 | BasicBlock *BB = PN->getParent(); | |||
385 | LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PNdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Checking if any uses of " << *PN << " can be reused\n"; } } while (false) | |||
386 | << " can be reused\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Checking if any uses of " << *PN << " can be reused\n"; } } while (false); | |||
387 | ||||
388 | SmallVector<Instruction *, 4> PNUsers; | |||
389 | for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { | |||
390 | Use &U = *UI; | |||
391 | Instruction *User = cast<Instruction>(U.getUser()); | |||
392 | ||||
393 | if (User->getParent() != BB) | |||
394 | continue; | |||
395 | if (ReplacedInsts.count(User)) { | |||
396 | LLVM_DEBUG(dbgs() << *Userdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << *User << " has already been replaced. Skipping...\n" ; } } while (false) | |||
397 | << " has already been replaced. Skipping...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << *User << " has already been replaced. Skipping...\n" ; } } while (false); | |||
398 | continue; | |||
399 | } | |||
400 | if (isa<PHINode>(User)) | |||
401 | continue; | |||
402 | if (User->mayHaveSideEffects()) | |||
403 | continue; | |||
404 | if (!canReplace(User)) | |||
405 | continue; | |||
406 | ||||
407 | PNUsers.push_back(User); | |||
408 | } | |||
409 | LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n" ; } } while (false); | |||
410 | ||||
411 | // For each interesting use I of PN, find an Instruction BEUser that | |||
412 | // performs the same operation as I on BEInst and whose other operands, | |||
413 | // if any, can also be rematerialized in OtherBB. We stop when we find the | |||
414 | // first such Instruction BEUser. This is because once BEUser is | |||
415 | // rematerialized in OtherBB, we may find more such "fixup" opportunities | |||
416 | // in this block. So, we'll start over again. | |||
417 | for (Instruction *I : PNUsers) { | |||
418 | for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E; | |||
419 | ++UI) { | |||
420 | Use &U = *UI; | |||
421 | Instruction *BEUser = cast<Instruction>(U.getUser()); | |||
422 | ||||
423 | if (BEUser->getParent() != BB) | |||
424 | continue; | |||
425 | if (!isEquivalentOperation(I, BEUser)) | |||
426 | continue; | |||
427 | ||||
428 | int NumOperands = I->getNumOperands(); | |||
429 | ||||
430 | for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { | |||
431 | Value *Op = I->getOperand(OpNo); | |||
432 | Value *BEOp = BEUser->getOperand(OpNo); | |||
433 | ||||
434 | Instruction *OpInst = dyn_cast<Instruction>(Op); | |||
435 | if (!OpInst) { | |||
436 | if (Op == BEOp) | |||
437 | continue; | |||
438 | // Do not allow reuse to occur when the operands may be different | |||
439 | // values. | |||
440 | BEUser = nullptr; | |||
441 | break; | |||
442 | } | |||
443 | ||||
444 | Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); | |||
445 | ||||
446 | if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) { | |||
447 | BEUser = nullptr; | |||
448 | break; | |||
449 | } | |||
450 | } | |||
451 | if (BEUser) { | |||
452 | LLVM_DEBUG(dbgs() << "Found Value for reuse.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Found Value for reuse.\n" ; } } while (false); | |||
453 | ReuseCandidate.Inst2Replace = I; | |||
454 | ReuseCandidate.BackedgeInst = BEUser; | |||
455 | return; | |||
456 | } else | |||
457 | ReuseCandidate.reset(); | |||
458 | } | |||
459 | } | |||
460 | } | |||
461 | ReuseCandidate.reset(); | |||
462 | } | |||
463 | ||||
464 | Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, | |||
465 | BasicBlock *BB) { | |||
466 | PHINode *PN = dyn_cast<PHINode>(Op); | |||
467 | assert(PN)((PN) ? static_cast<void> (0) : __assert_fail ("PN", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 467, __PRETTY_FUNCTION__)); | |||
468 | Value *ValueInBlock = PN->getIncomingValueForBlock(BB); | |||
469 | return ValueInBlock; | |||
470 | } | |||
471 | ||||
472 | void HexagonVectorLoopCarriedReuse::reuseValue() { | |||
473 | LLVM_DEBUG(dbgs() << ReuseCandidate)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << ReuseCandidate; } } while (false); | |||
474 | Instruction *Inst2Replace = ReuseCandidate.Inst2Replace; | |||
475 | Instruction *BEInst = ReuseCandidate.BackedgeInst; | |||
476 | int NumOperands = Inst2Replace->getNumOperands(); | |||
477 | std::map<Instruction *, DepChain *> DepChains; | |||
478 | int Iterations = -1; | |||
479 | BasicBlock *LoopPH = CurLoop->getLoopPreheader(); | |||
480 | ||||
481 | for (int i = 0; i < NumOperands; ++i) { | |||
482 | Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i)); | |||
483 | if(!I) | |||
484 | continue; | |||
485 | else { | |||
486 | Instruction *J = cast<Instruction>(BEInst->getOperand(i)); | |||
487 | DepChain *D = getDepChainBtwn(I, J); | |||
488 | ||||
489 | assert(D &&((D && "No DepChain between corresponding operands in ReuseCandidate\n" ) ? static_cast<void> (0) : __assert_fail ("D && \"No DepChain between corresponding operands in ReuseCandidate\\n\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 490, __PRETTY_FUNCTION__)) | |||
490 | "No DepChain between corresponding operands in ReuseCandidate\n")((D && "No DepChain between corresponding operands in ReuseCandidate\n" ) ? static_cast<void> (0) : __assert_fail ("D && \"No DepChain between corresponding operands in ReuseCandidate\\n\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 490, __PRETTY_FUNCTION__)); | |||
491 | if (Iterations == -1) | |||
492 | Iterations = D->iterations(); | |||
493 | assert(Iterations == D->iterations() && "Iterations mismatch")((Iterations == D->iterations() && "Iterations mismatch" ) ? static_cast<void> (0) : __assert_fail ("Iterations == D->iterations() && \"Iterations mismatch\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 493, __PRETTY_FUNCTION__)); | |||
494 | DepChains[I] = D; | |||
495 | } | |||
496 | } | |||
497 | ||||
498 | LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "reuseValue is making the following changes\n" ; } } while (false); | |||
499 | ||||
500 | SmallVector<Instruction *, 4> InstsInPreheader; | |||
501 | for (int i = 0; i < Iterations; ++i) { | |||
502 | Instruction *InstInPreheader = Inst2Replace->clone(); | |||
503 | SmallVector<Value *, 4> Ops; | |||
504 | for (int j = 0; j < NumOperands; ++j) { | |||
505 | Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j)); | |||
506 | if (!I) | |||
507 | continue; | |||
508 | // Get the DepChain corresponding to this operand. | |||
509 | DepChain &D = *DepChains[I]; | |||
510 | // Get the PHI for the iteration number and find | |||
511 | // the incoming value from the Loop Preheader for | |||
512 | // that PHI. | |||
513 | Value *ValInPreheader = findValueInBlock(D[i], LoopPH); | |||
514 | InstInPreheader->setOperand(j, ValInPreheader); | |||
515 | } | |||
516 | InstsInPreheader.push_back(InstInPreheader); | |||
517 | InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr"); | |||
518 | InstInPreheader->insertBefore(LoopPH->getTerminator()); | |||
519 | LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Added " << *InstInPreheader << " to " << LoopPH->getName() << "\n"; } } while (false) | |||
520 | << LoopPH->getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Added " << *InstInPreheader << " to " << LoopPH->getName() << "\n"; } } while (false); | |||
521 | } | |||
522 | BasicBlock *BB = BEInst->getParent(); | |||
523 | IRBuilder<> IRB(BB); | |||
524 | IRB.SetInsertPoint(BB->getFirstNonPHI()); | |||
525 | Value *BEVal = BEInst; | |||
526 | PHINode *NewPhi; | |||
527 | for (int i = Iterations-1; i >=0 ; --i) { | |||
528 | Instruction *InstInPreheader = InstsInPreheader[i]; | |||
529 | NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2); | |||
530 | NewPhi->addIncoming(InstInPreheader, LoopPH); | |||
531 | NewPhi->addIncoming(BEVal, BB); | |||
532 | LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Adding " << *NewPhi << " to " << BB->getName() << "\n"; } } while (false) | |||
533 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Adding " << *NewPhi << " to " << BB->getName() << "\n"; } } while (false); | |||
534 | BEVal = NewPhi; | |||
535 | } | |||
536 | // We are in LCSSA form. So, a value defined inside the Loop is used only | |||
537 | // inside the loop. So, the following is safe. | |||
538 | Inst2Replace->replaceAllUsesWith(NewPhi); | |||
| ||||
539 | ReplacedInsts.insert(Inst2Replace); | |||
540 | ++HexagonNumVectorLoopCarriedReuse; | |||
541 | } | |||
542 | ||||
543 | bool HexagonVectorLoopCarriedReuse::doVLCR() { | |||
544 | assert(CurLoop->getSubLoops().empty() &&((CurLoop->getSubLoops().empty() && "Can do VLCR on the innermost loop only" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->getSubLoops().empty() && \"Can do VLCR on the innermost loop only\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 545, __PRETTY_FUNCTION__)) | |||
545 | "Can do VLCR on the innermost loop only")((CurLoop->getSubLoops().empty() && "Can do VLCR on the innermost loop only" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->getSubLoops().empty() && \"Can do VLCR on the innermost loop only\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 545, __PRETTY_FUNCTION__)); | |||
546 | assert((CurLoop->getNumBlocks() == 1) &&(((CurLoop->getNumBlocks() == 1) && "Can do VLCR only on single block loops" ) ? static_cast<void> (0) : __assert_fail ("(CurLoop->getNumBlocks() == 1) && \"Can do VLCR only on single block loops\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 547, __PRETTY_FUNCTION__)) | |||
547 | "Can do VLCR only on single block loops")(((CurLoop->getNumBlocks() == 1) && "Can do VLCR only on single block loops" ) ? static_cast<void> (0) : __assert_fail ("(CurLoop->getNumBlocks() == 1) && \"Can do VLCR only on single block loops\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 547, __PRETTY_FUNCTION__)); | |||
548 | ||||
549 | bool Changed = false; | |||
550 | bool Continue; | |||
551 | ||||
552 | LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n"; } } while (false); | |||
553 | do { | |||
554 | // Reset datastructures. | |||
555 | Dependences.clear(); | |||
556 | Continue = false; | |||
557 | ||||
558 | findLoopCarriedDeps(); | |||
559 | findValueToReuse(); | |||
560 | if (ReuseCandidate.isDefined()) { | |||
561 | reuseValue(); | |||
562 | Changed = true; | |||
563 | Continue = true; | |||
564 | } | |||
565 | llvm::for_each(Dependences, std::default_delete<DepChain>()); | |||
566 | } while (Continue); | |||
567 | return Changed; | |||
568 | } | |||
569 | ||||
570 | void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, | |||
571 | DepChain &D) { | |||
572 | PHINode *PN = dyn_cast<PHINode>(I); | |||
573 | if (!PN) { | |||
574 | D.push_back(I); | |||
575 | return; | |||
576 | } else { | |||
577 | auto NumIncomingValues = PN->getNumIncomingValues(); | |||
578 | if (NumIncomingValues != 2) { | |||
579 | D.clear(); | |||
580 | return; | |||
581 | } | |||
582 | ||||
583 | BasicBlock *BB = PN->getParent(); | |||
584 | if (BB != CurLoop->getHeader()) { | |||
585 | D.clear(); | |||
586 | return; | |||
587 | } | |||
588 | ||||
589 | Value *BEVal = PN->getIncomingValueForBlock(BB); | |||
590 | Instruction *BEInst = dyn_cast<Instruction>(BEVal); | |||
591 | // This is a single block loop with a preheader, so at least | |||
592 | // one value should come over the backedge. | |||
593 | assert(BEInst && "There should be a value over the backedge")((BEInst && "There should be a value over the backedge" ) ? static_cast<void> (0) : __assert_fail ("BEInst && \"There should be a value over the backedge\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 593, __PRETTY_FUNCTION__)); | |||
594 | ||||
595 | Value *PreHdrVal = | |||
596 | PN->getIncomingValueForBlock(CurLoop->getLoopPreheader()); | |||
597 | if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) { | |||
598 | D.clear(); | |||
599 | return; | |||
600 | } | |||
601 | D.push_back(PN); | |||
602 | findDepChainFromPHI(BEInst, D); | |||
603 | } | |||
604 | } | |||
605 | ||||
606 | bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1, | |||
607 | Instruction *I2, | |||
608 | int Iters) { | |||
609 | for (auto *D : Dependences) { | |||
610 | if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters) | |||
611 | return true; | |||
612 | } | |||
613 | return false; | |||
614 | } | |||
615 | ||||
616 | DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1, | |||
617 | Instruction *I2) { | |||
618 | for (auto *D : Dependences) { | |||
619 | if (D->front() == I1 && D->back() == I2) | |||
620 | return D; | |||
621 | } | |||
622 | return nullptr; | |||
623 | } | |||
624 | ||||
625 | void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { | |||
626 | BasicBlock *BB = CurLoop->getHeader(); | |||
627 | for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { | |||
628 | auto *PN = cast<PHINode>(I); | |||
629 | if (!isa<VectorType>(PN->getType())) | |||
630 | continue; | |||
631 | ||||
632 | DepChain *D = new DepChain(); | |||
633 | findDepChainFromPHI(PN, *D); | |||
634 | if (D->size() != 0) | |||
635 | Dependences.insert(D); | |||
636 | else | |||
637 | delete D; | |||
638 | } | |||
639 | LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Found " << Dependences .size() << " dependences\n"; } } while (false); | |||
640 | LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { for (size_t i = 0; i < Dependences.size (); ++i) { dbgs() << *Dependences[i] << "\n"; }; } } while (false) | |||
641 | ++i) { dbgs() << *Dependences[i] << "\n"; })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { for (size_t i = 0; i < Dependences.size (); ++i) { dbgs() << *Dependences[i] << "\n"; }; } } while (false); | |||
642 | } | |||
643 | ||||
644 | Pass *llvm::createHexagonVectorLoopCarriedReusePass() { | |||
645 | return new HexagonVectorLoopCarriedReuse(); | |||
646 | } |