File: | lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp |
Warning: | line 528, column 3 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This pass removes the computation of provably redundant expressions that have | |||
11 | // been computed earlier in a previous iteration. It relies on the use of PHIs | |||
12 | // to identify loop carried dependences. This is scalar replacement for vector | |||
13 | // types. | |||
14 | // | |||
15 | //----------------------------------------------------------------------------- | |||
16 | // Motivation: Consider the case where we have the following loop structure. | |||
17 | // | |||
18 | // Loop: | |||
19 | // t0 = a[i]; | |||
20 | // t1 = f(t0); | |||
21 | // t2 = g(t1); | |||
22 | // ... | |||
23 | // t3 = a[i+1]; | |||
24 | // t4 = f(t3); | |||
25 | // t5 = g(t4); | |||
26 | // t6 = op(t2, t5) | |||
27 | // cond_branch <Loop> | |||
28 | // | |||
29 | // This can be converted to | |||
30 | // t00 = a[0]; | |||
31 | // t10 = f(t00); | |||
32 | // t20 = g(t10); | |||
33 | // Loop: | |||
34 | // t2 = t20; | |||
35 | // t3 = a[i+1]; | |||
36 | // t4 = f(t3); | |||
37 | // t5 = g(t4); | |||
38 | // t6 = op(t2, t5) | |||
39 | // t20 = t5 | |||
40 | // cond_branch <Loop> | |||
41 | // | |||
42 | // SROA does a good job of reusing a[i+1] as a[i] in the next iteration. | |||
43 | // Such a loop comes to this pass in the following form. | |||
44 | // | |||
45 | // LoopPreheader: | |||
46 | // X0 = a[0]; | |||
47 | // Loop: | |||
48 | // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> | |||
49 | // t1 = f(X2) <-- I1 | |||
50 | // t2 = g(t1) | |||
51 | // ... | |||
52 | // X1 = a[i+1] | |||
53 | // t4 = f(X1) <-- I2 | |||
54 | // t5 = g(t4) | |||
55 | // t6 = op(t2, t5) | |||
56 | // cond_branch <Loop> | |||
57 | // | |||
58 | // In this pass, we look for PHIs such as X2 whose incoming values come only | |||
59 | // from the Loop Preheader and over the backedge and additionaly, both these | |||
60 | // values are the results of the same operation in terms of opcode. We call such | |||
61 | // a PHI node a dependence chain or DepChain. In this case, the dependence of X2 | |||
62 | // over X1 is carried over only one iteration and so the DepChain is only one | |||
63 | // PHI node long. | |||
64 | // | |||
65 | // Then, we traverse the uses of the PHI (X2) and the uses of the value of the | |||
66 | // PHI coming over the backedge (X1). We stop at the first pair of such users | |||
67 | // I1 (of X2) and I2 (of X1) that meet the following conditions. | |||
68 | // 1. I1 and I2 are the same operation, but with different operands. | |||
69 | // 2. X2 and X1 are used at the same operand number in the two instructions. | |||
70 | // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a | |||
71 | // a DepChain from Op1 to Op2 of the same length as that between X2 and X1. | |||
72 | // | |||
73 | // We then make the following transformation | |||
74 | // LoopPreheader: | |||
75 | // X0 = a[0]; | |||
76 | // Y0 = f(X0); | |||
77 | // Loop: | |||
78 | // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> | |||
79 | // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)> | |||
80 | // t1 = f(X2) <-- Will be removed by DCE. | |||
81 | // t2 = g(Y2) | |||
82 | // ... | |||
83 | // X1 = a[i+1] | |||
84 | // t4 = f(X1) | |||
85 | // t5 = g(t4) | |||
86 | // t6 = op(t2, t5) | |||
87 | // cond_branch <Loop> | |||
88 | // | |||
89 | // We proceed until we cannot find any more such instructions I1 and I2. | |||
90 | // | |||
91 | // --- DepChains & Loop carried dependences --- | |||
92 | // Consider a single basic block loop such as | |||
93 | // | |||
94 | // LoopPreheader: | |||
95 | // X0 = ... | |||
96 | // Y0 = ... | |||
97 | // Loop: | |||
98 | // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> | |||
99 | // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)> | |||
100 | // ... | |||
101 | // X1 = ... | |||
102 | // ... | |||
103 | // cond_branch <Loop> | |||
104 | // | |||
105 | // Then there is a dependence between X2 and X1 that goes back one iteration, | |||
106 | // i.e. X1 is used as X2 in the very next iteration. We represent this as a | |||
107 | // DepChain from X2 to X1 (X2->X1). | |||
108 | // Similarly, there is a dependence between Y2 and X1 that goes back two | |||
109 | // iterations. X1 is used as Y2 two iterations after it is computed. This is | |||
110 | // represented by a DepChain as (Y2->X2->X1). | |||
111 | // | |||
112 | // A DepChain has the following properties. | |||
113 | // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of | |||
114 | // iterations of carried dependence + 1. | |||
115 | // 2. All instructions in the DepChain except the last are PHIs. | |||
116 | // | |||
117 | //===----------------------------------------------------------------------===// | |||
118 | ||||
119 | #include "llvm/ADT/SetVector.h" | |||
120 | #include "llvm/ADT/SmallVector.h" | |||
121 | #include "llvm/ADT/Statistic.h" | |||
122 | #include "llvm/Analysis/LoopInfo.h" | |||
123 | #include "llvm/Analysis/LoopPass.h" | |||
124 | #include "llvm/IR/BasicBlock.h" | |||
125 | #include "llvm/IR/DerivedTypes.h" | |||
126 | #include "llvm/IR/IRBuilder.h" | |||
127 | #include "llvm/IR/Instruction.h" | |||
128 | #include "llvm/IR/Instructions.h" | |||
129 | #include "llvm/IR/IntrinsicInst.h" | |||
130 | #include "llvm/IR/Intrinsics.h" | |||
131 | #include "llvm/IR/Use.h" | |||
132 | #include "llvm/IR/User.h" | |||
133 | #include "llvm/IR/Value.h" | |||
134 | #include "llvm/Pass.h" | |||
135 | #include "llvm/Support/Casting.h" | |||
136 | #include "llvm/Support/CommandLine.h" | |||
137 | #include "llvm/Support/Compiler.h" | |||
138 | #include "llvm/Support/Debug.h" | |||
139 | #include "llvm/Support/raw_ostream.h" | |||
140 | #include "llvm/Transforms/Scalar.h" | |||
141 | #include "llvm/Transforms/Utils.h" | |||
142 | #include <algorithm> | |||
143 | #include <cassert> | |||
144 | #include <cstddef> | |||
145 | #include <map> | |||
146 | #include <memory> | |||
147 | #include <set> | |||
148 | ||||
149 | using namespace llvm; | |||
150 | ||||
151 | #define DEBUG_TYPE"hexagon-vlcr" "hexagon-vlcr" | |||
152 | ||||
153 | STATISTIC(HexagonNumVectorLoopCarriedReuse,static llvm::Statistic HexagonNumVectorLoopCarriedReuse = {"hexagon-vlcr" , "HexagonNumVectorLoopCarriedReuse", "Number of values that were reused from a previous iteration." , {0}, {false}} | |||
154 | "Number of values that were reused from a previous iteration.")static llvm::Statistic HexagonNumVectorLoopCarriedReuse = {"hexagon-vlcr" , "HexagonNumVectorLoopCarriedReuse", "Number of values that were reused from a previous iteration." , {0}, {false}}; | |||
155 | ||||
156 | static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", | |||
157 | cl::Hidden, | |||
158 | cl::desc("Maximum distance of loop carried dependences that are handled"), | |||
159 | cl::init(2), cl::ZeroOrMore); | |||
160 | ||||
161 | namespace llvm { | |||
162 | ||||
163 | void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&); | |||
164 | Pass *createHexagonVectorLoopCarriedReusePass(); | |||
165 | ||||
166 | } // end namespace llvm | |||
167 | ||||
168 | namespace { | |||
169 | ||||
170 | // See info about DepChain in the comments at the top of this file. | |||
171 | using ChainOfDependences = SmallVector<Instruction *, 4>; | |||
172 | ||||
173 | class DepChain { | |||
174 | ChainOfDependences Chain; | |||
175 | ||||
176 | public: | |||
177 | bool isIdentical(DepChain &Other) const { | |||
178 | if (Other.size() != size()) | |||
179 | return false; | |||
180 | ChainOfDependences &OtherChain = Other.getChain(); | |||
181 | for (int i = 0; i < size(); ++i) { | |||
182 | if (Chain[i] != OtherChain[i]) | |||
183 | return false; | |||
184 | } | |||
185 | return true; | |||
186 | } | |||
187 | ||||
188 | ChainOfDependences &getChain() { | |||
189 | return Chain; | |||
190 | } | |||
191 | ||||
192 | int size() const { | |||
193 | return Chain.size(); | |||
194 | } | |||
195 | ||||
196 | void clear() { | |||
197 | Chain.clear(); | |||
198 | } | |||
199 | ||||
200 | void push_back(Instruction *I) { | |||
201 | Chain.push_back(I); | |||
202 | } | |||
203 | ||||
204 | int iterations() const { | |||
205 | return size() - 1; | |||
206 | } | |||
207 | ||||
208 | Instruction *front() const { | |||
209 | return Chain.front(); | |||
210 | } | |||
211 | ||||
212 | Instruction *back() const { | |||
213 | return Chain.back(); | |||
214 | } | |||
215 | ||||
216 | Instruction *&operator[](const int index) { | |||
217 | return Chain[index]; | |||
218 | } | |||
219 | ||||
220 | friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); | |||
221 | }; | |||
222 | ||||
223 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) | |||
224 | raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { | |||
225 | const ChainOfDependences &CD = D.Chain; | |||
226 | int ChainSize = CD.size(); | |||
227 | OS << "**DepChain Start::**\n"; | |||
228 | for (int i = 0; i < ChainSize -1; ++i) { | |||
229 | OS << *(CD[i]) << " -->\n"; | |||
230 | } | |||
231 | OS << *CD[ChainSize-1] << "\n"; | |||
232 | return OS; | |||
233 | } | |||
234 | ||||
235 | struct ReuseValue { | |||
236 | Instruction *Inst2Replace = nullptr; | |||
237 | ||||
238 | // In the new PHI node that we'll construct this is the value that'll be | |||
239 | // used over the backedge. This is teh value that gets reused from a | |||
240 | // previous iteration. | |||
241 | Instruction *BackedgeInst = nullptr; | |||
242 | ||||
243 | ReuseValue() = default; | |||
244 | ||||
245 | void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; } | |||
246 | bool isDefined() { return Inst2Replace != nullptr; } | |||
247 | }; | |||
248 | ||||
249 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) | |||
250 | raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { | |||
251 | OS << "** ReuseValue ***\n"; | |||
252 | OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; | |||
253 | OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n"; | |||
254 | return OS; | |||
255 | } | |||
256 | ||||
257 | class HexagonVectorLoopCarriedReuse : public LoopPass { | |||
258 | public: | |||
259 | static char ID; | |||
260 | ||||
261 | explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) { | |||
262 | PassRegistry *PR = PassRegistry::getPassRegistry(); | |||
263 | initializeHexagonVectorLoopCarriedReusePass(*PR); | |||
264 | } | |||
265 | ||||
266 | StringRef getPassName() const override { | |||
267 | return "Hexagon-specific loop carried reuse for HVX vectors"; | |||
268 | } | |||
269 | ||||
270 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
271 | AU.addRequired<LoopInfoWrapperPass>(); | |||
272 | AU.addRequiredID(LoopSimplifyID); | |||
273 | AU.addRequiredID(LCSSAID); | |||
274 | AU.addPreservedID(LCSSAID); | |||
275 | AU.setPreservesCFG(); | |||
276 | } | |||
277 | ||||
278 | bool runOnLoop(Loop *L, LPPassManager &LPM) override; | |||
279 | ||||
280 | private: | |||
281 | SetVector<DepChain *> Dependences; | |||
282 | std::set<Instruction *> ReplacedInsts; | |||
283 | Loop *CurLoop; | |||
284 | ReuseValue ReuseCandidate; | |||
285 | ||||
286 | bool doVLCR(); | |||
287 | void findLoopCarriedDeps(); | |||
288 | void findValueToReuse(); | |||
289 | void findDepChainFromPHI(Instruction *I, DepChain &D); | |||
290 | void reuseValue(); | |||
291 | Value *findValueInBlock(Value *Op, BasicBlock *BB); | |||
292 | bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters); | |||
293 | DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2); | |||
294 | bool isEquivalentOperation(Instruction *I1, Instruction *I2); | |||
295 | bool canReplace(Instruction *I); | |||
296 | }; | |||
297 | ||||
298 | } // end anonymous namespace | |||
299 | ||||
300 | char HexagonVectorLoopCarriedReuse::ID = 0; | |||
301 | ||||
302 | INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",static void *initializeHexagonVectorLoopCarriedReusePassOnce( PassRegistry &Registry) { | |||
303 | "Hexagon-specific predictive commoning for HVX vectors", false, false)static void *initializeHexagonVectorLoopCarriedReusePassOnce( PassRegistry &Registry) { | |||
304 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); | |||
305 | INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry); | |||
306 | INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry); | |||
307 | INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",PassInfo *PI = new PassInfo( "Hexagon-specific predictive commoning for HVX vectors" , "hexagon-vlcr", &HexagonVectorLoopCarriedReuse::ID, PassInfo ::NormalCtor_t(callDefaultCtor<HexagonVectorLoopCarriedReuse >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeHexagonVectorLoopCarriedReusePassFlag ; void llvm::initializeHexagonVectorLoopCarriedReusePass(PassRegistry &Registry) { llvm::call_once(InitializeHexagonVectorLoopCarriedReusePassFlag , initializeHexagonVectorLoopCarriedReusePassOnce, std::ref(Registry )); } | |||
308 | "Hexagon-specific predictive commoning for HVX vectors", false, false)PassInfo *PI = new PassInfo( "Hexagon-specific predictive commoning for HVX vectors" , "hexagon-vlcr", &HexagonVectorLoopCarriedReuse::ID, PassInfo ::NormalCtor_t(callDefaultCtor<HexagonVectorLoopCarriedReuse >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeHexagonVectorLoopCarriedReusePassFlag ; void llvm::initializeHexagonVectorLoopCarriedReusePass(PassRegistry &Registry) { llvm::call_once(InitializeHexagonVectorLoopCarriedReusePassFlag , initializeHexagonVectorLoopCarriedReusePassOnce, std::ref(Registry )); } | |||
309 | ||||
310 | bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) { | |||
311 | if (skipLoop(L)) | |||
| ||||
312 | return false; | |||
313 | ||||
314 | if (!L->getLoopPreheader()) | |||
315 | return false; | |||
316 | ||||
317 | // Work only on innermost loops. | |||
318 | if (!L->getSubLoops().empty()) | |||
319 | return false; | |||
320 | ||||
321 | // Work only on single basic blocks loops. | |||
322 | if (L->getNumBlocks() != 1) | |||
323 | return false; | |||
324 | ||||
325 | CurLoop = L; | |||
326 | ||||
327 | return doVLCR(); | |||
328 | } | |||
329 | ||||
330 | bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1, | |||
331 | Instruction *I2) { | |||
332 | if (!I1->isSameOperationAs(I2)) | |||
333 | return false; | |||
334 | // This check is in place specifically for intrinsics. isSameOperationAs will | |||
335 | // return two for any two hexagon intrinsics because they are essentially the | |||
336 | // same instruciton (CallInst). We need to scratch the surface to see if they | |||
337 | // are calls to the same function. | |||
338 | if (CallInst *C1 = dyn_cast<CallInst>(I1)) { | |||
339 | if (CallInst *C2 = dyn_cast<CallInst>(I2)) { | |||
340 | if (C1->getCalledFunction() != C2->getCalledFunction()) | |||
341 | return false; | |||
342 | } | |||
343 | } | |||
344 | ||||
345 | // If both the Instructions are of Vector Type and any of the element | |||
346 | // is integer constant, check their values too for equivalence. | |||
347 | if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) { | |||
348 | unsigned NumOperands = I1->getNumOperands(); | |||
349 | for (unsigned i = 0; i < NumOperands; ++i) { | |||
350 | ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i)); | |||
351 | ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i)); | |||
352 | if(!C1) continue; | |||
353 | assert(C2)(static_cast <bool> (C2) ? void (0) : __assert_fail ("C2" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 353, __extension__ __PRETTY_FUNCTION__)); | |||
354 | if (C1->getSExtValue() != C2->getSExtValue()) | |||
355 | return false; | |||
356 | } | |||
357 | } | |||
358 | ||||
359 | return true; | |||
360 | } | |||
361 | ||||
362 | bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) { | |||
363 | const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); | |||
364 | if (II && | |||
365 | (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi || | |||
366 | II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) { | |||
367 | DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Not considering for reuse: " << *II << "\n"; } } while (false); | |||
368 | return false; | |||
369 | } | |||
370 | return true; | |||
371 | } | |||
372 | void HexagonVectorLoopCarriedReuse::findValueToReuse() { | |||
373 | for (auto *D : Dependences) { | |||
374 | DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Processing dependence " << *(D->front()) << "\n"; } } while (false); | |||
375 | if (D->iterations() > HexagonVLCRIterationLim) { | |||
376 | DEBUG(dbgs() <<do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << ".. Skipping because number of iterations > than the limit\n" ; } } while (false) | |||
377 | ".. Skipping because number of iterations > than the limit\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << ".. Skipping because number of iterations > than the limit\n" ; } } while (false); | |||
378 | continue; | |||
379 | } | |||
380 | ||||
381 | PHINode *PN = cast<PHINode>(D->front()); | |||
382 | Instruction *BEInst = D->back(); | |||
383 | int Iters = D->iterations(); | |||
384 | BasicBlock *BB = PN->getParent(); | |||
385 | DEBUG(dbgs() << "Checking if any uses of " << *PN << " can be reused\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Checking if any uses of " << *PN << " can be reused\n"; } } while (false); | |||
386 | ||||
387 | SmallVector<Instruction *, 4> PNUsers; | |||
388 | for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { | |||
389 | Use &U = *UI; | |||
390 | Instruction *User = cast<Instruction>(U.getUser()); | |||
391 | ||||
392 | if (User->getParent() != BB) | |||
393 | continue; | |||
394 | if (ReplacedInsts.count(User)) { | |||
395 | DEBUG(dbgs() << *User << " has already been replaced. Skipping...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << *User << " has already been replaced. Skipping...\n" ; } } while (false); | |||
396 | continue; | |||
397 | } | |||
398 | if (isa<PHINode>(User)) | |||
399 | continue; | |||
400 | if (User->mayHaveSideEffects()) | |||
401 | continue; | |||
402 | if (!canReplace(User)) | |||
403 | continue; | |||
404 | ||||
405 | PNUsers.push_back(User); | |||
406 | } | |||
407 | DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n" ; } } while (false); | |||
408 | ||||
409 | // For each interesting use I of PN, find an Instruction BEUser that | |||
410 | // performs the same operation as I on BEInst and whose other operands, | |||
411 | // if any, can also be rematerialized in OtherBB. We stop when we find the | |||
412 | // first such Instruction BEUser. This is because once BEUser is | |||
413 | // rematerialized in OtherBB, we may find more such "fixup" opportunities | |||
414 | // in this block. So, we'll start over again. | |||
415 | for (Instruction *I : PNUsers) { | |||
416 | for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E; | |||
417 | ++UI) { | |||
418 | Use &U = *UI; | |||
419 | Instruction *BEUser = cast<Instruction>(U.getUser()); | |||
420 | ||||
421 | if (BEUser->getParent() != BB) | |||
422 | continue; | |||
423 | if (!isEquivalentOperation(I, BEUser)) | |||
424 | continue; | |||
425 | ||||
426 | int NumOperands = I->getNumOperands(); | |||
427 | ||||
428 | for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { | |||
429 | Value *Op = I->getOperand(OpNo); | |||
430 | Instruction *OpInst = dyn_cast<Instruction>(Op); | |||
431 | if (!OpInst) | |||
432 | continue; | |||
433 | ||||
434 | Value *BEOp = BEUser->getOperand(OpNo); | |||
435 | Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); | |||
436 | ||||
437 | if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) { | |||
438 | BEUser = nullptr; | |||
439 | break; | |||
440 | } | |||
441 | } | |||
442 | if (BEUser) { | |||
443 | DEBUG(dbgs() << "Found Value for reuse.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Found Value for reuse.\n" ; } } while (false); | |||
444 | ReuseCandidate.Inst2Replace = I; | |||
445 | ReuseCandidate.BackedgeInst = BEUser; | |||
446 | return; | |||
447 | } else | |||
448 | ReuseCandidate.reset(); | |||
449 | } | |||
450 | } | |||
451 | } | |||
452 | ReuseCandidate.reset(); | |||
453 | } | |||
454 | ||||
455 | Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, | |||
456 | BasicBlock *BB) { | |||
457 | PHINode *PN = dyn_cast<PHINode>(Op); | |||
458 | assert(PN)(static_cast <bool> (PN) ? void (0) : __assert_fail ("PN" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 458, __extension__ __PRETTY_FUNCTION__)); | |||
459 | Value *ValueInBlock = PN->getIncomingValueForBlock(BB); | |||
460 | return ValueInBlock; | |||
461 | } | |||
462 | ||||
463 | void HexagonVectorLoopCarriedReuse::reuseValue() { | |||
464 | DEBUG(dbgs() << ReuseCandidate)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << ReuseCandidate; } } while (false); | |||
465 | Instruction *Inst2Replace = ReuseCandidate.Inst2Replace; | |||
466 | Instruction *BEInst = ReuseCandidate.BackedgeInst; | |||
467 | int NumOperands = Inst2Replace->getNumOperands(); | |||
468 | std::map<Instruction *, DepChain *> DepChains; | |||
469 | int Iterations = -1; | |||
470 | BasicBlock *LoopPH = CurLoop->getLoopPreheader(); | |||
471 | ||||
472 | for (int i = 0; i < NumOperands; ++i) { | |||
473 | Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i)); | |||
474 | if(!I) | |||
475 | continue; | |||
476 | else { | |||
477 | Instruction *J = cast<Instruction>(BEInst->getOperand(i)); | |||
478 | DepChain *D = getDepChainBtwn(I, J); | |||
479 | ||||
480 | assert(D &&(static_cast <bool> (D && "No DepChain between corresponding operands in ReuseCandidate\n" ) ? void (0) : __assert_fail ("D && \"No DepChain between corresponding operands in ReuseCandidate\\n\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 481, __extension__ __PRETTY_FUNCTION__)) | |||
481 | "No DepChain between corresponding operands in ReuseCandidate\n")(static_cast <bool> (D && "No DepChain between corresponding operands in ReuseCandidate\n" ) ? void (0) : __assert_fail ("D && \"No DepChain between corresponding operands in ReuseCandidate\\n\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 481, __extension__ __PRETTY_FUNCTION__)); | |||
482 | if (Iterations == -1) | |||
483 | Iterations = D->iterations(); | |||
484 | assert(Iterations == D->iterations() && "Iterations mismatch")(static_cast <bool> (Iterations == D->iterations() && "Iterations mismatch") ? void (0) : __assert_fail ("Iterations == D->iterations() && \"Iterations mismatch\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 484, __extension__ __PRETTY_FUNCTION__)); | |||
485 | DepChains[I] = D; | |||
486 | } | |||
487 | } | |||
488 | ||||
489 | DEBUG(dbgs() << "reuseValue is making the following changes\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "reuseValue is making the following changes\n" ; } } while (false); | |||
490 | ||||
491 | SmallVector<Instruction *, 4> InstsInPreheader; | |||
492 | for (int i = 0; i < Iterations; ++i) { | |||
493 | Instruction *InstInPreheader = Inst2Replace->clone(); | |||
494 | SmallVector<Value *, 4> Ops; | |||
495 | for (int j = 0; j < NumOperands; ++j) { | |||
496 | Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j)); | |||
497 | if (!I) | |||
498 | continue; | |||
499 | // Get the DepChain corresponding to this operand. | |||
500 | DepChain &D = *DepChains[I]; | |||
501 | // Get the PHI for the iteration number and find | |||
502 | // the incoming value from the Loop Preheader for | |||
503 | // that PHI. | |||
504 | Value *ValInPreheader = findValueInBlock(D[i], LoopPH); | |||
505 | InstInPreheader->setOperand(j, ValInPreheader); | |||
506 | } | |||
507 | InstsInPreheader.push_back(InstInPreheader); | |||
508 | InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr"); | |||
509 | InstInPreheader->insertBefore(LoopPH->getTerminator()); | |||
510 | DEBUG(dbgs() << "Added " << *InstInPreheader << " to " << LoopPH->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Added " << *InstInPreheader << " to " << LoopPH->getName() << "\n"; } } while (false) | |||
511 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Added " << *InstInPreheader << " to " << LoopPH->getName() << "\n"; } } while (false); | |||
512 | } | |||
513 | BasicBlock *BB = BEInst->getParent(); | |||
514 | IRBuilder<> IRB(BB); | |||
515 | IRB.SetInsertPoint(BB->getFirstNonPHI()); | |||
516 | Value *BEVal = BEInst; | |||
517 | PHINode *NewPhi; | |||
518 | for (int i = Iterations-1; i >=0 ; --i) { | |||
519 | Instruction *InstInPreheader = InstsInPreheader[i]; | |||
520 | NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2); | |||
521 | NewPhi->addIncoming(InstInPreheader, LoopPH); | |||
522 | NewPhi->addIncoming(BEVal, BB); | |||
523 | DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Adding " << *NewPhi << " to " << BB->getName() << "\n"; } } while (false); | |||
524 | BEVal = NewPhi; | |||
525 | } | |||
526 | // We are in LCSSA form. So, a value defined inside the Loop is used only | |||
527 | // inside the loop. So, the following is safe. | |||
528 | Inst2Replace->replaceAllUsesWith(NewPhi); | |||
| ||||
529 | ReplacedInsts.insert(Inst2Replace); | |||
530 | ++HexagonNumVectorLoopCarriedReuse; | |||
531 | } | |||
532 | ||||
533 | bool HexagonVectorLoopCarriedReuse::doVLCR() { | |||
534 | assert(CurLoop->getSubLoops().empty() &&(static_cast <bool> (CurLoop->getSubLoops().empty() && "Can do VLCR on the innermost loop only") ? void (0) : __assert_fail ("CurLoop->getSubLoops().empty() && \"Can do VLCR on the innermost loop only\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 535, __extension__ __PRETTY_FUNCTION__)) | |||
535 | "Can do VLCR on the innermost loop only")(static_cast <bool> (CurLoop->getSubLoops().empty() && "Can do VLCR on the innermost loop only") ? void (0) : __assert_fail ("CurLoop->getSubLoops().empty() && \"Can do VLCR on the innermost loop only\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 535, __extension__ __PRETTY_FUNCTION__)); | |||
536 | assert((CurLoop->getNumBlocks() == 1) &&(static_cast <bool> ((CurLoop->getNumBlocks() == 1) && "Can do VLCR only on single block loops") ? void (0) : __assert_fail ("(CurLoop->getNumBlocks() == 1) && \"Can do VLCR only on single block loops\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 537, __extension__ __PRETTY_FUNCTION__)) | |||
537 | "Can do VLCR only on single block loops")(static_cast <bool> ((CurLoop->getNumBlocks() == 1) && "Can do VLCR only on single block loops") ? void (0) : __assert_fail ("(CurLoop->getNumBlocks() == 1) && \"Can do VLCR only on single block loops\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 537, __extension__ __PRETTY_FUNCTION__)); | |||
538 | ||||
539 | bool Changed = false; | |||
540 | bool Continue; | |||
541 | ||||
542 | DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n"; } } while (false); | |||
543 | do { | |||
544 | // Reset datastructures. | |||
545 | Dependences.clear(); | |||
546 | Continue = false; | |||
547 | ||||
548 | findLoopCarriedDeps(); | |||
549 | findValueToReuse(); | |||
550 | if (ReuseCandidate.isDefined()) { | |||
551 | reuseValue(); | |||
552 | Changed = true; | |||
553 | Continue = true; | |||
554 | } | |||
555 | llvm::for_each(Dependences, std::default_delete<DepChain>()); | |||
556 | } while (Continue); | |||
557 | return Changed; | |||
558 | } | |||
559 | ||||
560 | void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, | |||
561 | DepChain &D) { | |||
562 | PHINode *PN = dyn_cast<PHINode>(I); | |||
563 | if (!PN) { | |||
564 | D.push_back(I); | |||
565 | return; | |||
566 | } else { | |||
567 | auto NumIncomingValues = PN->getNumIncomingValues(); | |||
568 | if (NumIncomingValues != 2) { | |||
569 | D.clear(); | |||
570 | return; | |||
571 | } | |||
572 | ||||
573 | BasicBlock *BB = PN->getParent(); | |||
574 | if (BB != CurLoop->getHeader()) { | |||
575 | D.clear(); | |||
576 | return; | |||
577 | } | |||
578 | ||||
579 | Value *BEVal = PN->getIncomingValueForBlock(BB); | |||
580 | Instruction *BEInst = dyn_cast<Instruction>(BEVal); | |||
581 | // This is a single block loop with a preheader, so at least | |||
582 | // one value should come over the backedge. | |||
583 | assert(BEInst && "There should be a value over the backedge")(static_cast <bool> (BEInst && "There should be a value over the backedge" ) ? void (0) : __assert_fail ("BEInst && \"There should be a value over the backedge\"" , "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp" , 583, __extension__ __PRETTY_FUNCTION__)); | |||
584 | ||||
585 | Value *PreHdrVal = | |||
586 | PN->getIncomingValueForBlock(CurLoop->getLoopPreheader()); | |||
587 | if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) { | |||
588 | D.clear(); | |||
589 | return; | |||
590 | } | |||
591 | D.push_back(PN); | |||
592 | findDepChainFromPHI(BEInst, D); | |||
593 | } | |||
594 | } | |||
595 | ||||
596 | bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1, | |||
597 | Instruction *I2, | |||
598 | int Iters) { | |||
599 | for (auto *D : Dependences) { | |||
600 | if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters) | |||
601 | return true; | |||
602 | } | |||
603 | return false; | |||
604 | } | |||
605 | ||||
606 | DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1, | |||
607 | Instruction *I2) { | |||
608 | for (auto *D : Dependences) { | |||
609 | if (D->front() == I1 && D->back() == I2) | |||
610 | return D; | |||
611 | } | |||
612 | return nullptr; | |||
613 | } | |||
614 | ||||
615 | void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { | |||
616 | BasicBlock *BB = CurLoop->getHeader(); | |||
617 | for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { | |||
618 | auto *PN = cast<PHINode>(I); | |||
619 | if (!isa<VectorType>(PN->getType())) | |||
620 | continue; | |||
621 | ||||
622 | DepChain *D = new DepChain(); | |||
623 | findDepChainFromPHI(PN, *D); | |||
624 | if (D->size() != 0) | |||
625 | Dependences.insert(D); | |||
626 | else | |||
627 | delete D; | |||
628 | } | |||
629 | DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { dbgs() << "Found " << Dependences .size() << " dependences\n"; } } while (false); | |||
630 | DEBUG(for (size_t i = 0; i < Dependences.size(); ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { for (size_t i = 0; i < Dependences.size (); ++i) { dbgs() << *Dependences[i] << "\n"; }; } } while (false) | |||
631 | dbgs() << *Dependences[i] << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { for (size_t i = 0; i < Dependences.size (); ++i) { dbgs() << *Dependences[i] << "\n"; }; } } while (false) | |||
632 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-vlcr")) { for (size_t i = 0; i < Dependences.size (); ++i) { dbgs() << *Dependences[i] << "\n"; }; } } while (false); | |||
633 | } | |||
634 | ||||
635 | Pass *llvm::createHexagonVectorLoopCarriedReusePass() { | |||
636 | return new HexagonVectorLoopCarriedReuse(); | |||
637 | } |