File: | build/source/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp |
Warning: | line 432, column 20 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// | |||
9 | /// \file | |||
10 | /// This file implements a set of utility VPlan to VPlan transformations. | |||
11 | /// | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "VPlanTransforms.h" | |||
15 | #include "VPlanDominatorTree.h" | |||
16 | #include "VPRecipeBuilder.h" | |||
17 | #include "VPlanCFG.h" | |||
18 | #include "llvm/ADT/PostOrderIterator.h" | |||
19 | #include "llvm/ADT/SetVector.h" | |||
20 | #include "llvm/Analysis/IVDescriptors.h" | |||
21 | #include "llvm/Analysis/VectorUtils.h" | |||
22 | #include "llvm/IR/Intrinsics.h" | |||
23 | ||||
24 | using namespace llvm; | |||
25 | ||||
26 | void VPlanTransforms::VPInstructionsToVPRecipes( | |||
27 | VPlanPtr &Plan, | |||
28 | function_ref<const InductionDescriptor *(PHINode *)> | |||
29 | GetIntOrFpInductionDescriptor, | |||
30 | ScalarEvolution &SE, const TargetLibraryInfo &TLI) { | |||
31 | ||||
32 | ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( | |||
33 | Plan->getEntry()); | |||
34 | for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { | |||
35 | VPRecipeBase *Term = VPBB->getTerminator(); | |||
36 | auto EndIter = Term ? Term->getIterator() : VPBB->end(); | |||
37 | // Introduce each ingredient into VPlan. | |||
38 | for (VPRecipeBase &Ingredient : | |||
39 | make_early_inc_range(make_range(VPBB->begin(), EndIter))) { | |||
40 | ||||
41 | VPValue *VPV = Ingredient.getVPSingleValue(); | |||
42 | Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue()); | |||
43 | ||||
44 | VPRecipeBase *NewRecipe = nullptr; | |||
45 | if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) { | |||
46 | auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); | |||
47 | if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { | |||
48 | VPValue *Start = Plan->getVPValueOrAddLiveIn(II->getStartValue()); | |||
49 | VPValue *Step = | |||
50 | vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); | |||
51 | NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II); | |||
52 | } else { | |||
53 | Plan->addVPValue(Phi, VPPhi); | |||
54 | continue; | |||
55 | } | |||
56 | } else { | |||
57 | assert(isa<VPInstruction>(&Ingredient) &&(static_cast <bool> (isa<VPInstruction>(&Ingredient ) && "only VPInstructions expected here") ? void (0) : __assert_fail ("isa<VPInstruction>(&Ingredient) && \"only VPInstructions expected here\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 58, __extension__ __PRETTY_FUNCTION__)) | |||
58 | "only VPInstructions expected here")(static_cast <bool> (isa<VPInstruction>(&Ingredient ) && "only VPInstructions expected here") ? void (0) : __assert_fail ("isa<VPInstruction>(&Ingredient) && \"only VPInstructions expected here\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 58, __extension__ __PRETTY_FUNCTION__)); | |||
59 | assert(!isa<PHINode>(Inst) && "phis should be handled above")(static_cast <bool> (!isa<PHINode>(Inst) && "phis should be handled above") ? void (0) : __assert_fail ( "!isa<PHINode>(Inst) && \"phis should be handled above\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 59, __extension__ __PRETTY_FUNCTION__)); | |||
60 | // Create VPWidenMemoryInstructionRecipe for loads and stores. | |||
61 | if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { | |||
62 | NewRecipe = new VPWidenMemoryInstructionRecipe( | |||
63 | *Load, Ingredient.getOperand(0), nullptr /*Mask*/, | |||
64 | false /*Consecutive*/, false /*Reverse*/); | |||
65 | } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) { | |||
66 | NewRecipe = new VPWidenMemoryInstructionRecipe( | |||
67 | *Store, Ingredient.getOperand(1), Ingredient.getOperand(0), | |||
68 | nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); | |||
69 | } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { | |||
70 | NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); | |||
71 | } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { | |||
72 | NewRecipe = | |||
73 | new VPWidenCallRecipe(*CI, drop_end(Ingredient.operands()), | |||
74 | getVectorIntrinsicIDForCall(CI, &TLI)); | |||
75 | } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { | |||
76 | NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands()); | |||
77 | } else if (auto *CI = dyn_cast<CastInst>(Inst)) { | |||
78 | NewRecipe = new VPWidenCastRecipe( | |||
79 | CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI); | |||
80 | } else { | |||
81 | NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands()); | |||
82 | } | |||
83 | } | |||
84 | ||||
85 | NewRecipe->insertBefore(&Ingredient); | |||
86 | if (NewRecipe->getNumDefinedValues() == 1) | |||
87 | VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue()); | |||
88 | else | |||
89 | assert(NewRecipe->getNumDefinedValues() == 0 &&(static_cast <bool> (NewRecipe->getNumDefinedValues( ) == 0 && "Only recpies with zero or one defined values expected" ) ? void (0) : __assert_fail ("NewRecipe->getNumDefinedValues() == 0 && \"Only recpies with zero or one defined values expected\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 90, __extension__ __PRETTY_FUNCTION__)) | |||
90 | "Only recpies with zero or one defined values expected")(static_cast <bool> (NewRecipe->getNumDefinedValues( ) == 0 && "Only recpies with zero or one defined values expected" ) ? void (0) : __assert_fail ("NewRecipe->getNumDefinedValues() == 0 && \"Only recpies with zero or one defined values expected\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 90, __extension__ __PRETTY_FUNCTION__)); | |||
91 | Ingredient.eraseFromParent(); | |||
92 | } | |||
93 | } | |||
94 | } | |||
95 | ||||
96 | static bool sinkScalarOperands(VPlan &Plan) { | |||
97 | auto Iter = vp_depth_first_deep(Plan.getEntry()); | |||
98 | bool Changed = false; | |||
99 | // First, collect the operands of all recipes in replicate blocks as seeds for | |||
100 | // sinking. | |||
101 | SetVector<std::pair<VPBasicBlock *, VPRecipeBase *>> WorkList; | |||
102 | for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) { | |||
103 | VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock(); | |||
104 | if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2) | |||
105 | continue; | |||
106 | VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]); | |||
107 | if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock()) | |||
108 | continue; | |||
109 | for (auto &Recipe : *VPBB) { | |||
110 | for (VPValue *Op : Recipe.operands()) | |||
111 | if (auto *Def = Op->getDefiningRecipe()) | |||
112 | WorkList.insert(std::make_pair(VPBB, Def)); | |||
113 | } | |||
114 | } | |||
115 | ||||
116 | bool ScalarVFOnly = Plan.hasScalarVFOnly(); | |||
117 | // Try to sink each replicate or scalar IV steps recipe in the worklist. | |||
118 | for (unsigned I = 0; I != WorkList.size(); ++I) { | |||
119 | VPBasicBlock *SinkTo; | |||
120 | VPRecipeBase *SinkCandidate; | |||
121 | std::tie(SinkTo, SinkCandidate) = WorkList[I]; | |||
122 | if (SinkCandidate->getParent() == SinkTo || | |||
123 | SinkCandidate->mayHaveSideEffects() || | |||
124 | SinkCandidate->mayReadOrWriteMemory()) | |||
125 | continue; | |||
126 | if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) { | |||
127 | if (!ScalarVFOnly && RepR->isUniform()) | |||
128 | continue; | |||
129 | } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate)) | |||
130 | continue; | |||
131 | ||||
132 | bool NeedsDuplicating = false; | |||
133 | // All recipe users of the sink candidate must be in the same block SinkTo | |||
134 | // or all users outside of SinkTo must be uniform-after-vectorization ( | |||
135 | // i.e., only first lane is used) . In the latter case, we need to duplicate | |||
136 | // SinkCandidate. | |||
137 | auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, | |||
138 | SinkCandidate](VPUser *U) { | |||
139 | auto *UI = dyn_cast<VPRecipeBase>(U); | |||
140 | if (!UI) | |||
141 | return false; | |||
142 | if (UI->getParent() == SinkTo) | |||
143 | return true; | |||
144 | NeedsDuplicating = | |||
145 | UI->onlyFirstLaneUsed(SinkCandidate->getVPSingleValue()); | |||
146 | // We only know how to duplicate VPRecipeRecipes for now. | |||
147 | return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate); | |||
148 | }; | |||
149 | if (!all_of(SinkCandidate->getVPSingleValue()->users(), CanSinkWithUser)) | |||
150 | continue; | |||
151 | ||||
152 | if (NeedsDuplicating) { | |||
153 | if (ScalarVFOnly) | |||
154 | continue; | |||
155 | Instruction *I = cast<Instruction>( | |||
156 | cast<VPReplicateRecipe>(SinkCandidate)->getUnderlyingValue()); | |||
157 | auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true); | |||
158 | // TODO: add ".cloned" suffix to name of Clone's VPValue. | |||
159 | ||||
160 | Clone->insertBefore(SinkCandidate); | |||
161 | for (auto *U : to_vector(SinkCandidate->getVPSingleValue()->users())) { | |||
162 | auto *UI = cast<VPRecipeBase>(U); | |||
163 | if (UI->getParent() == SinkTo) | |||
164 | continue; | |||
165 | ||||
166 | for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) { | |||
167 | if (UI->getOperand(Idx) != SinkCandidate->getVPSingleValue()) | |||
168 | continue; | |||
169 | UI->setOperand(Idx, Clone); | |||
170 | } | |||
171 | } | |||
172 | } | |||
173 | SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi()); | |||
174 | for (VPValue *Op : SinkCandidate->operands()) | |||
175 | if (auto *Def = Op->getDefiningRecipe()) | |||
176 | WorkList.insert(std::make_pair(SinkTo, Def)); | |||
177 | Changed = true; | |||
178 | } | |||
179 | return Changed; | |||
180 | } | |||
181 | ||||
182 | /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return | |||
183 | /// the mask. | |||
184 | VPValue *getPredicatedMask(VPRegionBlock *R) { | |||
185 | auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry()); | |||
186 | if (!EntryBB || EntryBB->size() != 1 || | |||
187 | !isa<VPBranchOnMaskRecipe>(EntryBB->begin())) | |||
188 | return nullptr; | |||
189 | ||||
190 | return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0); | |||
191 | } | |||
192 | ||||
193 | /// If \p R is a triangle region, return the 'then' block of the triangle. | |||
194 | static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { | |||
195 | auto *EntryBB = cast<VPBasicBlock>(R->getEntry()); | |||
196 | if (EntryBB->getNumSuccessors() != 2) | |||
197 | return nullptr; | |||
198 | ||||
199 | auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]); | |||
200 | auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]); | |||
201 | if (!Succ0 || !Succ1) | |||
202 | return nullptr; | |||
203 | ||||
204 | if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1) | |||
205 | return nullptr; | |||
206 | if (Succ0->getSingleSuccessor() == Succ1) | |||
207 | return Succ0; | |||
208 | if (Succ1->getSingleSuccessor() == Succ0) | |||
209 | return Succ1; | |||
210 | return nullptr; | |||
211 | } | |||
212 | ||||
213 | // Merge replicate regions in their successor region, if a replicate region | |||
214 | // is connected to a successor replicate region with the same predicate by a | |||
215 | // single, empty VPBasicBlock. | |||
216 | static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { | |||
217 | SetVector<VPRegionBlock *> DeletedRegions; | |||
218 | ||||
219 | // Collect replicate regions followed by an empty block, followed by another | |||
220 | // replicate region with matching masks to process front. This is to avoid | |||
221 | // iterator invalidation issues while merging regions. | |||
222 | SmallVector<VPRegionBlock *, 8> WorkList; | |||
223 | for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>( | |||
224 | vp_depth_first_deep(Plan.getEntry()))) { | |||
225 | if (!Region1->isReplicator()) | |||
226 | continue; | |||
227 | auto *MiddleBasicBlock = | |||
228 | dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor()); | |||
229 | if (!MiddleBasicBlock || !MiddleBasicBlock->empty()) | |||
230 | continue; | |||
231 | ||||
232 | auto *Region2 = | |||
233 | dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); | |||
234 | if (!Region2 || !Region2->isReplicator()) | |||
235 | continue; | |||
236 | ||||
237 | VPValue *Mask1 = getPredicatedMask(Region1); | |||
238 | VPValue *Mask2 = getPredicatedMask(Region2); | |||
239 | if (!Mask1 || Mask1 != Mask2) | |||
240 | continue; | |||
241 | ||||
242 | assert(Mask1 && Mask2 && "both region must have conditions")(static_cast <bool> (Mask1 && Mask2 && "both region must have conditions" ) ? void (0) : __assert_fail ("Mask1 && Mask2 && \"both region must have conditions\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 242, __extension__ __PRETTY_FUNCTION__)); | |||
243 | WorkList.push_back(Region1); | |||
244 | } | |||
245 | ||||
246 | // Move recipes from Region1 to its successor region, if both are triangles. | |||
247 | for (VPRegionBlock *Region1 : WorkList) { | |||
248 | if (DeletedRegions.contains(Region1)) | |||
249 | continue; | |||
250 | auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor()); | |||
251 | auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); | |||
252 | ||||
253 | VPBasicBlock *Then1 = getPredicatedThenBlock(Region1); | |||
254 | VPBasicBlock *Then2 = getPredicatedThenBlock(Region2); | |||
255 | if (!Then1 || !Then2) | |||
256 | continue; | |||
257 | ||||
258 | // Note: No fusion-preventing memory dependencies are expected in either | |||
259 | // region. Such dependencies should be rejected during earlier dependence | |||
260 | // checks, which guarantee accesses can be re-ordered for vectorization. | |||
261 | // | |||
262 | // Move recipes to the successor region. | |||
263 | for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) | |||
264 | ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); | |||
265 | ||||
266 | auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor()); | |||
267 | auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor()); | |||
268 | ||||
269 | // Move VPPredInstPHIRecipes from the merge block to the successor region's | |||
270 | // merge block. Update all users inside the successor region to use the | |||
271 | // original values. | |||
272 | for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) { | |||
273 | VPValue *PredInst1 = | |||
274 | cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0); | |||
275 | VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue(); | |||
276 | for (VPUser *U : to_vector(Phi1ToMoveV->users())) { | |||
277 | auto *UI = dyn_cast<VPRecipeBase>(U); | |||
278 | if (!UI || UI->getParent() != Then2) | |||
279 | continue; | |||
280 | for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) { | |||
281 | if (Phi1ToMoveV != U->getOperand(I)) | |||
282 | continue; | |||
283 | U->setOperand(I, PredInst1); | |||
284 | } | |||
285 | } | |||
286 | ||||
287 | Phi1ToMove.moveBefore(*Merge2, Merge2->begin()); | |||
288 | } | |||
289 | ||||
290 | // Finally, remove the first region. | |||
291 | for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) { | |||
292 | VPBlockUtils::disconnectBlocks(Pred, Region1); | |||
293 | VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock); | |||
294 | } | |||
295 | VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock); | |||
296 | DeletedRegions.insert(Region1); | |||
297 | } | |||
298 | ||||
299 | for (VPRegionBlock *ToDelete : DeletedRegions) | |||
300 | delete ToDelete; | |||
301 | return !DeletedRegions.empty(); | |||
302 | } | |||
303 | ||||
304 | static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, | |||
305 | VPlan &Plan) { | |||
306 | Instruction *Instr = PredRecipe->getUnderlyingInstr(); | |||
307 | // Build the triangular if-then region. | |||
308 | std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str(); | |||
309 | assert(Instr->getParent() && "Predicated instruction not in any basic block")(static_cast <bool> (Instr->getParent() && "Predicated instruction not in any basic block" ) ? void (0) : __assert_fail ("Instr->getParent() && \"Predicated instruction not in any basic block\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 309, __extension__ __PRETTY_FUNCTION__)); | |||
310 | auto *BlockInMask = PredRecipe->getMask(); | |||
311 | auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask); | |||
312 | auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe); | |||
313 | ||||
314 | // Replace predicated replicate recipe with a replicate recipe without a | |||
315 | // mask but in the replicate region. | |||
316 | auto *RecipeWithoutMask = new VPReplicateRecipe( | |||
317 | PredRecipe->getUnderlyingInstr(), | |||
318 | make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())), | |||
319 | PredRecipe->isUniform()); | |||
320 | auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask); | |||
321 | ||||
322 | VPPredInstPHIRecipe *PHIRecipe = nullptr; | |||
323 | if (PredRecipe->getNumUsers() != 0) { | |||
324 | PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask); | |||
325 | PredRecipe->replaceAllUsesWith(PHIRecipe); | |||
326 | PHIRecipe->setOperand(0, RecipeWithoutMask); | |||
327 | } | |||
328 | PredRecipe->eraseFromParent(); | |||
329 | auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); | |||
330 | VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true); | |||
331 | ||||
332 | // Note: first set Entry as region entry and then connect successors starting | |||
333 | // from it in order, to propagate the "parent" of each VPBasicBlock. | |||
334 | VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry); | |||
335 | VPBlockUtils::connectBlocks(Pred, Exiting); | |||
336 | ||||
337 | return Region; | |||
338 | } | |||
339 | ||||
340 | static void addReplicateRegions(VPlan &Plan) { | |||
341 | SmallVector<VPReplicateRecipe *> WorkList; | |||
342 | for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( | |||
343 | vp_depth_first_deep(Plan.getEntry()))) { | |||
344 | for (VPRecipeBase &R : *VPBB) | |||
345 | if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) { | |||
346 | if (RepR->isPredicated()) | |||
347 | WorkList.push_back(RepR); | |||
348 | } | |||
349 | } | |||
350 | ||||
351 | unsigned BBNum = 0; | |||
352 | for (VPReplicateRecipe *RepR : WorkList) { | |||
353 | VPBasicBlock *CurrentBlock = RepR->getParent(); | |||
354 | VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator()); | |||
355 | ||||
356 | BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent(); | |||
357 | SplitBlock->setName( | |||
358 | OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : ""); | |||
359 | // Record predicated instructions for above packing optimizations. | |||
360 | VPBlockBase *Region = createReplicateRegion(RepR, Plan); | |||
361 | Region->setParent(CurrentBlock->getParent()); | |||
362 | VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock); | |||
363 | VPBlockUtils::connectBlocks(CurrentBlock, Region); | |||
364 | VPBlockUtils::connectBlocks(Region, SplitBlock); | |||
365 | } | |||
366 | } | |||
367 | ||||
368 | void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) { | |||
369 | // Convert masked VPReplicateRecipes to if-then region blocks. | |||
370 | addReplicateRegions(Plan); | |||
371 | ||||
372 | bool ShouldSimplify = true; | |||
373 | while (ShouldSimplify) { | |||
374 | ShouldSimplify = sinkScalarOperands(Plan); | |||
375 | ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan); | |||
376 | ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(Plan); | |||
377 | } | |||
378 | } | |||
379 | bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) { | |||
380 | SmallVector<VPBasicBlock *> WorkList; | |||
381 | for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( | |||
382 | vp_depth_first_deep(Plan.getEntry()))) { | |||
383 | auto *PredVPBB = | |||
384 | dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor()); | |||
385 | if (PredVPBB && PredVPBB->getNumSuccessors() == 1) | |||
386 | WorkList.push_back(VPBB); | |||
387 | } | |||
388 | ||||
389 | for (VPBasicBlock *VPBB : WorkList) { | |||
390 | VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor()); | |||
391 | for (VPRecipeBase &R : make_early_inc_range(*VPBB)) | |||
392 | R.moveBefore(*PredVPBB, PredVPBB->end()); | |||
393 | VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); | |||
394 | auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent()); | |||
395 | if (ParentRegion && ParentRegion->getExiting() == VPBB) | |||
396 | ParentRegion->setExiting(PredVPBB); | |||
397 | for (auto *Succ : to_vector(VPBB->successors())) { | |||
398 | VPBlockUtils::disconnectBlocks(VPBB, Succ); | |||
399 | VPBlockUtils::connectBlocks(PredVPBB, Succ); | |||
400 | } | |||
401 | delete VPBB; | |||
402 | } | |||
403 | return !WorkList.empty(); | |||
404 | } | |||
405 | ||||
406 | void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { | |||
407 | for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { | |||
408 | auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); | |||
| ||||
409 | if (!IV
| |||
410 | continue; | |||
411 | ||||
412 | // A sequence of IR Casts has potentially been recorded for IV, which | |||
413 | // *must be bypassed* when the IV is vectorized, because the vectorized IV | |||
414 | // will produce the desired casted value. This sequence forms a def-use | |||
415 | // chain and is provided in reverse order, ending with the cast that uses | |||
416 | // the IV phi. Search for the recipe of the last cast in the chain and | |||
417 | // replace it with the original IV. Note that only the final cast is | |||
418 | // expected to have users outside the cast-chain and the dead casts left | |||
419 | // over will be cleaned up later. | |||
420 | auto &Casts = IV->getInductionDescriptor().getCastInsts(); | |||
421 | VPValue *FindMyCast = IV; | |||
422 | for (Instruction *IRCast : reverse(Casts)) { | |||
423 | VPRecipeBase *FoundUserCast = nullptr; | |||
424 | for (auto *U : FindMyCast->users()) { | |||
425 | auto *UserCast = cast<VPRecipeBase>(U); | |||
426 | if (UserCast->getNumDefinedValues() == 1 && | |||
427 | UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) { | |||
428 | FoundUserCast = UserCast; | |||
429 | break; | |||
430 | } | |||
431 | } | |||
432 | FindMyCast = FoundUserCast->getVPSingleValue(); | |||
| ||||
433 | } | |||
434 | FindMyCast->replaceAllUsesWith(IV); | |||
435 | } | |||
436 | } | |||
437 | ||||
438 | void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { | |||
439 | VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); | |||
440 | VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; | |||
441 | for (VPUser *U : CanonicalIV->users()) { | |||
442 | WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U); | |||
443 | if (WidenNewIV) | |||
444 | break; | |||
445 | } | |||
446 | ||||
447 | if (!WidenNewIV) | |||
448 | return; | |||
449 | ||||
450 | VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); | |||
451 | for (VPRecipeBase &Phi : HeaderVPBB->phis()) { | |||
452 | auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); | |||
453 | ||||
454 | if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() || | |||
455 | WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType()) | |||
456 | continue; | |||
457 | ||||
458 | // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides | |||
459 | // everything WidenNewIV's users need. That is, WidenOriginalIV will | |||
460 | // generate a vector phi or all users of WidenNewIV demand the first lane | |||
461 | // only. | |||
462 | if (any_of(WidenOriginalIV->users(), | |||
463 | [WidenOriginalIV](VPUser *U) { | |||
464 | return !U->usesScalars(WidenOriginalIV); | |||
465 | }) || | |||
466 | vputils::onlyFirstLaneUsed(WidenNewIV)) { | |||
467 | WidenNewIV->replaceAllUsesWith(WidenOriginalIV); | |||
468 | WidenNewIV->eraseFromParent(); | |||
469 | return; | |||
470 | } | |||
471 | } | |||
472 | } | |||
473 | ||||
474 | void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { | |||
475 | ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( | |||
476 | Plan.getEntry()); | |||
477 | ||||
478 | for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) { | |||
479 | // The recipes in the block are processed in reverse order, to catch chains | |||
480 | // of dead recipes. | |||
481 | for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { | |||
482 | if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) { | |||
483 | return V->getNumUsers() > 0; | |||
484 | })) | |||
485 | continue; | |||
486 | R.eraseFromParent(); | |||
487 | } | |||
488 | } | |||
489 | } | |||
490 | ||||
491 | void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { | |||
492 | SmallVector<VPRecipeBase *> ToRemove; | |||
493 | VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); | |||
494 | bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); | |||
495 | for (VPRecipeBase &Phi : HeaderVPBB->phis()) { | |||
496 | auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); | |||
497 | if (!WideIV) | |||
498 | continue; | |||
499 | if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) { | |||
500 | return U->usesScalars(WideIV); | |||
501 | })) | |||
502 | continue; | |||
503 | ||||
504 | auto IP = HeaderVPBB->getFirstNonPhi(); | |||
505 | VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); | |||
506 | Type *ResultTy = WideIV->getPHINode()->getType(); | |||
507 | if (Instruction *TruncI = WideIV->getTruncInst()) | |||
508 | ResultTy = TruncI->getType(); | |||
509 | const InductionDescriptor &ID = WideIV->getInductionDescriptor(); | |||
510 | VPValue *Step = | |||
511 | vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE); | |||
512 | VPValue *BaseIV = CanonicalIV; | |||
513 | if (!CanonicalIV->isCanonical(ID.getKind(), WideIV->getStartValue(), Step, | |||
514 | ResultTy)) { | |||
515 | BaseIV = new VPDerivedIVRecipe(ID, WideIV->getStartValue(), CanonicalIV, | |||
516 | Step, ResultTy); | |||
517 | HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); | |||
518 | } | |||
519 | ||||
520 | VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); | |||
521 | HeaderVPBB->insert(Steps, IP); | |||
522 | ||||
523 | // Update scalar users of IV to use Step instead. Use SetVector to ensure | |||
524 | // the list of users doesn't contain duplicates. | |||
525 | SetVector<VPUser *> Users(WideIV->user_begin(), WideIV->user_end()); | |||
526 | for (VPUser *U : Users) { | |||
527 | if (HasOnlyVectorVFs && !U->usesScalars(WideIV)) | |||
528 | continue; | |||
529 | for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) { | |||
530 | if (U->getOperand(I) != WideIV) | |||
531 | continue; | |||
532 | U->setOperand(I, Steps); | |||
533 | } | |||
534 | } | |||
535 | } | |||
536 | } | |||
537 | ||||
538 | void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) { | |||
539 | DenseMap<const SCEV *, VPValue *> SCEV2VPV; | |||
540 | ||||
541 | for (VPRecipeBase &R : | |||
542 | make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) { | |||
543 | auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R); | |||
544 | if (!ExpR) | |||
545 | continue; | |||
546 | ||||
547 | auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR}); | |||
548 | if (I.second) | |||
549 | continue; | |||
550 | ExpR->replaceAllUsesWith(I.first->second); | |||
551 | ExpR->eraseFromParent(); | |||
552 | } | |||
553 | } | |||
554 | ||||
555 | static bool canSimplifyBranchOnCond(VPInstruction *Term) { | |||
556 | VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0)); | |||
557 | if (!Not || Not->getOpcode() != VPInstruction::Not) | |||
558 | return false; | |||
559 | ||||
560 | VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0)); | |||
561 | return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask; | |||
562 | } | |||
563 | ||||
564 | void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, | |||
565 | unsigned BestUF, | |||
566 | PredicatedScalarEvolution &PSE) { | |||
567 | assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan")(static_cast <bool> (Plan.hasVF(BestVF) && "BestVF is not available in Plan" ) ? void (0) : __assert_fail ("Plan.hasVF(BestVF) && \"BestVF is not available in Plan\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 567, __extension__ __PRETTY_FUNCTION__)); | |||
568 | assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan")(static_cast <bool> (Plan.hasUF(BestUF) && "BestUF is not available in Plan" ) ? void (0) : __assert_fail ("Plan.hasUF(BestUF) && \"BestUF is not available in Plan\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 568, __extension__ __PRETTY_FUNCTION__)); | |||
569 | VPBasicBlock *ExitingVPBB = | |||
570 | Plan.getVectorLoopRegion()->getExitingBasicBlock(); | |||
571 | auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back()); | |||
572 | // Try to simplify the branch condition if TC <= VF * UF when preparing to | |||
573 | // execute the plan for the main vector loop. We only do this if the | |||
574 | // terminator is: | |||
575 | // 1. BranchOnCount, or | |||
576 | // 2. BranchOnCond where the input is Not(ActiveLaneMask). | |||
577 | if (!Term || (Term->getOpcode() != VPInstruction::BranchOnCount && | |||
578 | (Term->getOpcode() != VPInstruction::BranchOnCond || | |||
579 | !canSimplifyBranchOnCond(Term)))) | |||
580 | return; | |||
581 | ||||
582 | Type *IdxTy = | |||
583 | Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType(); | |||
584 | const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE); | |||
585 | ScalarEvolution &SE = *PSE.getSE(); | |||
586 | const SCEV *C = | |||
587 | SE.getConstant(TripCount->getType(), BestVF.getKnownMinValue() * BestUF); | |||
588 | if (TripCount->isZero() || | |||
589 | !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) | |||
590 | return; | |||
591 | ||||
592 | LLVMContext &Ctx = SE.getContext(); | |||
593 | auto *BOC = new VPInstruction( | |||
594 | VPInstruction::BranchOnCond, | |||
595 | {Plan.getVPValueOrAddLiveIn(ConstantInt::getTrue(Ctx))}); | |||
596 | Term->eraseFromParent(); | |||
597 | ExitingVPBB->appendRecipe(BOC); | |||
598 | Plan.setVF(BestVF); | |||
599 | Plan.setUF(BestUF); | |||
600 | // TODO: Further simplifications are possible | |||
601 | // 1. Replace inductions with constants. | |||
602 | // 2. Replace vector loop region with VPBasicBlock. | |||
603 | } | |||
604 | ||||
605 | #ifndef NDEBUG | |||
606 | static VPRegionBlock *GetReplicateRegion(VPRecipeBase *R) { | |||
607 | auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); | |||
608 | if (Region && Region->isReplicator()) { | |||
609 | assert(Region->getNumSuccessors() == 1 &&(static_cast <bool> (Region->getNumSuccessors() == 1 && Region->getNumPredecessors() == 1 && "Expected SESE region!" ) ? void (0) : __assert_fail ("Region->getNumSuccessors() == 1 && Region->getNumPredecessors() == 1 && \"Expected SESE region!\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 610, __extension__ __PRETTY_FUNCTION__)) | |||
610 | Region->getNumPredecessors() == 1 && "Expected SESE region!")(static_cast <bool> (Region->getNumSuccessors() == 1 && Region->getNumPredecessors() == 1 && "Expected SESE region!" ) ? void (0) : __assert_fail ("Region->getNumSuccessors() == 1 && Region->getNumPredecessors() == 1 && \"Expected SESE region!\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 610, __extension__ __PRETTY_FUNCTION__)); | |||
611 | assert(R->getParent()->size() == 1 &&(static_cast <bool> (R->getParent()->size() == 1 && "A recipe in an original replicator region must be the only " "recipe in its block") ? void (0) : __assert_fail ("R->getParent()->size() == 1 && \"A recipe in an original replicator region must be the only \" \"recipe in its block\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 613, __extension__ __PRETTY_FUNCTION__)) | |||
612 | "A recipe in an original replicator region must be the only "(static_cast <bool> (R->getParent()->size() == 1 && "A recipe in an original replicator region must be the only " "recipe in its block") ? void (0) : __assert_fail ("R->getParent()->size() == 1 && \"A recipe in an original replicator region must be the only \" \"recipe in its block\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 613, __extension__ __PRETTY_FUNCTION__)) | |||
613 | "recipe in its block")(static_cast <bool> (R->getParent()->size() == 1 && "A recipe in an original replicator region must be the only " "recipe in its block") ? void (0) : __assert_fail ("R->getParent()->size() == 1 && \"A recipe in an original replicator region must be the only \" \"recipe in its block\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 613, __extension__ __PRETTY_FUNCTION__)); | |||
614 | return Region; | |||
615 | } | |||
616 | return nullptr; | |||
617 | } | |||
618 | #endif | |||
619 | ||||
620 | static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B, | |||
621 | VPDominatorTree &VPDT) { | |||
622 | if (A == B) | |||
623 | return false; | |||
624 | ||||
625 | auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) { | |||
626 | for (auto &R : *A->getParent()) { | |||
627 | if (&R == A) | |||
628 | return true; | |||
629 | if (&R == B) | |||
630 | return false; | |||
631 | } | |||
632 | llvm_unreachable("recipe not found")::llvm::llvm_unreachable_internal("recipe not found", "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp" , 632); | |||
633 | }; | |||
634 | const VPBlockBase *ParentA = A->getParent(); | |||
635 | const VPBlockBase *ParentB = B->getParent(); | |||
636 | if (ParentA == ParentB) | |||
637 | return LocalComesBefore(A, B); | |||
638 | ||||
639 | assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&(static_cast <bool> (!GetReplicateRegion(const_cast< VPRecipeBase *>(A)) && "No replicate regions expected at this point" ) ? void (0) : __assert_fail ("!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) && \"No replicate regions expected at this point\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 640, __extension__ __PRETTY_FUNCTION__)) | |||
640 | "No replicate regions expected at this point")(static_cast <bool> (!GetReplicateRegion(const_cast< VPRecipeBase *>(A)) && "No replicate regions expected at this point" ) ? void (0) : __assert_fail ("!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) && \"No replicate regions expected at this point\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 640, __extension__ __PRETTY_FUNCTION__)); | |||
641 | assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&(static_cast <bool> (!GetReplicateRegion(const_cast< VPRecipeBase *>(B)) && "No replicate regions expected at this point" ) ? void (0) : __assert_fail ("!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) && \"No replicate regions expected at this point\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 642, __extension__ __PRETTY_FUNCTION__)) | |||
642 | "No replicate regions expected at this point")(static_cast <bool> (!GetReplicateRegion(const_cast< VPRecipeBase *>(B)) && "No replicate regions expected at this point" ) ? void (0) : __assert_fail ("!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) && \"No replicate regions expected at this point\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 642, __extension__ __PRETTY_FUNCTION__)); | |||
643 | return VPDT.properlyDominates(ParentA, ParentB); | |||
644 | } | |||
645 | ||||
646 | /// Sink users of \p FOR after the recipe defining the previous value \p | |||
647 | /// Previous of the recurrence. \returns true if all users of \p FOR could be | |||
648 | /// re-arranged as needed or false if it is not possible. | |||
649 | static bool | |||
650 | sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, | |||
651 | VPRecipeBase *Previous, | |||
652 | VPDominatorTree &VPDT) { | |||
653 | // Collect recipes that need sinking. | |||
654 | SmallVector<VPRecipeBase *> WorkList; | |||
655 | SmallPtrSet<VPRecipeBase *, 8> Seen; | |||
656 | Seen.insert(Previous); | |||
657 | auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) { | |||
658 | // The previous value must not depend on the users of the recurrence phi. In | |||
659 | // that case, FOR is not a fixed order recurrence. | |||
660 | if (SinkCandidate == Previous) | |||
661 | return false; | |||
662 | ||||
663 | if (isa<VPHeaderPHIRecipe>(SinkCandidate) || | |||
664 | !Seen.insert(SinkCandidate).second || | |||
665 | properlyDominates(Previous, SinkCandidate, VPDT)) | |||
666 | return true; | |||
667 | ||||
668 | WorkList.push_back(SinkCandidate); | |||
669 | return true; | |||
670 | }; | |||
671 | ||||
672 | // Recursively sink users of FOR after Previous. | |||
673 | WorkList.push_back(FOR); | |||
674 | for (unsigned I = 0; I != WorkList.size(); ++I) { | |||
675 | VPRecipeBase *Current = WorkList[I]; | |||
676 | assert(Current->getNumDefinedValues() == 1 &&(static_cast <bool> (Current->getNumDefinedValues() == 1 && "only recipes with a single defined value expected" ) ? void (0) : __assert_fail ("Current->getNumDefinedValues() == 1 && \"only recipes with a single defined value expected\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 677, __extension__ __PRETTY_FUNCTION__)) | |||
677 | "only recipes with a single defined value expected")(static_cast <bool> (Current->getNumDefinedValues() == 1 && "only recipes with a single defined value expected" ) ? void (0) : __assert_fail ("Current->getNumDefinedValues() == 1 && \"only recipes with a single defined value expected\"" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 677, __extension__ __PRETTY_FUNCTION__)); | |||
678 | for (VPUser *User : Current->getVPSingleValue()->users()) { | |||
679 | if (auto *R = dyn_cast<VPRecipeBase>(User)) | |||
680 | if (!TryToPushSinkCandidate(R)) | |||
681 | return false; | |||
682 | } | |||
683 | } | |||
684 | ||||
685 | // Keep recipes to sink ordered by dominance so earlier instructions are | |||
686 | // processed first. | |||
687 | sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) { | |||
688 | return properlyDominates(A, B, VPDT); | |||
689 | }); | |||
690 | ||||
691 | for (VPRecipeBase *SinkCandidate : WorkList) { | |||
692 | if (SinkCandidate == FOR) | |||
693 | continue; | |||
694 | ||||
695 | SinkCandidate->moveAfter(Previous); | |||
696 | Previous = SinkCandidate; | |||
697 | } | |||
698 | return true; | |||
699 | } | |||
700 | ||||
701 | bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, | |||
702 | VPBuilder &Builder) { | |||
703 | VPDominatorTree VPDT; | |||
704 | VPDT.recalculate(Plan); | |||
705 | ||||
706 | SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis; | |||
707 | for (VPRecipeBase &R : | |||
708 | Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis()) | |||
709 | if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) | |||
710 | RecurrencePhis.push_back(FOR); | |||
711 | ||||
712 | for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) { | |||
713 | SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis; | |||
714 | VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe(); | |||
715 | // Fixed-order recurrences do not contain cycles, so this loop is guaranteed | |||
716 | // to terminate. | |||
717 | while (auto *PrevPhi = | |||
718 | dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) { | |||
719 | assert(PrevPhi->getParent() == FOR->getParent())(static_cast <bool> (PrevPhi->getParent() == FOR-> getParent()) ? void (0) : __assert_fail ("PrevPhi->getParent() == FOR->getParent()" , "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 719, __extension__ __PRETTY_FUNCTION__)); | |||
720 | assert(SeenPhis.insert(PrevPhi).second)(static_cast <bool> (SeenPhis.insert(PrevPhi).second) ? void (0) : __assert_fail ("SeenPhis.insert(PrevPhi).second", "llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp", 720, __extension__ __PRETTY_FUNCTION__)); | |||
721 | Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe(); | |||
722 | } | |||
723 | ||||
724 | if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT)) | |||
725 | return false; | |||
726 | ||||
727 | // Introduce a recipe to combine the incoming and previous values of a | |||
728 | // fixed-order recurrence. | |||
729 | VPBasicBlock *InsertBlock = Previous->getParent(); | |||
730 | if (isa<VPHeaderPHIRecipe>(Previous)) | |||
731 | Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi()); | |||
732 | else | |||
733 | Builder.setInsertPoint(InsertBlock, std::next(Previous->getIterator())); | |||
734 | ||||
735 | auto *RecurSplice = cast<VPInstruction>( | |||
736 | Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice, | |||
737 | {FOR, FOR->getBackedgeValue()})); | |||
738 | ||||
739 | FOR->replaceAllUsesWith(RecurSplice); | |||
740 | // Set the first operand of RecurSplice to FOR again, after replacing | |||
741 | // all users. | |||
742 | RecurSplice->setOperand(0, FOR); | |||
743 | } | |||
744 | return true; | |||
745 | } |