File: | build/source/llvm/lib/Transforms/Vectorize/VPlan.cpp |
Warning: | line 326, column 7 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// | |||
9 | /// \file | |||
10 | /// This is the LLVM vectorization plan. It represents a candidate for | |||
11 | /// vectorization, allowing to plan and optimize how to vectorize a given loop | |||
12 | /// before generating LLVM-IR. | |||
13 | /// The vectorizer uses vectorization plans to estimate the costs of potential | |||
14 | /// candidates and if profitable to execute the desired plan, generating vector | |||
15 | /// LLVM-IR code. | |||
16 | /// | |||
17 | //===----------------------------------------------------------------------===// | |||
18 | ||||
19 | #include "VPlan.h" | |||
20 | #include "VPlanCFG.h" | |||
21 | #include "VPlanDominatorTree.h" | |||
22 | #include "llvm/ADT/DepthFirstIterator.h" | |||
23 | #include "llvm/ADT/PostOrderIterator.h" | |||
24 | #include "llvm/ADT/STLExtras.h" | |||
25 | #include "llvm/ADT/SmallVector.h" | |||
26 | #include "llvm/ADT/Twine.h" | |||
27 | #include "llvm/Analysis/LoopInfo.h" | |||
28 | #include "llvm/IR/BasicBlock.h" | |||
29 | #include "llvm/IR/CFG.h" | |||
30 | #include "llvm/IR/IRBuilder.h" | |||
31 | #include "llvm/IR/Instruction.h" | |||
32 | #include "llvm/IR/Instructions.h" | |||
33 | #include "llvm/IR/Type.h" | |||
34 | #include "llvm/IR/Value.h" | |||
35 | #include "llvm/Support/Casting.h" | |||
36 | #include "llvm/Support/CommandLine.h" | |||
37 | #include "llvm/Support/Debug.h" | |||
38 | #include "llvm/Support/GenericDomTreeConstruction.h" | |||
39 | #include "llvm/Support/GraphWriter.h" | |||
40 | #include "llvm/Support/raw_ostream.h" | |||
41 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||
42 | #include "llvm/Transforms/Utils/LoopVersioning.h" | |||
43 | #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" | |||
44 | #include <cassert> | |||
45 | #include <string> | |||
46 | #include <vector> | |||
47 | ||||
48 | using namespace llvm; | |||
49 | ||||
50 | namespace llvm { | |||
51 | extern cl::opt<bool> EnableVPlanNativePath; | |||
52 | } | |||
53 | ||||
54 | #define DEBUG_TYPE"vplan" "vplan" | |||
55 | ||||
56 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
57 | raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { | |||
58 | const VPInstruction *Instr = dyn_cast<VPInstruction>(&V); | |||
59 | VPSlotTracker SlotTracker( | |||
60 | (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); | |||
61 | V.print(OS, SlotTracker); | |||
62 | return OS; | |||
63 | } | |||
64 | #endif | |||
65 | ||||
66 | Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder, | |||
67 | const ElementCount &VF) const { | |||
68 | switch (LaneKind) { | |||
69 | case VPLane::Kind::ScalableLast: | |||
70 | // Lane = RuntimeVF - VF.getKnownMinValue() + Lane | |||
71 | return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF), | |||
72 | Builder.getInt32(VF.getKnownMinValue() - Lane)); | |||
73 | case VPLane::Kind::First: | |||
74 | return Builder.getInt32(Lane); | |||
75 | } | |||
76 | llvm_unreachable("Unknown lane kind")::llvm::llvm_unreachable_internal("Unknown lane kind", "llvm/lib/Transforms/Vectorize/VPlan.cpp" , 76); | |||
77 | } | |||
78 | ||||
79 | VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def) | |||
80 | : SubclassID(SC), UnderlyingVal(UV), Def(Def) { | |||
81 | if (Def) | |||
82 | Def->addDefinedValue(this); | |||
83 | } | |||
84 | ||||
85 | VPValue::~VPValue() { | |||
86 | assert(Users.empty() && "trying to delete a VPValue with remaining users")(static_cast <bool> (Users.empty() && "trying to delete a VPValue with remaining users" ) ? void (0) : __assert_fail ("Users.empty() && \"trying to delete a VPValue with remaining users\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 86, __extension__ __PRETTY_FUNCTION__)); | |||
87 | if (Def) | |||
88 | Def->removeDefinedValue(this); | |||
89 | } | |||
90 | ||||
91 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
92 | void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { | |||
93 | if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def)) | |||
94 | R->print(OS, "", SlotTracker); | |||
95 | else | |||
96 | printAsOperand(OS, SlotTracker); | |||
97 | } | |||
98 | ||||
99 | void VPValue::dump() const { | |||
100 | const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def); | |||
101 | VPSlotTracker SlotTracker( | |||
102 | (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); | |||
103 | print(dbgs(), SlotTracker); | |||
104 | dbgs() << "\n"; | |||
105 | } | |||
106 | ||||
107 | void VPDef::dump() const { | |||
108 | const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this); | |||
109 | VPSlotTracker SlotTracker( | |||
110 | (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); | |||
111 | print(dbgs(), "", SlotTracker); | |||
112 | dbgs() << "\n"; | |||
113 | } | |||
114 | #endif | |||
115 | ||||
116 | VPRecipeBase *VPValue::getDefiningRecipe() { | |||
117 | return cast_or_null<VPRecipeBase>(Def); | |||
118 | } | |||
119 | ||||
120 | const VPRecipeBase *VPValue::getDefiningRecipe() const { | |||
121 | return cast_or_null<VPRecipeBase>(Def); | |||
122 | } | |||
123 | ||||
124 | // Get the top-most entry block of \p Start. This is the entry block of the | |||
125 | // containing VPlan. This function is templated to support both const and non-const blocks | |||
126 | template <typename T> static T *getPlanEntry(T *Start) { | |||
127 | T *Next = Start; | |||
128 | T *Current = Start; | |||
129 | while ((Next = Next->getParent())) | |||
130 | Current = Next; | |||
131 | ||||
132 | SmallSetVector<T *, 8> WorkList; | |||
133 | WorkList.insert(Current); | |||
134 | ||||
135 | for (unsigned i = 0; i < WorkList.size(); i++) { | |||
136 | T *Current = WorkList[i]; | |||
137 | if (Current->getNumPredecessors() == 0) | |||
138 | return Current; | |||
139 | auto &Predecessors = Current->getPredecessors(); | |||
140 | WorkList.insert(Predecessors.begin(), Predecessors.end()); | |||
141 | } | |||
142 | ||||
143 | llvm_unreachable("VPlan without any entry node without predecessors")::llvm::llvm_unreachable_internal("VPlan without any entry node without predecessors" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 143); | |||
144 | } | |||
145 | ||||
146 | VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; } | |||
147 | ||||
148 | const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; } | |||
149 | ||||
150 | /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. | |||
151 | const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { | |||
152 | const VPBlockBase *Block = this; | |||
153 | while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) | |||
154 | Block = Region->getEntry(); | |||
155 | return cast<VPBasicBlock>(Block); | |||
156 | } | |||
157 | ||||
158 | VPBasicBlock *VPBlockBase::getEntryBasicBlock() { | |||
159 | VPBlockBase *Block = this; | |||
160 | while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) | |||
161 | Block = Region->getEntry(); | |||
162 | return cast<VPBasicBlock>(Block); | |||
163 | } | |||
164 | ||||
165 | void VPBlockBase::setPlan(VPlan *ParentPlan) { | |||
166 | assert((static_cast <bool> ((ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && "Can only set plan on its entry or preheader block." ) ? void (0) : __assert_fail ("(ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && \"Can only set plan on its entry or preheader block.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 168, __extension__ __PRETTY_FUNCTION__)) | |||
167 | (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&(static_cast <bool> ((ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && "Can only set plan on its entry or preheader block." ) ? void (0) : __assert_fail ("(ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && \"Can only set plan on its entry or preheader block.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 168, __extension__ __PRETTY_FUNCTION__)) | |||
168 | "Can only set plan on its entry or preheader block.")(static_cast <bool> ((ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && "Can only set plan on its entry or preheader block." ) ? void (0) : __assert_fail ("(ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && \"Can only set plan on its entry or preheader block.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 168, __extension__ __PRETTY_FUNCTION__)); | |||
169 | Plan = ParentPlan; | |||
170 | } | |||
171 | ||||
172 | /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. | |||
173 | const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const { | |||
174 | const VPBlockBase *Block = this; | |||
175 | while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) | |||
176 | Block = Region->getExiting(); | |||
177 | return cast<VPBasicBlock>(Block); | |||
178 | } | |||
179 | ||||
180 | VPBasicBlock *VPBlockBase::getExitingBasicBlock() { | |||
181 | VPBlockBase *Block = this; | |||
182 | while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) | |||
183 | Block = Region->getExiting(); | |||
184 | return cast<VPBasicBlock>(Block); | |||
185 | } | |||
186 | ||||
187 | VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { | |||
188 | if (!Successors.empty() || !Parent) | |||
189 | return this; | |||
190 | assert(Parent->getExiting() == this &&(static_cast <bool> (Parent->getExiting() == this && "Block w/o successors not the exiting block of its parent.") ? void (0) : __assert_fail ("Parent->getExiting() == this && \"Block w/o successors not the exiting block of its parent.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 191, __extension__ __PRETTY_FUNCTION__)) | |||
191 | "Block w/o successors not the exiting block of its parent.")(static_cast <bool> (Parent->getExiting() == this && "Block w/o successors not the exiting block of its parent.") ? void (0) : __assert_fail ("Parent->getExiting() == this && \"Block w/o successors not the exiting block of its parent.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 191, __extension__ __PRETTY_FUNCTION__)); | |||
192 | return Parent->getEnclosingBlockWithSuccessors(); | |||
193 | } | |||
194 | ||||
195 | VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { | |||
196 | if (!Predecessors.empty() || !Parent) | |||
197 | return this; | |||
198 | assert(Parent->getEntry() == this &&(static_cast <bool> (Parent->getEntry() == this && "Block w/o predecessors not the entry of its parent.") ? void (0) : __assert_fail ("Parent->getEntry() == this && \"Block w/o predecessors not the entry of its parent.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 199, __extension__ __PRETTY_FUNCTION__)) | |||
199 | "Block w/o predecessors not the entry of its parent.")(static_cast <bool> (Parent->getEntry() == this && "Block w/o predecessors not the entry of its parent.") ? void (0) : __assert_fail ("Parent->getEntry() == this && \"Block w/o predecessors not the entry of its parent.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 199, __extension__ __PRETTY_FUNCTION__)); | |||
200 | return Parent->getEnclosingBlockWithPredecessors(); | |||
201 | } | |||
202 | ||||
203 | void VPBlockBase::deleteCFG(VPBlockBase *Entry) { | |||
204 | for (VPBlockBase *Block : to_vector(vp_depth_first_shallow(Entry))) | |||
205 | delete Block; | |||
206 | } | |||
207 | ||||
208 | VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { | |||
209 | iterator It = begin(); | |||
210 | while (It != end() && It->isPhi()) | |||
211 | It++; | |||
212 | return It; | |||
213 | } | |||
214 | ||||
215 | Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) { | |||
216 | if (Def->isLiveIn()) | |||
217 | return Def->getLiveInIRValue(); | |||
218 | ||||
219 | if (hasScalarValue(Def, Instance)) { | |||
220 | return Data | |||
221 | .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)]; | |||
222 | } | |||
223 | ||||
224 | assert(hasVectorValue(Def, Instance.Part))(static_cast <bool> (hasVectorValue(Def, Instance.Part) ) ? void (0) : __assert_fail ("hasVectorValue(Def, Instance.Part)" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 224, __extension__ __PRETTY_FUNCTION__)); | |||
225 | auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; | |||
226 | if (!VecPart->getType()->isVectorTy()) { | |||
227 | assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar")(static_cast <bool> (Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar") ? void (0) : __assert_fail ("Instance.Lane.isFirstLane() && \"cannot get lane > 0 for scalar\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 227, __extension__ __PRETTY_FUNCTION__)); | |||
228 | return VecPart; | |||
229 | } | |||
230 | // TODO: Cache created scalar values. | |||
231 | Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF); | |||
232 | auto *Extract = Builder.CreateExtractElement(VecPart, Lane); | |||
233 | // set(Def, Extract, Instance); | |||
234 | return Extract; | |||
235 | } | |||
236 | BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) { | |||
237 | VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion(); | |||
238 | return VPBB2IRBB[LoopRegion->getPreheaderVPBB()]; | |||
239 | } | |||
240 | ||||
241 | void VPTransformState::addNewMetadata(Instruction *To, | |||
242 | const Instruction *Orig) { | |||
243 | // If the loop was versioned with memchecks, add the corresponding no-alias | |||
244 | // metadata. | |||
245 | if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig))) | |||
246 | LVer->annotateInstWithNoAlias(To, Orig); | |||
247 | } | |||
248 | ||||
249 | void VPTransformState::addMetadata(Instruction *To, Instruction *From) { | |||
250 | // No source instruction to transfer metadata from? | |||
251 | if (!From) | |||
252 | return; | |||
253 | ||||
254 | propagateMetadata(To, From); | |||
255 | addNewMetadata(To, From); | |||
256 | } | |||
257 | ||||
258 | void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) { | |||
259 | // No source instruction to transfer metadata from? | |||
260 | if (!From) | |||
261 | return; | |||
262 | ||||
263 | for (Value *V : To) { | |||
264 | if (Instruction *I = dyn_cast<Instruction>(V)) | |||
265 | addMetadata(I, From); | |||
266 | } | |||
267 | } | |||
268 | ||||
269 | void VPTransformState::setDebugLocFromInst(const Value *V) { | |||
270 | const Instruction *Inst = dyn_cast<Instruction>(V); | |||
271 | if (!Inst) { | |||
272 | Builder.SetCurrentDebugLocation(DebugLoc()); | |||
273 | return; | |||
274 | } | |||
275 | ||||
276 | const DILocation *DIL = Inst->getDebugLoc(); | |||
277 | // When a FSDiscriminator is enabled, we don't need to add the multiply | |||
278 | // factors to the discriminators. | |||
279 | if (DIL && Inst->getFunction()->shouldEmitDebugInfoForProfiling() && | |||
280 | !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) { | |||
281 | // FIXME: For scalable vectors, assume vscale=1. | |||
282 | auto NewDIL = | |||
283 | DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); | |||
284 | if (NewDIL) | |||
285 | Builder.SetCurrentDebugLocation(*NewDIL); | |||
286 | else | |||
287 | LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("vplan")) { dbgs() << "Failed to create new discriminator: " << DIL->getFilename() << " Line: " << DIL ->getLine(); } } while (false) | |||
288 | << DIL->getFilename() << " Line: " << DIL->getLine())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("vplan")) { dbgs() << "Failed to create new discriminator: " << DIL->getFilename() << " Line: " << DIL ->getLine(); } } while (false); | |||
289 | } else | |||
290 | Builder.SetCurrentDebugLocation(DIL); | |||
291 | } | |||
292 | ||||
293 | BasicBlock * | |||
294 | VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { | |||
295 | // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. | |||
296 | // Pred stands for Predessor. Prev stands for Previous - last visited/created. | |||
297 | BasicBlock *PrevBB = CFG.PrevBB; | |||
298 | BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), | |||
299 | PrevBB->getParent(), CFG.ExitBB); | |||
300 | LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("vplan")) { dbgs() << "LV: created " << NewBB-> getName() << '\n'; } } while (false); | |||
301 | ||||
302 | // Hook up the new basic block to its predecessors. | |||
303 | for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { | |||
304 | VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); | |||
305 | auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); | |||
306 | BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; | |||
307 | ||||
308 | assert(PredBB && "Predecessor basic-block not found building successor.")(static_cast <bool> (PredBB && "Predecessor basic-block not found building successor." ) ? void (0) : __assert_fail ("PredBB && \"Predecessor basic-block not found building successor.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 308, __extension__ __PRETTY_FUNCTION__)); | |||
309 | auto *PredBBTerminator = PredBB->getTerminator(); | |||
310 | LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("vplan")) { dbgs() << "LV: draw edge from" << PredBB ->getName() << '\n'; } } while (false); | |||
311 | ||||
312 | auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator); | |||
313 | if (isa<UnreachableInst>(PredBBTerminator)) { | |||
314 | assert(PredVPSuccessors.size() == 1 &&(static_cast <bool> (PredVPSuccessors.size() == 1 && "Predecessor ending w/o branch must have single successor.") ? void (0) : __assert_fail ("PredVPSuccessors.size() == 1 && \"Predecessor ending w/o branch must have single successor.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 315, __extension__ __PRETTY_FUNCTION__)) | |||
315 | "Predecessor ending w/o branch must have single successor.")(static_cast <bool> (PredVPSuccessors.size() == 1 && "Predecessor ending w/o branch must have single successor.") ? void (0) : __assert_fail ("PredVPSuccessors.size() == 1 && \"Predecessor ending w/o branch must have single successor.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 315, __extension__ __PRETTY_FUNCTION__)); | |||
316 | DebugLoc DL = PredBBTerminator->getDebugLoc(); | |||
317 | PredBBTerminator->eraseFromParent(); | |||
318 | auto *Br = BranchInst::Create(NewBB, PredBB); | |||
319 | Br->setDebugLoc(DL); | |||
320 | } else if (TermBr
| |||
321 | TermBr->setSuccessor(0, NewBB); | |||
322 | } else { | |||
323 | // Set each forward successor here when it is created, excluding | |||
324 | // backedges. A backward successor is set when the branch is created. | |||
325 | unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; | |||
326 | assert(!TermBr->getSuccessor(idx) &&(static_cast <bool> (!TermBr->getSuccessor(idx) && "Trying to reset an existing successor block.") ? void (0) : __assert_fail ("!TermBr->getSuccessor(idx) && \"Trying to reset an existing successor block.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 327, __extension__ __PRETTY_FUNCTION__)) | |||
| ||||
327 | "Trying to reset an existing successor block.")(static_cast <bool> (!TermBr->getSuccessor(idx) && "Trying to reset an existing successor block.") ? void (0) : __assert_fail ("!TermBr->getSuccessor(idx) && \"Trying to reset an existing successor block.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 327, __extension__ __PRETTY_FUNCTION__)); | |||
328 | TermBr->setSuccessor(idx, NewBB); | |||
329 | } | |||
330 | } | |||
331 | return NewBB; | |||
332 | } | |||
333 | ||||
334 | void VPBasicBlock::execute(VPTransformState *State) { | |||
335 | bool Replica = State->Instance && !State->Instance->isFirstIteration(); | |||
| ||||
336 | VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; | |||
337 | VPBlockBase *SingleHPred = nullptr; | |||
338 | BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. | |||
339 | ||||
340 | auto IsLoopRegion = [](VPBlockBase *BB) { | |||
341 | auto *R = dyn_cast<VPRegionBlock>(BB); | |||
342 | return R && !R->isReplicator(); | |||
343 | }; | |||
344 | ||||
345 | // 1. Create an IR basic block, or reuse the last one or ExitBB if possible. | |||
346 | if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) { | |||
347 | // ExitBB can be re-used for the exit block of the Plan. | |||
348 | NewBB = State->CFG.ExitBB; | |||
349 | State->CFG.PrevBB = NewBB; | |||
350 | ||||
351 | // Update the branch instruction in the predecessor to branch to ExitBB. | |||
352 | VPBlockBase *PredVPB = getSingleHierarchicalPredecessor(); | |||
353 | VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock(); | |||
354 | assert(PredVPB->getSingleSuccessor() == this &&(static_cast <bool> (PredVPB->getSingleSuccessor() == this && "predecessor must have the current block as only successor" ) ? void (0) : __assert_fail ("PredVPB->getSingleSuccessor() == this && \"predecessor must have the current block as only successor\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 355, __extension__ __PRETTY_FUNCTION__)) | |||
355 | "predecessor must have the current block as only successor")(static_cast <bool> (PredVPB->getSingleSuccessor() == this && "predecessor must have the current block as only successor" ) ? void (0) : __assert_fail ("PredVPB->getSingleSuccessor() == this && \"predecessor must have the current block as only successor\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 355, __extension__ __PRETTY_FUNCTION__)); | |||
356 | BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB]; | |||
357 | // The Exit block of a loop is always set to be successor 0 of the Exiting | |||
358 | // block. | |||
359 | cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB); | |||
360 | } else if (PrevVPBB && /* A */ | |||
361 | !((SingleHPred = getSingleHierarchicalPredecessor()) && | |||
362 | SingleHPred->getExitingBasicBlock() == PrevVPBB && | |||
363 | PrevVPBB->getSingleHierarchicalSuccessor() && | |||
364 | (SingleHPred->getParent() == getEnclosingLoopRegion() && | |||
365 | !IsLoopRegion(SingleHPred))) && /* B */ | |||
366 | !(Replica
| |||
367 | // The last IR basic block is reused, as an optimization, in three cases: | |||
368 | // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; | |||
369 | // B. when the current VPBB has a single (hierarchical) predecessor which | |||
370 | // is PrevVPBB and the latter has a single (hierarchical) successor which | |||
371 | // both are in the same non-replicator region; and | |||
372 | // C. when the current VPBB is an entry of a region replica - where PrevVPBB | |||
373 | // is the exiting VPBB of this region from a previous instance, or the | |||
374 | // predecessor of this region. | |||
375 | ||||
376 | NewBB = createEmptyBasicBlock(State->CFG); | |||
377 | State->Builder.SetInsertPoint(NewBB); | |||
378 | // Temporarily terminate with unreachable until CFG is rewired. | |||
379 | UnreachableInst *Terminator = State->Builder.CreateUnreachable(); | |||
380 | // Register NewBB in its loop. In innermost loops its the same for all | |||
381 | // BB's. | |||
382 | if (State->CurrentVectorLoop) | |||
383 | State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI); | |||
384 | State->Builder.SetInsertPoint(Terminator); | |||
385 | State->CFG.PrevBB = NewBB; | |||
386 | } | |||
387 | ||||
388 | // 2. Fill the IR basic block with IR instructions. | |||
389 | LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("vplan")) { dbgs() << "LV: vectorizing VPBB:" << getName() << " in BB:" << NewBB->getName() << '\n'; } } while (false) | |||
390 | << " in BB:" << NewBB->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("vplan")) { dbgs() << "LV: vectorizing VPBB:" << getName() << " in BB:" << NewBB->getName() << '\n'; } } while (false); | |||
391 | ||||
392 | State->CFG.VPBB2IRBB[this] = NewBB; | |||
393 | State->CFG.PrevVPBB = this; | |||
394 | ||||
395 | for (VPRecipeBase &Recipe : Recipes) | |||
396 | Recipe.execute(*State); | |||
397 | ||||
398 | LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("vplan")) { dbgs() << "LV: filled BB:" << *NewBB ; } } while (false); | |||
399 | } | |||
400 | ||||
401 | void VPBasicBlock::dropAllReferences(VPValue *NewValue) { | |||
402 | for (VPRecipeBase &R : Recipes) { | |||
403 | for (auto *Def : R.definedValues()) | |||
404 | Def->replaceAllUsesWith(NewValue); | |||
405 | ||||
406 | for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) | |||
407 | R.setOperand(I, NewValue); | |||
408 | } | |||
409 | } | |||
410 | ||||
411 | VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { | |||
412 | assert((SplitAt == end() || SplitAt->getParent() == this) &&(static_cast <bool> ((SplitAt == end() || SplitAt->getParent () == this) && "can only split at a position in the same block" ) ? void (0) : __assert_fail ("(SplitAt == end() || SplitAt->getParent() == this) && \"can only split at a position in the same block\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 413, __extension__ __PRETTY_FUNCTION__)) | |||
413 | "can only split at a position in the same block")(static_cast <bool> ((SplitAt == end() || SplitAt->getParent () == this) && "can only split at a position in the same block" ) ? void (0) : __assert_fail ("(SplitAt == end() || SplitAt->getParent() == this) && \"can only split at a position in the same block\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 413, __extension__ __PRETTY_FUNCTION__)); | |||
414 | ||||
415 | SmallVector<VPBlockBase *, 2> Succs(successors()); | |||
416 | // First, disconnect the current block from its successors. | |||
417 | for (VPBlockBase *Succ : Succs) | |||
418 | VPBlockUtils::disconnectBlocks(this, Succ); | |||
419 | ||||
420 | // Create new empty block after the block to split. | |||
421 | auto *SplitBlock = new VPBasicBlock(getName() + ".split"); | |||
422 | VPBlockUtils::insertBlockAfter(SplitBlock, this); | |||
423 | ||||
424 | // Add successors for block to split to new block. | |||
425 | for (VPBlockBase *Succ : Succs) | |||
426 | VPBlockUtils::connectBlocks(SplitBlock, Succ); | |||
427 | ||||
428 | // Finally, move the recipes starting at SplitAt to new block. | |||
429 | for (VPRecipeBase &ToMove : | |||
430 | make_early_inc_range(make_range(SplitAt, this->end()))) | |||
431 | ToMove.moveBefore(*SplitBlock, SplitBlock->end()); | |||
432 | ||||
433 | return SplitBlock; | |||
434 | } | |||
435 | ||||
436 | VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() { | |||
437 | VPRegionBlock *P = getParent(); | |||
438 | if (P && P->isReplicator()) { | |||
439 | P = P->getParent(); | |||
440 | assert(!cast<VPRegionBlock>(P)->isReplicator() &&(static_cast <bool> (!cast<VPRegionBlock>(P)-> isReplicator() && "unexpected nested replicate regions" ) ? void (0) : __assert_fail ("!cast<VPRegionBlock>(P)->isReplicator() && \"unexpected nested replicate regions\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 441, __extension__ __PRETTY_FUNCTION__)) | |||
441 | "unexpected nested replicate regions")(static_cast <bool> (!cast<VPRegionBlock>(P)-> isReplicator() && "unexpected nested replicate regions" ) ? void (0) : __assert_fail ("!cast<VPRegionBlock>(P)->isReplicator() && \"unexpected nested replicate regions\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 441, __extension__ __PRETTY_FUNCTION__)); | |||
442 | } | |||
443 | return P; | |||
444 | } | |||
445 | ||||
446 | static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { | |||
447 | if (VPBB->empty()) { | |||
448 | assert((static_cast <bool> (VPBB->getNumSuccessors() < 2 && "block with multiple successors doesn't have a recipe as terminator" ) ? void (0) : __assert_fail ("VPBB->getNumSuccessors() < 2 && \"block with multiple successors doesn't have a recipe as terminator\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 450, __extension__ __PRETTY_FUNCTION__)) | |||
449 | VPBB->getNumSuccessors() < 2 &&(static_cast <bool> (VPBB->getNumSuccessors() < 2 && "block with multiple successors doesn't have a recipe as terminator" ) ? void (0) : __assert_fail ("VPBB->getNumSuccessors() < 2 && \"block with multiple successors doesn't have a recipe as terminator\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 450, __extension__ __PRETTY_FUNCTION__)) | |||
450 | "block with multiple successors doesn't have a recipe as terminator")(static_cast <bool> (VPBB->getNumSuccessors() < 2 && "block with multiple successors doesn't have a recipe as terminator" ) ? void (0) : __assert_fail ("VPBB->getNumSuccessors() < 2 && \"block with multiple successors doesn't have a recipe as terminator\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 450, __extension__ __PRETTY_FUNCTION__)); | |||
451 | return false; | |||
452 | } | |||
453 | ||||
454 | const VPRecipeBase *R = &VPBB->back(); | |||
455 | auto *VPI = dyn_cast<VPInstruction>(R); | |||
456 | bool IsCondBranch = | |||
457 | isa<VPBranchOnMaskRecipe>(R) || | |||
458 | (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond || | |||
459 | VPI->getOpcode() == VPInstruction::BranchOnCount)); | |||
460 | (void)IsCondBranch; | |||
461 | ||||
462 | if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) { | |||
463 | assert(IsCondBranch && "block with multiple successors not terminated by "(static_cast <bool> (IsCondBranch && "block with multiple successors not terminated by " "conditional branch recipe") ? void (0) : __assert_fail ("IsCondBranch && \"block with multiple successors not terminated by \" \"conditional branch recipe\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 464, __extension__ __PRETTY_FUNCTION__)) | |||
464 | "conditional branch recipe")(static_cast <bool> (IsCondBranch && "block with multiple successors not terminated by " "conditional branch recipe") ? void (0) : __assert_fail ("IsCondBranch && \"block with multiple successors not terminated by \" \"conditional branch recipe\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 464, __extension__ __PRETTY_FUNCTION__)); | |||
465 | ||||
466 | return true; | |||
467 | } | |||
468 | ||||
469 | assert((static_cast <bool> (!IsCondBranch && "block with 0 or 1 successors terminated by conditional branch recipe" ) ? void (0) : __assert_fail ("!IsCondBranch && \"block with 0 or 1 successors terminated by conditional branch recipe\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 471, __extension__ __PRETTY_FUNCTION__)) | |||
470 | !IsCondBranch &&(static_cast <bool> (!IsCondBranch && "block with 0 or 1 successors terminated by conditional branch recipe" ) ? void (0) : __assert_fail ("!IsCondBranch && \"block with 0 or 1 successors terminated by conditional branch recipe\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 471, __extension__ __PRETTY_FUNCTION__)) | |||
471 | "block with 0 or 1 successors terminated by conditional branch recipe")(static_cast <bool> (!IsCondBranch && "block with 0 or 1 successors terminated by conditional branch recipe" ) ? void (0) : __assert_fail ("!IsCondBranch && \"block with 0 or 1 successors terminated by conditional branch recipe\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 471, __extension__ __PRETTY_FUNCTION__)); | |||
472 | return false; | |||
473 | } | |||
474 | ||||
475 | VPRecipeBase *VPBasicBlock::getTerminator() { | |||
476 | if (hasConditionalTerminator(this)) | |||
477 | return &back(); | |||
478 | return nullptr; | |||
479 | } | |||
480 | ||||
481 | const VPRecipeBase *VPBasicBlock::getTerminator() const { | |||
482 | if (hasConditionalTerminator(this)) | |||
483 | return &back(); | |||
484 | return nullptr; | |||
485 | } | |||
486 | ||||
487 | bool VPBasicBlock::isExiting() const { | |||
488 | return getParent()->getExitingBasicBlock() == this; | |||
489 | } | |||
490 | ||||
491 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
492 | void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const { | |||
493 | if (getSuccessors().empty()) { | |||
494 | O << Indent << "No successors\n"; | |||
495 | } else { | |||
496 | O << Indent << "Successor(s): "; | |||
497 | ListSeparator LS; | |||
498 | for (auto *Succ : getSuccessors()) | |||
499 | O << LS << Succ->getName(); | |||
500 | O << '\n'; | |||
501 | } | |||
502 | } | |||
503 | ||||
504 | void VPBasicBlock::print(raw_ostream &O, const Twine &Indent, | |||
505 | VPSlotTracker &SlotTracker) const { | |||
506 | O << Indent << getName() << ":\n"; | |||
507 | ||||
508 | auto RecipeIndent = Indent + " "; | |||
509 | for (const VPRecipeBase &Recipe : *this) { | |||
510 | Recipe.print(O, RecipeIndent, SlotTracker); | |||
511 | O << '\n'; | |||
512 | } | |||
513 | ||||
514 | printSuccessors(O, Indent); | |||
515 | } | |||
516 | #endif | |||
517 | ||||
518 | void VPRegionBlock::dropAllReferences(VPValue *NewValue) { | |||
519 | for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) | |||
520 | // Drop all references in VPBasicBlocks and replace all uses with | |||
521 | // DummyValue. | |||
522 | Block->dropAllReferences(NewValue); | |||
523 | } | |||
524 | ||||
525 | void VPRegionBlock::execute(VPTransformState *State) { | |||
526 | ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> | |||
527 | RPOT(Entry); | |||
528 | ||||
529 | if (!isReplicator()) { | |||
530 | // Create and register the new vector loop. | |||
531 | Loop *PrevLoop = State->CurrentVectorLoop; | |||
532 | State->CurrentVectorLoop = State->LI->AllocateLoop(); | |||
533 | BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()]; | |||
534 | Loop *ParentLoop = State->LI->getLoopFor(VectorPH); | |||
535 | ||||
536 | // Insert the new loop into the loop nest and register the new basic blocks | |||
537 | // before calling any utilities such as SCEV that require valid LoopInfo. | |||
538 | if (ParentLoop) | |||
539 | ParentLoop->addChildLoop(State->CurrentVectorLoop); | |||
540 | else | |||
541 | State->LI->addTopLevelLoop(State->CurrentVectorLoop); | |||
542 | ||||
543 | // Visit the VPBlocks connected to "this", starting from it. | |||
544 | for (VPBlockBase *Block : RPOT) { | |||
545 | LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("vplan")) { dbgs() << "LV: VPBlock in RPO " << Block ->getName() << '\n'; } } while (false); | |||
546 | Block->execute(State); | |||
547 | } | |||
548 | ||||
549 | State->CurrentVectorLoop = PrevLoop; | |||
550 | return; | |||
551 | } | |||
552 | ||||
553 | assert(!State->Instance && "Replicating a Region with non-null instance.")(static_cast <bool> (!State->Instance && "Replicating a Region with non-null instance." ) ? void (0) : __assert_fail ("!State->Instance && \"Replicating a Region with non-null instance.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 553, __extension__ __PRETTY_FUNCTION__)); | |||
554 | ||||
555 | // Enter replicating mode. | |||
556 | State->Instance = VPIteration(0, 0); | |||
557 | ||||
558 | for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { | |||
559 | State->Instance->Part = Part; | |||
560 | assert(!State->VF.isScalable() && "VF is assumed to be non scalable.")(static_cast <bool> (!State->VF.isScalable() && "VF is assumed to be non scalable.") ? void (0) : __assert_fail ("!State->VF.isScalable() && \"VF is assumed to be non scalable.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 560, __extension__ __PRETTY_FUNCTION__)); | |||
561 | for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; | |||
562 | ++Lane) { | |||
563 | State->Instance->Lane = VPLane(Lane, VPLane::Kind::First); | |||
564 | // Visit the VPBlocks connected to \p this, starting from it. | |||
565 | for (VPBlockBase *Block : RPOT) { | |||
566 | LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("vplan")) { dbgs() << "LV: VPBlock in RPO " << Block ->getName() << '\n'; } } while (false); | |||
567 | Block->execute(State); | |||
568 | } | |||
569 | } | |||
570 | } | |||
571 | ||||
572 | // Exit replicating mode. | |||
573 | State->Instance.reset(); | |||
574 | } | |||
575 | ||||
576 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
577 | void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, | |||
578 | VPSlotTracker &SlotTracker) const { | |||
579 | O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {"; | |||
580 | auto NewIndent = Indent + " "; | |||
581 | for (auto *BlockBase : vp_depth_first_shallow(Entry)) { | |||
582 | O << '\n'; | |||
583 | BlockBase->print(O, NewIndent, SlotTracker); | |||
584 | } | |||
585 | O << Indent << "}\n"; | |||
586 | ||||
587 | printSuccessors(O, Indent); | |||
588 | } | |||
589 | #endif | |||
590 | ||||
591 | VPlan::~VPlan() { | |||
592 | for (auto &KV : LiveOuts) | |||
593 | delete KV.second; | |||
594 | LiveOuts.clear(); | |||
595 | ||||
596 | if (Entry) { | |||
597 | VPValue DummyValue; | |||
598 | for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) | |||
599 | Block->dropAllReferences(&DummyValue); | |||
600 | ||||
601 | VPBlockBase::deleteCFG(Entry); | |||
602 | ||||
603 | Preheader->dropAllReferences(&DummyValue); | |||
604 | delete Preheader; | |||
605 | } | |||
606 | for (VPValue *VPV : VPLiveInsToFree) | |||
607 | delete VPV; | |||
608 | if (BackedgeTakenCount) | |||
609 | delete BackedgeTakenCount; | |||
610 | } | |||
611 | ||||
612 | VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE) { | |||
613 | VPBasicBlock *Preheader = new VPBasicBlock("ph"); | |||
614 | VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); | |||
615 | auto Plan = std::make_unique<VPlan>(Preheader, VecPreheader); | |||
616 | Plan->TripCount = | |||
617 | vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); | |||
618 | return Plan; | |||
619 | } | |||
620 | ||||
621 | VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() { | |||
622 | VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); | |||
623 | for (VPRecipeBase &R : Header->phis()) { | |||
624 | if (isa<VPActiveLaneMaskPHIRecipe>(&R)) | |||
625 | return cast<VPActiveLaneMaskPHIRecipe>(&R); | |||
626 | } | |||
627 | return nullptr; | |||
628 | } | |||
629 | ||||
630 | void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, | |||
631 | Value *CanonicalIVStartValue, | |||
632 | VPTransformState &State, | |||
633 | bool IsEpilogueVectorization) { | |||
634 | // Check if the backedge taken count is needed, and if so build it. | |||
635 | if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { | |||
636 | IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); | |||
637 | auto *TCMO = Builder.CreateSub(TripCountV, | |||
638 | ConstantInt::get(TripCountV->getType(), 1), | |||
639 | "trip.count.minus.1"); | |||
640 | auto VF = State.VF; | |||
641 | Value *VTCMO = | |||
642 | VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast"); | |||
643 | for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) | |||
644 | State.set(BackedgeTakenCount, VTCMO, Part); | |||
645 | } | |||
646 | ||||
647 | for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) | |||
648 | State.set(&VectorTripCount, VectorTripCountV, Part); | |||
649 | ||||
650 | // When vectorizing the epilogue loop, the canonical induction start value | |||
651 | // needs to be changed from zero to the value after the main vector loop. | |||
652 | // FIXME: Improve modeling for canonical IV start values in the epilogue loop. | |||
653 | if (CanonicalIVStartValue) { | |||
654 | VPValue *VPV = getVPValueOrAddLiveIn(CanonicalIVStartValue); | |||
655 | auto *IV = getCanonicalIV(); | |||
656 | assert(all_of(IV->users(),(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
657 | [](const VPUser *U) {(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
658 | if (isa<VPScalarIVStepsRecipe>(U) ||(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
659 | isa<VPDerivedIVRecipe>(U))(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
660 | return true;(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
661 | auto *VPI = cast<VPInstruction>(U);(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
662 | return VPI->getOpcode() ==(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
663 | VPInstruction::CanonicalIVIncrement ||(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
664 | VPI->getOpcode() ==(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
665 | VPInstruction::CanonicalIVIncrementNUW;(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
666 | }) &&(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
667 | "the canonical IV should only be used by its increments or "(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)) | |||
668 | "ScalarIVSteps when resetting the start value")(static_cast <bool> (all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe >(U)) return true; auto *VPI = cast<VPInstruction>(U ); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW ; }) && "the canonical IV should only be used by its increments or " "ScalarIVSteps when resetting the start value") ? void (0) : __assert_fail ("all_of(IV->users(), [](const VPUser *U) { if (isa<VPScalarIVStepsRecipe>(U) || isa<VPDerivedIVRecipe>(U)) return true; auto *VPI = cast<VPInstruction>(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && \"the canonical IV should only be used by its increments or \" \"ScalarIVSteps when resetting the start value\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 668, __extension__ __PRETTY_FUNCTION__)); | |||
669 | IV->setOperand(0, VPV); | |||
670 | } | |||
671 | } | |||
672 | ||||
673 | /// Generate the code inside the preheader and body of the vectorized loop. | |||
674 | /// Assumes a single pre-header basic-block was created for this. Introduce | |||
675 | /// additional basic-blocks as needed, and fill them all. | |||
676 | void VPlan::execute(VPTransformState *State) { | |||
677 | // Set the reverse mapping from VPValues to Values for code generation. | |||
678 | for (auto &Entry : Value2VPValue) | |||
679 | State->VPValue2Value[Entry.second] = Entry.first; | |||
680 | ||||
681 | // Initialize CFG state. | |||
682 | State->CFG.PrevVPBB = nullptr; | |||
683 | State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); | |||
684 | BasicBlock *VectorPreHeader = State->CFG.PrevBB; | |||
685 | State->Builder.SetInsertPoint(VectorPreHeader->getTerminator()); | |||
686 | ||||
687 | // Generate code in the loop pre-header and body. | |||
688 | for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) | |||
689 | Block->execute(State); | |||
690 | ||||
691 | VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); | |||
692 | BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; | |||
693 | ||||
694 | // Fix the latch value of canonical, reduction and first-order recurrences | |||
695 | // phis in the vector loop. | |||
696 | VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); | |||
697 | for (VPRecipeBase &R : Header->phis()) { | |||
698 | // Skip phi-like recipes that generate their backedege values themselves. | |||
699 | if (isa<VPWidenPHIRecipe>(&R)) | |||
700 | continue; | |||
701 | ||||
702 | if (isa<VPWidenPointerInductionRecipe>(&R) || | |||
703 | isa<VPWidenIntOrFpInductionRecipe>(&R)) { | |||
704 | PHINode *Phi = nullptr; | |||
705 | if (isa<VPWidenIntOrFpInductionRecipe>(&R)) { | |||
706 | Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0)); | |||
707 | } else { | |||
708 | auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R); | |||
709 | // TODO: Split off the case that all users of a pointer phi are scalar | |||
710 | // from the VPWidenPointerInductionRecipe. | |||
711 | if (WidenPhi->onlyScalarsGenerated(State->VF)) | |||
712 | continue; | |||
713 | ||||
714 | auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0)); | |||
715 | Phi = cast<PHINode>(GEP->getPointerOperand()); | |||
716 | } | |||
717 | ||||
718 | Phi->setIncomingBlock(1, VectorLatchBB); | |||
719 | ||||
720 | // Move the last step to the end of the latch block. This ensures | |||
721 | // consistent placement of all induction updates. | |||
722 | Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1)); | |||
723 | Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); | |||
724 | continue; | |||
725 | } | |||
726 | ||||
727 | auto *PhiR = cast<VPHeaderPHIRecipe>(&R); | |||
728 | // For canonical IV, first-order recurrences and in-order reduction phis, | |||
729 | // only a single part is generated, which provides the last part from the | |||
730 | // previous iteration. For non-ordered reductions all UF parts are | |||
731 | // generated. | |||
732 | bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) || | |||
733 | isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) || | |||
734 | (isa<VPReductionPHIRecipe>(PhiR) && | |||
735 | cast<VPReductionPHIRecipe>(PhiR)->isOrdered()); | |||
736 | unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF; | |||
737 | ||||
738 | for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { | |||
739 | Value *Phi = State->get(PhiR, Part); | |||
740 | Value *Val = State->get(PhiR->getBackedgeValue(), | |||
741 | SinglePartNeeded ? State->UF - 1 : Part); | |||
742 | cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB); | |||
743 | } | |||
744 | } | |||
745 | ||||
746 | // We do not attempt to preserve DT for outer loop vectorization currently. | |||
747 | if (!EnableVPlanNativePath) { | |||
748 | BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header]; | |||
749 | State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader); | |||
750 | updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB, | |||
751 | State->CFG.ExitBB); | |||
752 | } | |||
753 | } | |||
754 | ||||
755 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
756 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) | |||
757 | void VPlan::print(raw_ostream &O) const { | |||
758 | VPSlotTracker SlotTracker(this); | |||
759 | ||||
760 | O << "VPlan '" << getName() << "' {"; | |||
761 | ||||
762 | if (VectorTripCount.getNumUsers() > 0) { | |||
763 | O << "\nLive-in "; | |||
764 | VectorTripCount.printAsOperand(O, SlotTracker); | |||
765 | O << " = vector-trip-count"; | |||
766 | } | |||
767 | ||||
768 | if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { | |||
769 | O << "\nLive-in "; | |||
770 | BackedgeTakenCount->printAsOperand(O, SlotTracker); | |||
771 | O << " = backedge-taken count"; | |||
772 | } | |||
773 | ||||
774 | O << "\n"; | |||
775 | if (TripCount->isLiveIn()) | |||
776 | O << "Live-in "; | |||
777 | TripCount->printAsOperand(O, SlotTracker); | |||
778 | O << " = original trip-count"; | |||
779 | O << "\n"; | |||
780 | ||||
781 | if (!getPreheader()->empty()) { | |||
782 | O << "\n"; | |||
783 | getPreheader()->print(O, "", SlotTracker); | |||
784 | } | |||
785 | ||||
786 | for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) { | |||
787 | O << '\n'; | |||
788 | Block->print(O, "", SlotTracker); | |||
789 | } | |||
790 | ||||
791 | if (!LiveOuts.empty()) | |||
792 | O << "\n"; | |||
793 | for (const auto &KV : LiveOuts) { | |||
794 | O << "Live-out "; | |||
795 | KV.second->getPhi()->printAsOperand(O); | |||
796 | O << " = "; | |||
797 | KV.second->getOperand(0)->printAsOperand(O, SlotTracker); | |||
798 | O << "\n"; | |||
799 | } | |||
800 | ||||
801 | O << "}\n"; | |||
802 | } | |||
803 | ||||
804 | std::string VPlan::getName() const { | |||
805 | std::string Out; | |||
806 | raw_string_ostream RSO(Out); | |||
807 | RSO << Name << " for "; | |||
808 | if (!VFs.empty()) { | |||
809 | RSO << "VF={" << VFs[0]; | |||
810 | for (ElementCount VF : drop_begin(VFs)) | |||
811 | RSO << "," << VF; | |||
812 | RSO << "},"; | |||
813 | } | |||
814 | ||||
815 | if (UFs.empty()) { | |||
816 | RSO << "UF>=1"; | |||
817 | } else { | |||
818 | RSO << "UF={" << UFs[0]; | |||
819 | for (unsigned UF : drop_begin(UFs)) | |||
820 | RSO << "," << UF; | |||
821 | RSO << "}"; | |||
822 | } | |||
823 | ||||
824 | return Out; | |||
825 | } | |||
826 | ||||
827 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) | |||
828 | void VPlan::printDOT(raw_ostream &O) const { | |||
829 | VPlanPrinter Printer(O, *this); | |||
830 | Printer.dump(); | |||
831 | } | |||
832 | ||||
833 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) | |||
834 | void VPlan::dump() const { print(dbgs()); } | |||
835 | #endif | |||
836 | ||||
837 | void VPlan::addLiveOut(PHINode *PN, VPValue *V) { | |||
838 | assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists")(static_cast <bool> (LiveOuts.count(PN) == 0 && "an exit value for PN already exists") ? void (0) : __assert_fail ("LiveOuts.count(PN) == 0 && \"an exit value for PN already exists\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 838, __extension__ __PRETTY_FUNCTION__)); | |||
839 | LiveOuts.insert({PN, new VPLiveOut(PN, V)}); | |||
840 | } | |||
841 | ||||
842 | void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB, | |||
843 | BasicBlock *LoopLatchBB, | |||
844 | BasicBlock *LoopExitBB) { | |||
845 | // The vector body may be more than a single basic-block by this point. | |||
846 | // Update the dominator tree information inside the vector body by propagating | |||
847 | // it from header to latch, expecting only triangular control-flow, if any. | |||
848 | BasicBlock *PostDomSucc = nullptr; | |||
849 | for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { | |||
850 | // Get the list of successors of this block. | |||
851 | std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); | |||
852 | assert(Succs.size() <= 2 &&(static_cast <bool> (Succs.size() <= 2 && "Basic block in vector loop has more than 2 successors." ) ? void (0) : __assert_fail ("Succs.size() <= 2 && \"Basic block in vector loop has more than 2 successors.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 853, __extension__ __PRETTY_FUNCTION__)) | |||
853 | "Basic block in vector loop has more than 2 successors.")(static_cast <bool> (Succs.size() <= 2 && "Basic block in vector loop has more than 2 successors." ) ? void (0) : __assert_fail ("Succs.size() <= 2 && \"Basic block in vector loop has more than 2 successors.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 853, __extension__ __PRETTY_FUNCTION__)); | |||
854 | PostDomSucc = Succs[0]; | |||
855 | if (Succs.size() == 1) { | |||
856 | assert(PostDomSucc->getSinglePredecessor() &&(static_cast <bool> (PostDomSucc->getSinglePredecessor () && "PostDom successor has more than one predecessor." ) ? void (0) : __assert_fail ("PostDomSucc->getSinglePredecessor() && \"PostDom successor has more than one predecessor.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 857, __extension__ __PRETTY_FUNCTION__)) | |||
857 | "PostDom successor has more than one predecessor.")(static_cast <bool> (PostDomSucc->getSinglePredecessor () && "PostDom successor has more than one predecessor." ) ? void (0) : __assert_fail ("PostDomSucc->getSinglePredecessor() && \"PostDom successor has more than one predecessor.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 857, __extension__ __PRETTY_FUNCTION__)); | |||
858 | DT->addNewBlock(PostDomSucc, BB); | |||
859 | continue; | |||
860 | } | |||
861 | BasicBlock *InterimSucc = Succs[1]; | |||
862 | if (PostDomSucc->getSingleSuccessor() == InterimSucc) { | |||
863 | PostDomSucc = Succs[1]; | |||
864 | InterimSucc = Succs[0]; | |||
865 | } | |||
866 | assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&(static_cast <bool> (InterimSucc->getSingleSuccessor () == PostDomSucc && "One successor of a basic block does not lead to the other." ) ? void (0) : __assert_fail ("InterimSucc->getSingleSuccessor() == PostDomSucc && \"One successor of a basic block does not lead to the other.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 867, __extension__ __PRETTY_FUNCTION__)) | |||
867 | "One successor of a basic block does not lead to the other.")(static_cast <bool> (InterimSucc->getSingleSuccessor () == PostDomSucc && "One successor of a basic block does not lead to the other." ) ? void (0) : __assert_fail ("InterimSucc->getSingleSuccessor() == PostDomSucc && \"One successor of a basic block does not lead to the other.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 867, __extension__ __PRETTY_FUNCTION__)); | |||
868 | assert(InterimSucc->getSinglePredecessor() &&(static_cast <bool> (InterimSucc->getSinglePredecessor () && "Interim successor has more than one predecessor." ) ? void (0) : __assert_fail ("InterimSucc->getSinglePredecessor() && \"Interim successor has more than one predecessor.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 869, __extension__ __PRETTY_FUNCTION__)) | |||
869 | "Interim successor has more than one predecessor.")(static_cast <bool> (InterimSucc->getSinglePredecessor () && "Interim successor has more than one predecessor." ) ? void (0) : __assert_fail ("InterimSucc->getSinglePredecessor() && \"Interim successor has more than one predecessor.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 869, __extension__ __PRETTY_FUNCTION__)); | |||
870 | assert(PostDomSucc->hasNPredecessors(2) &&(static_cast <bool> (PostDomSucc->hasNPredecessors(2 ) && "PostDom successor has more than two predecessors." ) ? void (0) : __assert_fail ("PostDomSucc->hasNPredecessors(2) && \"PostDom successor has more than two predecessors.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 871, __extension__ __PRETTY_FUNCTION__)) | |||
871 | "PostDom successor has more than two predecessors.")(static_cast <bool> (PostDomSucc->hasNPredecessors(2 ) && "PostDom successor has more than two predecessors." ) ? void (0) : __assert_fail ("PostDomSucc->hasNPredecessors(2) && \"PostDom successor has more than two predecessors.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 871, __extension__ __PRETTY_FUNCTION__)); | |||
872 | DT->addNewBlock(InterimSucc, BB); | |||
873 | DT->addNewBlock(PostDomSucc, BB); | |||
874 | } | |||
875 | // Latch block is a new dominator for the loop exit. | |||
876 | DT->changeImmediateDominator(LoopExitBB, LoopLatchBB); | |||
877 | assert(DT->verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT->verify(DominatorTree::VerificationLevel ::Fast)) ? void (0) : __assert_fail ("DT->verify(DominatorTree::VerificationLevel::Fast)" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 877, __extension__ __PRETTY_FUNCTION__)); | |||
878 | } | |||
879 | ||||
880 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
881 | ||||
882 | Twine VPlanPrinter::getUID(const VPBlockBase *Block) { | |||
883 | return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + | |||
884 | Twine(getOrCreateBID(Block)); | |||
885 | } | |||
886 | ||||
887 | Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { | |||
888 | const std::string &Name = Block->getName(); | |||
889 | if (!Name.empty()) | |||
890 | return Name; | |||
891 | return "VPB" + Twine(getOrCreateBID(Block)); | |||
892 | } | |||
893 | ||||
894 | void VPlanPrinter::dump() { | |||
895 | Depth = 1; | |||
896 | bumpIndent(0); | |||
897 | OS << "digraph VPlan {\n"; | |||
898 | OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; | |||
899 | if (!Plan.getName().empty()) | |||
900 | OS << "\\n" << DOT::EscapeString(Plan.getName()); | |||
901 | if (Plan.BackedgeTakenCount) { | |||
902 | OS << ", where:\\n"; | |||
903 | Plan.BackedgeTakenCount->print(OS, SlotTracker); | |||
904 | OS << " := BackedgeTakenCount"; | |||
905 | } | |||
906 | OS << "\"]\n"; | |||
907 | OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; | |||
908 | OS << "edge [fontname=Courier, fontsize=30]\n"; | |||
909 | OS << "compound=true\n"; | |||
910 | ||||
911 | dumpBlock(Plan.getPreheader()); | |||
912 | ||||
913 | for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry())) | |||
914 | dumpBlock(Block); | |||
915 | ||||
916 | OS << "}\n"; | |||
917 | } | |||
918 | ||||
919 | void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { | |||
920 | if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) | |||
921 | dumpBasicBlock(BasicBlock); | |||
922 | else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) | |||
923 | dumpRegion(Region); | |||
924 | else | |||
925 | llvm_unreachable("Unsupported kind of VPBlock.")::llvm::llvm_unreachable_internal("Unsupported kind of VPBlock." , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 925); | |||
926 | } | |||
927 | ||||
928 | void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, | |||
929 | bool Hidden, const Twine &Label) { | |||
930 | // Due to "dot" we print an edge between two regions as an edge between the | |||
931 | // exiting basic block and the entry basic of the respective regions. | |||
932 | const VPBlockBase *Tail = From->getExitingBasicBlock(); | |||
933 | const VPBlockBase *Head = To->getEntryBasicBlock(); | |||
934 | OS << Indent << getUID(Tail) << " -> " << getUID(Head); | |||
935 | OS << " [ label=\"" << Label << '\"'; | |||
936 | if (Tail != From) | |||
937 | OS << " ltail=" << getUID(From); | |||
938 | if (Head != To) | |||
939 | OS << " lhead=" << getUID(To); | |||
940 | if (Hidden) | |||
941 | OS << "; splines=none"; | |||
942 | OS << "]\n"; | |||
943 | } | |||
944 | ||||
945 | void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { | |||
946 | auto &Successors = Block->getSuccessors(); | |||
947 | if (Successors.size() == 1) | |||
948 | drawEdge(Block, Successors.front(), false, ""); | |||
949 | else if (Successors.size() == 2) { | |||
950 | drawEdge(Block, Successors.front(), false, "T"); | |||
951 | drawEdge(Block, Successors.back(), false, "F"); | |||
952 | } else { | |||
953 | unsigned SuccessorNumber = 0; | |||
954 | for (auto *Successor : Successors) | |||
955 | drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); | |||
956 | } | |||
957 | } | |||
958 | ||||
959 | void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { | |||
960 | // Implement dot-formatted dump by performing plain-text dump into the | |||
961 | // temporary storage followed by some post-processing. | |||
962 | OS << Indent << getUID(BasicBlock) << " [label =\n"; | |||
963 | bumpIndent(1); | |||
964 | std::string Str; | |||
965 | raw_string_ostream SS(Str); | |||
966 | // Use no indentation as we need to wrap the lines into quotes ourselves. | |||
967 | BasicBlock->print(SS, "", SlotTracker); | |||
968 | ||||
969 | // We need to process each line of the output separately, so split | |||
970 | // single-string plain-text dump. | |||
971 | SmallVector<StringRef, 0> Lines; | |||
972 | StringRef(Str).rtrim('\n').split(Lines, "\n"); | |||
973 | ||||
974 | auto EmitLine = [&](StringRef Line, StringRef Suffix) { | |||
975 | OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix; | |||
976 | }; | |||
977 | ||||
978 | // Don't need the "+" after the last line. | |||
979 | for (auto Line : make_range(Lines.begin(), Lines.end() - 1)) | |||
980 | EmitLine(Line, " +\n"); | |||
981 | EmitLine(Lines.back(), "\n"); | |||
982 | ||||
983 | bumpIndent(-1); | |||
984 | OS << Indent << "]\n"; | |||
985 | ||||
986 | dumpEdges(BasicBlock); | |||
987 | } | |||
988 | ||||
989 | void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { | |||
990 | OS << Indent << "subgraph " << getUID(Region) << " {\n"; | |||
991 | bumpIndent(1); | |||
992 | OS << Indent << "fontname=Courier\n" | |||
993 | << Indent << "label=\"" | |||
994 | << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") | |||
995 | << DOT::EscapeString(Region->getName()) << "\"\n"; | |||
996 | // Dump the blocks of the region. | |||
997 | assert(Region->getEntry() && "Region contains no inner blocks.")(static_cast <bool> (Region->getEntry() && "Region contains no inner blocks." ) ? void (0) : __assert_fail ("Region->getEntry() && \"Region contains no inner blocks.\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 997, __extension__ __PRETTY_FUNCTION__)); | |||
998 | for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry())) | |||
999 | dumpBlock(Block); | |||
1000 | bumpIndent(-1); | |||
1001 | OS << Indent << "}\n"; | |||
1002 | dumpEdges(Region); | |||
1003 | } | |||
1004 | ||||
1005 | void VPlanIngredient::print(raw_ostream &O) const { | |||
1006 | if (auto *Inst = dyn_cast<Instruction>(V)) { | |||
1007 | if (!Inst->getType()->isVoidTy()) { | |||
1008 | Inst->printAsOperand(O, false); | |||
1009 | O << " = "; | |||
1010 | } | |||
1011 | O << Inst->getOpcodeName() << " "; | |||
1012 | unsigned E = Inst->getNumOperands(); | |||
1013 | if (E > 0) { | |||
1014 | Inst->getOperand(0)->printAsOperand(O, false); | |||
1015 | for (unsigned I = 1; I < E; ++I) | |||
1016 | Inst->getOperand(I)->printAsOperand(O << ", ", false); | |||
1017 | } | |||
1018 | } else // !Inst | |||
1019 | V->printAsOperand(O, false); | |||
1020 | } | |||
1021 | ||||
1022 | #endif | |||
1023 | ||||
1024 | template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); | |||
1025 | ||||
1026 | void VPValue::replaceAllUsesWith(VPValue *New) { | |||
1027 | for (unsigned J = 0; J < getNumUsers();) { | |||
1028 | VPUser *User = Users[J]; | |||
1029 | unsigned NumUsers = getNumUsers(); | |||
1030 | for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) | |||
1031 | if (User->getOperand(I) == this) | |||
1032 | User->setOperand(I, New); | |||
1033 | // If a user got removed after updating the current user, the next user to | |||
1034 | // update will be moved to the current position, so we only need to | |||
1035 | // increment the index if the number of users did not change. | |||
1036 | if (NumUsers == getNumUsers()) | |||
1037 | J++; | |||
1038 | } | |||
1039 | } | |||
1040 | ||||
1041 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
1042 | void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { | |||
1043 | if (const Value *UV = getUnderlyingValue()) { | |||
1044 | OS << "ir<"; | |||
1045 | UV->printAsOperand(OS, false); | |||
1046 | OS << ">"; | |||
1047 | return; | |||
1048 | } | |||
1049 | ||||
1050 | unsigned Slot = Tracker.getSlot(this); | |||
1051 | if (Slot == unsigned(-1)) | |||
1052 | OS << "<badref>"; | |||
1053 | else | |||
1054 | OS << "vp<%" << Tracker.getSlot(this) << ">"; | |||
1055 | } | |||
1056 | ||||
1057 | void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const { | |||
1058 | interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) { | |||
1059 | Op->printAsOperand(O, SlotTracker); | |||
1060 | }); | |||
1061 | } | |||
1062 | #endif | |||
1063 | ||||
1064 | void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region, | |||
1065 | Old2NewTy &Old2New, | |||
1066 | InterleavedAccessInfo &IAI) { | |||
1067 | ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> | |||
1068 | RPOT(Region->getEntry()); | |||
1069 | for (VPBlockBase *Base : RPOT) { | |||
1070 | visitBlock(Base, Old2New, IAI); | |||
1071 | } | |||
1072 | } | |||
1073 | ||||
1074 | void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, | |||
1075 | InterleavedAccessInfo &IAI) { | |||
1076 | if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) { | |||
1077 | for (VPRecipeBase &VPI : *VPBB) { | |||
1078 | if (isa<VPHeaderPHIRecipe>(&VPI)) | |||
1079 | continue; | |||
1080 | assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions")(static_cast <bool> (isa<VPInstruction>(&VPI) && "Can only handle VPInstructions") ? void (0) : __assert_fail ("isa<VPInstruction>(&VPI) && \"Can only handle VPInstructions\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 1080, __extension__ __PRETTY_FUNCTION__)); | |||
1081 | auto *VPInst = cast<VPInstruction>(&VPI); | |||
1082 | ||||
1083 | auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue()); | |||
1084 | if (!Inst) | |||
1085 | continue; | |||
1086 | auto *IG = IAI.getInterleaveGroup(Inst); | |||
1087 | if (!IG) | |||
1088 | continue; | |||
1089 | ||||
1090 | auto NewIGIter = Old2New.find(IG); | |||
1091 | if (NewIGIter == Old2New.end()) | |||
1092 | Old2New[IG] = new InterleaveGroup<VPInstruction>( | |||
1093 | IG->getFactor(), IG->isReverse(), IG->getAlign()); | |||
1094 | ||||
1095 | if (Inst == IG->getInsertPos()) | |||
1096 | Old2New[IG]->setInsertPos(VPInst); | |||
1097 | ||||
1098 | InterleaveGroupMap[VPInst] = Old2New[IG]; | |||
1099 | InterleaveGroupMap[VPInst]->insertMember( | |||
1100 | VPInst, IG->getIndex(Inst), | |||
1101 | Align(IG->isReverse() ? (-1) * int(IG->getFactor()) | |||
1102 | : IG->getFactor())); | |||
1103 | } | |||
1104 | } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) | |||
1105 | visitRegion(Region, Old2New, IAI); | |||
1106 | else | |||
1107 | llvm_unreachable("Unsupported kind of VPBlock.")::llvm::llvm_unreachable_internal("Unsupported kind of VPBlock." , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 1107); | |||
1108 | } | |||
1109 | ||||
1110 | VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, | |||
1111 | InterleavedAccessInfo &IAI) { | |||
1112 | Old2NewTy Old2New; | |||
1113 | visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI); | |||
1114 | } | |||
1115 | ||||
1116 | void VPSlotTracker::assignSlot(const VPValue *V) { | |||
1117 | assert(!Slots.contains(V) && "VPValue already has a slot!")(static_cast <bool> (!Slots.contains(V) && "VPValue already has a slot!" ) ? void (0) : __assert_fail ("!Slots.contains(V) && \"VPValue already has a slot!\"" , "llvm/lib/Transforms/Vectorize/VPlan.cpp", 1117, __extension__ __PRETTY_FUNCTION__)); | |||
1118 | Slots[V] = NextSlot++; | |||
1119 | } | |||
1120 | ||||
1121 | void VPSlotTracker::assignSlots(const VPlan &Plan) { | |||
1122 | assignSlot(&Plan.VectorTripCount); | |||
1123 | if (Plan.BackedgeTakenCount) | |||
1124 | assignSlot(Plan.BackedgeTakenCount); | |||
1125 | assignSlots(Plan.getPreheader()); | |||
1126 | ||||
1127 | ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>> | |||
1128 | RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); | |||
1129 | for (const VPBasicBlock *VPBB : | |||
1130 | VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT)) | |||
1131 | assignSlots(VPBB); | |||
1132 | } | |||
1133 | ||||
1134 | void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) { | |||
1135 | for (const VPRecipeBase &Recipe : *VPBB) | |||
1136 | for (VPValue *Def : Recipe.definedValues()) | |||
1137 | assignSlot(Def); | |||
1138 | } | |||
1139 | ||||
1140 | bool vputils::onlyFirstLaneUsed(VPValue *Def) { | |||
1141 | return all_of(Def->users(), | |||
1142 | [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); }); | |||
1143 | } | |||
1144 | ||||
1145 | VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, | |||
1146 | ScalarEvolution &SE) { | |||
1147 | if (auto *Expanded = Plan.getSCEVExpansion(Expr)) | |||
1148 | return Expanded; | |||
1149 | VPValue *Expanded = nullptr; | |||
1150 | if (auto *E = dyn_cast<SCEVConstant>(Expr)) | |||
1151 | Expanded = Plan.getVPValueOrAddLiveIn(E->getValue()); | |||
1152 | else if (auto *E = dyn_cast<SCEVUnknown>(Expr)) | |||
1153 | Expanded = Plan.getVPValueOrAddLiveIn(E->getValue()); | |||
1154 | else { | |||
1155 | Expanded = new VPExpandSCEVRecipe(Expr, SE); | |||
1156 | Plan.getPreheader()->appendRecipe(Expanded->getDefiningRecipe()); | |||
1157 | } | |||
1158 | Plan.addSCEVExpansion(Expr, Expanded); | |||
1159 | return Expanded; | |||
1160 | } |