LLVM 22.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanCFG.h"
17#include "VPlanDominatorTree.h"
18#include "VPlanPatternMatch.h"
19#include "VPlanTransforms.h"
23#include "llvm/IR/InstrTypes.h"
24#include "llvm/IR/MDBuilder.h"
25
26#define DEBUG_TYPE "vplan"
27
28using namespace llvm;
29using namespace VPlanPatternMatch;
30
31namespace {
32// Class that is used to build the plain CFG for the incoming IR.
33class PlainCFGBuilder {
34 // The outermost loop of the input loop nest considered for vectorization.
35 Loop *TheLoop;
36
37 // Loop Info analysis.
38 LoopInfo *LI;
39
40 // Vectorization plan that we are working on.
41 std::unique_ptr<VPlan> Plan;
42
43 // Builder of the VPlan instruction-level representation.
44 VPBuilder VPIRBuilder;
45
46 // NOTE: The following maps are intentionally destroyed after the plain CFG
47 // construction because subsequent VPlan-to-VPlan transformation may
48 // invalidate them.
49 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
51 // Map incoming Value definitions to their newly-created VPValues.
52 DenseMap<Value *, VPValue *> IRDef2VPValue;
53
54 // Hold phi node's that need to be fixed once the plain CFG has been built.
56
57 // Utility functions.
58 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
59 void fixHeaderPhis();
60 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
61#ifndef NDEBUG
62 bool isExternalDef(Value *Val);
63#endif
64 VPValue *getOrCreateVPOperand(Value *IRVal);
65 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
66
67public:
68 PlainCFGBuilder(Loop *Lp, LoopInfo *LI)
69 : TheLoop(Lp), LI(LI), Plan(std::make_unique<VPlan>(Lp)) {}
70
71 /// Build plain CFG for TheLoop and connect it to Plan's entry.
72 std::unique_ptr<VPlan> buildPlainCFG();
73};
74} // anonymous namespace
75
76// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
77// must have no predecessors.
78void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
79 // Collect VPBB predecessors.
81 for (BasicBlock *Pred : predecessors(BB))
82 VPBBPreds.push_back(getOrCreateVPBB(Pred));
83 VPBB->setPredecessors(VPBBPreds);
84}
85
86static bool isHeaderBB(BasicBlock *BB, Loop *L) {
87 return L && BB == L->getHeader();
88}
89
90// Add operands to VPInstructions representing phi nodes from the input IR.
91void PlainCFGBuilder::fixHeaderPhis() {
92 for (auto *Phi : PhisToFix) {
93 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
94 VPValue *VPVal = IRDef2VPValue[Phi];
95 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
96 auto *PhiR = cast<VPPhi>(VPVal);
97 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
98 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
99 "Expected Phi in header block.");
100 assert(Phi->getNumOperands() == 2 &&
101 "header phi must have exactly 2 operands");
102 for (BasicBlock *Pred : predecessors(Phi->getParent()))
103 PhiR->addOperand(
104 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
105 }
106}
107
108// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
109// existing one if it was already created.
110VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
111 if (auto *VPBB = BB2VPBB.lookup(BB)) {
112 // Retrieve existing VPBB.
113 return VPBB;
114 }
115
116 // Create new VPBB.
117 StringRef Name = BB->getName();
118 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
119 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
120 BB2VPBB[BB] = VPBB;
121 return VPBB;
122}
123
124#ifndef NDEBUG
125// Return true if \p Val is considered an external definition. An external
126// definition is either:
127// 1. A Value that is not an Instruction. This will be refined in the future.
128// 2. An Instruction that is outside of the IR region represented in VPlan,
129// i.e., is not part of the loop nest.
130bool PlainCFGBuilder::isExternalDef(Value *Val) {
131 // All the Values that are not Instructions are considered external
132 // definitions for now.
134 if (!Inst)
135 return true;
136
137 // Check whether Instruction definition is in loop body.
138 return !TheLoop->contains(Inst);
139}
140#endif
141
142// Create a new VPValue or retrieve an existing one for the Instruction's
143// operand \p IRVal. This function must only be used to create/retrieve VPValues
144// for *Instruction's operands* and not to create regular VPInstruction's. For
145// the latter, please, look at 'createVPInstructionsForVPBB'.
146VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
147 auto VPValIt = IRDef2VPValue.find(IRVal);
148 if (VPValIt != IRDef2VPValue.end())
149 // Operand has an associated VPInstruction or VPValue that was previously
150 // created.
151 return VPValIt->second;
152
153 // Operand doesn't have a previously created VPInstruction/VPValue. This
154 // means that operand is:
155 // A) a definition external to VPlan,
156 // B) any other Value without specific representation in VPlan.
157 // For now, we use VPValue to represent A and B and classify both as external
158 // definitions. We may introduce specific VPValue subclasses for them in the
159 // future.
160 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
161
162 // A and B: Create VPValue and add it to the pool of external definitions and
163 // to the Value->VPValue map.
164 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
165 IRDef2VPValue[IRVal] = NewVPVal;
166 return NewVPVal;
167}
168
169// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
170// counterpart. This function must be invoked in RPO so that the operands of a
171// VPInstruction in \p BB have been visited before (except for Phi nodes).
172void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
173 BasicBlock *BB) {
174 VPIRBuilder.setInsertPoint(VPBB);
175 // TODO: Model and preserve debug intrinsics in VPlan.
176 for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
177 Instruction *Inst = &InstRef;
178
179 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
180 // visited Inst when we shouldn't, breaking the RPO traversal order.
181 assert(!IRDef2VPValue.count(Inst) &&
182 "Instruction shouldn't have been visited.");
183
184 if (auto *Br = dyn_cast<BranchInst>(Inst)) {
185 // Conditional branch instruction are represented using BranchOnCond
186 // recipes.
187 if (Br->isConditional()) {
188 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
189 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst);
190 }
191
192 // Skip the rest of the Instruction processing for Branch instructions.
193 continue;
194 }
195
196 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
197 // Don't emit recipes for unconditional switch instructions.
198 if (SI->getNumCases() == 0)
199 continue;
200 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
201 for (auto Case : SI->cases())
202 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
203 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst);
204 continue;
205 }
206
207 VPSingleDefRecipe *NewR;
208 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
209 // Phi node's operands may not have been visited at this point. We create
210 // an empty VPInstruction that we will fix once the whole plain CFG has
211 // been built.
212 NewR = VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi");
213 NewR->setUnderlyingValue(Phi);
214 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
215 // Header phis need to be fixed after the VPBB for the latch has been
216 // created.
217 PhisToFix.push_back(Phi);
218 } else {
219 // Add operands for VPPhi in the order matching its predecessors in
220 // VPlan.
221 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
222 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
223 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
224 getOrCreateVPOperand(Phi->getIncomingValue(I));
225 }
226 for (VPBlockBase *Pred : VPBB->getPredecessors())
227 NewR->addOperand(
228 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
229 }
230 } else {
231 // Translate LLVM-IR operands into VPValue operands and set them in the
232 // new VPInstruction.
233 SmallVector<VPValue *, 4> VPOperands;
234 for (Value *Op : Inst->operands())
235 VPOperands.push_back(getOrCreateVPOperand(Op));
236
237 if (auto *CI = dyn_cast<CastInst>(Inst)) {
238 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
239 CI->getType(), CI->getDebugLoc());
240 NewR->setUnderlyingValue(CI);
241 } else {
242 // Build VPInstruction for any arbitrary Instruction without specific
243 // representation in VPlan.
244 NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst);
245 }
246 }
247
248 IRDef2VPValue[Inst] = NewR;
249 }
250}
251
252// Main interface to build the plain CFG.
253std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
254 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
255 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
256 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
257 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
258
259 // 1. Scan the body of the loop in a topological order to visit each basic
260 // block after having visited its predecessor basic blocks. Create a VPBB for
261 // each BB and link it to its successor and predecessor VPBBs. Note that
262 // predecessors must be set in the same order as they are in the incomming IR.
263 // Otherwise, there might be problems with existing phi nodes and algorithm
264 // based on predecessors traversal.
265
266 // Loop PH needs to be explicitly visited since it's not taken into account by
267 // LoopBlocksDFS.
268 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
269 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
270 "Unexpected loop preheader");
271 for (auto &I : *ThePreheaderBB) {
272 if (I.getType()->isVoidTy())
273 continue;
274 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
275 }
276
277 LoopBlocksRPO RPO(TheLoop);
278 RPO.perform(LI);
279
280 for (BasicBlock *BB : RPO) {
281 // Create or retrieve the VPBasicBlock for this BB.
282 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
283 // Set VPBB predecessors in the same order as they are in the incoming BB.
284 setVPBBPredsFromBB(VPBB, BB);
285
286 // Create VPInstructions for BB.
287 createVPInstructionsForVPBB(VPBB, BB);
288
289 // Set VPBB successors. We create empty VPBBs for successors if they don't
290 // exist already. Recipes will be created when the successor is visited
291 // during the RPO traversal.
292 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
294 getOrCreateVPBB(SI->getDefaultDest())};
295 for (auto Case : SI->cases())
296 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
297 VPBB->setSuccessors(Succs);
298 continue;
299 }
300 auto *BI = cast<BranchInst>(BB->getTerminator());
301 unsigned NumSuccs = succ_size(BB);
302 if (NumSuccs == 1) {
303 VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
304 continue;
305 }
306 assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
307 "block must have conditional branch with 2 successors");
308
309 BasicBlock *IRSucc0 = BI->getSuccessor(0);
310 BasicBlock *IRSucc1 = BI->getSuccessor(1);
311 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
312 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
313 VPBB->setTwoSuccessors(Successor0, Successor1);
314 }
315
316 for (auto *EB : Plan->getExitBlocks())
317 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
318
319 // 2. The whole CFG has been built at this point so all the input Values must
320 // have a VPlan counterpart. Fix VPlan header phi by adding their
321 // corresponding VPlan operands.
322 fixHeaderPhis();
323
324 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
325 Plan->getEntry()->setPlan(&*Plan);
326
327 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
328 // VPIRInstructions wrapping them.
329 // // Note that the operand order corresponds to IR predecessor order, and may
330 // need adjusting when VPlan predecessors are added, if an exit block has
331 // multiple predecessor.
332 for (auto *EB : Plan->getExitBlocks()) {
333 for (VPRecipeBase &R : EB->phis()) {
334 auto *PhiR = cast<VPIRPhi>(&R);
335 PHINode &Phi = PhiR->getIRPhi();
336 assert(PhiR->getNumOperands() == 0 &&
337 "no phi operands should be added yet");
338 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
339 PhiR->addOperand(
340 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
341 }
342 }
343
344 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
345 return std::move(Plan);
346}
347
348/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
349/// has exactly 2 predecessors (preheader and latch), where the block
350/// dominates the latch and the preheader dominates the block. If it is a
351/// header block return true and canonicalize the predecessors of the header
352/// (making sure the preheader appears first and the latch second) and the
353/// successors of the latch (making sure the loop exit comes first). Otherwise
354/// return false.
356 const VPDominatorTree &VPDT) {
357 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
358 if (Preds.size() != 2)
359 return false;
360
361 auto *PreheaderVPBB = Preds[0];
362 auto *LatchVPBB = Preds[1];
363 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
364 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
365 std::swap(PreheaderVPBB, LatchVPBB);
366
367 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
368 !VPDT.dominates(HeaderVPB, LatchVPBB))
369 return false;
370
371 // Canonicalize predecessors of header so that preheader is first and
372 // latch second.
373 HeaderVPB->swapPredecessors();
374 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
375 R.swapOperands();
376 }
377
378 // The two successors of conditional branch match the condition, with the
379 // first successor corresponding to true and the second to false. We
380 // canonicalize the successors of the latch when introducing the region, such
381 // that the latch exits the region when its condition is true; invert the
382 // original condition if the original CFG branches to the header on true.
383 // Note that the exit edge is not yet connected for top-level loops.
384 if (LatchVPBB->getSingleSuccessor() ||
385 LatchVPBB->getSuccessors()[0] != HeaderVPB)
386 return true;
387
388 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
389 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
392 "terminator must be a BranchOnCond");
393 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
394 Not->insertBefore(Term);
395 Term->setOperand(0, Not);
396 LatchVPBB->swapSuccessors();
397
398 return true;
399}
400
401/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
402static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
403 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
404 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
405
406 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
407 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
408 VPBlockBase *LatchExitVPB = LatchVPBB->getSingleSuccessor();
409 assert(LatchExitVPB && "Latch expected to be left with a single successor");
410
411 // Create an empty region first and insert it between PreheaderVPBB and
412 // LatchExitVPB, taking care to preserve the original predecessor & successor
413 // order of blocks. Set region entry and exiting after both HeaderVPB and
414 // LatchVPBB have been disconnected from their predecessors/successors.
415 auto *R = Plan.createLoopRegion();
416 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R);
417 VPBlockUtils::disconnectBlocks(LatchVPBB, R);
418 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
419 R->setEntry(HeaderVPB);
420 R->setExiting(LatchVPBB);
421
422 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
423 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
424 VPBB->setParent(R);
425}
426
427// Add the necessary canonical IV and branch recipes required to control the
428// loop.
429static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
430 VPBasicBlock *LatchVPBB, Type *IdxTy,
431 DebugLoc DL) {
432 Value *StartIdx = ConstantInt::get(IdxTy, 0);
433 auto *StartV = Plan.getOrAddLiveIn(StartIdx);
434
435 // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
436 auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
437 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
438
439 // We are about to replace the branch to exit the region. Remove the original
440 // BranchOnCond, if there is any.
441 DebugLoc LatchDL = DL;
442 if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
443 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
444 LatchVPBB->getTerminator()->eraseFromParent();
445 }
446
447 VPBuilder Builder(LatchVPBB);
448 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
449 // Initially the induction increment is guaranteed to not wrap, but that may
450 // change later, e.g. when tail-folding, when the flags need to be dropped.
451 auto *CanonicalIVIncrement = Builder.createOverflowingOp(
452 Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL,
453 "index.next");
454 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
455
456 // Add the BranchOnCount VPInstruction to the latch.
457 Builder.createNaryOp(VPInstruction::BranchOnCount,
458 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
459 LatchDL);
460}
461
462/// Creates extracts for values in \p Plan defined in a loop region and used
463/// outside a loop region.
464static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
465 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
466 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
467 if (EB->getSinglePredecessor() != MiddleVPBB)
468 continue;
469
470 for (VPRecipeBase &R : EB->phis()) {
471 auto *ExitIRI = cast<VPIRPhi>(&R);
472 for (unsigned Idx = 0; Idx != ExitIRI->getNumIncoming(); ++Idx) {
473 VPRecipeBase *Inc = ExitIRI->getIncomingValue(Idx)->getDefiningRecipe();
474 if (!Inc)
475 continue;
476 assert(ExitIRI->getNumOperands() == 1 &&
477 ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
478 "exit values from early exits must be fixed when branch to "
479 "early-exit is added");
480 ExitIRI->extractLastLaneOfFirstOperand(B);
481 }
482 }
483 }
484}
485
486static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
487 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
488 VPDominatorTree VPDT(Plan);
489
490 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
491 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
492 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
493
494 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
495 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
496
497 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
498 // The canonical LatchVPBB has the header block as last successor. If it has
499 // another successor, this successor is an exit block - insert middle block on
500 // its edge. Otherwise, add middle block as another successor retaining header
501 // as last.
502 if (LatchVPBB->getNumSuccessors() == 2) {
503 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
504 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
505 } else {
506 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
507 LatchVPBB->swapSuccessors();
508 }
509
510 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
511
512 // Create SCEV and VPValue for the trip count.
513 // We use the symbolic max backedge-taken-count, which works also when
514 // vectorizing loops with uncountable early exits.
515 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
516 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
517 "Invalid backedge-taken count");
518 ScalarEvolution &SE = *PSE.getSE();
519 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
520 InductionTy, TheLoop);
522
523 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
525
526 // The connection order corresponds to the operands of the conditional branch,
527 // with the middle block already connected to the exit block.
528 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
529 // Also connect the entry block to the scalar preheader.
530 // TODO: Also introduce a branch recipe together with the minimum trip count
531 // check.
532 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
533 Plan.getEntry()->swapSuccessors();
534
535 createExtractsForLiveOuts(Plan, MiddleVPBB);
536}
537
538std::unique_ptr<VPlan>
539VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
541 PlainCFGBuilder Builder(TheLoop, &LI);
542 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
543 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
544 return VPlan0;
545}
546
548 bool HasUncountableEarlyExit) {
549 auto *MiddleVPBB = cast<VPBasicBlock>(
551 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
552 VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
553
554 // Disconnect all early exits from the loop leaving it with a single exit from
555 // the latch. Early exits that are countable are left for a scalar epilog. The
556 // condition of uncountable early exits (currently at most one is supported)
557 // is fused into the latch exit, and used to branch from middle block to the
558 // early exit destination.
559 [[maybe_unused]] bool HandledUncountableEarlyExit = false;
560 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
561 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
562 if (Pred == MiddleVPBB)
563 continue;
564 if (HasUncountableEarlyExit) {
565 assert(!HandledUncountableEarlyExit &&
566 "can handle exactly one uncountable early exit");
568 cast<VPBasicBlock>(HeaderVPB), LatchVPBB);
569 HandledUncountableEarlyExit = true;
570 } else {
571 for (VPRecipeBase &R : EB->phis())
572 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
573 }
574 cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
576 }
577 }
578
579 assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
580 "missed an uncountable exit that must be handled");
581}
582
584 bool RequiresScalarEpilogueCheck,
585 bool TailFolded) {
586 auto *MiddleVPBB = cast<VPBasicBlock>(
588 // If MiddleVPBB has a single successor then the original loop does not exit
589 // via the latch and the single successor must be the scalar preheader.
590 // There's no need to add a runtime check to MiddleVPBB.
591 if (MiddleVPBB->getNumSuccessors() == 1) {
592 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
593 "must have ScalarPH as single successor");
594 return;
595 }
596
597 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
598
599 // Add a check in the middle block to see if we have completed all of the
600 // iterations in the first vector loop.
601 //
602 // Three cases:
603 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
604 // condition to false.
605 // 2) If (N - N%VF) == N, then we *don't* need to run the
606 // remainder. Thus if tail is to be folded, we know we don't need to run
607 // the remainder and we can set the condition to true.
608 // 3) Otherwise, construct a runtime check.
609
610 // We use the same DebugLoc as the scalar loop latch terminator instead of
611 // the corresponding compare because they may have ended up with different
612 // line numbers and we want to avoid awkward line stepping while debugging.
613 // E.g., if the compare has got a line number inside the loop.
614 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
615 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
616 VPBuilder Builder(MiddleVPBB);
617 VPValue *Cmp;
618 if (!RequiresScalarEpilogueCheck)
619 Cmp = Plan.getFalse();
620 else if (TailFolded)
621 Cmp = Plan.getTrue();
622 else
623 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
624 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
625 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
626}
627
629 VPDominatorTree VPDT(Plan);
630 for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
631 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
632 createLoopRegion(Plan, HeaderVPB);
633
634 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
635 TopRegion->setName("vector loop");
636 TopRegion->getEntryBasicBlock()->setName("vector.body");
637}
638
639// Likelyhood of bypassing the vectorized loop due to a runtime check block,
640// including memory overlap checks block and wrapping/unit-stride checks block.
641static constexpr uint32_t CheckBypassWeights[] = {1, 127};
642
644 BasicBlock *CheckBlock,
645 bool AddBranchWeights) {
646 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
647 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
648 VPBlockBase *VectorPH = Plan.getVectorPreheader();
649 VPBlockBase *ScalarPH = Plan.getScalarPreheader();
650 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
651 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
652 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
653 CheckBlockVPBB->swapSuccessors();
654
655 // We just connected a new block to the scalar preheader. Update all
656 // VPPhis by adding an incoming value for it, replicating the last value.
657 unsigned NumPredecessors = ScalarPH->getNumPredecessors();
658 for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
659 assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
660 assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
661 "must have incoming values for all operands");
662 R.addOperand(R.getOperand(NumPredecessors - 2));
663 }
664
665 VPIRMetadata VPBranchWeights;
666 auto *Term =
667 VPBuilder(CheckBlockVPBB)
670 Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc());
671 if (AddBranchWeights) {
672 MDBuilder MDB(Plan.getContext());
673 MDNode *BranchWeights =
674 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
675 Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
676 }
677}
678
680 VPlan &Plan, ElementCount VF, unsigned UF,
681 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
682 bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop,
684 // Generate code to check if the loop's trip count is less than VF * UF, or
685 // equal to it in case a scalar epilogue is required; this implies that the
686 // vector trip count is zero. This check also covers the case where adding one
687 // to the backedge-taken count overflowed leading to an incorrect trip count
688 // of zero. In this case we will also jump to the scalar loop.
689 CmpInst::Predicate CmpPred =
690 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
691 // If tail is to be folded, vector loop takes care of all iterations.
692 VPValue *TripCountVPV = Plan.getTripCount();
693 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, SE);
694 Type *TripCountTy = TripCount->getType();
695 auto GetMinTripCount = [&]() -> const SCEV * {
696 // Compute max(MinProfitableTripCount, UF * VF) and return it.
697 const SCEV *VFxUF =
698 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
699 if (UF * VF.getKnownMinValue() >=
700 MinProfitableTripCount.getKnownMinValue()) {
701 // TODO: SCEV should be able to simplify test.
702 return VFxUF;
703 }
704 const SCEV *MinProfitableTripCountSCEV =
705 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
706 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
707 };
708
709 VPBasicBlock *EntryVPBB = Plan.getEntry();
710 VPBuilder Builder(EntryVPBB);
711 VPValue *TripCountCheck = Plan.getFalse();
712 const SCEV *Step = GetMinTripCount();
713 if (TailFolded) {
714 if (CheckNeededWithTailFolding) {
715 // vscale is not necessarily a power-of-2, which means we cannot guarantee
716 // an overflow to zero when updating induction variables and so an
717 // additional overflow check is required before entering the vector loop.
718
719 // Get the maximum unsigned value for the type.
720 VPValue *MaxUIntTripCount =
721 Plan.getConstantInt(cast<IntegerType>(TripCountTy)->getMask());
722 VPValue *DistanceToMax = Builder.createNaryOp(
723 Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
725
726 // Don't execute the vector loop if (UMax - n) < (VF * UF).
727 // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
728 // minProfitableTripCount).
729 TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
730 Builder.createExpandSCEV(Step), DL);
731 } else {
732 // TripCountCheck = false, folding tail implies positive vector trip
733 // count.
734 }
735 } else {
736 // TODO: Emit unconditional branch to vector preheader instead of
737 // conditional branch with known condition.
738 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
739 // Check if the trip count is < the step.
740 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
741 // TODO: Ensure step is at most the trip count when determining max VF and
742 // UF, w/o tail folding.
743 TripCountCheck = Plan.getTrue();
744 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
745 TripCount, Step)) {
746 // Generate the minimum iteration check only if we cannot prove the
747 // check is known to be true, or known to be false.
748 VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
749 TripCountCheck = Builder.createICmp(
750 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
751 } // else step known to be < trip count, use TripCountCheck preset to false.
752 }
753 VPInstruction *Term =
754 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
756 MDBuilder MDB(Plan.getContext());
757 MDNode *BranchWeights = MDB.createBranchWeights(
758 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
759 Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
760 }
761}
762
764 VPlan &Plan, Value *TripCount, Value *VectorTripCount,
765 bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF,
766 unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE) {
767 // Add the minimum iteration check for the epilogue vector loop.
768 VPValue *TC = Plan.getOrAddLiveIn(TripCount);
769 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
770 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
771 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
772 VPValue *Count = Builder.createNaryOp(
773 Instruction::Sub, {TC, Plan.getOrAddLiveIn(VectorTripCount)},
774 DebugLoc::getUnknown(), "n.vec.remaining");
775
776 // Generate code to check if the loop's trip count is less than VF * UF of
777 // the vector epilogue loop.
778 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
779 auto *CheckMinIters = Builder.createICmp(
780 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
781 VPInstruction *Branch =
782 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
783
784 // We assume the remaining `Count` is equally distributed in
785 // [0, MainLoopStep)
786 // So the probability for `Count < EpilogueLoopStep` should be
787 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
788 // TODO: Improve the estimate by taking the estimated trip count into
789 // consideration.
790 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
791 const uint32_t Weights[] = {EstimatedSkipCount,
792 MainLoopStep - EstimatedSkipCount};
793 MDBuilder MDB(Plan.getContext());
794 MDNode *BranchWeights =
795 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
796 Branch->addMetadata(LLVMContext::MD_prof, BranchWeights);
797}
798
800 auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
801 auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
802 RedPhiR->getBackedgeValue()->getDefiningRecipe());
803 if (!MinMaxR)
804 return nullptr;
805
806 auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxR);
807 if (!isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
808 !(RepR && isa<IntrinsicInst>(RepR->getUnderlyingInstr())))
809 return nullptr;
810
811#ifndef NDEBUG
812 Intrinsic::ID RdxIntrinsicId =
813 RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum
814 : Intrinsic::minnum;
816 cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() ==
817 RdxIntrinsicId) ||
818 (RepR && cast<IntrinsicInst>(RepR->getUnderlyingInstr())
819 ->getIntrinsicID() == RdxIntrinsicId)) &&
820 "Intrinsic did not match recurrence kind");
821#endif
822
823 if (MinMaxR->getOperand(0) == RedPhiR)
824 return MinMaxR->getOperand(1);
825
826 assert(MinMaxR->getOperand(1) == RedPhiR &&
827 "Reduction phi operand expected");
828 return MinMaxR->getOperand(0);
829 };
830
831 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
833 MinMaxNumReductionsToHandle;
834 bool HasUnsupportedPhi = false;
835 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
837 continue;
838 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
839 if (!Cur) {
840 // TODO: Also support fixed-order recurrence phis.
841 HasUnsupportedPhi = true;
842 continue;
843 }
845 Cur->getRecurrenceKind())) {
846 HasUnsupportedPhi = true;
847 continue;
848 }
849
850 VPValue *MinMaxOp = GetMinMaxCompareValue(Cur);
851 if (!MinMaxOp)
852 return false;
853
854 MinMaxNumReductionsToHandle.emplace_back(Cur, MinMaxOp);
855 }
856
857 if (MinMaxNumReductionsToHandle.empty())
858 return true;
859
860 // We won't be able to resume execution in the scalar tail, if there are
861 // unsupported header phis or there is no scalar tail at all, due to
862 // tail-folding.
863 if (HasUnsupportedPhi || !Plan.hasScalarTail())
864 return false;
865
866 /// Check if the vector loop of \p Plan can early exit and restart
867 /// execution of last vector iteration in the scalar loop. This requires all
868 /// recipes up to early exit point be side-effect free as they are
869 /// re-executed. Currently we check that the loop is free of any recipe that
870 /// may write to memory. Expected to operate on an early VPlan w/o nested
871 /// regions.
874 auto *VPBB = cast<VPBasicBlock>(VPB);
875 for (auto &R : *VPBB) {
876 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
877 return false;
878 }
879 }
880
881 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
882 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
883 VPValue *AllNaNLanes = nullptr;
884 SmallPtrSet<VPValue *, 2> RdxResults;
885 for (const auto &[_, MinMaxOp] : MinMaxNumReductionsToHandle) {
886 VPValue *RedNaNLanes =
887 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
888 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
889 : RedNaNLanes;
890 }
891
892 VPValue *AnyNaNLane =
893 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
894 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
895 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
896 for (const auto &[RedPhiR, _] : MinMaxNumReductionsToHandle) {
898 RedPhiR->getRecurrenceKind()) &&
899 "unsupported reduction");
900
901 // If we exit early due to NaNs, compute the final reduction result based on
902 // the reduction phi at the beginning of the last vector iteration.
903 auto *RdxResult = find_singleton<VPSingleDefRecipe>(
904 RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * {
905 auto *VPI = dyn_cast<VPInstruction>(U);
906 if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
907 return VPI;
908 return nullptr;
909 });
910
911 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
912 RdxResult->getOperand(1));
913 RdxResult->setOperand(1, NewSel);
914 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
915 RdxResults.insert(RdxResult);
916 }
917
918 auto *LatchExitingBranch = LatchVPBB->getTerminator();
919 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
920 "Unexpected terminator");
921 auto *IsLatchExitTaken = LatchBuilder.createICmp(
922 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
923 LatchExitingBranch->getOperand(1));
924 auto *AnyExitTaken = LatchBuilder.createNaryOp(
925 Instruction::Or, {AnyNaNLane, IsLatchExitTaken});
926 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
927 LatchExitingBranch->eraseFromParent();
928
929 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
930 // true, the resume from the start of the last vector iteration via the
931 // canonical IV, otherwise from the original value.
932 for (auto &R : Plan.getScalarPreheader()->phis()) {
933 auto *ResumeR = cast<VPPhi>(&R);
934 VPValue *VecV = ResumeR->getOperand(0);
935 if (RdxResults.contains(VecV))
936 continue;
937 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
938 if (DerivedIV->getNumUsers() == 1 &&
939 DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
940 auto *NewSel =
941 MiddleBuilder.createSelect(AnyNaNLane, LoopRegion->getCanonicalIV(),
942 &Plan.getVectorTripCount());
943 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
944 DerivedIV->setOperand(1, NewSel);
945 continue;
946 }
947 }
948 // Bail out and abandon the current, partially modified, VPlan if we
949 // encounter resume phi that cannot be updated yet.
950 if (VecV != &Plan.getVectorTripCount()) {
951 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
952 "FMaxNum/FMinNum reduction.\n");
953 return false;
954 }
955 auto *NewSel = MiddleBuilder.createSelect(
956 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
957 ResumeR->setOperand(0, NewSel);
958 }
959
960 auto *MiddleTerm = MiddleVPBB->getTerminator();
961 MiddleBuilder.setInsertPoint(MiddleTerm);
962 VPValue *MiddleCond = MiddleTerm->getOperand(0);
963 VPValue *NewCond =
964 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
965 MiddleTerm->setOperand(0, NewCond);
966 return true;
967}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define _
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static constexpr uint32_t CheckBypassWeights[]
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
op_range operands()
Definition User.h:292
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3824
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3859
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:3912
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:623
const VPRecipeBase & back() const
Definition VPlan.h:3873
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:3890
bool empty() const
Definition VPlan.h:3870
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:80
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:299
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:186
void setName(const Twine &newName)
Definition VPlan.h:165
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:321
size_t getNumPredecessors() const
Definition VPlan.h:219
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:290
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:203
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:281
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:214
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:313
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:270
void setParent(VPRegionBlock *P)
Definition VPlan.h:183
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:208
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:197
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:90
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:208
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:146
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:165
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3480
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:3977
Helper to manage IR metadata for recipes.
Definition VPlan.h:938
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:981
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:478
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition VPlan.h:2340
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4012
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4110
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:517
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:199
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:243
void addOperand(VPValue *Operand)
Definition VPlanValue.h:232
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:186
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4142
LLVMContext & getContext() const
Definition VPlan.h:4336
VPBasicBlock * getEntry()
Definition VPlan.h:4236
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4327
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4334
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4298
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4405
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4288
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4411
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1012
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4305
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4261
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4460
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1227
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4408
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4390
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4470
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4279
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4284
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4241
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4514
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
@ Entry
Definition COFF.h:862
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
bool match(Val *V, const Pattern &P)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
T * find_singleton(R &&Range, Predicate P, bool AllowRepeats=false)
Return the single value in Range that satisfies P(<member of Range> *, AllowRepeats)->T * returning n...
Definition STLExtras.h:1787
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
Definition VPlanCFG.h:229
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE)
Create a base VPlan0, serving as the common starting point for all later candidates.
static LLVM_ABI_FOR_TEST void handleEarlyExits(VPlan &Plan, bool HasUncountableExit)
Update Plan to account for all early exits.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, ScalarEvolution &SE)
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB)
Update Plan to account for the uncountable early exit from EarlyExitingVPBB to EarlyExitVPBB by.
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *TripCount, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...