LLVM 22.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanCFG.h"
17#include "VPlanDominatorTree.h"
18#include "VPlanPatternMatch.h"
19#include "VPlanTransforms.h"
24#include "llvm/IR/InstrTypes.h"
25#include "llvm/IR/MDBuilder.h"
28
29#define DEBUG_TYPE "vplan"
30
31using namespace llvm;
32using namespace VPlanPatternMatch;
33
34namespace {
35// Class that is used to build the plain CFG for the incoming IR.
36class PlainCFGBuilder {
37 // The outermost loop of the input loop nest considered for vectorization.
38 Loop *TheLoop;
39
40 // Loop Info analysis.
41 LoopInfo *LI;
42
43 // Loop versioning for alias metadata.
44 LoopVersioning *LVer;
45
46 // Vectorization plan that we are working on.
47 std::unique_ptr<VPlan> Plan;
48
49 // Builder of the VPlan instruction-level representation.
50 VPBuilder VPIRBuilder;
51
52 // NOTE: The following maps are intentionally destroyed after the plain CFG
53 // construction because subsequent VPlan-to-VPlan transformation may
54 // invalidate them.
55 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
57 // Map incoming Value definitions to their newly-created VPValues.
58 DenseMap<Value *, VPValue *> IRDef2VPValue;
59
60 // Hold phi node's that need to be fixed once the plain CFG has been built.
62
63 // Utility functions.
64 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
65 void fixHeaderPhis();
66 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
67#ifndef NDEBUG
68 bool isExternalDef(Value *Val);
69#endif
70 VPValue *getOrCreateVPOperand(Value *IRVal);
71 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
72
73public:
74 PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer)
75 : TheLoop(Lp), LI(LI), LVer(LVer), Plan(std::make_unique<VPlan>(Lp)) {}
76
77 /// Build plain CFG for TheLoop and connect it to Plan's entry.
78 std::unique_ptr<VPlan> buildPlainCFG();
79};
80} // anonymous namespace
81
82// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
83// must have no predecessors.
84void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
85 // Collect VPBB predecessors.
87 for (BasicBlock *Pred : predecessors(BB))
88 VPBBPreds.push_back(getOrCreateVPBB(Pred));
89 VPBB->setPredecessors(VPBBPreds);
90}
91
92static bool isHeaderBB(BasicBlock *BB, Loop *L) {
93 return L && BB == L->getHeader();
94}
95
96// Add operands to VPInstructions representing phi nodes from the input IR.
97void PlainCFGBuilder::fixHeaderPhis() {
98 for (auto *Phi : PhisToFix) {
99 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
100 VPValue *VPVal = IRDef2VPValue[Phi];
101 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
102 auto *PhiR = cast<VPPhi>(VPVal);
103 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
104 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
105 "Expected Phi in header block.");
106 assert(Phi->getNumOperands() == 2 &&
107 "header phi must have exactly 2 operands");
108 for (BasicBlock *Pred : predecessors(Phi->getParent()))
109 PhiR->addOperand(
110 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
111 }
112}
113
114// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
115// existing one if it was already created.
116VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
117 if (auto *VPBB = BB2VPBB.lookup(BB)) {
118 // Retrieve existing VPBB.
119 return VPBB;
120 }
121
122 // Create new VPBB.
123 StringRef Name = BB->getName();
124 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
125 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
126 BB2VPBB[BB] = VPBB;
127 return VPBB;
128}
129
130#ifndef NDEBUG
131// Return true if \p Val is considered an external definition. An external
132// definition is either:
133// 1. A Value that is not an Instruction. This will be refined in the future.
134// 2. An Instruction that is outside of the IR region represented in VPlan,
135// i.e., is not part of the loop nest.
136bool PlainCFGBuilder::isExternalDef(Value *Val) {
137 // All the Values that are not Instructions are considered external
138 // definitions for now.
140 if (!Inst)
141 return true;
142
143 // Check whether Instruction definition is in loop body.
144 return !TheLoop->contains(Inst);
145}
146#endif
147
148// Create a new VPValue or retrieve an existing one for the Instruction's
149// operand \p IRVal. This function must only be used to create/retrieve VPValues
150// for *Instruction's operands* and not to create regular VPInstruction's. For
151// the latter, please, look at 'createVPInstructionsForVPBB'.
152VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
153 auto VPValIt = IRDef2VPValue.find(IRVal);
154 if (VPValIt != IRDef2VPValue.end())
155 // Operand has an associated VPInstruction or VPValue that was previously
156 // created.
157 return VPValIt->second;
158
159 // Operand doesn't have a previously created VPInstruction/VPValue. This
160 // means that operand is:
161 // A) a definition external to VPlan,
162 // B) any other Value without specific representation in VPlan.
163 // For now, we use VPValue to represent A and B and classify both as external
164 // definitions. We may introduce specific VPValue subclasses for them in the
165 // future.
166 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
167
168 // A and B: Create VPValue and add it to the pool of external definitions and
169 // to the Value->VPValue map.
170 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
171 IRDef2VPValue[IRVal] = NewVPVal;
172 return NewVPVal;
173}
174
175// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
176// counterpart. This function must be invoked in RPO so that the operands of a
177// VPInstruction in \p BB have been visited before (except for Phi nodes).
178void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
179 BasicBlock *BB) {
180 VPIRBuilder.setInsertPoint(VPBB);
181 // TODO: Model and preserve debug intrinsics in VPlan.
182 for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
183 Instruction *Inst = &InstRef;
184
185 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
186 // visited Inst when we shouldn't, breaking the RPO traversal order.
187 assert(!IRDef2VPValue.count(Inst) &&
188 "Instruction shouldn't have been visited.");
189
190 if (auto *Br = dyn_cast<BranchInst>(Inst)) {
191 // Conditional branch instruction are represented using BranchOnCond
192 // recipes.
193 if (Br->isConditional()) {
194 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
195 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
196 VPIRMetadata(*Inst), Inst->getDebugLoc());
197 }
198
199 // Skip the rest of the Instruction processing for Branch instructions.
200 continue;
201 }
202
203 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
204 // Don't emit recipes for unconditional switch instructions.
205 if (SI->getNumCases() == 0)
206 continue;
207 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
208 for (auto Case : SI->cases())
209 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
210 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
211 VPIRMetadata(*Inst), Inst->getDebugLoc());
212 continue;
213 }
214
215 VPSingleDefRecipe *NewR;
216 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
217 // Phi node's operands may not have been visited at this point. We create
218 // an empty VPInstruction that we will fix once the whole plain CFG has
219 // been built.
220 NewR = VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi");
221 NewR->setUnderlyingValue(Phi);
222 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
223 // Header phis need to be fixed after the VPBB for the latch has been
224 // created.
225 PhisToFix.push_back(Phi);
226 } else {
227 // Add operands for VPPhi in the order matching its predecessors in
228 // VPlan.
229 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
230 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
231 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
232 getOrCreateVPOperand(Phi->getIncomingValue(I));
233 }
234 for (VPBlockBase *Pred : VPBB->getPredecessors())
235 NewR->addOperand(
236 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
237 }
238 } else {
239 // Build VPIRMetadata from the instruction and add loop versioning
240 // metadata for loads and stores.
241 VPIRMetadata MD(*Inst);
242 if (isa<LoadInst, StoreInst>(Inst) && LVer) {
243 const auto &[AliasScopeMD, NoAliasMD] =
244 LVer->getNoAliasMetadataFor(Inst);
245 if (AliasScopeMD)
246 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
247 if (NoAliasMD)
248 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
249 }
250
251 // Translate LLVM-IR operands into VPValue operands and set them in the
252 // new VPInstruction.
253 SmallVector<VPValue *, 4> VPOperands;
254 for (Value *Op : Inst->operands())
255 VPOperands.push_back(getOrCreateVPOperand(Op));
256
257 if (auto *CI = dyn_cast<CastInst>(Inst)) {
258 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
259 CI->getType(), CI->getDebugLoc(),
260 VPIRFlags(*CI), MD);
261 NewR->setUnderlyingValue(CI);
262 } else {
263 // Build VPInstruction for any arbitrary Instruction without specific
264 // representation in VPlan.
265 NewR =
266 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
267 VPIRFlags(*Inst), MD, Inst->getDebugLoc());
268 }
269 }
270
271 IRDef2VPValue[Inst] = NewR;
272 }
273}
274
275// Main interface to build the plain CFG.
276std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
277 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
278 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
279 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
280 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
281
282 // 1. Scan the body of the loop in a topological order to visit each basic
283 // block after having visited its predecessor basic blocks. Create a VPBB for
284 // each BB and link it to its successor and predecessor VPBBs. Note that
285 // predecessors must be set in the same order as they are in the incomming IR.
286 // Otherwise, there might be problems with existing phi nodes and algorithm
287 // based on predecessors traversal.
288
289 // Loop PH needs to be explicitly visited since it's not taken into account by
290 // LoopBlocksDFS.
291 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
292 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
293 "Unexpected loop preheader");
294 for (auto &I : *ThePreheaderBB) {
295 if (I.getType()->isVoidTy())
296 continue;
297 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
298 }
299
300 LoopBlocksRPO RPO(TheLoop);
301 RPO.perform(LI);
302
303 for (BasicBlock *BB : RPO) {
304 // Create or retrieve the VPBasicBlock for this BB.
305 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
306 // Set VPBB predecessors in the same order as they are in the incoming BB.
307 setVPBBPredsFromBB(VPBB, BB);
308
309 // Create VPInstructions for BB.
310 createVPInstructionsForVPBB(VPBB, BB);
311
312 // Set VPBB successors. We create empty VPBBs for successors if they don't
313 // exist already. Recipes will be created when the successor is visited
314 // during the RPO traversal.
315 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
317 getOrCreateVPBB(SI->getDefaultDest())};
318 for (auto Case : SI->cases())
319 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
320 VPBB->setSuccessors(Succs);
321 continue;
322 }
323 auto *BI = cast<BranchInst>(BB->getTerminator());
324 unsigned NumSuccs = succ_size(BB);
325 if (NumSuccs == 1) {
326 VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
327 continue;
328 }
329 assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
330 "block must have conditional branch with 2 successors");
331
332 BasicBlock *IRSucc0 = BI->getSuccessor(0);
333 BasicBlock *IRSucc1 = BI->getSuccessor(1);
334 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
335 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
336 VPBB->setTwoSuccessors(Successor0, Successor1);
337 }
338
339 for (auto *EB : Plan->getExitBlocks())
340 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
341
342 // 2. The whole CFG has been built at this point so all the input Values must
343 // have a VPlan counterpart. Fix VPlan header phi by adding their
344 // corresponding VPlan operands.
345 fixHeaderPhis();
346
347 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
348 Plan->getEntry()->setPlan(&*Plan);
349
350 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
351 // VPIRInstructions wrapping them.
352 // // Note that the operand order corresponds to IR predecessor order, and may
353 // need adjusting when VPlan predecessors are added, if an exit block has
354 // multiple predecessor.
355 for (auto *EB : Plan->getExitBlocks()) {
356 for (VPRecipeBase &R : EB->phis()) {
357 auto *PhiR = cast<VPIRPhi>(&R);
358 PHINode &Phi = PhiR->getIRPhi();
359 assert(PhiR->getNumOperands() == 0 &&
360 "no phi operands should be added yet");
361 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
362 PhiR->addOperand(
363 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
364 }
365 }
366
367 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
368 return std::move(Plan);
369}
370
371/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
372/// has exactly 2 predecessors (preheader and latch), where the block
373/// dominates the latch and the preheader dominates the block. If it is a
374/// header block return true and canonicalize the predecessors of the header
375/// (making sure the preheader appears first and the latch second) and the
376/// successors of the latch (making sure the loop exit comes first). Otherwise
377/// return false.
379 const VPDominatorTree &VPDT) {
380 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
381 if (Preds.size() != 2)
382 return false;
383
384 auto *PreheaderVPBB = Preds[0];
385 auto *LatchVPBB = Preds[1];
386 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
387 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
388 std::swap(PreheaderVPBB, LatchVPBB);
389
390 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
391 !VPDT.dominates(HeaderVPB, LatchVPBB))
392 return false;
393
394 // Canonicalize predecessors of header so that preheader is first and
395 // latch second.
396 HeaderVPB->swapPredecessors();
397 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
398 R.swapOperands();
399 }
400
401 // The two successors of conditional branch match the condition, with the
402 // first successor corresponding to true and the second to false. We
403 // canonicalize the successors of the latch when introducing the region, such
404 // that the latch exits the region when its condition is true; invert the
405 // original condition if the original CFG branches to the header on true.
406 // Note that the exit edge is not yet connected for top-level loops.
407 if (LatchVPBB->getSingleSuccessor() ||
408 LatchVPBB->getSuccessors()[0] != HeaderVPB)
409 return true;
410
411 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
412 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
415 "terminator must be a BranchOnCond");
416 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
417 Not->insertBefore(Term);
418 Term->setOperand(0, Not);
419 LatchVPBB->swapSuccessors();
420
421 return true;
422}
423
424/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
425static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
426 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
427 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
428
429 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
430 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
431 VPBlockBase *LatchExitVPB = LatchVPBB->getSingleSuccessor();
432 assert(LatchExitVPB && "Latch expected to be left with a single successor");
433
434 // Create an empty region first and insert it between PreheaderVPBB and
435 // LatchExitVPB, taking care to preserve the original predecessor & successor
436 // order of blocks. Set region entry and exiting after both HeaderVPB and
437 // LatchVPBB have been disconnected from their predecessors/successors.
438 auto *R = Plan.createLoopRegion();
439 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R);
440 VPBlockUtils::disconnectBlocks(LatchVPBB, R);
441 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
442 R->setEntry(HeaderVPB);
443 R->setExiting(LatchVPBB);
444
445 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
446 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
447 VPBB->setParent(R);
448}
449
450// Add the necessary canonical IV and branch recipes required to control the
451// loop.
452static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
453 VPBasicBlock *LatchVPBB, Type *IdxTy,
454 DebugLoc DL) {
455 Value *StartIdx = ConstantInt::get(IdxTy, 0);
456 auto *StartV = Plan.getOrAddLiveIn(StartIdx);
457
458 // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
459 auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
460 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
461
462 // We are about to replace the branch to exit the region. Remove the original
463 // BranchOnCond, if there is any.
464 DebugLoc LatchDL = DL;
465 if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
466 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
467 LatchVPBB->getTerminator()->eraseFromParent();
468 }
469
470 VPBuilder Builder(LatchVPBB);
471 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
472 // Initially the induction increment is guaranteed to not wrap, but that may
473 // change later, e.g. when tail-folding, when the flags need to be dropped.
474 auto *CanonicalIVIncrement = Builder.createOverflowingOp(
475 Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL,
476 "index.next");
477 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
478
479 // Add the BranchOnCount VPInstruction to the latch.
480 Builder.createNaryOp(VPInstruction::BranchOnCount,
481 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
482 LatchDL);
483}
484
485/// Creates extracts for values in \p Plan defined in a loop region and used
486/// outside a loop region.
487static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
488 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
489 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
490 if (EB->getSinglePredecessor() != MiddleVPBB)
491 continue;
492
493 for (VPRecipeBase &R : EB->phis()) {
494 auto *ExitIRI = cast<VPIRPhi>(&R);
495 for (unsigned Idx = 0; Idx != ExitIRI->getNumIncoming(); ++Idx) {
496 VPRecipeBase *Inc = ExitIRI->getIncomingValue(Idx)->getDefiningRecipe();
497 if (!Inc)
498 continue;
499 assert(ExitIRI->getNumOperands() == 1 &&
500 ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
501 "exit values from early exits must be fixed when branch to "
502 "early-exit is added");
503 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(B);
504 }
505 }
506 }
507}
508
509static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
510 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
511 VPDominatorTree VPDT(Plan);
512
513 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
514 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
515 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
516
517 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
518 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
519
520 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
521 // The canonical LatchVPBB has the header block as last successor. If it has
522 // another successor, this successor is an exit block - insert middle block on
523 // its edge. Otherwise, add middle block as another successor retaining header
524 // as last.
525 if (LatchVPBB->getNumSuccessors() == 2) {
526 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
527 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
528 } else {
529 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
530 LatchVPBB->swapSuccessors();
531 }
532
533 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
534
535 // Create SCEV and VPValue for the trip count.
536 // We use the symbolic max backedge-taken-count, which works also when
537 // vectorizing loops with uncountable early exits.
538 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
539 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
540 "Invalid backedge-taken count");
541 ScalarEvolution &SE = *PSE.getSE();
542 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
543 InductionTy, TheLoop);
545
546 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
548
549 // The connection order corresponds to the operands of the conditional branch,
550 // with the middle block already connected to the exit block.
551 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
552 // Also connect the entry block to the scalar preheader.
553 // TODO: Also introduce a branch recipe together with the minimum trip count
554 // check.
555 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
556 Plan.getEntry()->swapSuccessors();
557
558 createExtractsForLiveOuts(Plan, MiddleVPBB);
559
560 VPBuilder ScalarPHBuilder(ScalarPH);
561 for (const auto &[PhiR, ScalarPhiR] : zip_equal(
562 drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
563 auto *VectorPhiR = cast<VPPhi>(&PhiR);
564 auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
565 {VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc());
566 cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
567 }
568}
569
570/// Check \p Plan's live-in and replace them with constants, if they can be
571/// simplified via SCEV.
574 auto GetSimplifiedLiveInViaSCEV = [&](VPValue *VPV) -> VPValue * {
575 const SCEV *Expr = vputils::getSCEVExprForVPValue(VPV, PSE);
576 if (auto *C = dyn_cast<SCEVConstant>(Expr))
577 return Plan.getOrAddLiveIn(C->getValue());
578 return nullptr;
579 };
580
581 for (VPValue *LiveIn : Plan.getLiveIns()) {
582 if (VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
583 LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
584 }
585}
586
587std::unique_ptr<VPlan>
588VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
590 LoopVersioning *LVer) {
591 PlainCFGBuilder Builder(TheLoop, &LI, LVer);
592 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
593 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
594 simplifyLiveInsWithSCEV(*VPlan0, PSE);
595 return VPlan0;
596}
597
598/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
599/// for \p Phi based on \p IndDesc.
600static VPHeaderPHIRecipe *
602 const InductionDescriptor &IndDesc, VPlan &Plan,
603 PredicatedScalarEvolution &PSE, Loop &OrigLoop,
604 DebugLoc DL) {
605 [[maybe_unused]] ScalarEvolution &SE = *PSE.getSE();
606 assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
607 "step must be loop invariant");
608 assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
609 (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
610 SE.getSCEV(IndDesc.getStartValue()) ==
611 vputils::getSCEVExprForVPValue(Start, PSE))) &&
612 "Start VPValue must match IndDesc's start value");
613
614 VPValue *Step =
616
618 return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(),
619 IndDesc, DL);
620
623 "must have an integer or float induction at this point");
624
625 // Update wide induction increments to use the same step as the corresponding
626 // wide induction. This enables detecting induction increments directly in
627 // VPlan and removes redundant splats.
628 using namespace llvm::VPlanPatternMatch;
629 if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
630 PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
631
632 // It is always safe to copy over the NoWrap and FastMath flags. In
633 // particular, when folding tail by masking, the masked-off lanes are never
634 // used, so it is safe.
636
637 return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
638 IndDesc, Flags, DL);
639}
640
642 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
645 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
646 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
647 // Retrieve the header manually from the intial plain-CFG VPlan.
648 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
649 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
650 assert(VPDominatorTree(Plan).dominates(HeaderVPBB,
651 HeaderVPBB->getPredecessors()[1]) &&
652 "header must dominate its latch");
653
654 auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
655 // TODO: Gradually replace uses of underlying instruction by analyses on
656 // VPlan.
657 auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
658 assert(PhiR->getNumOperands() == 2 &&
659 "Must have 2 operands for header phis");
660
661 // Extract common values once.
662 VPValue *Start = PhiR->getOperand(0);
663 VPValue *BackedgeValue = PhiR->getOperand(1);
664
665 if (FixedOrderRecurrences.contains(Phi)) {
666 // TODO: Currently fixed-order recurrences are modeled as chains of
667 // first-order recurrences. If there are no users of the intermediate
668 // recurrences in the chain, the fixed order recurrence should be
669 // modeled directly, enabling more efficient codegen.
670 return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
671 }
672
673 auto InductionIt = Inductions.find(Phi);
674 if (InductionIt != Inductions.end())
675 return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
676 Plan, PSE, OrigLoop,
677 PhiR->getDebugLoc());
678
679 assert(Reductions.contains(Phi) && "only reductions are expected now");
680 const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
682 Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
683 "incoming value must match start value");
684 // Will be updated later to >1 if reduction is partial.
685 unsigned ScaleFactor = 1;
686 bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
687 return new VPReductionPHIRecipe(
688 Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
689 getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
690 ScaleFactor),
692 };
693
694 for (VPRecipeBase &R : make_early_inc_range(HeaderVPBB->phis())) {
696 continue;
697 auto *PhiR = cast<VPPhi>(&R);
698 VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
699 HeaderPhiR->insertBefore(PhiR);
700 PhiR->replaceAllUsesWith(HeaderPhiR);
701 PhiR->eraseFromParent();
702 }
703}
704
706 bool HasUncountableEarlyExit) {
707 auto *MiddleVPBB = cast<VPBasicBlock>(
709 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
710 VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
711
712 // Disconnect all early exits from the loop leaving it with a single exit from
713 // the latch. Early exits that are countable are left for a scalar epilog. The
714 // condition of uncountable early exits (currently at most one is supported)
715 // is fused into the latch exit, and used to branch from middle block to the
716 // early exit destination.
717 [[maybe_unused]] bool HandledUncountableEarlyExit = false;
718 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
719 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
720 if (Pred == MiddleVPBB)
721 continue;
722 if (HasUncountableEarlyExit) {
723 assert(!HandledUncountableEarlyExit &&
724 "can handle exactly one uncountable early exit");
726 cast<VPBasicBlock>(HeaderVPB), LatchVPBB);
727 HandledUncountableEarlyExit = true;
728 } else {
729 for (VPRecipeBase &R : EB->phis())
730 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
731 }
732 cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
734 }
735 }
736
737 assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
738 "missed an uncountable exit that must be handled");
739}
740
742 bool RequiresScalarEpilogueCheck,
743 bool TailFolded) {
744 auto *MiddleVPBB = cast<VPBasicBlock>(
746 // If MiddleVPBB has a single successor then the original loop does not exit
747 // via the latch and the single successor must be the scalar preheader.
748 // There's no need to add a runtime check to MiddleVPBB.
749 if (MiddleVPBB->getNumSuccessors() == 1) {
750 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
751 "must have ScalarPH as single successor");
752 return;
753 }
754
755 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
756
757 // Add a check in the middle block to see if we have completed all of the
758 // iterations in the first vector loop.
759 //
760 // Three cases:
761 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
762 // condition to false.
763 // 2) If (N - N%VF) == N, then we *don't* need to run the
764 // remainder. Thus if tail is to be folded, we know we don't need to run
765 // the remainder and we can set the condition to true.
766 // 3) Otherwise, construct a runtime check.
767
768 // We use the same DebugLoc as the scalar loop latch terminator instead of
769 // the corresponding compare because they may have ended up with different
770 // line numbers and we want to avoid awkward line stepping while debugging.
771 // E.g., if the compare has got a line number inside the loop.
772 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
773 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
774 VPBuilder Builder(MiddleVPBB);
775 VPValue *Cmp;
776 if (!RequiresScalarEpilogueCheck)
777 Cmp = Plan.getFalse();
778 else if (TailFolded)
779 Cmp = Plan.getTrue();
780 else
781 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
782 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
783 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
784}
785
787 VPDominatorTree VPDT(Plan);
788 for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
789 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
790 createLoopRegion(Plan, HeaderVPB);
791
792 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
793 TopRegion->setName("vector loop");
794 TopRegion->getEntryBasicBlock()->setName("vector.body");
795}
796
797// Likelyhood of bypassing the vectorized loop due to a runtime check block,
798// including memory overlap checks block and wrapping/unit-stride checks block.
799static constexpr uint32_t CheckBypassWeights[] = {1, 127};
800
802 BasicBlock *CheckBlock,
803 bool AddBranchWeights) {
804 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
805 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
806 VPBlockBase *VectorPH = Plan.getVectorPreheader();
807 VPBlockBase *ScalarPH = Plan.getScalarPreheader();
808 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
809 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
810 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
811 CheckBlockVPBB->swapSuccessors();
812
813 // We just connected a new block to the scalar preheader. Update all
814 // VPPhis by adding an incoming value for it, replicating the last value.
815 unsigned NumPredecessors = ScalarPH->getNumPredecessors();
816 for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
817 assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
818 assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
819 "must have incoming values for all operands");
820 R.addOperand(R.getOperand(NumPredecessors - 2));
821 }
822
823 VPIRMetadata VPBranchWeights;
824 auto *Term =
825 VPBuilder(CheckBlockVPBB)
828 Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc());
829 if (AddBranchWeights) {
830 MDBuilder MDB(Plan.getContext());
831 MDNode *BranchWeights =
832 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
833 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
834 }
835}
836
838 VPlan &Plan, ElementCount VF, unsigned UF,
839 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
840 bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop,
843 // Generate code to check if the loop's trip count is less than VF * UF, or
844 // equal to it in case a scalar epilogue is required; this implies that the
845 // vector trip count is zero. This check also covers the case where adding one
846 // to the backedge-taken count overflowed leading to an incorrect trip count
847 // of zero. In this case we will also jump to the scalar loop.
848 CmpInst::Predicate CmpPred =
849 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
850 // If tail is to be folded, vector loop takes care of all iterations.
851 VPValue *TripCountVPV = Plan.getTripCount();
852 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, PSE);
853 Type *TripCountTy = TripCount->getType();
854 ScalarEvolution &SE = *PSE.getSE();
855 auto GetMinTripCount = [&]() -> const SCEV * {
856 // Compute max(MinProfitableTripCount, UF * VF) and return it.
857 const SCEV *VFxUF =
858 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
859 if (UF * VF.getKnownMinValue() >=
860 MinProfitableTripCount.getKnownMinValue()) {
861 // TODO: SCEV should be able to simplify test.
862 return VFxUF;
863 }
864 const SCEV *MinProfitableTripCountSCEV =
865 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
866 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
867 };
868
869 VPBasicBlock *EntryVPBB = Plan.getEntry();
870 VPBuilder Builder(EntryVPBB);
871 VPValue *TripCountCheck = Plan.getFalse();
872 const SCEV *Step = GetMinTripCount();
873 if (TailFolded) {
874 if (CheckNeededWithTailFolding) {
875 // vscale is not necessarily a power-of-2, which means we cannot guarantee
876 // an overflow to zero when updating induction variables and so an
877 // additional overflow check is required before entering the vector loop.
878
879 // Get the maximum unsigned value for the type.
880 VPValue *MaxUIntTripCount =
881 Plan.getConstantInt(cast<IntegerType>(TripCountTy)->getMask());
882 VPValue *DistanceToMax = Builder.createNaryOp(
883 Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
885
886 // Don't execute the vector loop if (UMax - n) < (VF * UF).
887 // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
888 // minProfitableTripCount).
889 TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
890 Builder.createExpandSCEV(Step), DL);
891 } else {
892 // TripCountCheck = false, folding tail implies positive vector trip
893 // count.
894 }
895 } else {
896 // TODO: Emit unconditional branch to vector preheader instead of
897 // conditional branch with known condition.
898 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
899 // Check if the trip count is < the step.
900 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
901 // TODO: Ensure step is at most the trip count when determining max VF and
902 // UF, w/o tail folding.
903 TripCountCheck = Plan.getTrue();
904 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
905 TripCount, Step)) {
906 // Generate the minimum iteration check only if we cannot prove the
907 // check is known to be true, or known to be false.
908 VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
909 TripCountCheck = Builder.createICmp(
910 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
911 } // else step known to be < trip count, use TripCountCheck preset to false.
912 }
913 VPInstruction *Term =
914 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
916 MDBuilder MDB(Plan.getContext());
917 MDNode *BranchWeights = MDB.createBranchWeights(
918 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
919 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
920 }
921}
922
924 VPlan &Plan, Value *TripCount, Value *VectorTripCount,
925 bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF,
926 unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE) {
927 // Add the minimum iteration check for the epilogue vector loop.
928 VPValue *TC = Plan.getOrAddLiveIn(TripCount);
929 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
930 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
931 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
932 VPValue *Count = Builder.createNaryOp(
933 Instruction::Sub, {TC, Plan.getOrAddLiveIn(VectorTripCount)},
934 DebugLoc::getUnknown(), "n.vec.remaining");
935
936 // Generate code to check if the loop's trip count is less than VF * UF of
937 // the vector epilogue loop.
938 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
939 auto *CheckMinIters = Builder.createICmp(
940 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
941 VPInstruction *Branch =
942 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
943
944 // We assume the remaining `Count` is equally distributed in
945 // [0, MainLoopStep)
946 // So the probability for `Count < EpilogueLoopStep` should be
947 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
948 // TODO: Improve the estimate by taking the estimated trip count into
949 // consideration.
950 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
951 const uint32_t Weights[] = {EstimatedSkipCount,
952 MainLoopStep - EstimatedSkipCount};
953 MDBuilder MDB(Plan.getContext());
954 MDNode *BranchWeights =
955 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
956 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
957}
958
959/// If \p V is used by a recipe matching pattern \p P, return it. Otherwise
960/// return nullptr;
961template <typename MatchT>
962static VPRecipeBase *findUserOf(VPValue *V, const MatchT &P) {
963 auto It = find_if(V->users(), match_fn(P));
964 return It == V->user_end() ? nullptr : cast<VPRecipeBase>(*It);
965}
966
967/// If \p V is used by a VPInstruction with \p Opcode, return it. Otherwise
968/// return nullptr.
969template <unsigned Opcode> static VPInstruction *findUserOf(VPValue *V) {
971}
972
974 auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
975 auto *MinMaxR =
976 dyn_cast_or_null<VPRecipeWithIRFlags>(RedPhiR->getBackedgeValue());
977 if (!MinMaxR)
978 return nullptr;
979
980 // Check that MinMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe
981 // with an intrinsic that matches the reduction kind.
982 Intrinsic::ID ExpectedIntrinsicID =
983 getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind());
984 if (!match(MinMaxR, m_Intrinsic(ExpectedIntrinsicID)))
985 return nullptr;
986
987 if (MinMaxR->getOperand(0) == RedPhiR)
988 return MinMaxR->getOperand(1);
989
990 assert(MinMaxR->getOperand(1) == RedPhiR &&
991 "Reduction phi operand expected");
992 return MinMaxR->getOperand(0);
993 };
994
995 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
997 MinMaxNumReductionsToHandle;
998 bool HasUnsupportedPhi = false;
999 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
1001 continue;
1002 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
1003 if (!Cur) {
1004 // TODO: Also support fixed-order recurrence phis.
1005 HasUnsupportedPhi = true;
1006 continue;
1007 }
1009 Cur->getRecurrenceKind())) {
1010 HasUnsupportedPhi = true;
1011 continue;
1012 }
1013
1014 VPValue *MinMaxOp = GetMinMaxCompareValue(Cur);
1015 if (!MinMaxOp)
1016 return false;
1017
1018 MinMaxNumReductionsToHandle.emplace_back(Cur, MinMaxOp);
1019 }
1020
1021 if (MinMaxNumReductionsToHandle.empty())
1022 return true;
1023
1024 // We won't be able to resume execution in the scalar tail, if there are
1025 // unsupported header phis or there is no scalar tail at all, due to
1026 // tail-folding.
1027 if (HasUnsupportedPhi || !Plan.hasScalarTail())
1028 return false;
1029
1030 /// Check if the vector loop of \p Plan can early exit and restart
1031 /// execution of last vector iteration in the scalar loop. This requires all
1032 /// recipes up to early exit point be side-effect free as they are
1033 /// re-executed. Currently we check that the loop is free of any recipe that
1034 /// may write to memory. Expected to operate on an early VPlan w/o nested
1035 /// regions.
1038 auto *VPBB = cast<VPBasicBlock>(VPB);
1039 for (auto &R : *VPBB) {
1040 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
1041 return false;
1042 }
1043 }
1044
1045 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1046 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
1047 VPValue *AllNaNLanes = nullptr;
1048 SmallPtrSet<VPValue *, 2> RdxResults;
1049 for (const auto &[_, MinMaxOp] : MinMaxNumReductionsToHandle) {
1050 VPValue *RedNaNLanes =
1051 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
1052 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
1053 : RedNaNLanes;
1054 }
1055
1056 VPValue *AnyNaNLane =
1057 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
1058 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
1059 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
1060 for (const auto &[RedPhiR, _] : MinMaxNumReductionsToHandle) {
1062 RedPhiR->getRecurrenceKind()) &&
1063 "unsupported reduction");
1064
1065 // If we exit early due to NaNs, compute the final reduction result based on
1066 // the reduction phi at the beginning of the last vector iteration.
1067 auto *RdxResult =
1069
1070 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
1071 RdxResult->getOperand(1));
1072 RdxResult->setOperand(1, NewSel);
1073 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
1074 RdxResults.insert(RdxResult);
1075 }
1076
1077 auto *LatchExitingBranch = LatchVPBB->getTerminator();
1078 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
1079 "Unexpected terminator");
1080 auto *IsLatchExitTaken = LatchBuilder.createICmp(
1081 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
1082 LatchExitingBranch->getOperand(1));
1083 auto *AnyExitTaken = LatchBuilder.createNaryOp(
1084 Instruction::Or, {AnyNaNLane, IsLatchExitTaken});
1085 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
1086 LatchExitingBranch->eraseFromParent();
1087
1088 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
1089 // true, the resume from the start of the last vector iteration via the
1090 // canonical IV, otherwise from the original value.
1091 for (auto &R : Plan.getScalarPreheader()->phis()) {
1092 auto *ResumeR = cast<VPPhi>(&R);
1093 VPValue *VecV = ResumeR->getOperand(0);
1094 if (RdxResults.contains(VecV))
1095 continue;
1096 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
1097 if (DerivedIV->getNumUsers() == 1 &&
1098 DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
1099 auto *NewSel =
1100 MiddleBuilder.createSelect(AnyNaNLane, LoopRegion->getCanonicalIV(),
1101 &Plan.getVectorTripCount());
1102 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
1103 DerivedIV->setOperand(1, NewSel);
1104 continue;
1105 }
1106 }
1107 // Bail out and abandon the current, partially modified, VPlan if we
1108 // encounter resume phi that cannot be updated yet.
1109 if (VecV != &Plan.getVectorTripCount()) {
1110 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
1111 "FMaxNum/FMinNum reduction.\n");
1112 return false;
1113 }
1114 auto *NewSel = MiddleBuilder.createSelect(
1115 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
1116 ResumeR->setOperand(0, NewSel);
1117 }
1118
1119 auto *MiddleTerm = MiddleVPBB->getTerminator();
1120 MiddleBuilder.setInsertPoint(MiddleTerm);
1121 VPValue *MiddleCond = MiddleTerm->getOperand(0);
1122 VPValue *NewCond =
1123 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
1124 MiddleTerm->setOperand(0, NewCond);
1125 return true;
1126}
1127
1129 for (auto &PhiR : make_early_inc_range(
1131 auto *MinMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
1132 // TODO: check for multi-uses in VPlan directly.
1133 if (!MinMaxPhiR || !MinMaxPhiR->hasUsesOutsideReductionChain())
1134 continue;
1135
1136 // MinMaxPhiR has users outside the reduction cycle in the loop. Check if
1137 // the only other user is a FindLastIV reduction. MinMaxPhiR must have
1138 // exactly 3 users: 1) the min/max operation, the compare of a FindLastIV
1139 // reduction and ComputeReductionResult. The comparisom must compare
1140 // MinMaxPhiR against the min/max operand used for the min/max reduction
1141 // and only be used by the select of the FindLastIV reduction.
1142 RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind();
1143 assert(
1145 "only min/max recurrences support users outside the reduction chain");
1146
1147 auto *MinMaxOp =
1148 dyn_cast<VPRecipeWithIRFlags>(MinMaxPhiR->getBackedgeValue());
1149 if (!MinMaxOp)
1150 return false;
1151
1152 // Check that MinMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1153 // with an intrinsic that matches the reduction kind.
1154 Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind);
1155 if (!match(MinMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
1156 return false;
1157
1158 // MinMaxOp must have 2 users: 1) MinMaxPhiR and 2) ComputeReductionResult
1159 // (asserted below).
1160 assert(MinMaxOp->getNumUsers() == 2 &&
1161 "MinMaxOp must have exactly 2 users");
1162 VPValue *MinMaxOpValue = MinMaxOp->getOperand(0);
1163 if (MinMaxOpValue == MinMaxPhiR)
1164 MinMaxOpValue = MinMaxOp->getOperand(1);
1165
1166 VPValue *CmpOpA;
1167 VPValue *CmpOpB;
1168 CmpPredicate Pred;
1170 MinMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
1171 if (!Cmp || Cmp->getNumUsers() != 1 ||
1172 (CmpOpA != MinMaxOpValue && CmpOpB != MinMaxOpValue))
1173 return false;
1174
1175 if (MinMaxOpValue != CmpOpB)
1176 Pred = CmpInst::getSwappedPredicate(Pred);
1177
1178 // MinMaxPhiR must have exactly 3 users:
1179 // * MinMaxOp,
1180 // * Cmp (that's part of a FindLastIV chain),
1181 // * ComputeReductionResult.
1182 if (MinMaxPhiR->getNumUsers() != 3)
1183 return false;
1184
1185 VPInstruction *MinMaxResult =
1187 assert(is_contained(MinMaxPhiR->users(), MinMaxOp) &&
1188 "one user must be MinMaxOp");
1189 assert(MinMaxResult && "MinMaxResult must be a user of MinMaxPhiR");
1190 assert(is_contained(MinMaxOp->users(), MinMaxResult) &&
1191 "MinMaxResult must be a user of MinMaxOp (and of MinMaxPhiR");
1192
1193 // Cmp must be used by the select of a FindLastIV chain.
1194 VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
1195 VPValue *IVOp, *FindIV;
1196 if (!Sel || Sel->getNumUsers() != 2 ||
1197 !match(Sel,
1198 m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))))
1199 return false;
1200
1201 if (!isa<VPReductionPHIRecipe>(FindIV)) {
1202 std::swap(FindIV, IVOp);
1203 Pred = CmpInst::getInversePredicate(Pred);
1204 }
1205
1206 auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
1208 FindIVPhiR->getRecurrenceKind()))
1209 return false;
1210
1211 // TODO: Support cases where IVOp is the IV increment.
1212 if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
1214 return false;
1215
1216 CmpInst::Predicate RdxPredicate = [RdxKind]() {
1217 switch (RdxKind) {
1218 case RecurKind::UMin:
1219 return CmpInst::ICMP_UGE;
1220 case RecurKind::UMax:
1221 return CmpInst::ICMP_ULE;
1222 case RecurKind::SMax:
1223 return CmpInst::ICMP_SLE;
1224 case RecurKind::SMin:
1225 return CmpInst::ICMP_SGE;
1226 default:
1227 llvm_unreachable("unhandled recurrence kind");
1228 }
1229 }();
1230
1231 // TODO: Strict predicates need to find the first IV value for which the
1232 // predicate holds, not the last.
1233 if (Pred != RdxPredicate)
1234 return false;
1235
1236 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
1237 "cannot handle inloop/ordered reductions yet");
1238
1239 // The reduction using MinMaxPhiR needs adjusting to compute the correct
1240 // result:
1241 // 1. We need to find the last IV for which the condition based on the
1242 // min/max recurrence is true,
1243 // 2. Compare the partial min/max reduction result to its final value and,
1244 // 3. Select the lanes of the partial FindLastIV reductions which
1245 // correspond to the lanes matching the min/max reduction result.
1246 //
1247 // For example, this transforms
1248 // vp<%min.result> = compute-reduction-result ir<%min.val>,
1249 // ir<%min.val.next>
1250 // vp<%find.iv.result = compute-find-iv-result ir<%min.idx>, ir<0>,
1251 // SENTINEL, vp<%min.idx.next>
1252 //
1253 // into:
1254 //
1255 // vp<min.result> = compute-reduction-result ir<%min.val>, ir<%min.val.next>
1256 // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
1257 // vp<%final.iv> = select vp<%final.min.cmp>, ir<%min.idx.next>, SENTINEL
1258 // vp<%find.iv.result> = compute-find-iv-result ir<%min.idx>, ir<0>,
1259 // SENTINEL, vp<%final.iv>
1260 VPInstruction *FindIVResult =
1262 assert(FindIVResult->getParent() == MinMaxResult->getParent() &&
1263 "both results must be computed in the same block");
1264 MinMaxResult->moveBefore(*FindIVResult->getParent(),
1265 FindIVResult->getIterator());
1266
1267 VPBuilder B(FindIVResult);
1268 VPValue *MinMaxExiting = MinMaxResult->getOperand(1);
1269 auto *FinalMinMaxCmp =
1270 B.createICmp(CmpInst::ICMP_EQ, MinMaxExiting, MinMaxResult);
1271 VPValue *Sentinel = FindIVResult->getOperand(2);
1272 VPValue *LastIVExiting = FindIVResult->getOperand(3);
1273 auto *FinalIVSelect =
1274 B.createSelect(FinalMinMaxCmp, LastIVExiting, Sentinel);
1275 FindIVResult->setOperand(3, FinalIVSelect);
1276 }
1277 return true;
1278}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define _
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static void simplifyLiveInsWithSCEV(VPlan &Plan, PredicatedScalarEvolution &PSE)
Check Plan's live-in and replace them with constants, if they can be simplified via SCEV.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static VPHeaderPHIRecipe * createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, DebugLoc DL)
Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe for Phi based on IndDesc.
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static constexpr uint32_t CheckBypassWeights[]
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
A struct for saving information about induction variables.
InductionKind getKind() const
const SCEV * getStep() const
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Value * getStartValue() const
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
iterator find(const KeyT &Key)
Definition MapVector.h:154
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
op_range operands()
Definition User.h:292
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3982
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4017
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4070
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:623
const VPRecipeBase & back() const
Definition VPlan.h:4031
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4048
bool empty() const
Definition VPlan.h:4028
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:186
void setName(const Twine &newName)
Definition VPlan.h:166
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:122
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:243
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:173
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:192
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3565
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2056
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4135
Class to record and manage LLVM IR flags.
Definition VPlan.h:609
Helper to manage IR metadata for recipes.
Definition VPlan.h:982
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1036
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition VPlan.h:2437
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4170
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4268
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:246
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:241
void addOperand(VPValue *Operand)
Definition VPlanValue.h:235
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:131
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:191
unsigned getNumUsers() const
Definition VPlanValue.h:111
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2199
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4300
LLVMContext & getContext() const
Definition VPlan.h:4489
VPBasicBlock * getEntry()
Definition VPlan.h:4389
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4480
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4487
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4483
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4451
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4557
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4582
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4441
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4563
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1010
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4458
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4414
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4605
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1225
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4560
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4543
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4615
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4432
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4437
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4579
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4394
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4659
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
Definition VPlanUtils.h:93
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2423
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
Definition VPlanCFG.h:229
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
A recipe for handling first-order recurrence phis.
Definition VPlan.h:2375
static bool handleMultiUseReductions(VPlan &Plan)
Try to legalize reductions with multiple in-loop uses.
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static LLVM_ABI_FOR_TEST void handleEarlyExits(VPlan &Plan, bool HasUncountableExit)
Update Plan to account for all early exits.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
static void createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB)
Update Plan to account for the uncountable early exit from EarlyExitingVPBB to EarlyExitVPBB by.
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *TripCount, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...