LLVM 23.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanDominatorTree.h"
19#include "VPlanHelpers.h"
20#include "VPlanPatternMatch.h"
21#include "VPlanTransforms.h"
22#include "VPlanUtils.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/MDBuilder.h"
32
33#define DEBUG_TYPE "vplan"
34
35using namespace llvm;
36using namespace VPlanPatternMatch;
37
38namespace {
39// Class that is used to build the plain CFG for the incoming IR.
40class PlainCFGBuilder {
41 // The outermost loop of the input loop nest considered for vectorization.
42 Loop *TheLoop;
43
44 // Loop Info analysis.
45 LoopInfo *LI;
46
47 // Loop versioning for alias metadata.
48 LoopVersioning *LVer;
49
50 // Vectorization plan that we are working on.
51 std::unique_ptr<VPlan> Plan;
52
53 // Builder of the VPlan instruction-level representation.
54 VPBuilder VPIRBuilder;
55
56 // NOTE: The following maps are intentionally destroyed after the plain CFG
57 // construction because subsequent VPlan-to-VPlan transformation may
58 // invalidate them.
59 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
61 // Map incoming Value definitions to their newly-created VPValues.
62 DenseMap<Value *, VPValue *> IRDef2VPValue;
63
64 // Hold phi node's that need to be fixed once the plain CFG has been built.
66
67 // Utility functions.
68 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
69 void fixHeaderPhis();
70 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
71#ifndef NDEBUG
72 bool isExternalDef(Value *Val);
73#endif
74 VPValue *getOrCreateVPOperand(Value *IRVal);
75 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
76
77public:
78 PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer)
79 : TheLoop(Lp), LI(LI), LVer(LVer), Plan(std::make_unique<VPlan>(Lp)) {}
80
81 /// Build plain CFG for TheLoop and connect it to Plan's entry.
82 std::unique_ptr<VPlan> buildPlainCFG();
83};
84} // anonymous namespace
85
86// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
87// must have no predecessors.
88void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
89 // Collect VPBB predecessors.
91 for (BasicBlock *Pred : predecessors(BB))
92 VPBBPreds.push_back(getOrCreateVPBB(Pred));
93 VPBB->setPredecessors(VPBBPreds);
94}
95
96static bool isHeaderBB(BasicBlock *BB, Loop *L) {
97 return L && BB == L->getHeader();
98}
99
100// Add operands to VPInstructions representing phi nodes from the input IR.
101void PlainCFGBuilder::fixHeaderPhis() {
102 for (auto *Phi : PhisToFix) {
103 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
104 VPValue *VPVal = IRDef2VPValue[Phi];
105 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
106 auto *PhiR = cast<VPPhi>(VPVal);
107 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
108 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
109 "Expected Phi in header block.");
110 assert(Phi->getNumOperands() == 2 &&
111 "header phi must have exactly 2 operands");
112 for (BasicBlock *Pred : predecessors(Phi->getParent()))
113 PhiR->addOperand(
114 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
115 }
116}
117
118// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
119// existing one if it was already created.
120VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
121 if (auto *VPBB = BB2VPBB.lookup(BB)) {
122 // Retrieve existing VPBB.
123 return VPBB;
124 }
125
126 // Create new VPBB.
127 StringRef Name = BB->getName();
128 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
129 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
130 BB2VPBB[BB] = VPBB;
131 return VPBB;
132}
133
134#ifndef NDEBUG
135// Return true if \p Val is considered an external definition. An external
136// definition is either:
137// 1. A Value that is not an Instruction. This will be refined in the future.
138// 2. An Instruction that is outside of the IR region represented in VPlan,
139// i.e., is not part of the loop nest.
140bool PlainCFGBuilder::isExternalDef(Value *Val) {
141 // All the Values that are not Instructions are considered external
142 // definitions for now.
144 if (!Inst)
145 return true;
146
147 // Check whether Instruction definition is in loop body.
148 return !TheLoop->contains(Inst);
149}
150#endif
151
152// Create a new VPValue or retrieve an existing one for the Instruction's
153// operand \p IRVal. This function must only be used to create/retrieve VPValues
154// for *Instruction's operands* and not to create regular VPInstruction's. For
155// the latter, please, look at 'createVPInstructionsForVPBB'.
156VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
157 auto VPValIt = IRDef2VPValue.find(IRVal);
158 if (VPValIt != IRDef2VPValue.end())
159 // Operand has an associated VPInstruction or VPValue that was previously
160 // created.
161 return VPValIt->second;
162
163 // Operand doesn't have a previously created VPInstruction/VPValue. This
164 // means that operand is:
165 // A) a definition external to VPlan,
166 // B) any other Value without specific representation in VPlan.
167 // For now, we use VPValue to represent A and B and classify both as external
168 // definitions. We may introduce specific VPValue subclasses for them in the
169 // future.
170 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
171
172 // A and B: Create VPValue and add it to the pool of external definitions and
173 // to the Value->VPValue map.
174 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
175 IRDef2VPValue[IRVal] = NewVPVal;
176 return NewVPVal;
177}
178
179// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
180// counterpart. This function must be invoked in RPO so that the operands of a
181// VPInstruction in \p BB have been visited before (except for Phi nodes).
182void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
183 BasicBlock *BB) {
184 VPIRBuilder.setInsertPoint(VPBB);
185 // TODO: Model and preserve debug intrinsics in VPlan.
186 for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
187 Instruction *Inst = &InstRef;
188
189 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
190 // visited Inst when we shouldn't, breaking the RPO traversal order.
191 assert(!IRDef2VPValue.count(Inst) &&
192 "Instruction shouldn't have been visited.");
193
194 if (auto *Br = dyn_cast<BranchInst>(Inst)) {
195 // Conditional branch instruction are represented using BranchOnCond
196 // recipes.
197 if (Br->isConditional()) {
198 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
199 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
200 VPIRMetadata(*Inst), Inst->getDebugLoc());
201 }
202
203 // Skip the rest of the Instruction processing for Branch instructions.
204 continue;
205 }
206
207 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
208 // Don't emit recipes for unconditional switch instructions.
209 if (SI->getNumCases() == 0)
210 continue;
211 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
212 for (auto Case : SI->cases())
213 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
214 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
215 VPIRMetadata(*Inst), Inst->getDebugLoc());
216 continue;
217 }
218
219 VPSingleDefRecipe *NewR;
220 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
221 // Phi node's operands may not have been visited at this point. We create
222 // an empty VPInstruction that we will fix once the whole plain CFG has
223 // been built.
224 NewR = VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi");
225 NewR->setUnderlyingValue(Phi);
226 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
227 // Header phis need to be fixed after the VPBB for the latch has been
228 // created.
229 PhisToFix.push_back(Phi);
230 } else {
231 // Add operands for VPPhi in the order matching its predecessors in
232 // VPlan.
233 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
234 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
235 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
236 getOrCreateVPOperand(Phi->getIncomingValue(I));
237 }
238 for (VPBlockBase *Pred : VPBB->getPredecessors())
239 NewR->addOperand(
240 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
241 }
242 } else {
243 // Build VPIRMetadata from the instruction and add loop versioning
244 // metadata for loads and stores.
245 VPIRMetadata MD(*Inst);
246 if (isa<LoadInst, StoreInst>(Inst) && LVer) {
247 const auto &[AliasScopeMD, NoAliasMD] =
248 LVer->getNoAliasMetadataFor(Inst);
249 if (AliasScopeMD)
250 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
251 if (NoAliasMD)
252 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
253 }
254
255 // Translate LLVM-IR operands into VPValue operands and set them in the
256 // new VPInstruction.
257 SmallVector<VPValue *, 4> VPOperands;
258 for (Value *Op : Inst->operands())
259 VPOperands.push_back(getOrCreateVPOperand(Op));
260
261 if (auto *CI = dyn_cast<CastInst>(Inst)) {
262 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
263 CI->getType(), CI->getDebugLoc(),
264 VPIRFlags(*CI), MD);
265 NewR->setUnderlyingValue(CI);
266 } else {
267 // Build VPInstruction for any arbitrary Instruction without specific
268 // representation in VPlan.
269 NewR =
270 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
271 VPIRFlags(*Inst), MD, Inst->getDebugLoc());
272 }
273 }
274
275 IRDef2VPValue[Inst] = NewR;
276 }
277}
278
279// Main interface to build the plain CFG.
280std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
281 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
282 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
283 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
284 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
285
286 // 1. Scan the body of the loop in a topological order to visit each basic
287 // block after having visited its predecessor basic blocks. Create a VPBB for
288 // each BB and link it to its successor and predecessor VPBBs. Note that
289 // predecessors must be set in the same order as they are in the incomming IR.
290 // Otherwise, there might be problems with existing phi nodes and algorithm
291 // based on predecessors traversal.
292
293 // Loop PH needs to be explicitly visited since it's not taken into account by
294 // LoopBlocksDFS.
295 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
296 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
297 "Unexpected loop preheader");
298 for (auto &I : *ThePreheaderBB) {
299 if (I.getType()->isVoidTy())
300 continue;
301 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
302 }
303
304 LoopBlocksRPO RPO(TheLoop);
305 RPO.perform(LI);
306
307 for (BasicBlock *BB : RPO) {
308 // Create or retrieve the VPBasicBlock for this BB.
309 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
310 // Set VPBB predecessors in the same order as they are in the incoming BB.
311 setVPBBPredsFromBB(VPBB, BB);
312
313 // Create VPInstructions for BB.
314 createVPInstructionsForVPBB(VPBB, BB);
315
316 // Set VPBB successors. We create empty VPBBs for successors if they don't
317 // exist already. Recipes will be created when the successor is visited
318 // during the RPO traversal.
319 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
321 getOrCreateVPBB(SI->getDefaultDest())};
322 for (auto Case : SI->cases())
323 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
324 VPBB->setSuccessors(Succs);
325 continue;
326 }
327 auto *BI = cast<BranchInst>(BB->getTerminator());
328 unsigned NumSuccs = succ_size(BB);
329 if (NumSuccs == 1) {
330 VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
331 continue;
332 }
333 assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
334 "block must have conditional branch with 2 successors");
335
336 BasicBlock *IRSucc0 = BI->getSuccessor(0);
337 BasicBlock *IRSucc1 = BI->getSuccessor(1);
338 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
339 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
340 VPBB->setTwoSuccessors(Successor0, Successor1);
341 }
342
343 for (auto *EB : Plan->getExitBlocks())
344 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
345
346 // 2. The whole CFG has been built at this point so all the input Values must
347 // have a VPlan counterpart. Fix VPlan header phi by adding their
348 // corresponding VPlan operands.
349 fixHeaderPhis();
350
351 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
352 Plan->getEntry()->setPlan(&*Plan);
353
354 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
355 // VPIRInstructions wrapping them.
356 // // Note that the operand order corresponds to IR predecessor order, and may
357 // need adjusting when VPlan predecessors are added, if an exit block has
358 // multiple predecessor.
359 for (auto *EB : Plan->getExitBlocks()) {
360 for (VPRecipeBase &R : EB->phis()) {
361 auto *PhiR = cast<VPIRPhi>(&R);
362 PHINode &Phi = PhiR->getIRPhi();
363 assert(PhiR->getNumOperands() == 0 &&
364 "no phi operands should be added yet");
365 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
366 PhiR->addOperand(
367 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
368 }
369 }
370
371 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
372 return std::move(Plan);
373}
374
375/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
376/// has exactly 2 predecessors (preheader and latch), where the block
377/// dominates the latch and the preheader dominates the block. If it is a
378/// header block return true and canonicalize the predecessors of the header
379/// (making sure the preheader appears first and the latch second) and the
380/// successors of the latch (making sure the loop exit comes first). Otherwise
381/// return false.
383 const VPDominatorTree &VPDT) {
384 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
385 if (Preds.size() != 2)
386 return false;
387
388 auto *PreheaderVPBB = Preds[0];
389 auto *LatchVPBB = Preds[1];
390 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
391 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
392 std::swap(PreheaderVPBB, LatchVPBB);
393
394 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
395 !VPDT.dominates(HeaderVPB, LatchVPBB))
396 return false;
397
398 // Canonicalize predecessors of header so that preheader is first and
399 // latch second.
400 HeaderVPB->swapPredecessors();
401 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
402 R.swapOperands();
403 }
404
405 // The two successors of conditional branch match the condition, with the
406 // first successor corresponding to true and the second to false. We
407 // canonicalize the successors of the latch when introducing the region, such
408 // that the latch exits the region when its condition is true; invert the
409 // original condition if the original CFG branches to the header on true.
410 // Note that the exit edge is not yet connected for top-level loops.
411 if (LatchVPBB->getSingleSuccessor() ||
412 LatchVPBB->getSuccessors()[0] != HeaderVPB)
413 return true;
414
415 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
416 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
419 "terminator must be a BranchOnCond");
420 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
421 Not->insertBefore(Term);
422 Term->setOperand(0, Not);
423 LatchVPBB->swapSuccessors();
424
425 return true;
426}
427
428/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
429static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
430 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
431 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
432
433 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
434 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
435
436 // Create an empty region first and insert it between PreheaderVPBB and
437 // the exit blocks, taking care to preserve the original predecessor &
438 // successor order of blocks. Set region entry and exiting after both
439 // HeaderVPB and LatchVPBB have been disconnected from their
440 // predecessors/successors.
441 auto *R = Plan.createLoopRegion();
442
443 // Transfer latch's successors to the region.
445
446 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
447 R->setEntry(HeaderVPB);
448 R->setExiting(LatchVPBB);
449
450 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
451 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
452 VPBB->setParent(R);
453}
454
455// Add the necessary canonical IV and branch recipes required to control the
456// loop.
457static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
458 VPBasicBlock *LatchVPBB, Type *IdxTy,
459 DebugLoc DL) {
460 Value *StartIdx = ConstantInt::get(IdxTy, 0);
461 auto *StartV = Plan.getOrAddLiveIn(StartIdx);
462
463 // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
464 auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
465 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
466
467 // We are about to replace the branch to exit the region. Remove the original
468 // BranchOnCond, if there is any.
469 DebugLoc LatchDL = DL;
470 if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
471 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
472 LatchVPBB->getTerminator()->eraseFromParent();
473 }
474
475 VPBuilder Builder(LatchVPBB);
476 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
477 // Initially the induction increment is guaranteed to not wrap, but that may
478 // change later, e.g. when tail-folding, when the flags need to be dropped.
479 auto *CanonicalIVIncrement = Builder.createOverflowingOp(
480 Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL,
481 "index.next");
482 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
483
484 // Add the BranchOnCount VPInstruction to the latch.
485 Builder.createNaryOp(VPInstruction::BranchOnCount,
486 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
487 LatchDL);
488}
489
490/// Creates extracts for values in \p Plan defined in a loop region and used
491/// outside a loop region.
492static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
493 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
494 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
495 if (EB->getSinglePredecessor() != MiddleVPBB)
496 continue;
497
498 for (VPRecipeBase &R : EB->phis()) {
499 auto *ExitIRI = cast<VPIRPhi>(&R);
500 for (unsigned Idx = 0; Idx != ExitIRI->getNumIncoming(); ++Idx) {
501 VPRecipeBase *Inc = ExitIRI->getIncomingValue(Idx)->getDefiningRecipe();
502 if (!Inc)
503 continue;
504 assert(ExitIRI->getNumOperands() == 1 &&
505 ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
506 "exit values from early exits must be fixed when branch to "
507 "early-exit is added");
508 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(B);
509 }
510 }
511 }
512}
513
514static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
515 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
516 VPDominatorTree VPDT(Plan);
517
518 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
519 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
520 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
521
522 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
523 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
524
525 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
526 // The canonical LatchVPBB has the header block as last successor. If it has
527 // another successor, this successor is an exit block - insert middle block on
528 // its edge. Otherwise, add middle block as another successor retaining header
529 // as last.
530 if (LatchVPBB->getNumSuccessors() == 2) {
531 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
532 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
533 } else {
534 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
535 LatchVPBB->swapSuccessors();
536 }
537
538 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
539
540 // Create SCEV and VPValue for the trip count.
541 // We use the symbolic max backedge-taken-count, which works also when
542 // vectorizing loops with uncountable early exits.
543 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
544 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
545 "Invalid backedge-taken count");
546 ScalarEvolution &SE = *PSE.getSE();
547 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
548 InductionTy, TheLoop);
550
551 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
553
554 // The connection order corresponds to the operands of the conditional branch,
555 // with the middle block already connected to the exit block.
556 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
557 // Also connect the entry block to the scalar preheader.
558 // TODO: Also introduce a branch recipe together with the minimum trip count
559 // check.
560 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
561 Plan.getEntry()->swapSuccessors();
562
563 createExtractsForLiveOuts(Plan, MiddleVPBB);
564
565 VPBuilder ScalarPHBuilder(ScalarPH);
566 for (const auto &[PhiR, ScalarPhiR] : zip_equal(
567 drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
568 auto *VectorPhiR = cast<VPPhi>(&PhiR);
569 auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
570 {VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc());
571 cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
572 }
573}
574
575/// Check \p Plan's live-in and replace them with constants, if they can be
576/// simplified via SCEV.
579 auto GetSimplifiedLiveInViaSCEV = [&](VPValue *VPV) -> VPValue * {
580 const SCEV *Expr = vputils::getSCEVExprForVPValue(VPV, PSE);
581 if (auto *C = dyn_cast<SCEVConstant>(Expr))
582 return Plan.getOrAddLiveIn(C->getValue());
583 return nullptr;
584 };
585
586 for (VPValue *LiveIn : to_vector(Plan.getLiveIns())) {
587 if (VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
588 LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
589 }
590}
591
592/// To make RUN_VPLAN_PASS print initial VPlan.
594
595std::unique_ptr<VPlan>
596VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
598 LoopVersioning *LVer) {
599 PlainCFGBuilder Builder(TheLoop, &LI, LVer);
600 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
601 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
602 simplifyLiveInsWithSCEV(*VPlan0, PSE);
603
605 return VPlan0;
606}
607
608/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
609/// for \p Phi based on \p IndDesc.
610static VPHeaderPHIRecipe *
612 const InductionDescriptor &IndDesc, VPlan &Plan,
613 PredicatedScalarEvolution &PSE, Loop &OrigLoop,
614 DebugLoc DL) {
615 [[maybe_unused]] ScalarEvolution &SE = *PSE.getSE();
616 assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
617 "step must be loop invariant");
618 assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
619 (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
620 SE.getSCEV(IndDesc.getStartValue()) ==
621 vputils::getSCEVExprForVPValue(Start, PSE))) &&
622 "Start VPValue must match IndDesc's start value");
623
624 VPValue *Step =
626
628 return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(),
629 IndDesc, DL);
630
633 "must have an integer or float induction at this point");
634
635 // Update wide induction increments to use the same step as the corresponding
636 // wide induction. This enables detecting induction increments directly in
637 // VPlan and removes redundant splats.
638 using namespace llvm::VPlanPatternMatch;
639 if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
640 PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
641
642 // It is always safe to copy over the NoWrap and FastMath flags. In
643 // particular, when folding tail by masking, the masked-off lanes are never
644 // used, so it is safe.
646
647 return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
648 IndDesc, Flags, DL);
649}
650
652 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
655 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
656 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
657 // Retrieve the header manually from the intial plain-CFG VPlan.
658 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
659 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
660 assert(VPDominatorTree(Plan).dominates(HeaderVPBB,
661 HeaderVPBB->getPredecessors()[1]) &&
662 "header must dominate its latch");
663
664 auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
665 // TODO: Gradually replace uses of underlying instruction by analyses on
666 // VPlan.
667 auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
668 assert(PhiR->getNumOperands() == 2 &&
669 "Must have 2 operands for header phis");
670
671 // Extract common values once.
672 VPIRValue *Start = cast<VPIRValue>(PhiR->getOperand(0));
673 VPValue *BackedgeValue = PhiR->getOperand(1);
674
675 if (FixedOrderRecurrences.contains(Phi)) {
676 // TODO: Currently fixed-order recurrences are modeled as chains of
677 // first-order recurrences. If there are no users of the intermediate
678 // recurrences in the chain, the fixed order recurrence should be
679 // modeled directly, enabling more efficient codegen.
680 return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
681 }
682
683 auto InductionIt = Inductions.find(Phi);
684 if (InductionIt != Inductions.end())
685 return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
686 Plan, PSE, OrigLoop,
687 PhiR->getDebugLoc());
688
689 assert(Reductions.contains(Phi) && "only reductions are expected now");
690 const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
692 Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
693 "incoming value must match start value");
694 // Will be updated later to >1 if reduction is partial.
695 unsigned ScaleFactor = 1;
696 bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
697 return new VPReductionPHIRecipe(
698 Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
699 getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
700 ScaleFactor),
702 };
703
704 for (VPRecipeBase &R : make_early_inc_range(HeaderVPBB->phis())) {
706 continue;
707 auto *PhiR = cast<VPPhi>(&R);
708 VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
709 HeaderPhiR->insertBefore(PhiR);
710 PhiR->replaceAllUsesWith(HeaderPhiR);
711 PhiR->eraseFromParent();
712 }
713}
714
716 VPlan &Plan, const DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
717 const DenseSet<BasicBlock *> &BlocksNeedingPredication,
718 ElementCount MinVF) {
719 VPTypeAnalysis TypeInfo(Plan);
722
723 for (VPRecipeBase &R : Header->phis()) {
724 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
725 if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
726 continue;
727
728 RecurKind Kind = PhiR->getRecurrenceKind();
732 "AnyOf and Find reductions are not allowed for in-loop reductions");
733
734 bool IsFPRecurrence =
736 FastMathFlags FMFs =
737 IsFPRecurrence ? FastMathFlags::getFast() : FastMathFlags();
738
739 // Collect the chain of "link" recipes for the reduction starting at PhiR.
741 Worklist.insert(PhiR);
742 for (unsigned I = 0; I != Worklist.size(); ++I) {
743 VPSingleDefRecipe *Cur = Worklist[I];
744 for (VPUser *U : Cur->users()) {
745 auto *UserRecipe = cast<VPSingleDefRecipe>(U);
746 if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
747 assert((UserRecipe->getParent() == Plan.getMiddleBlock() ||
748 UserRecipe->getParent() == Plan.getScalarPreheader()) &&
749 "U must be either in the loop region, the middle block or the "
750 "scalar preheader.");
751 continue;
752 }
753
754 // Stores using instructions will be sunk later.
756 continue;
757 Worklist.insert(UserRecipe);
758 }
759 }
760
761 // Visit operation "Links" along the reduction chain top-down starting from
762 // the phi until LoopExitValue. We keep track of the previous item
763 // (PreviousLink) to tell which of the two operands of a Link will remain
764 // scalar and which will be reduced. For minmax by select(cmp), Link will be
765 // the select instructions. Blend recipes of in-loop reduction phi's will
766 // get folded to their non-phi operand, as the reduction recipe handles the
767 // condition directly.
768 VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
769 for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
770 if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
771 assert(Blend->getNumIncomingValues() == 2 &&
772 "Blend must have 2 incoming values");
773 unsigned PhiRIdx = Blend->getIncomingValue(0) == PhiR ? 0 : 1;
774 assert(Blend->getIncomingValue(PhiRIdx) == PhiR &&
775 "PhiR must be an operand of the blend");
776 Blend->replaceAllUsesWith(Blend->getIncomingValue(1 - PhiRIdx));
777 continue;
778 }
779
780 if (IsFPRecurrence) {
781 FastMathFlags CurFMF =
782 cast<VPRecipeWithIRFlags>(CurrentLink)->getFastMathFlags();
783 if (match(CurrentLink, m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
784 CurFMF |= cast<VPRecipeWithIRFlags>(CurrentLink->getOperand(0))
785 ->getFastMathFlags();
786 FMFs &= CurFMF;
787 }
788
789 Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
790
791 // Recognize a call to the llvm.fmuladd intrinsic.
792 bool IsFMulAdd = Kind == RecurKind::FMulAdd;
793 VPValue *VecOp;
794 VPBasicBlock *LinkVPBB = CurrentLink->getParent();
795 if (IsFMulAdd) {
797 "Expected current VPInstruction to be a call to the "
798 "llvm.fmuladd intrinsic");
799 assert(CurrentLink->getOperand(2) == PreviousLink &&
800 "expected a call where the previous link is the added operand");
801
802 // If the instruction is a call to the llvm.fmuladd intrinsic then we
803 // need to create an fmul recipe (multiplying the first two operands of
804 // the fmuladd together) to use as the vector operand for the fadd
805 // reduction.
806 auto *FMulRecipe = new VPInstruction(
807 Instruction::FMul,
808 {CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
809 CurrentLinkI->getFastMathFlags());
810 LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
811 VecOp = FMulRecipe;
812 } else if (Kind == RecurKind::AddChainWithSubs &&
813 match(CurrentLink, m_Sub(m_VPValue(), m_VPValue()))) {
814 Type *PhiTy = TypeInfo.inferScalarType(PhiR);
815 auto *Zero = Plan.getConstantInt(PhiTy, 0);
816 auto *Sub = new VPInstruction(Instruction::Sub,
817 {Zero, CurrentLink->getOperand(1)}, {},
818 {}, CurrentLinkI->getDebugLoc());
819 Sub->setUnderlyingValue(CurrentLinkI);
820 LinkVPBB->insert(Sub, CurrentLink->getIterator());
821 VecOp = Sub;
822 } else {
823 // Index of the first operand which holds a non-mask vector operand.
824 unsigned IndexOfFirstOperand = 0;
826 if (match(CurrentLink, m_Cmp(m_VPValue(), m_VPValue())))
827 continue;
828 assert(match(CurrentLink,
830 "must be a select recipe");
831 IndexOfFirstOperand = 1;
832 }
833 // Note that for non-commutable operands (cmp-selects), the semantics of
834 // the cmp-select are captured in the recurrence kind.
835 unsigned VecOpId =
836 CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
837 ? IndexOfFirstOperand + 1
838 : IndexOfFirstOperand;
839 VecOp = CurrentLink->getOperand(VecOpId);
840 assert(VecOp != PreviousLink &&
841 CurrentLink->getOperand(CurrentLink->getNumOperands() - 1 -
842 (VecOpId - IndexOfFirstOperand)) ==
843 PreviousLink &&
844 "PreviousLink must be the operand other than VecOp");
845 }
846
847 // Get block mask from BlockMaskCache if the block needs predication.
848 VPValue *CondOp = nullptr;
849 if (BlocksNeedingPredication.contains(CurrentLinkI->getParent()))
850 CondOp = BlockMaskCache.lookup(LinkVPBB);
851
852 assert(PhiR->getVFScaleFactor() == 1 &&
853 "inloop reductions must be unscaled");
854 auto *RedRecipe = new VPReductionRecipe(
855 Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
856 getReductionStyle(/*IsInLoop=*/true, PhiR->isOrdered(), 1),
857 CurrentLinkI->getDebugLoc());
858 // Append the recipe to the end of the VPBasicBlock because we need to
859 // ensure that it comes after all of it's inputs, including CondOp.
860 // Delete CurrentLink as it will be invalid if its operand is replaced
861 // with a reduction defined at the bottom of the block in the next link.
862 if (LinkVPBB->getNumSuccessors() == 0)
863 RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
864 else
865 LinkVPBB->appendRecipe(RedRecipe);
866
867 CurrentLink->replaceAllUsesWith(RedRecipe);
868 ToDelete.push_back(CurrentLink);
869 PreviousLink = RedRecipe;
870 }
871 }
872
873 for (VPRecipeBase *R : ToDelete)
874 R->eraseFromParent();
875}
876
878 bool HasUncountableEarlyExit) {
879 auto *MiddleVPBB = cast<VPBasicBlock>(
881 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
882 VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
883
884 // Disconnect all early exits from the loop leaving it with a single exit from
885 // the latch. Early exits that are countable are left for a scalar epilog. The
886 // condition of uncountable early exits (currently at most one is supported)
887 // is fused into the latch exit, and used to branch from middle block to the
888 // early exit destination.
889 [[maybe_unused]] bool HandledUncountableEarlyExit = false;
890 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
891 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
892 if (Pred == MiddleVPBB)
893 continue;
894 if (HasUncountableEarlyExit) {
895 assert(!HandledUncountableEarlyExit &&
896 "can handle exactly one uncountable early exit");
898 cast<VPBasicBlock>(HeaderVPB), LatchVPBB);
899 HandledUncountableEarlyExit = true;
900 } else {
901 for (VPRecipeBase &R : EB->phis())
902 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
903 }
904 cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
906 }
907 }
908
909 assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
910 "missed an uncountable exit that must be handled");
911}
912
914 bool RequiresScalarEpilogueCheck,
915 bool TailFolded) {
916 auto *MiddleVPBB = cast<VPBasicBlock>(
918 // If MiddleVPBB has a single successor then the original loop does not exit
919 // via the latch and the single successor must be the scalar preheader.
920 // There's no need to add a runtime check to MiddleVPBB.
921 if (MiddleVPBB->getNumSuccessors() == 1) {
922 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
923 "must have ScalarPH as single successor");
924 return;
925 }
926
927 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
928
929 // Add a check in the middle block to see if we have completed all of the
930 // iterations in the first vector loop.
931 //
932 // Three cases:
933 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
934 // condition to false.
935 // 2) If (N - N%VF) == N, then we *don't* need to run the
936 // remainder. Thus if tail is to be folded, we know we don't need to run
937 // the remainder and we can set the condition to true.
938 // 3) Otherwise, construct a runtime check.
939
940 // We use the same DebugLoc as the scalar loop latch terminator instead of
941 // the corresponding compare because they may have ended up with different
942 // line numbers and we want to avoid awkward line stepping while debugging.
943 // E.g., if the compare has got a line number inside the loop.
944 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
945 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
946 VPBuilder Builder(MiddleVPBB);
947 VPValue *Cmp;
948 if (!RequiresScalarEpilogueCheck)
949 Cmp = Plan.getFalse();
950 else if (TailFolded)
951 Cmp = Plan.getTrue();
952 else
953 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
954 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
955 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
956}
957
959 VPDominatorTree VPDT(Plan);
960 for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
961 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
962 createLoopRegion(Plan, HeaderVPB);
963
964 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
965 TopRegion->setName("vector loop");
966 TopRegion->getEntryBasicBlock()->setName("vector.body");
967}
968
969/// Insert \p CheckBlockVPBB on the edge leading to the vector preheader,
970/// connecting it to both vector and scalar preheaders. Updates scalar
971/// preheader phis to account for the new predecessor.
973 VPBasicBlock *CheckBlockVPBB) {
974 VPBlockBase *VectorPH = Plan.getVectorPreheader();
975 auto *ScalarPH = cast<VPBasicBlock>(Plan.getScalarPreheader());
976 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
977 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
978 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
979 CheckBlockVPBB->swapSuccessors();
980 unsigned NumPreds = ScalarPH->getNumPredecessors();
981 for (VPRecipeBase &R : ScalarPH->phis()) {
982 auto *Phi = cast<VPPhi>(&R);
983 assert(Phi->getNumIncoming() == NumPreds - 1 &&
984 "must have incoming values for all predecessors");
985 Phi->addOperand(Phi->getOperand(NumPreds - 2));
986 }
987}
988
989// Likelyhood of bypassing the vectorized loop due to a runtime check block,
990// including memory overlap checks block and wrapping/unit-stride checks block.
991static constexpr uint32_t CheckBypassWeights[] = {1, 127};
992
993/// Create a BranchOnCond terminator in \p CheckBlockVPBB. Optionally adds
994/// branch weights.
995static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB,
996 VPValue *Cond, bool AddBranchWeights) {
998 auto *Term = VPBuilder(CheckBlockVPBB)
1000 if (AddBranchWeights) {
1001 MDBuilder MDB(Plan.getContext());
1002 MDNode *BranchWeights =
1003 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
1004 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1005 }
1006}
1007
1009 BasicBlock *CheckBlock,
1010 bool AddBranchWeights) {
1011 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
1012 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
1013 insertCheckBlockBeforeVectorLoop(Plan, CheckBlockVPBB);
1014 addBypassBranch(Plan, CheckBlockVPBB, CondVPV, AddBranchWeights);
1015}
1016
1018 VPlan &Plan, ElementCount VF, unsigned UF,
1019 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
1020 bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop,
1023 // Generate code to check if the loop's trip count is less than VF * UF, or
1024 // equal to it in case a scalar epilogue is required; this implies that the
1025 // vector trip count is zero. This check also covers the case where adding one
1026 // to the backedge-taken count overflowed leading to an incorrect trip count
1027 // of zero. In this case we will also jump to the scalar loop.
1028 CmpInst::Predicate CmpPred =
1029 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1030 // If tail is to be folded, vector loop takes care of all iterations.
1031 VPValue *TripCountVPV = Plan.getTripCount();
1032 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, PSE);
1033 Type *TripCountTy = TripCount->getType();
1034 ScalarEvolution &SE = *PSE.getSE();
1035 auto GetMinTripCount = [&]() -> const SCEV * {
1036 // Compute max(MinProfitableTripCount, UF * VF) and return it.
1037 const SCEV *VFxUF =
1038 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
1039 if (UF * VF.getKnownMinValue() >=
1040 MinProfitableTripCount.getKnownMinValue()) {
1041 // TODO: SCEV should be able to simplify test.
1042 return VFxUF;
1043 }
1044 const SCEV *MinProfitableTripCountSCEV =
1045 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
1046 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
1047 };
1048
1049 VPBasicBlock *EntryVPBB = Plan.getEntry();
1050 VPBuilder Builder(EntryVPBB);
1051 VPValue *TripCountCheck = Plan.getFalse();
1052 const SCEV *Step = GetMinTripCount();
1053 if (TailFolded) {
1054 if (CheckNeededWithTailFolding) {
1055 // vscale is not necessarily a power-of-2, which means we cannot guarantee
1056 // an overflow to zero when updating induction variables and so an
1057 // additional overflow check is required before entering the vector loop.
1058
1059 VPValue *StepVPV = Builder.createExpandSCEV(Step);
1060
1061 // Get the maximum unsigned value for the type.
1062 VPValue *MaxUIntTripCount =
1063 Plan.getConstantInt(cast<IntegerType>(TripCountTy)->getMask());
1064 VPValue *DistanceToMax = Builder.createNaryOp(
1065 Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
1067
1068 // Don't execute the vector loop if (UMax - n) < (VF * UF).
1069 // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
1070 // minProfitableTripCount).
1071 TripCountCheck =
1072 Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax, StepVPV, DL);
1073 } else {
1074 // TripCountCheck = false, folding tail implies positive vector trip
1075 // count.
1076 }
1077 } else {
1078 // TODO: Emit unconditional branch to vector preheader instead of
1079 // conditional branch with known condition.
1080 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
1081 // Check if the trip count is < the step.
1082 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
1083 // TODO: Ensure step is at most the trip count when determining max VF and
1084 // UF, w/o tail folding.
1085 TripCountCheck = Plan.getTrue();
1086 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
1087 TripCount, Step)) {
1088 // Generate the minimum iteration check only if we cannot prove the
1089 // check is known to be true, or known to be false.
1090 VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
1091 TripCountCheck = Builder.createICmp(
1092 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
1093 } // else step known to be < trip count, use TripCountCheck preset to false.
1094 }
1095 VPInstruction *Term =
1096 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
1098 MDBuilder MDB(Plan.getContext());
1099 MDNode *BranchWeights = MDB.createBranchWeights(
1100 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
1101 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1102 }
1103}
1104
1106 VPlan &Plan, Value *TripCount, Value *VectorTripCount,
1107 bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF,
1108 unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE) {
1109 // Add the minimum iteration check for the epilogue vector loop.
1110 VPValue *TC = Plan.getOrAddLiveIn(TripCount);
1111 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
1112 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
1113 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
1114 VPValue *Count = Builder.createNaryOp(
1115 Instruction::Sub, {TC, Plan.getOrAddLiveIn(VectorTripCount)},
1116 DebugLoc::getUnknown(), "n.vec.remaining");
1117
1118 // Generate code to check if the loop's trip count is less than VF * UF of
1119 // the vector epilogue loop.
1120 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1121 auto *CheckMinIters = Builder.createICmp(
1122 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
1123 VPInstruction *Branch =
1124 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
1125
1126 // We assume the remaining `Count` is equally distributed in
1127 // [0, MainLoopStep)
1128 // So the probability for `Count < EpilogueLoopStep` should be
1129 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
1130 // TODO: Improve the estimate by taking the estimated trip count into
1131 // consideration.
1132 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
1133 const uint32_t Weights[] = {EstimatedSkipCount,
1134 MainLoopStep - EstimatedSkipCount};
1135 MDBuilder MDB(Plan.getContext());
1136 MDNode *BranchWeights =
1137 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
1138 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
1139}
1140
1141/// Find and return the final select instruction of the FindIV result pattern
1142/// for the given \p BackedgeVal:
1143/// select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),
1144/// ComputeReductionResult(ReducedIV), Start.
1146 return cast<VPInstruction>(
1147 vputils::findRecipe(BackedgeVal, [BackedgeVal](VPRecipeBase *R) {
1148 auto *VPI = dyn_cast<VPInstruction>(R);
1149 return VPI &&
1150 matchFindIVResult(VPI, m_Specific(BackedgeVal), m_VPValue());
1151 }));
1152}
1153
1155 auto GetMinOrMaxCompareValue =
1156 [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
1157 auto *MinOrMaxR =
1158 dyn_cast_or_null<VPRecipeWithIRFlags>(RedPhiR->getBackedgeValue());
1159 if (!MinOrMaxR)
1160 return nullptr;
1161
1162 // Check that MinOrMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1163 // with an intrinsic that matches the reduction kind.
1164 Intrinsic::ID ExpectedIntrinsicID =
1165 getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind());
1166 if (!match(MinOrMaxR, m_Intrinsic(ExpectedIntrinsicID)))
1167 return nullptr;
1168
1169 if (MinOrMaxR->getOperand(0) == RedPhiR)
1170 return MinOrMaxR->getOperand(1);
1171
1172 assert(MinOrMaxR->getOperand(1) == RedPhiR &&
1173 "Reduction phi operand expected");
1174 return MinOrMaxR->getOperand(0);
1175 };
1176
1177 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1179 MinOrMaxNumReductionsToHandle;
1180 bool HasUnsupportedPhi = false;
1181 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
1183 continue;
1184 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
1185 if (!Cur) {
1186 // TODO: Also support fixed-order recurrence phis.
1187 HasUnsupportedPhi = true;
1188 continue;
1189 }
1191 Cur->getRecurrenceKind())) {
1192 HasUnsupportedPhi = true;
1193 continue;
1194 }
1195
1196 VPValue *MinOrMaxOp = GetMinOrMaxCompareValue(Cur);
1197 if (!MinOrMaxOp)
1198 return false;
1199
1200 MinOrMaxNumReductionsToHandle.emplace_back(Cur, MinOrMaxOp);
1201 }
1202
1203 if (MinOrMaxNumReductionsToHandle.empty())
1204 return true;
1205
1206 // We won't be able to resume execution in the scalar tail, if there are
1207 // unsupported header phis or there is no scalar tail at all, due to
1208 // tail-folding.
1209 if (HasUnsupportedPhi || !Plan.hasScalarTail())
1210 return false;
1211
1212 /// Check if the vector loop of \p Plan can early exit and restart
1213 /// execution of last vector iteration in the scalar loop. This requires all
1214 /// recipes up to early exit point be side-effect free as they are
1215 /// re-executed. Currently we check that the loop is free of any recipe that
1216 /// may write to memory. Expected to operate on an early VPlan w/o nested
1217 /// regions.
1220 auto *VPBB = cast<VPBasicBlock>(VPB);
1221 for (auto &R : *VPBB) {
1222 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
1223 return false;
1224 }
1225 }
1226
1227 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1228 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
1229 VPValue *AllNaNLanes = nullptr;
1230 SmallPtrSet<VPValue *, 2> RdxResults;
1231 for (const auto &[_, MinOrMaxOp] : MinOrMaxNumReductionsToHandle) {
1232 VPValue *RedNaNLanes =
1233 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinOrMaxOp, MinOrMaxOp);
1234 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
1235 : RedNaNLanes;
1236 }
1237
1238 VPValue *AnyNaNLane =
1239 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
1240 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
1241 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
1242 for (const auto &[RedPhiR, _] : MinOrMaxNumReductionsToHandle) {
1244 RedPhiR->getRecurrenceKind()) &&
1245 "unsupported reduction");
1246
1247 // If we exit early due to NaNs, compute the final reduction result based on
1248 // the reduction phi at the beginning of the last vector iteration.
1249 auto *RdxResult = vputils::findComputeReductionResult(RedPhiR);
1250 assert(RdxResult && "must find a ComputeReductionResult");
1251
1252 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
1253 RdxResult->getOperand(0));
1254 RdxResult->setOperand(0, NewSel);
1255 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
1256 RdxResults.insert(RdxResult);
1257 }
1258
1259 auto *LatchExitingBranch = LatchVPBB->getTerminator();
1260 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
1261 "Unexpected terminator");
1262 auto *IsLatchExitTaken = LatchBuilder.createICmp(
1263 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
1264 LatchExitingBranch->getOperand(1));
1265 auto *AnyExitTaken = LatchBuilder.createNaryOp(
1266 Instruction::Or, {AnyNaNLane, IsLatchExitTaken});
1267 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
1268 LatchExitingBranch->eraseFromParent();
1269
1270 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
1271 // true, the resume from the start of the last vector iteration via the
1272 // canonical IV, otherwise from the original value.
1273 for (auto &R : Plan.getScalarPreheader()->phis()) {
1274 auto *ResumeR = cast<VPPhi>(&R);
1275 VPValue *VecV = ResumeR->getOperand(0);
1276 if (RdxResults.contains(VecV))
1277 continue;
1278 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
1279 if (DerivedIV->getNumUsers() == 1 &&
1280 DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
1281 auto *NewSel =
1282 MiddleBuilder.createSelect(AnyNaNLane, LoopRegion->getCanonicalIV(),
1283 &Plan.getVectorTripCount());
1284 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
1285 DerivedIV->setOperand(1, NewSel);
1286 continue;
1287 }
1288 }
1289 // Bail out and abandon the current, partially modified, VPlan if we
1290 // encounter resume phi that cannot be updated yet.
1291 if (VecV != &Plan.getVectorTripCount()) {
1292 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
1293 "FMaxNum/FMinNum reduction.\n");
1294 return false;
1295 }
1296 auto *NewSel = MiddleBuilder.createSelect(
1297 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
1298 ResumeR->setOperand(0, NewSel);
1299 }
1300
1301 auto *MiddleTerm = MiddleVPBB->getTerminator();
1302 MiddleBuilder.setInsertPoint(MiddleTerm);
1303 VPValue *MiddleCond = MiddleTerm->getOperand(0);
1304 VPValue *NewCond =
1305 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
1306 MiddleTerm->setOperand(0, NewCond);
1307 return true;
1308}
1309
1311 if (Plan.hasScalarVFOnly())
1312 return false;
1313
1314 // We want to create the following nodes:
1315 // vector.body:
1316 // ...new WidenPHI recipe introduced to keep the mask value for the latest
1317 // iteration where any lane was active.
1318 // mask.phi = phi [ ir<false>, vector.ph ], [ vp<new.mask>, vector.body ]
1319 // ...data.phi (a VPReductionPHIRecipe for a FindLast reduction) already
1320 // exists, but needs updating to use 'new.data' for the backedge value.
1321 // data.phi = phi ir<default.val>, vp<new.data>
1322 //
1323 // ...'data' and 'compare' created by existing nodes...
1324 //
1325 // ...new recipes introduced to determine whether to update the reduction
1326 // values or keep the current one.
1327 // any.active = i1 any-of ir<compare>
1328 // new.mask = select vp<any.active>, ir<compare>, vp<mask.phi>
1329 // new.data = select vp<any.active>, ir<data>, ir<data.phi>
1330 //
1331 // middle.block:
1332 // ...extract-last-active replaces compute-reduction-result.
1333 // result = extract-last-active vp<new.data>, vp<new.mask>, ir<default.val>
1334
1335 for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
1336 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&Phi);
1338 PhiR->getRecurrenceKind()))
1339 continue;
1340
1341 // Find the condition for the select.
1342 auto *SelectR = cast<VPSingleDefRecipe>(&PhiR->getBackedgeRecipe());
1343 VPValue *Cond = nullptr, *Op1 = nullptr, *Op2 = nullptr;
1344 if (!match(SelectR,
1345 m_Select(m_VPValue(Cond), m_VPValue(Op1), m_VPValue(Op2))))
1346 return false;
1347
1348 // Add mask phi.
1349 VPBuilder Builder = VPBuilder::getToInsertAfter(PhiR);
1350 auto *MaskPHI = new VPWidenPHIRecipe(nullptr, /*Start=*/Plan.getFalse());
1351 Builder.insert(MaskPHI);
1352
1353 // Add select for mask.
1354 Builder.setInsertPoint(SelectR);
1355
1356 if (Op1 == PhiR) {
1357 // Normalize to selecting the data operand when the condition is true by
1358 // swapping operands and negating the condition.
1359 std::swap(Op1, Op2);
1360 Cond = Builder.createNot(Cond);
1361 }
1362 assert(Op2 == PhiR && "data value must be selected if Cond is true");
1363
1364 VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, {Cond});
1365 VPValue *MaskSelect = Builder.createSelect(AnyOf, Cond, MaskPHI);
1366 MaskPHI->addOperand(MaskSelect);
1367
1368 // Replace select for data.
1369 VPValue *DataSelect =
1370 Builder.createSelect(AnyOf, Op1, Op2, SelectR->getDebugLoc());
1371 SelectR->replaceAllUsesWith(DataSelect);
1372 SelectR->eraseFromParent();
1373
1374 // Find final reduction computation and replace it with an
1375 // extract.last.active intrinsic.
1376 auto *RdxResult =
1378 // TODO: Handle tail-folding.
1379 if (!RdxResult)
1380 return false;
1381 Builder.setInsertPoint(RdxResult);
1382 auto *ExtractLastActive =
1383 Builder.createNaryOp(VPInstruction::ExtractLastActive,
1384 {DataSelect, MaskSelect, PhiR->getStartValue()},
1385 RdxResult->getDebugLoc());
1386 RdxResult->replaceAllUsesWith(ExtractLastActive);
1387 RdxResult->eraseFromParent();
1388 }
1389
1390 return true;
1391}
1392
1394 for (auto &PhiR : make_early_inc_range(
1396 auto *MinOrMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
1397 // TODO: check for multi-uses in VPlan directly.
1398 if (!MinOrMaxPhiR || !MinOrMaxPhiR->hasUsesOutsideReductionChain())
1399 continue;
1400
1401 // MinOrMaxPhiR has users outside the reduction cycle in the loop. Check if
1402 // the only other user is a FindLastIV reduction. MinOrMaxPhiR must have
1403 // exactly 2 users:
1404 // 1) the min/max operation of the reduction cycle, and
1405 // 2) the compare of a FindLastIV reduction cycle. This compare must match
1406 // the min/max operation - comparing MinOrMaxPhiR with the operand of the
1407 // min/max operation, and be used only by the select of the FindLastIV
1408 // reduction cycle.
1409 RecurKind RdxKind = MinOrMaxPhiR->getRecurrenceKind();
1410 assert(
1412 "only min/max recurrences support users outside the reduction chain");
1413
1414 auto *MinOrMaxOp =
1415 dyn_cast<VPRecipeWithIRFlags>(MinOrMaxPhiR->getBackedgeValue());
1416 if (!MinOrMaxOp)
1417 return false;
1418
1419 // Check that MinOrMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1420 // with an intrinsic that matches the reduction kind.
1421 Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind);
1422 if (!match(MinOrMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
1423 return false;
1424
1425 // MinOrMaxOp must have 2 users: 1) MinOrMaxPhiR and 2)
1426 // ComputeReductionResult.
1427 assert(MinOrMaxOp->getNumUsers() == 2 &&
1428 "MinOrMaxOp must have exactly 2 users");
1429 VPValue *MinOrMaxOpValue = MinOrMaxOp->getOperand(0);
1430 if (MinOrMaxOpValue == MinOrMaxPhiR)
1431 MinOrMaxOpValue = MinOrMaxOp->getOperand(1);
1432
1433 VPValue *CmpOpA;
1434 VPValue *CmpOpB;
1435 CmpPredicate Pred;
1437 MinOrMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
1438 if (!Cmp || Cmp->getNumUsers() != 1 ||
1439 (CmpOpA != MinOrMaxOpValue && CmpOpB != MinOrMaxOpValue))
1440 return false;
1441
1442 if (MinOrMaxOpValue != CmpOpB)
1443 Pred = CmpInst::getSwappedPredicate(Pred);
1444
1445 // MinOrMaxPhiR must have exactly 2 users:
1446 // * MinOrMaxOp,
1447 // * Cmp (that's part of a FindLastIV chain).
1448 if (MinOrMaxPhiR->getNumUsers() != 2)
1449 return false;
1450
1451 VPInstruction *MinOrMaxResult =
1453 assert(is_contained(MinOrMaxPhiR->users(), MinOrMaxOp) &&
1454 "one user must be MinOrMaxOp");
1455 assert(MinOrMaxResult &&
1456 "MinOrMaxOp must have a ComputeReductionResult user");
1457
1458 // Cmp must be used by the select of a FindLastIV chain.
1459 VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
1460 VPValue *IVOp, *FindIV;
1461 if (!Sel || Sel->getNumUsers() != 2 ||
1462 !match(Sel,
1463 m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))))
1464 return false;
1465
1466 if (!isa<VPReductionPHIRecipe>(FindIV)) {
1467 std::swap(FindIV, IVOp);
1468 Pred = CmpInst::getInversePredicate(Pred);
1469 }
1470
1471 auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
1473 FindIVPhiR->getRecurrenceKind()))
1474 return false;
1475
1476 // TODO: Support cases where IVOp is the IV increment.
1477 if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
1479 return false;
1480
1481 CmpInst::Predicate RdxPredicate = [RdxKind]() {
1482 switch (RdxKind) {
1483 case RecurKind::UMin:
1484 return CmpInst::ICMP_UGE;
1485 case RecurKind::UMax:
1486 return CmpInst::ICMP_ULE;
1487 case RecurKind::SMax:
1488 return CmpInst::ICMP_SLE;
1489 case RecurKind::SMin:
1490 return CmpInst::ICMP_SGE;
1491 default:
1492 llvm_unreachable("unhandled recurrence kind");
1493 }
1494 }();
1495
1496 // TODO: Strict predicates need to find the first IV value for which the
1497 // predicate holds, not the last.
1498 if (Pred != RdxPredicate)
1499 return false;
1500
1501 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
1502 "cannot handle inloop/ordered reductions yet");
1503
1504 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
1505 // result:
1506 // 1. We need to find the last IV for which the condition based on the
1507 // min/max recurrence is true,
1508 // 2. Compare the partial min/max reduction result to its final value and,
1509 // 3. Select the lanes of the partial FindLastIV reductions which
1510 // correspond to the lanes matching the min/max reduction result.
1511 //
1512 // For example, this transforms
1513 // vp<%min.result> = compute-reduction-result ir<%min.val.next>
1514 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
1515 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
1516 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
1517 //
1518 // into:
1519 //
1520 // vp<min.result> = compute-reduction-result ir<%min.val.next>
1521 // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
1522 // vp<%final.iv> = select vp<%final.min.cmp>, vp<%min.idx.next>, SENTINEL
1523 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%final.iv>
1524 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
1525 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
1526 //
1527 // Find the FindIV result pattern.
1528 auto *FindIVSelect = findFindIVSelect(FindIVPhiR->getBackedgeValue());
1529 auto *FindIVCmp = FindIVSelect->getOperand(0)->getDefiningRecipe();
1530 auto *FindIVRdxResult = cast<VPInstruction>(FindIVCmp->getOperand(0));
1531 assert(FindIVSelect->getParent() == MinOrMaxResult->getParent() &&
1532 "both results must be computed in the same block");
1533 MinOrMaxResult->moveBefore(*FindIVRdxResult->getParent(),
1534 FindIVRdxResult->getIterator());
1535
1536 VPBuilder B(FindIVRdxResult);
1537 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
1538 auto *FinalMinOrMaxCmp =
1539 B.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
1540 VPValue *Sentinel = FindIVCmp->getOperand(1);
1541 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
1542 auto *FinalIVSelect =
1543 B.createSelect(FinalMinOrMaxCmp, LastIVExiting, Sentinel);
1544 FindIVRdxResult->setOperand(0, FinalIVSelect);
1545 }
1546 return true;
1547}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define _
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static VPHeaderPHIRecipe * createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, DebugLoc DL)
Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe for Phi based on IndDesc.
static void insertCheckBlockBeforeVectorLoop(VPlan &Plan, VPBasicBlock *CheckBlockVPBB)
Insert CheckBlockVPBB on the edge leading to the vector preheader, connecting it to both vector and s...
static void simplifyLiveInsWithSCEV(VPlan &Plan, PredicatedScalarEvolution &PSE)
Check Plan's live-in and replace them with constants, if they can be simplified via SCEV.
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB, VPValue *Cond, bool AddBranchWeights)
Create a BranchOnCond terminator in CheckBlockVPBB.
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static VPInstruction * findFindIVSelect(VPValue *BackedgeVal)
Find and return the final select instruction of the FindIV result pattern for the given BackedgeVal: ...
static constexpr uint32_t CheckBypassWeights[]
static void printAfterInitialConstruction(VPlan &)
To make RUN_VPLAN_PASS print initial VPlan.
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS_NO_VERIFY(PASS,...)
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
static FastMathFlags getFast()
Definition FMF.h:50
A struct for saving information about induction variables.
InductionKind getKind() const
const SCEV * getStep() const
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Value * getStartValue() const
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
iterator find(const KeyT &Key)
Definition MapVector.h:154
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
op_range operands()
Definition User.h:267
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4090
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4165
iterator end()
Definition VPlan.h:4127
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4125
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4178
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:230
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:637
const VPRecipeBase & back() const
Definition VPlan.h:4139
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4156
bool empty() const
Definition VPlan.h:4136
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:200
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:180
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:164
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:284
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:215
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:233
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:253
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3664
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2141
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4243
Class to record and manage LLVM IR flags.
Definition VPlan.h:665
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1141
@ ExtractLastActive
Extracts the lane from the first operand corresponding to the last active (non-zero) lane in the mask...
Definition VPlan.h:1249
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
VPBasicBlock * getParent()
Definition VPlan.h:462
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:536
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition VPlan.h:2533
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2894
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4278
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4376
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:588
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:258
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:302
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:297
void addOperand(VPValue *Operand)
Definition VPlanValue.h:291
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:125
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:172
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1394
unsigned getNumUsers() const
Definition VPlanValue.h:104
user_range users()
Definition VPlanValue.h:125
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2287
A recipe for widened phis.
Definition VPlan.h:2423
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4408
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4691
LLVMContext & getContext() const
Definition VPlan.h:4593
VPBasicBlock * getEntry()
Definition VPlan.h:4497
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4591
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4587
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4555
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4670
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4694
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4545
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4584
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4647
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1031
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4562
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4522
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4717
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1243
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4667
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4727
bool hasScalarVFOnly() const
Definition VPlan.h:4616
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4536
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4541
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4502
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4771
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4673
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
bool matchFindIVResult(VPInstruction *VPI, Op0_t ReducedIV, Op1_t Start)
Match FindIV result pattern: select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),...
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
Definition VPlanUtils.h:94
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
Definition VPlanUtils.h:111
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
Definition VPlanUtils.h:132
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2519
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
Definition VPlanCFG.h:229
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
A recipe for handling first-order recurrence phis.
Definition VPlan.h:2470
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:183
static bool handleMultiUseReductions(VPlan &Plan)
Try to legalize reductions with multiple in-loop uses.
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static LLVM_ABI_FOR_TEST void handleEarlyExits(VPlan &Plan, bool HasUncountableExit)
Update Plan to account for all early exits.
static void createInLoopReductionRecipes(VPlan &Plan, const DenseMap< VPBasicBlock *, VPValue * > &BlockMaskCache, const DenseSet< BasicBlock * > &BlocksNeedingPredication, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB)
Update Plan to account for the uncountable early exit from EarlyExitingVPBB to EarlyExitVPBB by intro...
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *TripCount, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...