LLVM 23.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanDominatorTree.h"
19#include "VPlanHelpers.h"
20#include "VPlanPatternMatch.h"
21#include "VPlanTransforms.h"
22#include "VPlanUtils.h"
29#include "llvm/IR/InstrTypes.h"
30#include "llvm/IR/MDBuilder.h"
33
34#define DEBUG_TYPE "vplan"
35
36using namespace llvm;
37using namespace VPlanPatternMatch;
38
39namespace {
40// Class that is used to build the plain CFG for the incoming IR.
41class PlainCFGBuilder {
42 // The outermost loop of the input loop nest considered for vectorization.
43 Loop *TheLoop;
44
45 // Loop Info analysis.
46 LoopInfo *LI;
47
48 // Loop versioning for alias metadata.
49 LoopVersioning *LVer;
50
51 // Vectorization plan that we are working on.
52 std::unique_ptr<VPlan> Plan;
53
54 // Builder of the VPlan instruction-level representation.
55 VPBuilder VPIRBuilder;
56
57 // NOTE: The following maps are intentionally destroyed after the plain CFG
58 // construction because subsequent VPlan-to-VPlan transformation may
59 // invalidate them.
60 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
62 // Map incoming Value definitions to their newly-created VPValues.
63 DenseMap<Value *, VPValue *> IRDef2VPValue;
64
65 // Hold phi node's that need to be fixed once the plain CFG has been built.
67
68 // Utility functions.
69 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
70 void fixHeaderPhis();
71 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
72#ifndef NDEBUG
73 bool isExternalDef(Value *Val);
74#endif
75 VPValue *getOrCreateVPOperand(Value *IRVal);
76 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
77
78public:
79 PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer)
80 : TheLoop(Lp), LI(LI), LVer(LVer), Plan(std::make_unique<VPlan>(Lp)) {}
81
82 /// Build plain CFG for TheLoop and connect it to Plan's entry.
83 std::unique_ptr<VPlan> buildPlainCFG();
84};
85} // anonymous namespace
86
87// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
88// must have no predecessors.
89void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
90 // Collect VPBB predecessors.
92 for (BasicBlock *Pred : predecessors(BB))
93 VPBBPreds.push_back(getOrCreateVPBB(Pred));
94 VPBB->setPredecessors(VPBBPreds);
95}
96
97static bool isHeaderBB(BasicBlock *BB, Loop *L) {
98 return L && BB == L->getHeader();
99}
100
101// Add operands to VPInstructions representing phi nodes from the input IR.
102void PlainCFGBuilder::fixHeaderPhis() {
103 for (auto *Phi : PhisToFix) {
104 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
105 VPValue *VPVal = IRDef2VPValue[Phi];
106 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
107 auto *PhiR = cast<VPPhi>(VPVal);
108 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
109 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
110 "Expected Phi in header block.");
111 assert(Phi->getNumOperands() == 2 &&
112 "header phi must have exactly 2 operands");
113 for (BasicBlock *Pred : predecessors(Phi->getParent()))
114 PhiR->addOperand(
115 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
116 }
117}
118
119// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
120// existing one if it was already created.
121VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
122 if (auto *VPBB = BB2VPBB.lookup(BB)) {
123 // Retrieve existing VPBB.
124 return VPBB;
125 }
126
127 // Create new VPBB.
128 StringRef Name = BB->getName();
129 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
130 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
131 BB2VPBB[BB] = VPBB;
132 return VPBB;
133}
134
135#ifndef NDEBUG
136// Return true if \p Val is considered an external definition. An external
137// definition is either:
138// 1. A Value that is not an Instruction. This will be refined in the future.
139// 2. An Instruction that is outside of the IR region represented in VPlan,
140// i.e., is not part of the loop nest.
141bool PlainCFGBuilder::isExternalDef(Value *Val) {
142 // All the Values that are not Instructions are considered external
143 // definitions for now.
145 if (!Inst)
146 return true;
147
148 // Check whether Instruction definition is in loop body.
149 return !TheLoop->contains(Inst);
150}
151#endif
152
153// Create a new VPValue or retrieve an existing one for the Instruction's
154// operand \p IRVal. This function must only be used to create/retrieve VPValues
155// for *Instruction's operands* and not to create regular VPInstruction's. For
156// the latter, please, look at 'createVPInstructionsForVPBB'.
157VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
158 auto VPValIt = IRDef2VPValue.find(IRVal);
159 if (VPValIt != IRDef2VPValue.end())
160 // Operand has an associated VPInstruction or VPValue that was previously
161 // created.
162 return VPValIt->second;
163
164 // Operand doesn't have a previously created VPInstruction/VPValue. This
165 // means that operand is:
166 // A) a definition external to VPlan,
167 // B) any other Value without specific representation in VPlan.
168 // For now, we use VPValue to represent A and B and classify both as external
169 // definitions. We may introduce specific VPValue subclasses for them in the
170 // future.
171 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
172
173 // A and B: Create VPValue and add it to the pool of external definitions and
174 // to the Value->VPValue map.
175 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
176 IRDef2VPValue[IRVal] = NewVPVal;
177 return NewVPVal;
178}
179
180// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
181// counterpart. This function must be invoked in RPO so that the operands of a
182// VPInstruction in \p BB have been visited before (except for Phi nodes).
183void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
184 BasicBlock *BB) {
185 VPIRBuilder.setInsertPoint(VPBB);
186 // TODO: Model and preserve debug intrinsics in VPlan.
187 for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
188 Instruction *Inst = &InstRef;
189
190 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
191 // visited Inst when we shouldn't, breaking the RPO traversal order.
192 assert(!IRDef2VPValue.count(Inst) &&
193 "Instruction shouldn't have been visited.");
194
195 if (auto *Br = dyn_cast<BranchInst>(Inst)) {
196 // Conditional branch instruction are represented using BranchOnCond
197 // recipes.
198 if (Br->isConditional()) {
199 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
200 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
201 VPIRMetadata(*Inst), Inst->getDebugLoc());
202 }
203
204 // Skip the rest of the Instruction processing for Branch instructions.
205 continue;
206 }
207
208 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
209 // Don't emit recipes for unconditional switch instructions.
210 if (SI->getNumCases() == 0)
211 continue;
212 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
213 for (auto Case : SI->cases())
214 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
215 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
216 VPIRMetadata(*Inst), Inst->getDebugLoc());
217 continue;
218 }
219
220 VPSingleDefRecipe *NewR;
221 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
222 // Phi node's operands may not have been visited at this point. We create
223 // an empty VPInstruction that we will fix once the whole plain CFG has
224 // been built.
225 NewR =
226 VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi", *Phi);
227 NewR->setUnderlyingValue(Phi);
228 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
229 // Header phis need to be fixed after the VPBB for the latch has been
230 // created.
231 PhisToFix.push_back(Phi);
232 } else {
233 // Add operands for VPPhi in the order matching its predecessors in
234 // VPlan.
235 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
236 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
237 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
238 getOrCreateVPOperand(Phi->getIncomingValue(I));
239 }
240 for (VPBlockBase *Pred : VPBB->getPredecessors())
241 NewR->addOperand(
242 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
243 }
244 } else {
245 // Build VPIRMetadata from the instruction and add loop versioning
246 // metadata for loads and stores.
247 VPIRMetadata MD(*Inst);
248 if (isa<LoadInst, StoreInst>(Inst) && LVer) {
249 const auto &[AliasScopeMD, NoAliasMD] =
250 LVer->getNoAliasMetadataFor(Inst);
251 if (AliasScopeMD)
252 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
253 if (NoAliasMD)
254 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
255 }
256
257 // Translate LLVM-IR operands into VPValue operands and set them in the
258 // new VPInstruction.
259 SmallVector<VPValue *, 4> VPOperands;
260 for (Value *Op : Inst->operands())
261 VPOperands.push_back(getOrCreateVPOperand(Op));
262
263 if (auto *CI = dyn_cast<CastInst>(Inst)) {
264 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
265 CI->getType(), CI->getDebugLoc(),
266 VPIRFlags(*CI), MD);
267 NewR->setUnderlyingValue(CI);
268 } else {
269 // Build VPInstruction for any arbitrary Instruction without specific
270 // representation in VPlan.
271 NewR =
272 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
273 VPIRFlags(*Inst), MD, Inst->getDebugLoc());
274 }
275 }
276
277 IRDef2VPValue[Inst] = NewR;
278 }
279}
280
281// Main interface to build the plain CFG.
282std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
283 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
284 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
285 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
286 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
287
288 // 1. Scan the body of the loop in a topological order to visit each basic
289 // block after having visited its predecessor basic blocks. Create a VPBB for
290 // each BB and link it to its successor and predecessor VPBBs. Note that
291 // predecessors must be set in the same order as they are in the incomming IR.
292 // Otherwise, there might be problems with existing phi nodes and algorithm
293 // based on predecessors traversal.
294
295 // Loop PH needs to be explicitly visited since it's not taken into account by
296 // LoopBlocksDFS.
297 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
298 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
299 "Unexpected loop preheader");
300 for (auto &I : *ThePreheaderBB) {
301 if (I.getType()->isVoidTy())
302 continue;
303 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
304 }
305
306 LoopBlocksRPO RPO(TheLoop);
307 RPO.perform(LI);
308
309 for (BasicBlock *BB : RPO) {
310 // Create or retrieve the VPBasicBlock for this BB.
311 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
312 // Set VPBB predecessors in the same order as they are in the incoming BB.
313 setVPBBPredsFromBB(VPBB, BB);
314
315 // Create VPInstructions for BB.
316 createVPInstructionsForVPBB(VPBB, BB);
317
318 // Set VPBB successors. We create empty VPBBs for successors if they don't
319 // exist already. Recipes will be created when the successor is visited
320 // during the RPO traversal.
321 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
323 getOrCreateVPBB(SI->getDefaultDest())};
324 for (auto Case : SI->cases())
325 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
326 VPBB->setSuccessors(Succs);
327 continue;
328 }
329 auto *BI = cast<BranchInst>(BB->getTerminator());
330 unsigned NumSuccs = succ_size(BB);
331 if (NumSuccs == 1) {
332 VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
333 continue;
334 }
335 assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
336 "block must have conditional branch with 2 successors");
337
338 BasicBlock *IRSucc0 = BI->getSuccessor(0);
339 BasicBlock *IRSucc1 = BI->getSuccessor(1);
340 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
341 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
342 VPBB->setTwoSuccessors(Successor0, Successor1);
343 }
344
345 for (auto *EB : Plan->getExitBlocks())
346 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
347
348 // 2. The whole CFG has been built at this point so all the input Values must
349 // have a VPlan counterpart. Fix VPlan header phi by adding their
350 // corresponding VPlan operands.
351 fixHeaderPhis();
352
353 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
354 Plan->getEntry()->setPlan(&*Plan);
355
356 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
357 // VPIRInstructions wrapping them.
358 // // Note that the operand order corresponds to IR predecessor order, and may
359 // need adjusting when VPlan predecessors are added, if an exit block has
360 // multiple predecessor.
361 for (auto *EB : Plan->getExitBlocks()) {
362 for (VPRecipeBase &R : EB->phis()) {
363 auto *PhiR = cast<VPIRPhi>(&R);
364 PHINode &Phi = PhiR->getIRPhi();
365 assert(PhiR->getNumOperands() == 0 &&
366 "no phi operands should be added yet");
367 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
368 PhiR->addOperand(
369 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
370 }
371 }
372
373 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
374 return std::move(Plan);
375}
376
377/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
378/// has exactly 2 predecessors (preheader and latch), where the block
379/// dominates the latch and the preheader dominates the block. If it is a
380/// header block return true and canonicalize the predecessors of the header
381/// (making sure the preheader appears first and the latch second) and the
382/// successors of the latch (making sure the loop exit comes first). Otherwise
383/// return false.
385 const VPDominatorTree &VPDT) {
386 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
387 if (Preds.size() != 2)
388 return false;
389
390 auto *PreheaderVPBB = Preds[0];
391 auto *LatchVPBB = Preds[1];
392 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
393 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
394 std::swap(PreheaderVPBB, LatchVPBB);
395
396 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
397 !VPDT.dominates(HeaderVPB, LatchVPBB))
398 return false;
399
400 // Canonicalize predecessors of header so that preheader is first and
401 // latch second.
402 HeaderVPB->swapPredecessors();
403 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
404 R.swapOperands();
405 }
406
407 // The two successors of conditional branch match the condition, with the
408 // first successor corresponding to true and the second to false. We
409 // canonicalize the successors of the latch when introducing the region, such
410 // that the latch exits the region when its condition is true; invert the
411 // original condition if the original CFG branches to the header on true.
412 // Note that the exit edge is not yet connected for top-level loops.
413 if (LatchVPBB->getSingleSuccessor() ||
414 LatchVPBB->getSuccessors()[0] != HeaderVPB)
415 return true;
416
417 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
418 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
421 "terminator must be a BranchOnCond");
422 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
423 Not->insertBefore(Term);
424 Term->setOperand(0, Not);
425 LatchVPBB->swapSuccessors();
426
427 return true;
428}
429
430/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
431static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
432 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
433 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
434
435 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
436 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
437
438 // Create an empty region first and insert it between PreheaderVPBB and
439 // the exit blocks, taking care to preserve the original predecessor &
440 // successor order of blocks. Set region entry and exiting after both
441 // HeaderVPB and LatchVPBB have been disconnected from their
442 // predecessors/successors.
443 auto *R = Plan.createLoopRegion();
444
445 // Transfer latch's successors to the region.
447
448 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
449 R->setEntry(HeaderVPB);
450 R->setExiting(LatchVPBB);
451
452 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
453 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
454 VPBB->setParent(R);
455}
456
457// Add the necessary canonical IV and branch recipes required to control the
458// loop.
459static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
460 VPBasicBlock *LatchVPBB, Type *IdxTy,
461 DebugLoc DL) {
462 auto *StartV = Plan.getConstantInt(IdxTy, 0);
463
464 // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
465 auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
466 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
467
468 // We are about to replace the branch to exit the region. Remove the original
469 // BranchOnCond, if there is any.
470 DebugLoc LatchDL = DL;
471 if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
472 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
473 LatchVPBB->getTerminator()->eraseFromParent();
474 }
475
476 VPBuilder Builder(LatchVPBB);
477 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
478 // Initially the induction increment is guaranteed to not wrap, but that may
479 // change later, e.g. when tail-folding, when the flags need to be dropped.
480 auto *CanonicalIVIncrement = Builder.createAdd(
481 CanonicalIVPHI, &Plan.getVFxUF(), DL, "index.next", {true, false});
482 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
483
484 // Add the BranchOnCount VPInstruction to the latch.
485 Builder.createNaryOp(VPInstruction::BranchOnCount,
486 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
487 LatchDL);
488}
489
490/// Creates extracts for values in \p Plan defined in a loop region and used
491/// outside a loop region.
492static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
493 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
494 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
495 if (!is_contained(EB->predecessors(), MiddleVPBB))
496 continue;
497
498 for (VPRecipeBase &R : EB->phis()) {
499 auto *ExitIRI = cast<VPIRPhi>(&R);
500 VPValue *Exiting = ExitIRI->getIncomingValueForBlock(MiddleVPBB);
501 if (isa<VPIRValue>(Exiting))
502 continue;
503 Exiting = B.createNaryOp(VPInstruction::ExtractLastPart, Exiting);
504 Exiting = B.createNaryOp(VPInstruction::ExtractLastLane, Exiting);
505 ExitIRI->setIncomingValueForBlock(MiddleVPBB, Exiting);
506 }
507 }
508}
509
510static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
511 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
512 VPDominatorTree VPDT(Plan);
513
514 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
515 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
516 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
517
518 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
519 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
520
521 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
522 // The canonical LatchVPBB has the header block as last successor. If it has
523 // another successor, this successor is an exit block - insert middle block on
524 // its edge. Otherwise, add middle block as another successor retaining header
525 // as last.
526 if (LatchVPBB->getNumSuccessors() == 2) {
527 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
528 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
529 } else {
530 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
531 LatchVPBB->swapSuccessors();
532 }
533
534 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
535
536 // Create SCEV and VPValue for the trip count.
537 // We use the symbolic max backedge-taken-count, which works also when
538 // vectorizing loops with uncountable early exits.
539 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
540 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
541 "Invalid backedge-taken count");
542 ScalarEvolution &SE = *PSE.getSE();
543 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
544 InductionTy, TheLoop);
546
547 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
549
550 // The connection order corresponds to the operands of the conditional branch,
551 // with the middle block already connected to the exit block.
552 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
553 // Also connect the entry block to the scalar preheader.
554 // TODO: Also introduce a branch recipe together with the minimum trip count
555 // check.
556 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
557 Plan.getEntry()->swapSuccessors();
558
559 createExtractsForLiveOuts(Plan, MiddleVPBB);
560
561 VPBuilder ScalarPHBuilder(ScalarPH);
562 for (const auto &[PhiR, ScalarPhiR] : zip_equal(
563 drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
564 auto *VectorPhiR = cast<VPPhi>(&PhiR);
565 auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
566 {VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc());
567 cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
568 }
569}
570
571/// Check \p Plan's live-in and replace them with constants, if they can be
572/// simplified via SCEV.
575 auto GetSimplifiedLiveInViaSCEV = [&](VPValue *VPV) -> VPValue * {
576 const SCEV *Expr = vputils::getSCEVExprForVPValue(VPV, PSE);
577 if (auto *C = dyn_cast<SCEVConstant>(Expr))
578 return Plan.getOrAddLiveIn(C->getValue());
579 return nullptr;
580 };
581
582 for (VPValue *LiveIn : to_vector(Plan.getLiveIns())) {
583 if (VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
584 LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
585 }
586}
587
588/// To make RUN_VPLAN_PASS print initial VPlan.
590
591std::unique_ptr<VPlan>
592VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
594 LoopVersioning *LVer) {
595 PlainCFGBuilder Builder(TheLoop, &LI, LVer);
596 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
597 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
598 simplifyLiveInsWithSCEV(*VPlan0, PSE);
599
601 return VPlan0;
602}
603
604/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
605/// for \p Phi based on \p IndDesc.
606static VPHeaderPHIRecipe *
608 const InductionDescriptor &IndDesc, VPlan &Plan,
609 PredicatedScalarEvolution &PSE, Loop &OrigLoop,
610 DebugLoc DL) {
611 [[maybe_unused]] ScalarEvolution &SE = *PSE.getSE();
612 assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
613 "step must be loop invariant");
614 assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
615 (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
616 SE.getSCEV(IndDesc.getStartValue()) ==
617 vputils::getSCEVExprForVPValue(Start, PSE))) &&
618 "Start VPValue must match IndDesc's start value");
619
620 VPValue *Step =
622
623 VPValue *BackedgeVal = PhiR->getOperand(1);
624 // Replace live-out extracts of WideIV's backedge value by ExitingIVValue
625 // recipes. optimizeInductionExitUsers will later compute the proper
626 // DerivedIV.
627 auto ReplaceExtractsWithExitingIVValue = [&](VPHeaderPHIRecipe *WideIV) {
628 for (VPUser *U : to_vector(BackedgeVal->users())) {
630 continue;
631 auto *ExtractLastPart = cast<VPInstruction>(U);
632 VPUser *ExtractLastPartUser = ExtractLastPart->getSingleUser();
633 assert(ExtractLastPartUser && "must have a single user");
634 if (!match(ExtractLastPartUser, m_ExtractLastLane(m_VPValue())))
635 continue;
636 auto *ExtractLastLane = cast<VPInstruction>(ExtractLastPartUser);
637 assert(is_contained(ExtractLastLane->getParent()->successors(),
638 Plan.getScalarPreheader()) &&
639 "last lane must be extracted in the middle block");
640 VPBuilder Builder(ExtractLastLane);
641 ExtractLastLane->replaceAllUsesWith(
642 Builder.createNaryOp(VPInstruction::ExitingIVValue, {WideIV}));
643 ExtractLastLane->eraseFromParent();
644 ExtractLastPart->eraseFromParent();
645 }
646 };
647
649 auto *WideIV = new VPWidenPointerInductionRecipe(
650 Phi, Start, Step, &Plan.getVFxUF(), IndDesc, DL);
651 ReplaceExtractsWithExitingIVValue(WideIV);
652 return WideIV;
653 }
654
657 "must have an integer or float induction at this point");
658
659 // Update wide induction increments to use the same step as the corresponding
660 // wide induction. This enables detecting induction increments directly in
661 // VPlan and removes redundant splats.
662 if (match(BackedgeVal, m_Add(m_Specific(PhiR), m_VPValue())))
663 BackedgeVal->getDefiningRecipe()->setOperand(1, Step);
664
665 // It is always safe to copy over the NoWrap and FastMath flags. In
666 // particular, when folding tail by masking, the masked-off lanes are never
667 // used, so it is safe.
669
670 auto *WideIV = new VPWidenIntOrFpInductionRecipe(
671 Phi, Start, Step, &Plan.getVF(), IndDesc, Flags, DL);
672
673 ReplaceExtractsWithExitingIVValue(WideIV);
674 return WideIV;
675}
676
678 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
681 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
682 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
683 // Retrieve the header manually from the intial plain-CFG VPlan.
684 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
685 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
686 assert(VPDominatorTree(Plan).dominates(HeaderVPBB,
687 HeaderVPBB->getPredecessors()[1]) &&
688 "header must dominate its latch");
689
690 auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
691 // TODO: Gradually replace uses of underlying instruction by analyses on
692 // VPlan.
693 auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
694 assert(PhiR->getNumOperands() == 2 &&
695 "Must have 2 operands for header phis");
696
697 // Extract common values once.
698 VPIRValue *Start = cast<VPIRValue>(PhiR->getOperand(0));
699 VPValue *BackedgeValue = PhiR->getOperand(1);
700
701 if (FixedOrderRecurrences.contains(Phi)) {
702 // TODO: Currently fixed-order recurrences are modeled as chains of
703 // first-order recurrences. If there are no users of the intermediate
704 // recurrences in the chain, the fixed order recurrence should be
705 // modeled directly, enabling more efficient codegen.
706 return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
707 }
708
709 auto InductionIt = Inductions.find(Phi);
710 if (InductionIt != Inductions.end())
711 return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
712 Plan, PSE, OrigLoop,
713 PhiR->getDebugLoc());
714
715 assert(Reductions.contains(Phi) && "only reductions are expected now");
716 const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
718 Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
719 "incoming value must match start value");
720 // Will be updated later to >1 if reduction is partial.
721 unsigned ScaleFactor = 1;
722 bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
723 return new VPReductionPHIRecipe(
724 Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
725 getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
726 ScaleFactor),
727 Phi->getType()->isFloatingPointTy() ? RdxDesc.getFastMathFlags()
728 : VPIRFlags(),
730 };
731
732 for (VPRecipeBase &R : make_early_inc_range(HeaderVPBB->phis())) {
734 continue;
735 auto *PhiR = cast<VPPhi>(&R);
736 VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
737 HeaderPhiR->insertBefore(PhiR);
738 PhiR->replaceAllUsesWith(HeaderPhiR);
739 PhiR->eraseFromParent();
740 }
741}
742
744 VPlan &Plan, const DenseSet<BasicBlock *> &BlocksNeedingPredication,
745 ElementCount MinVF) {
746 VPTypeAnalysis TypeInfo(Plan);
749
750 for (VPRecipeBase &R : Header->phis()) {
751 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
752 if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
753 continue;
754
755 RecurKind Kind = PhiR->getRecurrenceKind();
759 "AnyOf and Find reductions are not allowed for in-loop reductions");
760
761 bool IsFPRecurrence =
763 FastMathFlags FMFs =
764 IsFPRecurrence ? FastMathFlags::getFast() : FastMathFlags();
765
766 // Collect the chain of "link" recipes for the reduction starting at PhiR.
768 Worklist.insert(PhiR);
769 for (unsigned I = 0; I != Worklist.size(); ++I) {
770 VPSingleDefRecipe *Cur = Worklist[I];
771 for (VPUser *U : Cur->users()) {
772 auto *UserRecipe = cast<VPSingleDefRecipe>(U);
773 if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
774 assert((UserRecipe->getParent() == Plan.getMiddleBlock() ||
775 UserRecipe->getParent() == Plan.getScalarPreheader()) &&
776 "U must be either in the loop region, the middle block or the "
777 "scalar preheader.");
778 continue;
779 }
780
781 // Stores using instructions will be sunk later.
783 continue;
784 Worklist.insert(UserRecipe);
785 }
786 }
787
788 // Visit operation "Links" along the reduction chain top-down starting from
789 // the phi until LoopExitValue. We keep track of the previous item
790 // (PreviousLink) to tell which of the two operands of a Link will remain
791 // scalar and which will be reduced. For minmax by select(cmp), Link will be
792 // the select instructions. Blend recipes of in-loop reduction phi's will
793 // get folded to their non-phi operand, as the reduction recipe handles the
794 // condition directly.
795 VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
796 for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
797 if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
798 assert(Blend->getNumIncomingValues() == 2 &&
799 "Blend must have 2 incoming values");
800 unsigned PhiRIdx = Blend->getIncomingValue(0) == PhiR ? 0 : 1;
801 assert(Blend->getIncomingValue(PhiRIdx) == PhiR &&
802 "PhiR must be an operand of the blend");
803 Blend->replaceAllUsesWith(Blend->getIncomingValue(1 - PhiRIdx));
804 continue;
805 }
806
807 if (IsFPRecurrence) {
808 FastMathFlags CurFMF =
809 cast<VPRecipeWithIRFlags>(CurrentLink)->getFastMathFlags();
810 if (match(CurrentLink, m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
811 CurFMF |= cast<VPRecipeWithIRFlags>(CurrentLink->getOperand(0))
812 ->getFastMathFlags();
813 FMFs &= CurFMF;
814 }
815
816 Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
817
818 // Recognize a call to the llvm.fmuladd intrinsic.
819 bool IsFMulAdd = Kind == RecurKind::FMulAdd;
820 VPValue *VecOp;
821 VPBasicBlock *LinkVPBB = CurrentLink->getParent();
822 if (IsFMulAdd) {
824 "Expected current VPInstruction to be a call to the "
825 "llvm.fmuladd intrinsic");
826 assert(CurrentLink->getOperand(2) == PreviousLink &&
827 "expected a call where the previous link is the added operand");
828
829 // If the instruction is a call to the llvm.fmuladd intrinsic then we
830 // need to create an fmul recipe (multiplying the first two operands of
831 // the fmuladd together) to use as the vector operand for the fadd
832 // reduction.
833 auto *FMulRecipe = new VPInstruction(
834 Instruction::FMul,
835 {CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
836 CurrentLinkI->getFastMathFlags());
837 LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
838 VecOp = FMulRecipe;
839 } else if (Kind == RecurKind::AddChainWithSubs &&
840 match(CurrentLink, m_Sub(m_VPValue(), m_VPValue()))) {
841 Type *PhiTy = TypeInfo.inferScalarType(PhiR);
842 auto *Zero = Plan.getConstantInt(PhiTy, 0);
843 VPBuilder Builder(LinkVPBB, CurrentLink->getIterator());
844 auto *Sub = Builder.createSub(Zero, CurrentLink->getOperand(1),
845 CurrentLinkI->getDebugLoc());
846 Sub->setUnderlyingValue(CurrentLinkI);
847 VecOp = Sub;
848 } else {
849 // Index of the first operand which holds a non-mask vector operand.
850 unsigned IndexOfFirstOperand = 0;
852 if (match(CurrentLink, m_Cmp(m_VPValue(), m_VPValue())))
853 continue;
854 assert(match(CurrentLink,
856 "must be a select recipe");
857 IndexOfFirstOperand = 1;
858 }
859 // Note that for non-commutable operands (cmp-selects), the semantics of
860 // the cmp-select are captured in the recurrence kind.
861 unsigned VecOpId =
862 CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
863 ? IndexOfFirstOperand + 1
864 : IndexOfFirstOperand;
865 VecOp = CurrentLink->getOperand(VecOpId);
866 assert(
867 VecOp != PreviousLink &&
868 CurrentLink->getOperand(
869 cast<VPInstruction>(CurrentLink)->getNumOperandsWithoutMask() -
870 1 - (VecOpId - IndexOfFirstOperand)) == PreviousLink &&
871 "PreviousLink must be the operand other than VecOp");
872 }
873
874 // Get block mask from CurrentLink, if it needs predication.
875 VPValue *CondOp = nullptr;
876 if (BlocksNeedingPredication.contains(CurrentLinkI->getParent()))
877 CondOp = cast<VPInstruction>(CurrentLink)->getMask();
878
879 assert(PhiR->getVFScaleFactor() == 1 &&
880 "inloop reductions must be unscaled");
881 auto *RedRecipe = new VPReductionRecipe(
882 Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
883 getReductionStyle(/*IsInLoop=*/true, PhiR->isOrdered(), 1),
884 CurrentLinkI->getDebugLoc());
885 // Append the recipe to the end of the VPBasicBlock because we need to
886 // ensure that it comes after all of it's inputs, including CondOp.
887 // Delete CurrentLink as it will be invalid if its operand is replaced
888 // with a reduction defined at the bottom of the block in the next link.
889 if (LinkVPBB->getNumSuccessors() == 0)
890 RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
891 else
892 LinkVPBB->appendRecipe(RedRecipe);
893
894 CurrentLink->replaceAllUsesWith(RedRecipe);
895 ToDelete.push_back(CurrentLink);
896 PreviousLink = RedRecipe;
897 }
898 }
899
900 for (VPRecipeBase *R : ToDelete)
901 R->eraseFromParent();
902}
903
905 bool HasUncountableEarlyExit) {
906 auto *MiddleVPBB = cast<VPBasicBlock>(
908 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
909 VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
910
911 if (HasUncountableEarlyExit) {
912 handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
913 MiddleVPBB);
914 return;
915 }
916
917 // Disconnect countable early exits from the loop, leaving it with a single
918 // exit from the latch. Countable early exits are left for a scalar epilog.
919 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
920 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
921 if (Pred == MiddleVPBB)
922 continue;
923
924 // Remove phi operands for the early exiting block.
925 for (VPRecipeBase &R : EB->phis())
926 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
927 auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
928 EarlyExitingVPBB->getTerminator()->eraseFromParent();
930 }
931 }
932}
933
935 bool RequiresScalarEpilogueCheck,
936 bool TailFolded) {
937 auto *MiddleVPBB = cast<VPBasicBlock>(
939 // If MiddleVPBB has a single successor then the original loop does not exit
940 // via the latch and the single successor must be the scalar preheader.
941 // There's no need to add a runtime check to MiddleVPBB.
942 if (MiddleVPBB->getNumSuccessors() == 1) {
943 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
944 "must have ScalarPH as single successor");
945 return;
946 }
947
948 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
949
950 // Add a check in the middle block to see if we have completed all of the
951 // iterations in the first vector loop.
952 //
953 // Three cases:
954 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
955 // condition to false.
956 // 2) If (N - N%VF) == N, then we *don't* need to run the
957 // remainder. Thus if tail is to be folded, we know we don't need to run
958 // the remainder and we can set the condition to true.
959 // 3) Otherwise, construct a runtime check.
960
961 // We use the same DebugLoc as the scalar loop latch terminator instead of
962 // the corresponding compare because they may have ended up with different
963 // line numbers and we want to avoid awkward line stepping while debugging.
964 // E.g., if the compare has got a line number inside the loop.
965 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
966 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
967 VPBuilder Builder(MiddleVPBB);
968 VPValue *Cmp;
969 if (!RequiresScalarEpilogueCheck)
970 Cmp = Plan.getFalse();
971 else if (TailFolded)
972 Cmp = Plan.getTrue();
973 else
974 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
975 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
976 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
977}
978
980 VPDominatorTree VPDT(Plan);
981 for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
982 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
983 createLoopRegion(Plan, HeaderVPB);
984
985 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
986 TopRegion->setName("vector loop");
987 TopRegion->getEntryBasicBlock()->setName("vector.body");
988}
989
990/// Insert \p CheckBlockVPBB on the edge leading to the vector preheader,
991/// connecting it to both vector and scalar preheaders. Updates scalar
992/// preheader phis to account for the new predecessor.
994 VPBasicBlock *CheckBlockVPBB) {
995 VPBlockBase *VectorPH = Plan.getVectorPreheader();
996 auto *ScalarPH = cast<VPBasicBlock>(Plan.getScalarPreheader());
997 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
998 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
999 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
1000 CheckBlockVPBB->swapSuccessors();
1001 unsigned NumPreds = ScalarPH->getNumPredecessors();
1002 for (VPRecipeBase &R : ScalarPH->phis()) {
1003 auto *Phi = cast<VPPhi>(&R);
1004 assert(Phi->getNumIncoming() == NumPreds - 1 &&
1005 "must have incoming values for all predecessors");
1006 Phi->addOperand(Phi->getOperand(NumPreds - 2));
1007 }
1008}
1009
1010// Likelyhood of bypassing the vectorized loop due to a runtime check block,
1011// including memory overlap checks block and wrapping/unit-stride checks block.
1012static constexpr uint32_t CheckBypassWeights[] = {1, 127};
1013
1014/// Create a BranchOnCond terminator in \p CheckBlockVPBB. Optionally adds
1015/// branch weights.
1016static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB,
1017 VPValue *Cond, bool AddBranchWeights) {
1019 auto *Term = VPBuilder(CheckBlockVPBB)
1021 if (AddBranchWeights) {
1022 MDBuilder MDB(Plan.getContext());
1023 MDNode *BranchWeights =
1024 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
1025 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1026 }
1027}
1028
1030 BasicBlock *CheckBlock,
1031 bool AddBranchWeights) {
1032 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
1033 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
1034 insertCheckBlockBeforeVectorLoop(Plan, CheckBlockVPBB);
1035 addBypassBranch(Plan, CheckBlockVPBB, CondVPV, AddBranchWeights);
1036}
1037
1039 VPlan &Plan, ElementCount VF, unsigned UF,
1040 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
1041 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
1043 // Generate code to check if the loop's trip count is less than VF * UF, or
1044 // equal to it in case a scalar epilogue is required; this implies that the
1045 // vector trip count is zero. This check also covers the case where adding one
1046 // to the backedge-taken count overflowed leading to an incorrect trip count
1047 // of zero. In this case we will also jump to the scalar loop.
1048 CmpInst::Predicate CmpPred =
1049 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1050 // If tail is to be folded, vector loop takes care of all iterations.
1051 VPValue *TripCountVPV = Plan.getTripCount();
1052 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, PSE);
1053 Type *TripCountTy = TripCount->getType();
1054 ScalarEvolution &SE = *PSE.getSE();
1055 auto GetMinTripCount = [&]() -> const SCEV * {
1056 // Compute max(MinProfitableTripCount, UF * VF) and return it.
1057 const SCEV *VFxUF =
1058 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
1059 if (UF * VF.getKnownMinValue() >=
1060 MinProfitableTripCount.getKnownMinValue()) {
1061 // TODO: SCEV should be able to simplify test.
1062 return VFxUF;
1063 }
1064 const SCEV *MinProfitableTripCountSCEV =
1065 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
1066 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
1067 };
1068
1069 VPBasicBlock *EntryVPBB = Plan.getEntry();
1070 VPBuilder Builder(EntryVPBB);
1071 VPValue *TripCountCheck = Plan.getFalse();
1072 const SCEV *Step = GetMinTripCount();
1073 // TripCountCheck = false, folding tail implies positive vector trip
1074 // count.
1075 if (!TailFolded) {
1076 // TODO: Emit unconditional branch to vector preheader instead of
1077 // conditional branch with known condition.
1078 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
1079 // Check if the trip count is < the step.
1080 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
1081 // TODO: Ensure step is at most the trip count when determining max VF and
1082 // UF, w/o tail folding.
1083 TripCountCheck = Plan.getTrue();
1084 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
1085 TripCount, Step)) {
1086 // Generate the minimum iteration check only if we cannot prove the
1087 // check is known to be true, or known to be false.
1088 VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
1089 TripCountCheck = Builder.createICmp(
1090 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
1091 } // else step known to be < trip count, use TripCountCheck preset to false.
1092 }
1093 VPInstruction *Term =
1094 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
1096 MDBuilder MDB(Plan.getContext());
1097 MDNode *BranchWeights = MDB.createBranchWeights(
1098 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
1099 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1100 }
1101}
1102
1104 VPlan &Plan, Value *TripCount, Value *VectorTripCount,
1105 bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF,
1106 unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE) {
1107 // Add the minimum iteration check for the epilogue vector loop.
1108 VPValue *TC = Plan.getOrAddLiveIn(TripCount);
1109 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
1110 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
1111 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
1112 VPValue *Count = Builder.createSub(TC, Plan.getOrAddLiveIn(VectorTripCount),
1113 DebugLoc::getUnknown(), "n.vec.remaining");
1114
1115 // Generate code to check if the loop's trip count is less than VF * UF of
1116 // the vector epilogue loop.
1117 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1118 auto *CheckMinIters = Builder.createICmp(
1119 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
1120 VPInstruction *Branch =
1121 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
1122
1123 // We assume the remaining `Count` is equally distributed in
1124 // [0, MainLoopStep)
1125 // So the probability for `Count < EpilogueLoopStep` should be
1126 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
1127 // TODO: Improve the estimate by taking the estimated trip count into
1128 // consideration.
1129 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
1130 const uint32_t Weights[] = {EstimatedSkipCount,
1131 MainLoopStep - EstimatedSkipCount};
1132 MDBuilder MDB(Plan.getContext());
1133 MDNode *BranchWeights =
1134 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
1135 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
1136}
1137
1138/// Find and return the final select instruction of the FindIV result pattern
1139/// for the given \p BackedgeVal:
1140/// select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),
1141/// ComputeReductionResult(ReducedIV), Start.
1143 return cast<VPInstruction>(
1144 vputils::findRecipe(BackedgeVal, [BackedgeVal](VPRecipeBase *R) {
1145 auto *VPI = dyn_cast<VPInstruction>(R);
1146 return VPI &&
1147 matchFindIVResult(VPI, m_Specific(BackedgeVal), m_VPValue());
1148 }));
1149}
1150
1152 auto GetMinOrMaxCompareValue =
1153 [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
1154 auto *MinOrMaxR =
1155 dyn_cast_or_null<VPRecipeWithIRFlags>(RedPhiR->getBackedgeValue());
1156 if (!MinOrMaxR)
1157 return nullptr;
1158
1159 // Check that MinOrMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1160 // with an intrinsic that matches the reduction kind.
1161 Intrinsic::ID ExpectedIntrinsicID =
1162 getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind());
1163 if (!match(MinOrMaxR, m_Intrinsic(ExpectedIntrinsicID)))
1164 return nullptr;
1165
1166 if (MinOrMaxR->getOperand(0) == RedPhiR)
1167 return MinOrMaxR->getOperand(1);
1168
1169 assert(MinOrMaxR->getOperand(1) == RedPhiR &&
1170 "Reduction phi operand expected");
1171 return MinOrMaxR->getOperand(0);
1172 };
1173
1174 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1176 MinOrMaxNumReductionsToHandle;
1177 bool HasUnsupportedPhi = false;
1178 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
1180 continue;
1181 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
1182 if (!Cur) {
1183 // TODO: Also support fixed-order recurrence phis.
1184 HasUnsupportedPhi = true;
1185 continue;
1186 }
1188 Cur->getRecurrenceKind())) {
1189 HasUnsupportedPhi = true;
1190 continue;
1191 }
1192
1193 VPValue *MinOrMaxOp = GetMinOrMaxCompareValue(Cur);
1194 if (!MinOrMaxOp)
1195 return false;
1196
1197 MinOrMaxNumReductionsToHandle.emplace_back(Cur, MinOrMaxOp);
1198 }
1199
1200 if (MinOrMaxNumReductionsToHandle.empty())
1201 return true;
1202
1203 // We won't be able to resume execution in the scalar tail, if there are
1204 // unsupported header phis or there is no scalar tail at all, due to
1205 // tail-folding.
1206 if (HasUnsupportedPhi || !Plan.hasScalarTail())
1207 return false;
1208
1209 /// Check if the vector loop of \p Plan can early exit and restart
1210 /// execution of last vector iteration in the scalar loop. This requires all
1211 /// recipes up to early exit point be side-effect free as they are
1212 /// re-executed. Currently we check that the loop is free of any recipe that
1213 /// may write to memory. Expected to operate on an early VPlan w/o nested
1214 /// regions.
1217 auto *VPBB = cast<VPBasicBlock>(VPB);
1218 for (auto &R : *VPBB) {
1219 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
1220 return false;
1221 }
1222 }
1223
1224 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1225 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
1226 VPValue *AllNaNLanes = nullptr;
1227 SmallPtrSet<VPValue *, 2> RdxResults;
1228 for (const auto &[_, MinOrMaxOp] : MinOrMaxNumReductionsToHandle) {
1229 VPValue *RedNaNLanes =
1230 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinOrMaxOp, MinOrMaxOp);
1231 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
1232 : RedNaNLanes;
1233 }
1234
1235 VPValue *AnyNaNLane =
1236 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
1237 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
1238 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
1239 for (const auto &[RedPhiR, _] : MinOrMaxNumReductionsToHandle) {
1241 RedPhiR->getRecurrenceKind()) &&
1242 "unsupported reduction");
1243
1244 // If we exit early due to NaNs, compute the final reduction result based on
1245 // the reduction phi at the beginning of the last vector iteration.
1246 auto *RdxResult = vputils::findComputeReductionResult(RedPhiR);
1247 assert(RdxResult && "must find a ComputeReductionResult");
1248
1249 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
1250 RdxResult->getOperand(0));
1251 RdxResult->setOperand(0, NewSel);
1252 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
1253 RdxResults.insert(RdxResult);
1254 }
1255
1256 auto *LatchExitingBranch = LatchVPBB->getTerminator();
1257 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
1258 "Unexpected terminator");
1259 auto *IsLatchExitTaken = LatchBuilder.createICmp(
1260 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
1261 LatchExitingBranch->getOperand(1));
1262 auto *AnyExitTaken = LatchBuilder.createOr(AnyNaNLane, IsLatchExitTaken);
1263 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
1264 LatchExitingBranch->eraseFromParent();
1265
1266 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
1267 // true, the resume from the start of the last vector iteration via the
1268 // canonical IV, otherwise from the original value.
1269 auto IsTC = [&Plan](VPValue *V) {
1270 return V == &Plan.getVectorTripCount() || V == Plan.getTripCount();
1271 };
1272 for (auto &R : Plan.getScalarPreheader()->phis()) {
1273 auto *ResumeR = cast<VPPhi>(&R);
1274 VPValue *VecV = ResumeR->getOperand(0);
1275 if (RdxResults.contains(VecV))
1276 continue;
1277 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
1278 VPValue *DIVTC = DerivedIV->getOperand(1);
1279 if (DerivedIV->getNumUsers() == 1 && IsTC(DIVTC)) {
1280 auto *NewSel = MiddleBuilder.createSelect(
1281 AnyNaNLane, LoopRegion->getCanonicalIV(), DIVTC);
1282 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
1283 DerivedIV->setOperand(1, NewSel);
1284 continue;
1285 }
1286 }
1287 // Bail out and abandon the current, partially modified, VPlan if we
1288 // encounter resume phi that cannot be updated yet.
1289 if (!IsTC(VecV)) {
1290 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
1291 "FMaxNum/FMinNum reduction.\n");
1292 return false;
1293 }
1294 auto *NewSel = MiddleBuilder.createSelect(
1295 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
1296 ResumeR->setOperand(0, NewSel);
1297 }
1298
1299 auto *MiddleTerm = MiddleVPBB->getTerminator();
1300 MiddleBuilder.setInsertPoint(MiddleTerm);
1301 VPValue *MiddleCond = MiddleTerm->getOperand(0);
1302 VPValue *NewCond =
1303 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
1304 MiddleTerm->setOperand(0, NewCond);
1305 return true;
1306}
1307
1309 if (Plan.hasScalarVFOnly())
1310 return false;
1311
1312 // We want to create the following nodes:
1313 // vector.body:
1314 // ...new WidenPHI recipe introduced to keep the mask value for the latest
1315 // iteration where any lane was active.
1316 // mask.phi = phi [ ir<false>, vector.ph ], [ vp<new.mask>, vector.body ]
1317 // ...data.phi (a VPReductionPHIRecipe for a FindLast reduction) already
1318 // exists, but needs updating to use 'new.data' for the backedge value.
1319 // data.phi = phi ir<default.val>, vp<new.data>
1320 //
1321 // ...'data' and 'compare' created by existing nodes...
1322 //
1323 // ...new recipes introduced to determine whether to update the reduction
1324 // values or keep the current one.
1325 // any.active = i1 any-of ir<compare>
1326 // new.mask = select vp<any.active>, ir<compare>, vp<mask.phi>
1327 // new.data = select vp<any.active>, ir<data>, ir<data.phi>
1328 //
1329 // middle.block:
1330 // ...extract-last-active replaces compute-reduction-result.
1331 // result = extract-last-active vp<new.data>, vp<new.mask>, ir<default.val>
1332
1333 VPValue *HeaderMask = vputils::findHeaderMask(Plan);
1334 for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
1335 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&Phi);
1337 PhiR->getRecurrenceKind()))
1338 continue;
1339
1340 // Find the condition for the select/blend.
1341 VPValue *BackedgeSelect = PhiR->getBackedgeValue();
1342 VPValue *CondSelect = BackedgeSelect;
1343
1344 // If there's a header mask, the backedge select will not be the find-last
1345 // select.
1346 if (HeaderMask && !match(BackedgeSelect,
1347 m_Select(m_Specific(HeaderMask),
1348 m_VPValue(CondSelect), m_Specific(PhiR))))
1349 llvm_unreachable("expected header mask select");
1350
1351 VPValue *Cond = nullptr, *Op1 = nullptr, *Op2 = nullptr;
1352
1353 // If we're matching a blend rather than a select, there should be one
1354 // incoming value which is the data, then all other incoming values should
1355 // be the phi.
1356 auto MatchBlend = [&](VPRecipeBase *R) {
1357 auto *Blend = dyn_cast<VPBlendRecipe>(R);
1358 if (!Blend)
1359 return false;
1360 assert(!Blend->isNormalized() && "must run before blend normalizaion");
1361 unsigned NumIncomingDataValues = 0;
1362 for (unsigned I = 0; I < Blend->getNumIncomingValues(); ++I) {
1363 VPValue *Incoming = Blend->getIncomingValue(I);
1364 if (Incoming != PhiR) {
1365 ++NumIncomingDataValues;
1366 Cond = Blend->getMask(I);
1367 Op1 = Incoming;
1368 Op2 = PhiR;
1369 }
1370 }
1371 return NumIncomingDataValues == 1;
1372 };
1373
1374 VPSingleDefRecipe *SelectR =
1376 if (!match(SelectR,
1377 m_Select(m_VPValue(Cond), m_VPValue(Op1), m_VPValue(Op2))) &&
1378 !MatchBlend(SelectR))
1379 return false;
1380
1381 assert(Cond != HeaderMask && "Cond must not be HeaderMask");
1382
1383 // Find final reduction computation and replace it with an
1384 // extract.last.active intrinsic.
1385 auto *RdxResult =
1387 BackedgeSelect);
1388 assert(RdxResult && "Could not find reduction result");
1389
1390 // Add mask phi.
1391 VPBuilder Builder = VPBuilder::getToInsertAfter(PhiR);
1392 auto *MaskPHI = new VPWidenPHIRecipe(nullptr, /*Start=*/Plan.getFalse());
1393 Builder.insert(MaskPHI);
1394
1395 // Add select for mask.
1396 Builder.setInsertPoint(SelectR);
1397
1398 if (Op1 == PhiR) {
1399 // Normalize to selecting the data operand when the condition is true by
1400 // swapping operands and negating the condition.
1401 std::swap(Op1, Op2);
1402 Cond = Builder.createNot(Cond);
1403 }
1404 assert(Op2 == PhiR && "data value must be selected if Cond is true");
1405
1406 if (HeaderMask)
1407 Cond = Builder.createLogicalAnd(HeaderMask, Cond);
1408
1409 VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, {Cond});
1410 VPValue *MaskSelect = Builder.createSelect(AnyOf, Cond, MaskPHI);
1411 MaskPHI->addOperand(MaskSelect);
1412
1413 // Replace select for data.
1414 VPValue *DataSelect =
1415 Builder.createSelect(AnyOf, Op1, Op2, SelectR->getDebugLoc());
1416 SelectR->replaceAllUsesWith(DataSelect);
1417 PhiR->setBackedgeValue(DataSelect);
1418 SelectR->eraseFromParent();
1419
1420 Builder.setInsertPoint(RdxResult);
1421 auto *ExtractLastActive =
1422 Builder.createNaryOp(VPInstruction::ExtractLastActive,
1423 {DataSelect, MaskSelect, PhiR->getStartValue()},
1424 RdxResult->getDebugLoc());
1425 RdxResult->replaceAllUsesWith(ExtractLastActive);
1426 RdxResult->eraseFromParent();
1427 }
1428
1429 return true;
1430}
1431
1432/// Given a first argmin/argmax pattern with strict predicate consisting of
1433/// 1) a MinOrMax reduction \p MinOrMaxPhiR producing \p MinOrMaxResult,
1434/// 2) a wide induction \p WideIV,
1435/// 3) a FindLastIV reduction \p FindLastIVPhiR using \p WideIV,
1436/// return the smallest index of the FindLastIV reduction result using UMin,
1437/// unless \p MinOrMaxResult equals the start value of its MinOrMax reduction.
1438/// In that case, return the start value of the FindLastIV reduction instead.
1439/// If \p WideIV is not canonical, a new canonical wide IV is added, and the
1440/// final result is scaled back to the non-canonical \p WideIV.
1441/// The final value of the FindLastIV reduction is originally computed using
1442/// \p FindIVSelect, \p FindIVCmp, and \p FindIVRdxResult, which are replaced
1443/// and removed.
1444/// Returns true if the pattern was handled successfully, false otherwise.
1446 VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR,
1447 VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV,
1448 VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect,
1449 VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult) {
1450 assert(!FindLastIVPhiR->isInLoop() && !FindLastIVPhiR->isOrdered() &&
1451 "inloop and ordered reductions not supported");
1452 assert(FindLastIVPhiR->getVFScaleFactor() == 1 &&
1453 "FindIV reduction must not be scaled");
1454
1456 // TODO: Support non (i.e., narrower than) canonical IV types.
1457 // TODO: Emit remarks for failed transformations.
1458 if (Ty != VPTypeAnalysis(Plan).inferScalarType(WideIV))
1459 return false;
1460
1461 auto *FindIVSelectR = cast<VPSingleDefRecipe>(
1462 FindLastIVPhiR->getBackedgeValue()->getDefiningRecipe());
1463 assert(
1464 match(FindIVSelectR, m_Select(m_VPValue(), m_VPValue(), m_VPValue())) &&
1465 "backedge value must be a select");
1466 if (FindIVSelectR->getOperand(1) != WideIV &&
1467 FindIVSelectR->getOperand(2) != WideIV)
1468 return false;
1469
1470 // If the original wide IV is not canonical, create a new one. The canonical
1471 // wide IV is guaranteed to not wrap for all lanes that are active in the
1472 // vector loop.
1473 if (!WideIV->isCanonical()) {
1474 VPIRValue *Zero = Plan.getConstantInt(Ty, 0);
1475 VPIRValue *One = Plan.getConstantInt(Ty, 1);
1476 auto *WidenCanIV = new VPWidenIntOrFpInductionRecipe(
1477 nullptr, Zero, One, WideIV->getVFValue(),
1478 WideIV->getInductionDescriptor(),
1479 VPIRFlags::WrapFlagsTy(/*HasNUW=*/true, /*HasNSW=*/false),
1480 WideIV->getDebugLoc());
1481 WidenCanIV->insertBefore(WideIV);
1482
1483 // Update the select to use the wide canonical IV.
1484 FindIVSelectR->setOperand(FindIVSelectR->getOperand(1) == WideIV ? 1 : 2,
1485 WidenCanIV);
1486 }
1487 FindLastIVPhiR->setOperand(0, Plan.getOrAddLiveIn(PoisonValue::get(Ty)));
1488
1489 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
1490 // result:
1491 // 1. Find the first canonical indices corresponding to partial min/max
1492 // values, using loop reductions.
1493 // 2. Find which of the partial min/max values are equal to the overall
1494 // min/max value.
1495 // 3. Select among the canonical indices those corresponding to the overall
1496 // min/max value.
1497 // 4. Find the first canonical index of overall min/max and scale it back to
1498 // the original IV using VPDerivedIVRecipe.
1499 // 5. If the overall min/max equals the starting min/max, the condition in
1500 // the loop was always false, due to being strict; return the start value
1501 // of FindLastIVPhiR in that case.
1502 //
1503 // For example, we transforms two independent reduction result computations
1504 // for
1505 //
1506 // <x1> vector loop: {
1507 // vector.body:
1508 // ...
1509 // ir<%iv> = WIDEN-INDUCTION nuw nsw ir<10>, ir<1>, vp<%0>
1510 // WIDEN-REDUCTION-PHI ir<%min.idx> = phi ir<sentinel.min.start>,
1511 // ir<%min.idx.next>
1512 // WIDEN-REDUCTION-PHI ir<%min.val> = phi ir<100>, ir<%min.val.next>
1513 // ....
1514 // WIDEN-INTRINSIC ir<%min.val.next> = call llvm.umin(ir<%min.val>, ir<%l>)
1515 // WIDEN ir<%min.idx.next> = select ir<%cmp>, ir<%iv>, ir<%min.idx>
1516 // ...
1517 // }
1518 // Successor(s): middle.block
1519 //
1520 // middle.block:
1521 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
1522 // vp<%min.result> = compute-reduction-result (umin) ir<%min.val.next>
1523 // vp<%cmp> = icmp ne vp<%iv.rdx>, ir<sentinel.min.start>
1524 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<10>
1525 //
1526 //
1527 // Into:
1528 //
1529 // vp<%reduced.min> = compute-reduction-result (umin) ir<%min.val.next>
1530 // vp<%reduced.mins.mask> = icmp eq ir<%min.val.next>, vp<%reduced.min>
1531 // vp<%idxs2reduce> = select vp<%reduced.mins.mask>, ir<%min.idx.next>,
1532 // ir<MaxUInt>
1533 // vp<%reduced.idx> = compute-reduction-result (umin) vp<%idxs2reduce>
1534 // vp<%scaled.idx> = DERIVED-IV ir<20> + vp<%reduced.idx> * ir<1>
1535 // vp<%always.false> = icmp eq vp<%reduced.min>, ir<100>
1536 // vp<%final.idx> = select vp<%always.false>, ir<10>,
1537 // vp<%scaled.idx>
1538
1539 VPBuilder Builder(FindIVRdxResult);
1540 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
1541 auto *FinalMinOrMaxCmp =
1542 Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
1543 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
1544 VPValue *MaxIV =
1545 Plan.getConstantInt(APInt::getMaxValue(Ty->getIntegerBitWidth()));
1546 auto *FinalIVSelect =
1547 Builder.createSelect(FinalMinOrMaxCmp, LastIVExiting, MaxIV);
1548 VPIRFlags RdxFlags(RecurKind::UMin, false, false, FastMathFlags());
1549 VPSingleDefRecipe *FinalCanIV = Builder.createNaryOp(
1550 VPInstruction::ComputeReductionResult, {FinalIVSelect}, RdxFlags,
1551 FindIVRdxResult->getDebugLoc());
1552
1553 // If we used a new wide canonical IV convert the reduction result back to the
1554 // original IV scale before the final select.
1555 if (!WideIV->isCanonical()) {
1556 auto *DerivedIVRecipe =
1558 nullptr, // No FPBinOp for integer induction
1559 WideIV->getStartValue(), FinalCanIV,
1560 WideIV->getStepValue(), "derived.iv.result");
1561 DerivedIVRecipe->insertBefore(&*Builder.getInsertPoint());
1562 FinalCanIV = DerivedIVRecipe;
1563 }
1564
1565 // If the final min/max value matches its start value, the condition in the
1566 // loop was always false, i.e. no induction value has been selected. If that's
1567 // the case, set the result of the IV reduction to its start value.
1568 VPValue *AlwaysFalse = Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxResult,
1569 MinOrMaxPhiR->getStartValue());
1570 VPValue *FinalIV = Builder.createSelect(
1571 AlwaysFalse, FindIVSelect->getOperand(2), FinalCanIV);
1572 FindIVSelect->replaceAllUsesWith(FinalIV);
1573
1574 // Erase the old FindIV result pattern which is now dead.
1575 FindIVSelect->eraseFromParent();
1576 FindIVCmp->eraseFromParent();
1577 FindIVRdxResult->eraseFromParent();
1578 return true;
1579}
1580
1583 Loop *TheLoop) {
1584 for (auto &PhiR : make_early_inc_range(
1586 auto *MinOrMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
1587 // TODO: check for multi-uses in VPlan directly.
1588 if (!MinOrMaxPhiR || !MinOrMaxPhiR->hasUsesOutsideReductionChain())
1589 continue;
1590
1591 // MinOrMaxPhiR has users outside the reduction cycle in the loop. Check if
1592 // the only other user is a FindLastIV reduction. MinOrMaxPhiR must have
1593 // exactly 2 users:
1594 // 1) the min/max operation of the reduction cycle, and
1595 // 2) the compare of a FindLastIV reduction cycle. This compare must match
1596 // the min/max operation - comparing MinOrMaxPhiR with the operand of the
1597 // min/max operation, and be used only by the select of the FindLastIV
1598 // reduction cycle.
1599 RecurKind RdxKind = MinOrMaxPhiR->getRecurrenceKind();
1600 assert(
1602 "only min/max recurrences support users outside the reduction chain");
1603
1604 auto *MinOrMaxOp =
1605 dyn_cast<VPRecipeWithIRFlags>(MinOrMaxPhiR->getBackedgeValue());
1606 if (!MinOrMaxOp)
1607 return false;
1608
1609 // Check that MinOrMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1610 // with an intrinsic that matches the reduction kind.
1611 Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind);
1612 if (!match(MinOrMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
1613 return false;
1614
1615 // MinOrMaxOp must have 2 users: 1) MinOrMaxPhiR and 2)
1616 // ComputeReductionResult.
1617 assert(MinOrMaxOp->getNumUsers() == 2 &&
1618 "MinOrMaxOp must have exactly 2 users");
1619 VPValue *MinOrMaxOpValue = MinOrMaxOp->getOperand(0);
1620 if (MinOrMaxOpValue == MinOrMaxPhiR)
1621 MinOrMaxOpValue = MinOrMaxOp->getOperand(1);
1622
1623 VPValue *CmpOpA;
1624 VPValue *CmpOpB;
1625 CmpPredicate Pred;
1627 MinOrMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
1628 if (!Cmp || Cmp->getNumUsers() != 1 ||
1629 (CmpOpA != MinOrMaxOpValue && CmpOpB != MinOrMaxOpValue))
1630 return false;
1631
1632 if (MinOrMaxOpValue != CmpOpB)
1633 Pred = CmpInst::getSwappedPredicate(Pred);
1634
1635 // MinOrMaxPhiR must have exactly 2 users:
1636 // * MinOrMaxOp,
1637 // * Cmp (that's part of a FindLastIV chain).
1638 if (MinOrMaxPhiR->getNumUsers() != 2)
1639 return false;
1640
1641 VPInstruction *MinOrMaxResult =
1643 assert(is_contained(MinOrMaxPhiR->users(), MinOrMaxOp) &&
1644 "one user must be MinOrMaxOp");
1645 assert(MinOrMaxResult && "MinOrMaxResult must be a user of MinOrMaxOp");
1646
1647 // Cmp must be used by the select of a FindLastIV chain.
1648 VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
1649 VPValue *IVOp, *FindIV;
1650 if (!Sel || Sel->getNumUsers() != 2 ||
1651 !match(Sel,
1653 return false;
1654
1656 std::swap(FindIV, IVOp);
1657 Pred = CmpInst::getInversePredicate(Pred);
1658 }
1659
1660 auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
1662 FindIVPhiR->getRecurrenceKind()))
1663 return false;
1664
1665 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
1666 "cannot handle inloop/ordered reductions yet");
1667
1668 // Check if FindIVPhiR is a FindLast pattern by checking the MinMaxKind
1669 // on its ComputeReductionResult. SMax/UMax indicates FindLast.
1670 VPInstruction *FindIVResult =
1672 FindIVPhiR->getBackedgeValue());
1673 assert(FindIVResult &&
1674 "must be able to retrieve the FindIVResult VPInstruction");
1675 RecurKind FindIVMinMaxKind = FindIVResult->getRecurKind();
1676 if (FindIVMinMaxKind != RecurKind::SMax &&
1677 FindIVMinMaxKind != RecurKind::UMax)
1678 return false;
1679
1680 // TODO: Support cases where IVOp is the IV increment.
1681 if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
1683 return false;
1684
1685 // Check if the predicate is compatible with the reduction kind.
1686 bool IsValidKindPred = [RdxKind, Pred]() {
1687 switch (RdxKind) {
1688 case RecurKind::UMin:
1689 return Pred == CmpInst::ICMP_UGE || Pred == CmpInst::ICMP_UGT;
1690 case RecurKind::UMax:
1691 return Pred == CmpInst::ICMP_ULE || Pred == CmpInst::ICMP_ULT;
1692 case RecurKind::SMax:
1693 return Pred == CmpInst::ICMP_SLE || Pred == CmpInst::ICMP_SLT;
1694 case RecurKind::SMin:
1695 return Pred == CmpInst::ICMP_SGE || Pred == CmpInst::ICMP_SGT;
1696 default:
1697 llvm_unreachable("unhandled recurrence kind");
1698 }
1699 }();
1700 if (!IsValidKindPred) {
1701 ORE->emit([&]() {
1703 DEBUG_TYPE, "VectorizationMultiUseReductionPredicate",
1704 TheLoop->getStartLoc(), TheLoop->getHeader())
1705 << "Multi-use reduction with predicate "
1707 << " incompatible with reduction kind";
1708 });
1709 return false;
1710 }
1711
1712 auto *FindIVSelect = findFindIVSelect(FindIVPhiR->getBackedgeValue());
1713 auto *FindIVCmp = FindIVSelect->getOperand(0)->getDefiningRecipe();
1714 auto *FindIVRdxResult = cast<VPInstruction>(FindIVCmp->getOperand(0));
1715 assert(FindIVSelect->getParent() == MinOrMaxResult->getParent() &&
1716 "both results must be computed in the same block");
1717 // Reducing to a scalar min or max value is placed right before reducing to
1718 // its scalar iteration, in order to generate instructions that use both
1719 // their operands.
1720 MinOrMaxResult->moveBefore(*FindIVRdxResult->getParent(),
1721 FindIVRdxResult->getIterator());
1722
1723 bool IsStrictPredicate = ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred);
1724 if (IsStrictPredicate) {
1725 return handleFirstArgMinOrMax(Plan, MinOrMaxPhiR, FindIVPhiR,
1727 MinOrMaxResult, FindIVSelect, FindIVCmp,
1728 FindIVRdxResult);
1729 }
1730
1731 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
1732 // result:
1733 // 1. We need to find the last IV for which the condition based on the
1734 // min/max recurrence is true,
1735 // 2. Compare the partial min/max reduction result to its final value and,
1736 // 3. Select the lanes of the partial FindLastIV reductions which
1737 // correspond to the lanes matching the min/max reduction result.
1738 //
1739 // For example, this transforms
1740 // vp<%min.result> = compute-reduction-result ir<%min.val.next>
1741 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
1742 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
1743 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
1744 //
1745 // into:
1746 //
1747 // vp<min.result> = compute-reduction-result ir<%min.val.next>
1748 // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
1749 // vp<%final.iv> = select vp<%final.min.cmp>, vp<%min.idx.next>, SENTINEL
1750 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%final.iv>
1751 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
1752 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
1753 //
1754 VPBuilder B(FindIVRdxResult);
1755 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
1756 auto *FinalMinOrMaxCmp =
1757 B.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
1758 VPValue *Sentinel = FindIVCmp->getOperand(1);
1759 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
1760 auto *FinalIVSelect =
1761 B.createSelect(FinalMinOrMaxCmp, LastIVExiting, Sentinel);
1762 FindIVRdxResult->setOperand(0, FinalIVSelect);
1763 }
1764 return true;
1765}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
#define _
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static bool handleFirstArgMinOrMax(VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR, VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV, VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect, VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult)
Given a first argmin/argmax pattern with strict predicate consisting of 1) a MinOrMax reduction MinOr...
static VPHeaderPHIRecipe * createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, DebugLoc DL)
Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe for Phi based on IndDesc.
static void insertCheckBlockBeforeVectorLoop(VPlan &Plan, VPBasicBlock *CheckBlockVPBB)
Insert CheckBlockVPBB on the edge leading to the vector preheader, connecting it to both vector and s...
static void simplifyLiveInsWithSCEV(VPlan &Plan, PredicatedScalarEvolution &PSE)
Check Plan's live-in and replace them with constants, if they can be simplified via SCEV.
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB, VPValue *Cond, bool AddBranchWeights)
Create a BranchOnCond terminator in CheckBlockVPBB.
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static VPInstruction * findFindIVSelect(VPValue *BackedgeVal)
Find and return the final select instruction of the FindIV result pattern for the given BackedgeVal: ...
static constexpr uint32_t CheckBypassWeights[]
static void printAfterInitialConstruction(VPlan &)
To make RUN_VPLAN_PASS print initial VPlan.
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS_NO_VERIFY(PASS,...)
This file contains the declarations of the Vectorization Plan base classes:
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI StringRef getPredicateName(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
static FastMathFlags getFast()
Definition FMF.h:50
static bool isLT(Predicate P)
Return true if the predicate is SLT or ULT.
static bool isGT(Predicate P)
Return true if the predicate is SGT or UGT.
A struct for saving information about induction variables.
InductionKind getKind() const
const SCEV * getStep() const
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Value * getStartValue() const
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:632
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
iterator find(const KeyT &Key)
Definition MapVector.h:154
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
FastMathFlags getFastMathFlags() const
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
op_range operands()
Definition User.h:267
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4181
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4256
iterator end()
Definition VPlan.h:4218
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4216
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4269
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:232
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:639
const VPRecipeBase & back() const
Definition VPlan.h:4230
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4247
bool empty() const
Definition VPlan.h:4227
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:202
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:170
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:290
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:221
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:239
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:259
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3756
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3926
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2232
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2274
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2263
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4334
Class to record and manage LLVM IR flags.
Definition VPlan.h:670
RecurKind getRecurKind() const
Definition VPlan.h:991
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1160
@ ExtractLastActive
Extracts the lane from the first operand corresponding to the last active (non-zero) lane in the mask...
Definition VPlan.h:1269
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1276
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
VPBasicBlock * getParent()
Definition VPlan.h:462
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:536
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition VPlan.h:2625
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2686
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2665
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2689
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2988
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4369
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4480
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4467
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:588
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:258
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:302
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:297
void addOperand(VPValue *Operand)
Definition VPlanValue.h:291
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:172
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1403
unsigned getNumUsers() const
Definition VPlanValue.h:104
user_range users()
Definition VPlanValue.h:125
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2329
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2349
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2380
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2427
bool isCanonical() const
Returns true if the induction is canonical, i.e.
A recipe for widened phis.
Definition VPlan.h:2516
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4499
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4795
LLVMContext & getContext() const
Definition VPlan.h:4690
VPBasicBlock * getEntry()
Definition VPlan.h:4591
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4688
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4681
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4649
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4774
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4798
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4639
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4678
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4751
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1033
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4656
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4616
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4821
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1252
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4771
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4831
bool hasScalarVFOnly() const
Definition VPlan.h:4719
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4630
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4635
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4596
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4875
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4777
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
bool matchFindIVResult(VPInstruction *VPI, Op0_t ReducedIV, Op1_t Start)
Match FindIV result pattern: select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),...
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
Definition VPlanUtils.h:94
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
Definition VPlanUtils.h:111
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
Definition VPlanUtils.h:132
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:841
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2612
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:262
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
Definition VPlanCFG.h:275
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A recipe for handling first-order recurrence phis.
Definition VPlan.h:2563
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:183
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static void createInLoopReductionRecipes(VPlan &Plan, const DenseSet< BasicBlock * > &BlocksNeedingPredication, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static LLVM_ABI_FOR_TEST void handleEarlyExits(VPlan &Plan, bool HasUncountableExit)
Update Plan to account for all early exits.
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static void createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *TripCount, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...