LLVM 23.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanDominatorTree.h"
19#include "VPlanHelpers.h"
20#include "VPlanPatternMatch.h"
21#include "VPlanTransforms.h"
22#include "VPlanUtils.h"
23#include "llvm/Analysis/Loads.h"
30#include "llvm/IR/InstrTypes.h"
31#include "llvm/IR/MDBuilder.h"
32#include "llvm/Support/Debug.h"
35
36#define DEBUG_TYPE "vplan"
37
38using namespace llvm;
39using namespace VPlanPatternMatch;
40
41namespace {
42// Class that is used to build the plain CFG for the incoming IR.
43class PlainCFGBuilder {
44 // The outermost loop of the input loop nest considered for vectorization.
45 Loop *TheLoop;
46
47 // Loop Info analysis.
48 LoopInfo *LI;
49
50 // Loop versioning for alias metadata.
51 LoopVersioning *LVer;
52
53 // Vectorization plan that we are working on.
54 std::unique_ptr<VPlan> Plan;
55
56 // Builder of the VPlan instruction-level representation.
57 VPBuilder VPIRBuilder;
58
59 // NOTE: The following maps are intentionally destroyed after the plain CFG
60 // construction because subsequent VPlan-to-VPlan transformation may
61 // invalidate them.
62 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
64 // Map incoming Value definitions to their newly-created VPValues.
65 DenseMap<Value *, VPValue *> IRDef2VPValue;
66
67 // Hold phi node's that need to be fixed once the plain CFG has been built.
69
70 // Utility functions.
71 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
72 void fixHeaderPhis();
73 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
74#ifndef NDEBUG
75 bool isExternalDef(Value *Val);
76#endif
77 VPValue *getOrCreateVPOperand(Value *IRVal);
78 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
79
80public:
81 PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer, Type *IdxTy)
82 : TheLoop(Lp), LI(LI), LVer(LVer),
83 Plan(std::make_unique<VPlan>(Lp, IdxTy)) {}
84
85 /// Build plain CFG for TheLoop and connect it to Plan's entry.
86 std::unique_ptr<VPlan> buildPlainCFG();
87};
88} // anonymous namespace
89
90// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
91// must have no predecessors.
92void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
93 // Collect VPBB predecessors.
95 for (BasicBlock *Pred : predecessors(BB))
96 VPBBPreds.push_back(getOrCreateVPBB(Pred));
97 VPBB->setPredecessors(VPBBPreds);
98}
99
100static bool isHeaderBB(BasicBlock *BB, Loop *L) {
101 return L && BB == L->getHeader();
102}
103
104// Add operands to VPInstructions representing phi nodes from the input IR.
105void PlainCFGBuilder::fixHeaderPhis() {
106 for (auto *Phi : PhisToFix) {
107 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
108 VPValue *VPVal = IRDef2VPValue[Phi];
109 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
110 auto *PhiR = cast<VPPhi>(VPVal);
111 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
112 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
113 "Expected Phi in header block.");
114 assert(Phi->getNumOperands() == 2 &&
115 "header phi must have exactly 2 operands");
116 for (BasicBlock *Pred : predecessors(Phi->getParent()))
117 PhiR->addOperand(
118 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
119 }
120}
121
122// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
123// existing one if it was already created.
124VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
125 if (auto *VPBB = BB2VPBB.lookup(BB)) {
126 // Retrieve existing VPBB.
127 return VPBB;
128 }
129
130 // Create new VPBB.
131 StringRef Name = BB->getName();
132 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
133 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
134 BB2VPBB[BB] = VPBB;
135 return VPBB;
136}
137
138#ifndef NDEBUG
139// Return true if \p Val is considered an external definition. An external
140// definition is either:
141// 1. A Value that is not an Instruction. This will be refined in the future.
142// 2. An Instruction that is outside of the IR region represented in VPlan,
143// i.e., is not part of the loop nest.
144bool PlainCFGBuilder::isExternalDef(Value *Val) {
145 // All the Values that are not Instructions are considered external
146 // definitions for now.
148 if (!Inst)
149 return true;
150
151 // Check whether Instruction definition is in loop body.
152 return !TheLoop->contains(Inst);
153}
154#endif
155
156// Create a new VPValue or retrieve an existing one for the Instruction's
157// operand \p IRVal. This function must only be used to create/retrieve VPValues
158// for *Instruction's operands* and not to create regular VPInstruction's. For
159// the latter, please, look at 'createVPInstructionsForVPBB'.
160VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
161 auto VPValIt = IRDef2VPValue.find(IRVal);
162 if (VPValIt != IRDef2VPValue.end())
163 // Operand has an associated VPInstruction or VPValue that was previously
164 // created.
165 return VPValIt->second;
166
167 // Operand doesn't have a previously created VPInstruction/VPValue. This
168 // means that operand is:
169 // A) a definition external to VPlan,
170 // B) any other Value without specific representation in VPlan.
171 // For now, we use VPValue to represent A and B and classify both as external
172 // definitions. We may introduce specific VPValue subclasses for them in the
173 // future.
174 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
175
176 // A and B: Create VPValue and add it to the pool of external definitions and
177 // to the Value->VPValue map.
178 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
179 IRDef2VPValue[IRVal] = NewVPVal;
180 return NewVPVal;
181}
182
183// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
184// counterpart. This function must be invoked in RPO so that the operands of a
185// VPInstruction in \p BB have been visited before (except for Phi nodes).
186void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
187 BasicBlock *BB) {
188 VPIRBuilder.setInsertPoint(VPBB);
189 // TODO: Model and preserve debug intrinsics in VPlan.
190 for (Instruction &InstRef : *BB) {
191 Instruction *Inst = &InstRef;
192
193 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
194 // visited Inst when we shouldn't, breaking the RPO traversal order.
195 assert(!IRDef2VPValue.count(Inst) &&
196 "Instruction shouldn't have been visited.");
197
198 if (isa<UncondBrInst>(Inst))
199 // Skip the rest of the Instruction processing for Branch instructions.
200 continue;
201
202 if (auto *Br = dyn_cast<CondBrInst>(Inst)) {
203 // Conditional branch instruction are represented using BranchOnCond
204 // recipes.
205 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
206 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
207 VPIRMetadata(*Inst), Inst->getDebugLoc());
208 continue;
209 }
210
211 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
212 // Don't emit recipes for unconditional switch instructions.
213 if (SI->getNumCases() == 0)
214 continue;
215 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
216 for (auto Case : SI->cases())
217 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
218 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
219 VPIRMetadata(*Inst), Inst->getDebugLoc());
220 continue;
221 }
222
223 VPSingleDefRecipe *NewR;
224 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
225 // Phi node's operands may not have been visited at this point. We create
226 // an empty VPInstruction that we will fix once the whole plain CFG has
227 // been built.
228 NewR =
229 VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi", *Phi);
230 NewR->setUnderlyingValue(Phi);
231 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
232 // Header phis need to be fixed after the VPBB for the latch has been
233 // created.
234 PhisToFix.push_back(Phi);
235 } else {
236 // Add operands for VPPhi in the order matching its predecessors in
237 // VPlan.
238 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
239 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
240 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
241 getOrCreateVPOperand(Phi->getIncomingValue(I));
242 }
243 for (VPBlockBase *Pred : VPBB->getPredecessors())
244 NewR->addOperand(
245 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
246 }
247 } else {
248 // Build VPIRMetadata from the instruction and add loop versioning
249 // metadata for loads and stores.
250 VPIRMetadata MD(*Inst);
251 if (isa<LoadInst, StoreInst>(Inst) && LVer) {
252 const auto &[AliasScopeMD, NoAliasMD] =
253 LVer->getNoAliasMetadataFor(Inst);
254 if (AliasScopeMD)
255 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
256 if (NoAliasMD)
257 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
258 }
259
260 // Translate LLVM-IR operands into VPValue operands and set them in the
261 // new VPInstruction.
262 SmallVector<VPValue *, 4> VPOperands;
263 for (Value *Op : Inst->operands())
264 VPOperands.push_back(getOrCreateVPOperand(Op));
265
266 if (auto *CI = dyn_cast<CastInst>(Inst)) {
267 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
268 CI->getType(), CI->getDebugLoc(),
269 VPIRFlags(*CI), MD);
270 NewR->setUnderlyingValue(CI);
271 } else if (auto *LI = dyn_cast<LoadInst>(Inst)) {
272 NewR = VPIRBuilder.createScalarLoad(LI->getType(), VPOperands[0],
273 LI->getDebugLoc(), MD);
274 NewR->setUnderlyingValue(LI);
275 } else {
276 // Build VPInstruction for any arbitrary Instruction without specific
277 // representation in VPlan.
278 NewR =
279 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
280 VPIRFlags(*Inst), MD, Inst->getDebugLoc());
281 }
282 }
283
284 IRDef2VPValue[Inst] = NewR;
285 }
286}
287
288// Main interface to build the plain CFG.
289std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
290 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
291 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
292 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
293 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
294
295 // 1. Scan the body of the loop in a topological order to visit each basic
296 // block after having visited its predecessor basic blocks. Create a VPBB for
297 // each BB and link it to its successor and predecessor VPBBs. Note that
298 // predecessors must be set in the same order as they are in the incomming IR.
299 // Otherwise, there might be problems with existing phi nodes and algorithm
300 // based on predecessors traversal.
301
302 // Loop PH needs to be explicitly visited since it's not taken into account by
303 // LoopBlocksDFS.
304 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
305 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
306 "Unexpected loop preheader");
307 for (auto &I : *ThePreheaderBB) {
308 if (I.getType()->isVoidTy())
309 continue;
310 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
311 }
312
313 LoopBlocksRPO RPO(TheLoop);
314 RPO.perform(LI);
315
316 for (BasicBlock *BB : RPO) {
317 // Create or retrieve the VPBasicBlock for this BB.
318 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
319 // Set VPBB predecessors in the same order as they are in the incoming BB.
320 setVPBBPredsFromBB(VPBB, BB);
321
322 // Create VPInstructions for BB.
323 createVPInstructionsForVPBB(VPBB, BB);
324
325 // Set VPBB successors. We create empty VPBBs for successors if they don't
326 // exist already. Recipes will be created when the successor is visited
327 // during the RPO traversal.
328 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
330 getOrCreateVPBB(SI->getDefaultDest())};
331 for (auto Case : SI->cases())
332 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
333 VPBB->setSuccessors(Succs);
334 continue;
335 }
336 if (auto *BI = dyn_cast<UncondBrInst>(BB->getTerminator())) {
337 VPBB->setOneSuccessor(getOrCreateVPBB(BI->getSuccessor()));
338 continue;
339 }
340 auto *BI = cast<CondBrInst>(BB->getTerminator());
341 BasicBlock *IRSucc0 = BI->getSuccessor(0);
342 BasicBlock *IRSucc1 = BI->getSuccessor(1);
343 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
344 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
345 VPBB->setTwoSuccessors(Successor0, Successor1);
346 }
347
348 for (auto *EB : Plan->getExitBlocks())
349 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
350
351 // 2. The whole CFG has been built at this point so all the input Values must
352 // have a VPlan counterpart. Fix VPlan header phi by adding their
353 // corresponding VPlan operands.
354 fixHeaderPhis();
355
356 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
357 Plan->getEntry()->setPlan(&*Plan);
358
359 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
360 // VPIRInstructions wrapping them.
361 // // Note that the operand order corresponds to IR predecessor order, and may
362 // need adjusting when VPlan predecessors are added, if an exit block has
363 // multiple predecessor.
364 for (auto *EB : Plan->getExitBlocks()) {
365 for (VPRecipeBase &R : EB->phis()) {
366 auto *PhiR = cast<VPIRPhi>(&R);
367 PHINode &Phi = PhiR->getIRPhi();
368 assert(PhiR->getNumOperands() == 0 &&
369 "no phi operands should be added yet");
370 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
371 PhiR->addOperand(
372 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
373 }
374 }
375
376 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
377 return std::move(Plan);
378}
379
380/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
381/// has exactly 2 predecessors (preheader and latch), where the block
382/// dominates the latch and the preheader dominates the block. If it is a
383/// header block return true and canonicalize the predecessors of the header
384/// (making sure the preheader appears first and the latch second) and the
385/// successors of the latch (making sure the loop exit comes first). Otherwise
386/// return false.
388 const VPDominatorTree &VPDT) {
389 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
390 if (Preds.size() != 2)
391 return false;
392
393 auto *PreheaderVPBB = Preds[0];
394 auto *LatchVPBB = Preds[1];
395 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
396 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
397 std::swap(PreheaderVPBB, LatchVPBB);
398
399 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
400 !VPDT.dominates(HeaderVPB, LatchVPBB))
401 return false;
402
403 // Canonicalize predecessors of header so that preheader is first and
404 // latch second.
405 HeaderVPB->swapPredecessors();
406 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
407 R.swapOperands();
408 }
409
410 // The two successors of conditional branch match the condition, with the
411 // first successor corresponding to true and the second to false. We
412 // canonicalize the successors of the latch when introducing the region, such
413 // that the latch exits the region when its condition is true; invert the
414 // original condition if the original CFG branches to the header on true.
415 // Note that the exit edge is not yet connected for top-level loops.
416 if (LatchVPBB->getSingleSuccessor() ||
417 LatchVPBB->getSuccessors()[0] != HeaderVPB)
418 return true;
419
420 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
421 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
422 assert(cast<VPInstruction>(Term)->getOpcode() ==
424 "terminator must be a BranchOnCond");
425 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
426 Not->insertBefore(Term);
427 Term->setOperand(0, Not);
428 LatchVPBB->swapSuccessors();
429
430 return true;
431}
432
433/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
434static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
435 // Get type info and debug location from the scalar phi corresponding to the
436 // canonical IV of the outermost (to be vectorized) loop. Only the outermost
437 // header will have a canonical IV. Other, nested loops are assigned a
438 // canonical IV of null type and debug location.
439 Type *CanIVTy = nullptr;
441 auto *OutermostHeaderVPBB = cast<VPBasicBlock>(
442 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
443 VPPhi *OutermostVPPhi = nullptr;
444 if (HeaderVPB == OutermostHeaderVPBB) {
445 OutermostVPPhi = cast<VPPhi>(&OutermostHeaderVPBB->front());
446 CanIVTy = OutermostVPPhi->getOperand(0)->getLiveInIRValue()->getType();
447 DL = OutermostVPPhi->getDebugLoc();
448 }
449
450 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
451 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
452
453 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
454 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
455
456 // Create an empty region first and insert it between PreheaderVPBB and
457 // the exit blocks, taking care to preserve the original predecessor &
458 // successor order of blocks. Set region entry and exiting after both
459 // HeaderVPB and LatchVPBB have been disconnected from their
460 // predecessors/successors.
461 auto *R = Plan.createLoopRegion(CanIVTy, DL);
462
463 // Transfer latch's successors to the region.
465
466 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
467 R->setEntry(HeaderVPB);
468 R->setExiting(LatchVPBB);
469
470 // Update canonical IV users for the outermost loop only.
471 if (OutermostVPPhi) {
472 OutermostVPPhi->replaceAllUsesWith(R->getCanonicalIV());
473 OutermostVPPhi->eraseFromParent();
474 }
475
476 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
477 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
478 VPBB->setParent(R);
479}
480
481// Add the necessary canonical IV and branch recipes required to control the
482// loop.
483static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
484 VPBasicBlock *LatchVPBB, Type *IdxTy,
485 DebugLoc DL) {
486 // Add a VPPhi for the canonical IV starting at 0 as first recipe in header.
487 auto *CanonicalIVPHI = new VPPhi(Plan.getConstantInt(IdxTy, 0), {}, DL);
488 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
489
490 // We are about to replace the branch to exit the region. Remove the original
491 // BranchOnCond, if there is any.
492 DebugLoc LatchDL = DL;
493 if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
494 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
495 LatchVPBB->getTerminator()->eraseFromParent();
496 }
497
498 VPBuilder Builder(LatchVPBB);
499 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
500 // Initially the induction increment is guaranteed to not wrap, but that may
501 // change later, e.g. when tail-folding, when the flags need to be dropped.
502 auto *CanonicalIVIncrement = Builder.createAdd(
503 CanonicalIVPHI, &Plan.getVFxUF(), DL, "index.next", {true, false});
504 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
505
506 // Add the BranchOnCount VPInstruction to the latch.
507 Builder.createNaryOp(VPInstruction::BranchOnCount,
508 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
509 LatchDL);
510}
511
512/// Creates extracts for values in \p Plan defined in a loop region and used
513/// outside a loop region.
514static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
515 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
516 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
517 if (!is_contained(EB->predecessors(), MiddleVPBB))
518 continue;
519
520 for (VPRecipeBase &R : EB->phis()) {
521 auto *ExitIRI = cast<VPIRPhi>(&R);
522 VPValue *Exiting = ExitIRI->getIncomingValueForBlock(MiddleVPBB);
523 if (isa<VPIRValue>(Exiting))
524 continue;
525 Exiting = B.createNaryOp(VPInstruction::ExtractLastPart, Exiting);
526 Exiting = B.createNaryOp(VPInstruction::ExtractLastLane, Exiting);
527 ExitIRI->setIncomingValueForBlock(MiddleVPBB, Exiting);
528 }
529 }
530}
531
532/// Return an iterator range to iterate over pairs of matching phi nodes in
533/// \p Header and \p ScalarHeader, skipping the canonical IV in the former.
535 VPBasicBlock *ScalarHeader) {
536 return zip_equal(drop_begin(Header->phis()), ScalarHeader->phis());
537}
538
539static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
540 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
541 VPDominatorTree VPDT(Plan);
542
543 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
544 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
545 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
546
547 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
548 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
549
550 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
551 // The canonical LatchVPBB has the header block as last successor. If it has
552 // another successor, this successor is an exit block - insert middle block on
553 // its edge. Otherwise, add middle block as another successor retaining header
554 // as last.
555 if (LatchVPBB->getNumSuccessors() == 2) {
556 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
557 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
558 } else {
559 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
560 LatchVPBB->swapSuccessors();
561 }
562
563 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
564
565 // Create SCEV and VPValue for the trip count.
566 // We use the symbolic max backedge-taken-count, which works also when
567 // vectorizing loops with uncountable early exits.
568 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
569 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
570 "Invalid backedge-taken count");
571 ScalarEvolution &SE = *PSE.getSE();
572 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
573 InductionTy, TheLoop);
575
576 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
578
579 // The connection order corresponds to the operands of the conditional branch,
580 // with the middle block already connected to the exit block.
581 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
582 // Also connect the entry block to the scalar preheader.
583 // TODO: Also introduce a branch recipe together with the minimum trip count
584 // check.
585 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
586 Plan.getEntry()->swapSuccessors();
587
588 createExtractsForLiveOuts(Plan, MiddleVPBB);
589
590 // Create resume phis in the scalar preheader for each phi in the scalar loop.
591 // Their incoming value from the vector loop will be the last lane of the
592 // corresponding vector loop header phi.
593 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
594 VPBuilder ScalarPHBuilder(ScalarPH);
595 assert(equal(ScalarPH->getPredecessors(),
596 ArrayRef<VPBlockBase *>({MiddleVPBB, Plan.getEntry()})) &&
597 "unexpected predecessor order of scalar ph");
598 for (const auto &[PhiR, ScalarPhiR] :
599 getMatchingPhisForScalarLoop(HeaderVPBB, Plan.getScalarHeader())) {
600 auto *VectorPhiR = cast<VPPhi>(&PhiR);
601 VPValue *BackedgeVal = VectorPhiR->getOperand(1);
602 VPValue *ResumeFromVectorLoop =
603 MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart, BackedgeVal);
604 ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
605 VPInstruction::ExtractLastLane, ResumeFromVectorLoop);
606 // Create scalar resume phi, with the first operand being the incoming value
607 // from the middle block and the second operand coming from the entry block.
608 auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
609 {ResumeFromVectorLoop, VectorPhiR->getOperand(0)},
610 VectorPhiR->getDebugLoc());
611 cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
612 }
613}
614
615/// Check \p Plan's live-in and replace them with constants, if they can be
616/// simplified via SCEV.
619 auto GetSimplifiedLiveInViaSCEV = [&](VPValue *VPV) -> VPValue * {
620 const SCEV *Expr = vputils::getSCEVExprForVPValue(VPV, PSE);
621 if (auto *C = dyn_cast<SCEVConstant>(Expr))
622 return Plan.getOrAddLiveIn(C->getValue());
623 return nullptr;
624 };
625
626 for (VPValue *LiveIn : to_vector(Plan.getLiveIns())) {
627 if (VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
628 LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
629 }
630}
631
632/// To make RUN_VPLAN_PASS print initial VPlan.
634
635std::unique_ptr<VPlan>
636VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
638 LoopVersioning *LVer) {
639 PlainCFGBuilder Builder(TheLoop, &LI, LVer, InductionTy);
640 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
641 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
642 simplifyLiveInsWithSCEV(*VPlan0, PSE);
643
645 return VPlan0;
646}
647
648/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
649/// for \p Phi based on \p IndDesc.
650static VPHeaderPHIRecipe *
652 const InductionDescriptor &IndDesc, VPlan &Plan,
653 PredicatedScalarEvolution &PSE, Loop &OrigLoop,
654 DebugLoc DL) {
655 [[maybe_unused]] ScalarEvolution &SE = *PSE.getSE();
656 assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
657 "step must be loop invariant");
658 assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
659 (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
660 SE.getSCEV(IndDesc.getStartValue()) ==
661 vputils::getSCEVExprForVPValue(Start, PSE))) &&
662 "Start VPValue must match IndDesc's start value");
663
664 VPValue *Step =
666
667 VPValue *BackedgeVal = PhiR->getOperand(1);
668 // Replace live-out extracts of WideIV's backedge value by ExitingIVValue
669 // recipes. optimizeInductionLiveOutUsers will later compute the proper
670 // DerivedIV.
671 auto ReplaceExtractsWithExitingIVValue = [&](VPHeaderPHIRecipe *WideIV) {
672 for (VPUser *U : to_vector(BackedgeVal->users())) {
674 continue;
675 auto *ExtractLastPart = cast<VPInstruction>(U);
676 VPUser *ExtractLastPartUser = ExtractLastPart->getSingleUser();
677 assert(ExtractLastPartUser && "must have a single user");
678 if (!match(ExtractLastPartUser, m_ExtractLastLane(m_VPValue())))
679 continue;
680 auto *ExtractLastLane = cast<VPInstruction>(ExtractLastPartUser);
681 assert(is_contained(ExtractLastLane->getParent()->successors(),
682 Plan.getScalarPreheader()) &&
683 "last lane must be extracted in the middle block");
684 VPBuilder Builder(ExtractLastLane);
685 ExtractLastLane->replaceAllUsesWith(
686 Builder.createNaryOp(VPInstruction::ExitingIVValue, {WideIV}));
687 ExtractLastLane->eraseFromParent();
688 ExtractLastPart->eraseFromParent();
689 }
690 };
691
693 auto *WideIV = new VPWidenPointerInductionRecipe(
694 Phi, Start, Step, &Plan.getVFxUF(), IndDesc, DL);
695 ReplaceExtractsWithExitingIVValue(WideIV);
696 return WideIV;
697 }
698
701 "must have an integer or float induction at this point");
702
703 // Update wide induction increments to use the same step as the corresponding
704 // wide induction. This enables detecting induction increments directly in
705 // VPlan and removes redundant splats.
706 if (match(BackedgeVal, m_Add(m_Specific(PhiR), m_VPValue())))
707 BackedgeVal->getDefiningRecipe()->setOperand(1, Step);
708
709 // It is always safe to copy over the NoWrap and FastMath flags. In
710 // particular, when folding tail by masking, the masked-off lanes are never
711 // used, so it is safe.
713
714 auto *WideIV = new VPWidenIntOrFpInductionRecipe(
715 Phi, Start, Step, &Plan.getVF(), IndDesc, Flags, DL);
716
717 ReplaceExtractsWithExitingIVValue(WideIV);
718 return WideIV;
719}
720
721/// Try to sink users of \p FOR after \p Previous. \returns true if sinking
722/// succeeded or was not necessary, and false otherwise.
723static bool
725 VPRecipeBase *Previous,
726 VPDominatorTree &VPDT) {
727 // Collect recipes that need sinking.
730 Seen.insert(Previous);
731 auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
732 // The previous value must not depend on the users of the recurrence phi.
733 // In that case, FOR is not a fixed order recurrence.
734 if (SinkCandidate == Previous)
735 return false;
736
737 if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
738 !Seen.insert(SinkCandidate).second ||
739 VPDT.properlyDominates(Previous, SinkCandidate))
740 return true;
741
742 if (vputils::cannotHoistOrSinkRecipe(*SinkCandidate, /*Sinking=*/true))
743 return false;
744
745 WorkList.push_back(SinkCandidate);
746 return true;
747 };
748
749 // Recursively sink users of FOR after Previous.
750 WorkList.push_back(FOR);
751 for (unsigned I = 0; I != WorkList.size(); ++I) {
752 VPRecipeBase *Current = WorkList[I];
753 assert(Current->getNumDefinedValues() == 1 &&
754 "only recipes with a single defined value expected");
755
756 for (VPUser *User : Current->getVPSingleValue()->users()) {
757 if (!TryToPushSinkCandidate(cast<VPRecipeBase>(User)))
758 return false;
759 }
760 }
761
762 // Keep recipes to sink ordered by dominance so earlier instructions are
763 // processed first.
764 sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
765 return VPDT.properlyDominates(A, B);
766 });
767
768 for (VPRecipeBase *SinkCandidate : WorkList) {
769 if (SinkCandidate == FOR)
770 continue;
771
772 SinkCandidate->moveAfter(Previous);
773 Previous = SinkCandidate;
774 }
775 return true;
776}
777
778/// Try to hoist \p Previous and its operands before all users of \p FOR.
779/// \returns true if hoisting succeeded or was not necessary, and false
780/// otherwise.
782 VPRecipeBase *Previous,
783 VPDominatorTree &VPDT) {
785 return false;
786
787 // Collect recipes that need hoisting.
788 SmallVector<VPRecipeBase *> HoistCandidates;
790 // Find the closest hoist point by looking at all users of FOR and selecting
791 // the recipe dominating all other users.
792 VPRecipeBase *HoistPoint = nullptr;
793 for (VPUser *U : FOR->users()) {
794 auto *R = cast<VPRecipeBase>(U);
795 if (!HoistPoint || VPDT.properlyDominates(R, HoistPoint))
796 HoistPoint = R;
797 }
798 assert(all_of(FOR->users(),
799 [&VPDT, HoistPoint](VPUser *U) {
800 auto *R = cast<VPRecipeBase>(U);
801 return HoistPoint == R ||
802 VPDT.properlyDominates(HoistPoint, R);
803 }) &&
804 "HoistPoint must dominate all users of FOR");
805
806 auto NeedsHoisting = [HoistPoint, &VPDT,
807 &Visited](VPValue *HoistCandidateV) -> VPRecipeBase * {
808 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
809 if (!HoistCandidate)
810 return nullptr;
811 // Hoist candidate was already visited, no need to hoist.
812 if (!Visited.insert(HoistCandidate).second)
813 return nullptr;
814 // If we reached a recipe that dominates HoistPoint, we don't need to
815 // hoist the recipe.
816 if (VPDT.properlyDominates(HoistCandidate, HoistPoint))
817 return nullptr;
818 return HoistCandidate;
819 };
820
821 if (!NeedsHoisting(Previous->getVPSingleValue()))
822 return true;
823
824 // Recursively try to hoist Previous and its operands before all users of
825 // FOR.
826 HoistCandidates.push_back(Previous);
827
828 for (unsigned I = 0; I != HoistCandidates.size(); ++I) {
829 VPRecipeBase *Current = HoistCandidates[I];
830 assert(Current->getNumDefinedValues() == 1 &&
831 "only recipes with a single defined value expected");
833 return false;
834
835 for (VPValue *Op : Current->operands()) {
836 // If we reach FOR, it means the original Previous depends on some other
837 // recurrence that in turn depends on FOR. If that is the case, we would
838 // also need to hoist recipes involving the other FOR, which may break
839 // dependencies.
840 if (Op == FOR)
841 return false;
842
843 if (auto *R = NeedsHoisting(Op)) {
844 // Bail out if the recipe defines multiple values.
845 // TODO: Hoisting such recipes requires additional handling.
846 if (R->getNumDefinedValues() != 1)
847 return false;
848 HoistCandidates.push_back(R);
849 }
850 }
851 }
852
853 // Order recipes to hoist by dominance so earlier instructions are processed
854 // first.
855 sort(HoistCandidates, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
856 return VPDT.properlyDominates(A, B);
857 });
858
859 for (VPRecipeBase *HoistCandidate : HoistCandidates) {
860 HoistCandidate->moveBefore(*HoistPoint->getParent(),
861 HoistPoint->getIterator());
862 }
863
864 return true;
865}
866
867/// Sink users of fixed-order recurrences past or hoist before the recipe
868/// defining the previous value, introduce FirstOrderRecurrenceSplice
869/// VPInstructions, and replace FOR uses. Returns false if hoisting or sinking
870/// fails.
872 VPDominatorTree &VPDT) {
873 for (VPRecipeBase &R : HeaderVPBB->phis()) {
875 if (!FOR)
876 continue;
877
878 // Follow through FOR phi chains to find the actual Previous recipe.
879 // Fixed-order recurrences do not contain cycles, so this loop is
880 // guaranteed to terminate.
882 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
883 while (auto *PrevPhi =
885 assert(PrevPhi->getParent() == FOR->getParent() &&
886 "PrevPhi must be in same block as FOR");
887 assert(SeenPhis.insert(PrevPhi).second &&
888 "PrevPhi must not be visited multiple times");
889 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
890 }
891
892 assert(Previous && "Previous must be a recipe");
893 // Sink FOR users after Previous or hoist Previous before FOR users.
894 if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT) &&
895 !hoistPreviousBeforeFORUsers(FOR, Previous, VPDT))
896 return false;
897
898 // Create FirstOrderRecurrenceSplice and replace FOR uses.
899 VPBasicBlock *InsertBlock = Previous->getParent();
900 auto InsertPt = isa<VPHeaderPHIRecipe>(Previous)
901 ? InsertBlock->getFirstNonPhi()
902 : std::next(Previous->getIterator());
903 VPBuilder LoopBuilder(InsertBlock, InsertPt);
904 auto *RecurSplice =
906 {FOR, FOR->getBackedgeValue()});
907 FOR->replaceUsesWithIf(RecurSplice, [RecurSplice](VPUser &U, unsigned) {
908 return &U != RecurSplice;
909 });
910 }
911
912 return true;
913}
914
916 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
919 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
920 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
921 // Retrieve the header manually from the intial plain-CFG VPlan.
922 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
923 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
924 VPDominatorTree VPDT(Plan);
925 assert(VPDT.dominates(HeaderVPBB, HeaderVPBB->getPredecessors()[1]) &&
926 "header must dominate its latch");
927
928 auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
929 // TODO: Gradually replace uses of underlying instruction by analyses on
930 // VPlan.
931 auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
932 assert(PhiR->getNumOperands() == 2 &&
933 "Must have 2 operands for header phis");
934
935 // Extract common values once.
936 VPIRValue *Start = cast<VPIRValue>(PhiR->getOperand(0));
937 VPValue *BackedgeValue = PhiR->getOperand(1);
938
939 if (FixedOrderRecurrences.contains(Phi)) {
940 // TODO: Currently fixed-order recurrences are modeled as chains of
941 // first-order recurrences. If there are no users of the intermediate
942 // recurrences in the chain, the fixed order recurrence should be
943 // modeled directly, enabling more efficient codegen.
944 return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
945 }
946
947 auto InductionIt = Inductions.find(Phi);
948 if (InductionIt != Inductions.end())
949 return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
950 Plan, PSE, OrigLoop,
951 PhiR->getDebugLoc());
952
953 assert(Reductions.contains(Phi) && "only reductions are expected now");
954 const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
956 Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
957 "incoming value must match start value");
958 // Will be updated later to >1 if reduction is partial.
959 unsigned ScaleFactor = 1;
960 bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
961 return new VPReductionPHIRecipe(
962 Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
963 getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
964 ScaleFactor),
965 Phi->getType()->isFloatingPointTy() ? RdxDesc.getFastMathFlags()
966 : VPIRFlags(),
968 };
969
970 for (VPRecipeBase &R : make_early_inc_range(drop_begin(HeaderVPBB->phis()))) {
971 auto *PhiR = cast<VPPhi>(&R);
972 VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
973 HeaderPhiR->insertBefore(PhiR);
974 PhiR->replaceAllUsesWith(HeaderPhiR);
975 PhiR->eraseFromParent();
976 }
977
978 if (!tryToSinkOrHoistRecurrenceUsers(HeaderVPBB, VPDT))
979 return false;
980
981 for (const auto &[HeaderPhiR, ScalarPhiR] :
983 auto *ResumePhiR = cast<VPPhi>(&ScalarPhiR);
984 if (isa<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhiR)) {
985 ResumePhiR->setName("scalar.recur.init");
986 auto *ExtractLastLane = cast<VPInstruction>(ResumePhiR->getOperand(0));
987 ExtractLastLane->setName("vector.recur.extract");
988 continue;
989 }
990 ResumePhiR->setName(isa<VPWidenInductionRecipe>(HeaderPhiR)
991 ? "bc.resume.val"
992 : "bc.merge.rdx");
993 }
994 return true;
995}
996
998 ElementCount MinVF) {
999 VPTypeAnalysis TypeInfo(Plan);
1002
1003 for (VPRecipeBase &R : Header->phis()) {
1004 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
1005 if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
1006 continue;
1007
1008 RecurKind Kind = PhiR->getRecurrenceKind();
1012 "AnyOf and Find reductions are not allowed for in-loop reductions");
1013
1014 bool IsFPRecurrence =
1016 FastMathFlags FMFs =
1017 IsFPRecurrence ? FastMathFlags::getFast() : FastMathFlags();
1018
1019 // Collect the chain of "link" recipes for the reduction starting at PhiR.
1021 Worklist.insert(PhiR);
1022 for (unsigned I = 0; I != Worklist.size(); ++I) {
1023 VPSingleDefRecipe *Cur = Worklist[I];
1024 for (VPUser *U : Cur->users()) {
1025 auto *UserRecipe = cast<VPSingleDefRecipe>(U);
1026 if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
1027 assert((UserRecipe->getParent() == Plan.getMiddleBlock() ||
1028 UserRecipe->getParent() == Plan.getScalarPreheader()) &&
1029 "U must be either in the loop region, the middle block or the "
1030 "scalar preheader.");
1031 continue;
1032 }
1033
1034 // Stores using instructions will be sunk later.
1035 if (match(UserRecipe, m_VPInstruction<Instruction::Store>()))
1036 continue;
1037 Worklist.insert(UserRecipe);
1038 }
1039 }
1040
1041 // Visit operation "Links" along the reduction chain top-down starting from
1042 // the phi until LoopExitValue. We keep track of the previous item
1043 // (PreviousLink) to tell which of the two operands of a Link will remain
1044 // scalar and which will be reduced. For minmax by select(cmp), Link will be
1045 // the select instructions. Blend recipes of in-loop reduction phi's will
1046 // get folded to their non-phi operand, as the reduction recipe handles the
1047 // condition directly.
1048 VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
1049 for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
1050 if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
1051 assert(Blend->getNumIncomingValues() == 2 &&
1052 "Blend must have 2 incoming values");
1053 unsigned PhiRIdx = Blend->getIncomingValue(0) == PhiR ? 0 : 1;
1054 assert(Blend->getIncomingValue(PhiRIdx) == PhiR &&
1055 "PhiR must be an operand of the blend");
1056 Blend->replaceAllUsesWith(Blend->getIncomingValue(1 - PhiRIdx));
1057 continue;
1058 }
1059
1060 if (IsFPRecurrence) {
1061 FastMathFlags CurFMF =
1062 cast<VPRecipeWithIRFlags>(CurrentLink)->getFastMathFlags();
1063 if (match(CurrentLink, m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
1064 CurFMF |= cast<VPRecipeWithIRFlags>(CurrentLink->getOperand(0))
1065 ->getFastMathFlags();
1066 FMFs &= CurFMF;
1067 }
1068
1069 Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
1070
1071 // Recognize a call to the llvm.fmuladd intrinsic.
1072 bool IsFMulAdd = Kind == RecurKind::FMulAdd;
1073 VPValue *VecOp;
1074 VPBasicBlock *LinkVPBB = CurrentLink->getParent();
1075 if (IsFMulAdd) {
1077 "Expected current VPInstruction to be a call to the "
1078 "llvm.fmuladd intrinsic");
1079 assert(CurrentLink->getOperand(2) == PreviousLink &&
1080 "expected a call where the previous link is the added operand");
1081
1082 // If the instruction is a call to the llvm.fmuladd intrinsic then we
1083 // need to create an fmul recipe (multiplying the first two operands of
1084 // the fmuladd together) to use as the vector operand for the fadd
1085 // reduction.
1086 auto *FMulRecipe = new VPInstruction(
1087 Instruction::FMul,
1088 {CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
1089 CurrentLinkI->getFastMathFlags());
1090 LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
1091 VecOp = FMulRecipe;
1092 } else if (Kind == RecurKind::AddChainWithSubs &&
1093 match(CurrentLink, m_Sub(m_VPValue(), m_VPValue()))) {
1094 Type *PhiTy = TypeInfo.inferScalarType(PhiR);
1095 auto *Zero = Plan.getConstantInt(PhiTy, 0);
1096 VPBuilder Builder(LinkVPBB, CurrentLink->getIterator());
1097 auto *Sub = Builder.createSub(Zero, CurrentLink->getOperand(1),
1098 CurrentLinkI->getDebugLoc());
1099 Sub->setUnderlyingValue(CurrentLinkI);
1100 VecOp = Sub;
1101 } else {
1102 // Index of the first operand which holds a non-mask vector operand.
1103 unsigned IndexOfFirstOperand = 0;
1105 if (match(CurrentLink, m_Cmp(m_VPValue(), m_VPValue())))
1106 continue;
1107 assert(match(CurrentLink,
1109 "must be a select recipe");
1110 IndexOfFirstOperand = 1;
1111 }
1112 // Note that for non-commutable operands (cmp-selects), the semantics of
1113 // the cmp-select are captured in the recurrence kind.
1114 unsigned VecOpId =
1115 CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
1116 ? IndexOfFirstOperand + 1
1117 : IndexOfFirstOperand;
1118 VecOp = CurrentLink->getOperand(VecOpId);
1119 assert(
1120 VecOp != PreviousLink &&
1121 CurrentLink->getOperand(
1122 cast<VPInstruction>(CurrentLink)->getNumOperandsWithoutMask() -
1123 1 - (VecOpId - IndexOfFirstOperand)) == PreviousLink &&
1124 "PreviousLink must be the operand other than VecOp");
1125 }
1126
1127 assert(PhiR->getVFScaleFactor() == 1 &&
1128 "inloop reductions must be unscaled");
1129 VPValue *CondOp = cast<VPInstruction>(CurrentLink)->getMask();
1130 auto *RedRecipe = new VPReductionRecipe(
1131 Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
1132 getReductionStyle(/*IsInLoop=*/true, PhiR->isOrdered(), 1),
1133 CurrentLinkI->getDebugLoc());
1134 // Append the recipe to the end of the VPBasicBlock because we need to
1135 // ensure that it comes after all of it's inputs, including CondOp.
1136 // Delete CurrentLink as it will be invalid if its operand is replaced
1137 // with a reduction defined at the bottom of the block in the next link.
1138 if (LinkVPBB->getNumSuccessors() == 0)
1139 RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
1140 else
1141 LinkVPBB->appendRecipe(RedRecipe);
1142
1143 CurrentLink->replaceAllUsesWith(RedRecipe);
1144 // Move any store recipes using the RedRecipe that appear before it in the
1145 // same block to just after the RedRecipe.
1146 for (VPUser *U : make_early_inc_range(RedRecipe->users())) {
1147 auto *UserR = dyn_cast<VPRecipeBase>(U);
1148 if (!UserR || UserR->getParent() != LinkVPBB)
1149 continue;
1151 continue;
1152 UserR->moveAfter(RedRecipe);
1153 }
1154 ToDelete.push_back(CurrentLink);
1155 PreviousLink = RedRecipe;
1156 }
1157 }
1158
1159 for (VPRecipeBase *R : ToDelete)
1160 R->eraseFromParent();
1161}
1162
1163/// Check if all loads in the loop are dereferenceable. Iterates over all blocks
1164/// reachable from \p HeaderVPBB, skipping \p MiddleVPBB. Returns false if any
1165/// non-dereferenceable load is found.
1167 VPBasicBlock *MiddleVPBB, Loop *TheLoop,
1169 DominatorTree &DT, AssumptionCache *AC) {
1170 ScalarEvolution &SE = *PSE.getSE();
1171 const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
1173 vp_depth_first_shallow(HeaderVPBB))) {
1174 // Skip blocks outside the loop (exit blocks and their successors).
1175 if (VPBB == MiddleVPBB || isa<VPIRBasicBlock>(VPBB))
1176 continue;
1177 for (VPRecipeBase &R : *VPBB) {
1178 auto *VPI = dyn_cast<VPInstructionWithType>(&R);
1179 if (!VPI || VPI->getOpcode() != Instruction::Load) {
1180 assert(!R.mayReadFromMemory() && "unexpected recipe reading memory");
1181 continue;
1182 }
1183
1184 // Get the pointer SCEV for dereferenceability checking.
1185 VPValue *Ptr = VPI->getOperand(0);
1186 const SCEV *PtrSCEV = vputils::getSCEVExprForVPValue(Ptr, PSE, TheLoop);
1187 if (isa<SCEVCouldNotCompute>(PtrSCEV)) {
1188 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Found non-dereferenceable "
1189 "load with SCEVCouldNotCompute pointer\n");
1190 return false;
1191 }
1192
1193 // Check dereferenceability using the SCEV-based version.
1194 Type *LoadTy = VPI->getResultType();
1195 const SCEV *SizeSCEV =
1196 SE.getStoreSizeOfExpr(DL.getIndexType(PtrSCEV->getType()), LoadTy);
1197 auto *Load = cast<LoadInst>(VPI->getUnderlyingValue());
1199 if (isDereferenceableAndAlignedInLoop(PtrSCEV, Load->getAlign(), SizeSCEV,
1200 TheLoop, SE, DT, AC, &Preds))
1201 continue;
1202
1203 LLVM_DEBUG(
1204 dbgs() << "LV: Not vectorizing: Auto-vectorization of loops with "
1205 "potentially faulting load is not supported.\n");
1206 return false;
1207 }
1208 }
1209 return true;
1210}
1211
1213 Loop *TheLoop,
1215 DominatorTree &DT, AssumptionCache *AC) {
1216 auto *MiddleVPBB = cast<VPBasicBlock>(
1218 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
1219 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
1220
1221 // TODO: We would like to detect uncountable exits and stores within loops
1222 // with such exits from the VPlan alone. Exit detection can be moved
1223 // here from handleUncountableEarlyExits, but we need to improve
1224 // detection of recipes which may write to memory.
1226 if (!areAllLoadsDereferenceable(HeaderVPBB, MiddleVPBB, TheLoop, PSE, DT,
1227 AC))
1228 return false;
1229 // TODO: Check target preference for style.
1230 handleUncountableEarlyExits(Plan, HeaderVPBB, LatchVPBB, MiddleVPBB, Style);
1231 return true;
1232 }
1233
1234 // Disconnect countable early exits from the loop, leaving it with a single
1235 // exit from the latch. Countable early exits are left for a scalar epilog.
1236 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
1237 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
1238 if (Pred == MiddleVPBB)
1239 continue;
1240
1241 // Remove phi operands for the early exiting block.
1242 for (VPRecipeBase &R : EB->phis())
1243 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
1244 auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
1245 EarlyExitingVPBB->getTerminator()->eraseFromParent();
1247 }
1248 }
1249 return true;
1250}
1251
1252void VPlanTransforms::addMiddleCheck(VPlan &Plan, bool TailFolded) {
1253 auto *MiddleVPBB = cast<VPBasicBlock>(
1255 // If MiddleVPBB has a single successor then the original loop does not exit
1256 // via the latch and the single successor must be the scalar preheader.
1257 // There's no need to add a runtime check to MiddleVPBB.
1258 if (MiddleVPBB->getNumSuccessors() == 1) {
1259 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
1260 "must have ScalarPH as single successor");
1261 return;
1262 }
1263
1264 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
1265
1266 // Add a check in the middle block to see if we have completed all of the
1267 // iterations in the first vector loop.
1268 //
1269 // Three cases:
1270 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
1271 // condition to false.
1272 // 2) If (N - N%VF) == N, then we *don't* need to run the
1273 // remainder. Thus if tail is to be folded, we know we don't need to run
1274 // the remainder and we can set the condition to true.
1275 // 3) Otherwise, construct a runtime check.
1276
1277 // We use the same DebugLoc as the scalar loop latch terminator instead of
1278 // the corresponding compare because they may have ended up with different
1279 // line numbers and we want to avoid awkward line stepping while debugging.
1280 // E.g., if the compare has got a line number inside the loop.
1281 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
1282 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
1283 VPBuilder Builder(MiddleVPBB);
1284 VPValue *Cmp;
1285 if (TailFolded)
1286 Cmp = Plan.getTrue();
1287 else
1288 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
1289 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
1290 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
1291}
1292
1294 VPDominatorTree VPDT(Plan);
1296 Plan.getEntry());
1297 for (VPBlockBase *HeaderVPB : POT)
1298 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
1299 createLoopRegion(Plan, HeaderVPB);
1300
1301 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
1302 TopRegion->setName("vector loop");
1303 TopRegion->getEntryBasicBlock()->setName("vector.body");
1304}
1305
1307 assert(Plan.getExitBlocks().size() == 1 &&
1308 "only a single-exit block is supported currently");
1309 assert(Plan.getExitBlocks().front()->getSinglePredecessor() ==
1310 Plan.getMiddleBlock() &&
1311 "the exit block must have middle block as single predecessor");
1312
1313 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1314 assert(LoopRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
1315 "The vector loop region must have the middle block as its single "
1316 "successor for now");
1317 VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1318
1319 Header->splitAt(Header->getFirstNonPhi());
1320
1321 // Create the header mask, insert it in the header and branch on it.
1322 auto *IV =
1323 new VPWidenCanonicalIVRecipe(Header->getParent()->getCanonicalIV());
1324 VPBuilder Builder(Header, Header->getFirstNonPhi());
1325 Builder.insert(IV);
1327 VPValue *HeaderMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
1328 Builder.createNaryOp(VPInstruction::BranchOnCond, HeaderMask);
1329
1330 VPBasicBlock *OrigLatch = LoopRegion->getExitingBasicBlock();
1331 VPValue *IVInc;
1332 [[maybe_unused]] bool TermBranchOnCount =
1333 match(OrigLatch->getTerminator(),
1335 m_Specific(&Plan.getVectorTripCount())));
1336 assert(TermBranchOnCount &&
1337 match(IVInc, m_Add(m_Specific(LoopRegion->getCanonicalIV()),
1338 m_Specific(&Plan.getVFxUF()))) &&
1339 std::next(IVInc->getDefiningRecipe()->getIterator()) ==
1340 OrigLatch->getTerminator()->getIterator() &&
1341 "Unexpected canonical iv increment");
1342
1343 // Split the latch at the IV update, and branch to it from the header mask.
1344 VPBasicBlock *Latch =
1345 OrigLatch->splitAt(IVInc->getDefiningRecipe()->getIterator());
1346 Latch->setName("vector.latch");
1347 VPBlockUtils::connectBlocks(Header, Latch);
1348
1349 // Collect any values defined in the loop that need a phi. Currently this
1350 // includes header phi backedges and live-outs extracted in the middle block.
1351 // TODO: Handle early exits via Plan.getExitBlocks()
1353 for (VPRecipeBase &R : Header->phis())
1355 NeedsPhi[cast<VPHeaderPHIRecipe>(R).getBackedgeValue()].push_back(&R);
1356
1357 VPValue *V;
1358 for (VPRecipeBase &R : *Plan.getMiddleBlock())
1359 if (match(&R, m_ExtractLastPart(m_VPValue(V))))
1360 NeedsPhi[V].push_back(&R);
1361
1362 // Insert phis for values coming past the end of the tail.
1363 Builder.setInsertPoint(Latch, Latch->begin());
1364 VPTypeAnalysis TypeInfo(Plan);
1365 for (const auto &[V, Users] : NeedsPhi) {
1366 if (isa<VPIRValue>(V))
1367 continue;
1368 VPValue *TailVal =
1370 VPIRFlags Flags;
1372 "Value used by more than two reduction phis?");
1374 auto *RdxPhi =
1375 RedIt != Users.end() ? cast<VPReductionPHIRecipe>(*RedIt) : nullptr;
1376 if (RdxPhi && !RdxPhi->isInLoop()) {
1377 TailVal = RdxPhi;
1378 Flags = *RdxPhi;
1379 }
1380
1381 VPInstruction *Phi = Builder.createScalarPhi({V, TailVal}, {}, "", Flags);
1382 for (VPUser *U : Users)
1383 U->replaceUsesOfWith(V, Phi);
1384 }
1385
1386 // Any extract of the last element must be updated to extract from the last
1387 // active lane of the header mask instead (i.e., the lane corresponding to the
1388 // last active iteration).
1389 Builder.setInsertPoint(Plan.getMiddleBlock()->getTerminator());
1390 for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
1391 VPValue *Op;
1393 continue;
1394
1395 // Compute the index of the last active lane.
1396 VPValue *LastActiveLane = Builder.createLastActiveLane(HeaderMask);
1397 auto *Ext =
1398 Builder.createNaryOp(VPInstruction::ExtractLane, {LastActiveLane, Op});
1399 R.getVPSingleValue()->replaceAllUsesWith(Ext);
1400 }
1401}
1402
1403/// Insert \p CheckBlockVPBB on the edge leading to the vector preheader,
1404/// connecting it to both vector and scalar preheaders. Updates scalar
1405/// preheader phis to account for the new predecessor.
1407 VPBasicBlock *CheckBlockVPBB) {
1408 VPBlockBase *VectorPH = Plan.getVectorPreheader();
1409 auto *ScalarPH = cast<VPBasicBlock>(Plan.getScalarPreheader());
1410 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
1411 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
1412 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
1413 CheckBlockVPBB->swapSuccessors();
1414 unsigned NumPreds = ScalarPH->getNumPredecessors();
1415 for (VPRecipeBase &R : ScalarPH->phis()) {
1416 auto *Phi = cast<VPPhi>(&R);
1417 assert(Phi->getNumIncoming() == NumPreds - 1 &&
1418 "must have incoming values for all predecessors");
1419 Phi->addOperand(Phi->getOperand(NumPreds - 2));
1420 }
1421}
1422
1423// Likelyhood of bypassing the vectorized loop due to a runtime check block,
1424// including memory overlap checks block and wrapping/unit-stride checks block.
1425static constexpr uint32_t CheckBypassWeights[] = {1, 127};
1426
1427/// Create a BranchOnCond terminator in \p CheckBlockVPBB. Optionally adds
1428/// branch weights.
1429static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB,
1430 VPValue *Cond, bool AddBranchWeights) {
1432 auto *Term = VPBuilder(CheckBlockVPBB)
1434 if (AddBranchWeights) {
1435 MDBuilder MDB(Plan.getContext());
1436 MDNode *BranchWeights =
1437 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
1438 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1439 }
1440}
1441
1443 BasicBlock *CheckBlock,
1444 bool AddBranchWeights) {
1445 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
1446 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
1447 insertCheckBlockBeforeVectorLoop(Plan, CheckBlockVPBB);
1448 addBypassBranch(Plan, CheckBlockVPBB, CondVPV, AddBranchWeights);
1449}
1450
1451/// Return an insert point in \p EntryVPBB after existing VPIRPhi,
1452/// VPIRInstruction and VPExpandSCEVRecipe recipes.
1454 auto InsertPt = EntryVPBB->begin();
1455 while (InsertPt != EntryVPBB->end() &&
1457 ++InsertPt;
1458 return InsertPt;
1459}
1460
1462 VPlan &Plan, ElementCount VF, unsigned UF,
1463 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
1464 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
1466 // Generate code to check if the loop's trip count is less than VF * UF, or
1467 // equal to it in case a scalar epilogue is required; this implies that the
1468 // vector trip count is zero. This check also covers the case where adding one
1469 // to the backedge-taken count overflowed leading to an incorrect trip count
1470 // of zero. In this case we will also jump to the scalar loop.
1471 CmpInst::Predicate CmpPred =
1472 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1473 // If tail is to be folded, vector loop takes care of all iterations.
1474 VPValue *TripCountVPV = Plan.getTripCount();
1475 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, PSE);
1476 Type *TripCountTy = TripCount->getType();
1477 ScalarEvolution &SE = *PSE.getSE();
1478 auto GetMinTripCount = [&]() -> const SCEV * {
1479 // Compute max(MinProfitableTripCount, UF * VF) and return it.
1480 const SCEV *VFxUF =
1481 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
1482 if (UF * VF.getKnownMinValue() >=
1483 MinProfitableTripCount.getKnownMinValue()) {
1484 // TODO: SCEV should be able to simplify test.
1485 return VFxUF;
1486 }
1487 const SCEV *MinProfitableTripCountSCEV =
1488 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
1489 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
1490 };
1491
1492 VPBasicBlock *EntryVPBB = Plan.getEntry();
1493 // Place compare and branch in CheckBlock if given, ExpandSCEVs in Entry.
1494 VPBasicBlock *CheckVPBB = CheckBlock ? CheckBlock : EntryVPBB;
1495 VPBuilder Builder(CheckVPBB);
1496 VPValue *TripCountCheck = Plan.getFalse();
1497 const SCEV *Step = GetMinTripCount();
1498 // TripCountCheck = false, folding tail implies positive vector trip
1499 // count.
1500 if (!TailFolded) {
1501 // TODO: Emit unconditional branch to vector preheader instead of
1502 // conditional branch with known condition.
1503 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
1504 // Check if the trip count is < the step.
1505 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
1506 // TODO: Ensure step is at most the trip count when determining max VF and
1507 // UF, w/o tail folding.
1508 TripCountCheck = Plan.getTrue();
1509 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
1510 TripCount, Step)) {
1511 // Generate the minimum iteration check only if we cannot prove the
1512 // check is known to be true, or known to be false.
1513 // ExpandSCEV must be placed in Entry.
1514 VPBuilder SCEVBuilder(EntryVPBB, getExpandSCEVInsertPt(EntryVPBB));
1515 VPValue *MinTripCountVPV = SCEVBuilder.createExpandSCEV(Step);
1516 TripCountCheck = Builder.createICmp(
1517 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
1518 } // else step known to be < trip count, use TripCountCheck preset to false.
1519 }
1520 VPInstruction *Term =
1521 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
1523 MDBuilder MDB(Plan.getContext());
1524 MDNode *BranchWeights = MDB.createBranchWeights(
1525 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
1526 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1527 }
1528}
1529
1531 VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue,
1532 Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL,
1534 auto *CheckBlock = Plan.createVPBasicBlock("vector.main.loop.iter.check");
1535 insertCheckBlockBeforeVectorLoop(Plan, CheckBlock);
1537 RequiresScalarEpilogue, /*TailFolded=*/false,
1538 OrigLoop, MinItersBypassWeights, DL, PSE,
1539 CheckBlock);
1540}
1541
1543 VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
1544 ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
1545 unsigned EpilogueLoopStep, ScalarEvolution &SE) {
1546 // Add the minimum iteration check for the epilogue vector loop.
1547 VPValue *TC = Plan.getTripCount();
1548 Value *TripCount = TC->getLiveInIRValue();
1549 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
1550 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
1551 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
1552 VPValue *Count = Builder.createSub(TC, Plan.getOrAddLiveIn(VectorTripCount),
1553 DebugLoc::getUnknown(), "n.vec.remaining");
1554
1555 // Generate code to check if the loop's trip count is less than VF * UF of
1556 // the vector epilogue loop.
1557 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1558 auto *CheckMinIters = Builder.createICmp(
1559 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
1560 VPInstruction *Branch =
1561 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
1562
1563 // We assume the remaining `Count` is equally distributed in
1564 // [0, MainLoopStep)
1565 // So the probability for `Count < EpilogueLoopStep` should be
1566 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
1567 // TODO: Improve the estimate by taking the estimated trip count into
1568 // consideration.
1569 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
1570 const uint32_t Weights[] = {EstimatedSkipCount,
1571 MainLoopStep - EstimatedSkipCount};
1572 MDBuilder MDB(Plan.getContext());
1573 MDNode *BranchWeights =
1574 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
1575 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
1576}
1577
1578/// Find and return the final select instruction of the FindIV result pattern
1579/// for the given \p BackedgeVal:
1580/// select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),
1581/// ComputeReductionResult(ReducedIV), Start.
1583 return cast<VPInstruction>(
1584 vputils::findRecipe(BackedgeVal, [BackedgeVal](VPRecipeBase *R) {
1585 auto *VPI = dyn_cast<VPInstruction>(R);
1586 return VPI &&
1587 matchFindIVResult(VPI, m_Specific(BackedgeVal), m_VPValue());
1588 }));
1589}
1590
1592 auto GetMinOrMaxCompareValue =
1593 [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
1594 auto *MinOrMaxR =
1595 dyn_cast_or_null<VPRecipeWithIRFlags>(RedPhiR->getBackedgeValue());
1596 if (!MinOrMaxR)
1597 return nullptr;
1598
1599 // Check that MinOrMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1600 // with an intrinsic that matches the reduction kind.
1601 Intrinsic::ID ExpectedIntrinsicID =
1602 getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind());
1603 if (!match(MinOrMaxR, m_Intrinsic(ExpectedIntrinsicID)))
1604 return nullptr;
1605
1606 if (MinOrMaxR->getOperand(0) == RedPhiR)
1607 return MinOrMaxR->getOperand(1);
1608
1609 assert(MinOrMaxR->getOperand(1) == RedPhiR &&
1610 "Reduction phi operand expected");
1611 return MinOrMaxR->getOperand(0);
1612 };
1613
1614 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1616 MinOrMaxNumReductionsToHandle;
1617 bool HasUnsupportedPhi = false;
1618 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
1620 continue;
1621 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
1622 if (!Cur) {
1623 // TODO: Also support fixed-order recurrence phis.
1624 HasUnsupportedPhi = true;
1625 continue;
1626 }
1628 Cur->getRecurrenceKind())) {
1629 HasUnsupportedPhi = true;
1630 continue;
1631 }
1632
1633 VPValue *MinOrMaxOp = GetMinOrMaxCompareValue(Cur);
1634 if (!MinOrMaxOp)
1635 return false;
1636
1637 MinOrMaxNumReductionsToHandle.emplace_back(Cur, MinOrMaxOp);
1638 }
1639
1640 if (MinOrMaxNumReductionsToHandle.empty())
1641 return true;
1642
1643 // We won't be able to resume execution in the scalar tail, if there are
1644 // unsupported header phis or there is no scalar tail at all, due to
1645 // tail-folding.
1646 if (HasUnsupportedPhi || !Plan.hasScalarTail())
1647 return false;
1648
1649 /// Check if the vector loop of \p Plan can early exit and restart
1650 /// execution of last vector iteration in the scalar loop. This requires all
1651 /// recipes up to early exit point be side-effect free as they are
1652 /// re-executed. Currently we check that the loop is free of any recipe that
1653 /// may write to memory. Expected to operate on an early VPlan w/o nested
1654 /// regions.
1657 auto *VPBB = cast<VPBasicBlock>(VPB);
1658 for (auto &R : *VPBB) {
1659 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
1660 return false;
1661 }
1662 }
1663
1664 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1665 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
1666 VPValue *AllNaNLanes = nullptr;
1667 SmallPtrSet<VPValue *, 2> RdxResults;
1668 for (const auto &[_, MinOrMaxOp] : MinOrMaxNumReductionsToHandle) {
1669 VPValue *RedNaNLanes =
1670 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinOrMaxOp, MinOrMaxOp);
1671 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
1672 : RedNaNLanes;
1673 }
1674
1675 VPValue *AnyNaNLane =
1676 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
1677 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
1678 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
1679 for (const auto &[RedPhiR, _] : MinOrMaxNumReductionsToHandle) {
1681 RedPhiR->getRecurrenceKind()) &&
1682 "unsupported reduction");
1683
1684 // If we exit early due to NaNs, compute the final reduction result based on
1685 // the reduction phi at the beginning of the last vector iteration.
1686 auto *RdxResult = vputils::findComputeReductionResult(RedPhiR);
1687 assert(RdxResult && "must find a ComputeReductionResult");
1688
1689 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
1690 RdxResult->getOperand(0));
1691 RdxResult->setOperand(0, NewSel);
1692 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
1693 RdxResults.insert(RdxResult);
1694 }
1695
1696 auto *LatchExitingBranch = LatchVPBB->getTerminator();
1697 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
1698 "Unexpected terminator");
1699 auto *IsLatchExitTaken = LatchBuilder.createICmp(
1700 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
1701 LatchExitingBranch->getOperand(1));
1702 auto *AnyExitTaken = LatchBuilder.createOr(AnyNaNLane, IsLatchExitTaken);
1703 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
1704 LatchExitingBranch->eraseFromParent();
1705
1706 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
1707 // true, the resume from the start of the last vector iteration via the
1708 // canonical IV, otherwise from the original value.
1709 auto IsTC = [&Plan](VPValue *V) {
1710 return V == &Plan.getVectorTripCount() || V == Plan.getTripCount();
1711 };
1712 for (auto &R : Plan.getScalarPreheader()->phis()) {
1713 auto *ResumeR = cast<VPPhi>(&R);
1714 VPValue *VecV = ResumeR->getOperand(0);
1715 if (RdxResults.contains(VecV))
1716 continue;
1717 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
1718 VPValue *DIVTC = DerivedIV->getOperand(1);
1719 if (DerivedIV->getNumUsers() == 1 && IsTC(DIVTC)) {
1720 auto *NewSel = MiddleBuilder.createSelect(
1721 AnyNaNLane, LoopRegion->getCanonicalIV(), DIVTC);
1722 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
1723 DerivedIV->setOperand(1, NewSel);
1724 continue;
1725 }
1726 }
1727 // Bail out and abandon the current, partially modified, VPlan if we
1728 // encounter resume phi that cannot be updated yet.
1729 if (!IsTC(VecV)) {
1730 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
1731 "FMaxNum/FMinNum reduction.\n");
1732 return false;
1733 }
1734 auto *NewSel = MiddleBuilder.createSelect(
1735 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
1736 ResumeR->setOperand(0, NewSel);
1737 }
1738
1739 auto *MiddleTerm = MiddleVPBB->getTerminator();
1740 MiddleBuilder.setInsertPoint(MiddleTerm);
1741 VPValue *MiddleCond = MiddleTerm->getOperand(0);
1742 VPValue *NewCond =
1743 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
1744 MiddleTerm->setOperand(0, NewCond);
1745 return true;
1746}
1747
1749 if (Plan.hasScalarVFOnly())
1750 return false;
1751
1752 // We want to create the following nodes:
1753 // vector.body:
1754 // ...new WidenPHI recipe introduced to keep the mask value for the latest
1755 // iteration where any lane was active.
1756 // mask.phi = phi [ ir<false>, vector.ph ], [ vp<new.mask>, vector.body ]
1757 // ...data.phi (a VPReductionPHIRecipe for a FindLast reduction) already
1758 // exists, but needs updating to use 'new.data' for the backedge value.
1759 // data.phi = phi ir<default.val>, vp<new.data>
1760 //
1761 // ...'data' and 'compare' created by existing nodes...
1762 //
1763 // ...new recipes introduced to determine whether to update the reduction
1764 // values or keep the current one.
1765 // any.active = i1 any-of ir<compare>
1766 // new.mask = select vp<any.active>, ir<compare>, vp<mask.phi>
1767 // new.data = select vp<any.active>, ir<data>, ir<data.phi>
1768 //
1769 // middle.block:
1770 // ...extract-last-active replaces compute-reduction-result.
1771 // result = extract-last-active vp<new.data>, vp<new.mask>, ir<default.val>
1772
1774 for (VPRecipeBase &Phi :
1776 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&Phi);
1778 PhiR->getRecurrenceKind()))
1779 Phis.push_back(PhiR);
1780 }
1781
1782 if (Phis.empty())
1783 return true;
1784
1785 VPValue *HeaderMask = vputils::findHeaderMask(Plan);
1786 for (VPReductionPHIRecipe *PhiR : Phis) {
1787 // Find the condition for the select/blend.
1788 VPValue *BackedgeSelect = PhiR->getBackedgeValue();
1789 VPValue *CondSelect = BackedgeSelect;
1790
1791 // If there's a header mask, the backedge select will not be the find-last
1792 // select.
1793 if (HeaderMask && !match(BackedgeSelect,
1794 m_Select(m_Specific(HeaderMask),
1795 m_VPValue(CondSelect), m_Specific(PhiR))))
1796 llvm_unreachable("expected header mask select");
1797
1798 VPValue *Cond = nullptr, *Op1 = nullptr, *Op2 = nullptr;
1799
1800 // If we're matching a blend rather than a select, there should be one
1801 // incoming value which is the data, then all other incoming values should
1802 // be the phi.
1803 auto MatchBlend = [&](VPRecipeBase *R) {
1804 auto *Blend = dyn_cast<VPBlendRecipe>(R);
1805 if (!Blend)
1806 return false;
1807 assert(!Blend->isNormalized() && "must run before blend normalizaion");
1808 unsigned NumIncomingDataValues = 0;
1809 for (unsigned I = 0; I < Blend->getNumIncomingValues(); ++I) {
1810 VPValue *Incoming = Blend->getIncomingValue(I);
1811 if (Incoming != PhiR) {
1812 ++NumIncomingDataValues;
1813 Cond = Blend->getMask(I);
1814 Op1 = Incoming;
1815 Op2 = PhiR;
1816 }
1817 }
1818 return NumIncomingDataValues == 1;
1819 };
1820
1821 VPSingleDefRecipe *SelectR =
1823 if (!match(SelectR,
1824 m_Select(m_VPValue(Cond), m_VPValue(Op1), m_VPValue(Op2))) &&
1825 !MatchBlend(SelectR))
1826 return false;
1827
1828 assert(Cond != HeaderMask && "Cond must not be HeaderMask");
1829
1830 // Find final reduction computation and replace it with an
1831 // extract.last.active intrinsic.
1832 auto *RdxResult =
1834 BackedgeSelect);
1835 assert(RdxResult && "Could not find reduction result");
1836
1837 // Add mask phi.
1838 VPBuilder Builder = VPBuilder::getToInsertAfter(PhiR);
1839 auto *MaskPHI = Builder.createWidenPhi(Plan.getFalse());
1840
1841 // Add select for mask.
1842 Builder.setInsertPoint(SelectR);
1843
1844 if (Op1 == PhiR) {
1845 // Normalize to selecting the data operand when the condition is true by
1846 // swapping operands and negating the condition.
1847 std::swap(Op1, Op2);
1848 Cond = Builder.createNot(Cond);
1849 }
1850 assert(Op2 == PhiR && "data value must be selected if Cond is true");
1851
1852 if (HeaderMask)
1853 Cond = Builder.createLogicalAnd(HeaderMask, Cond);
1854
1855 VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, {Cond});
1856 VPValue *MaskSelect = Builder.createSelect(AnyOf, Cond, MaskPHI);
1857 MaskPHI->addOperand(MaskSelect);
1858
1859 // Replace select for data.
1860 VPValue *DataSelect =
1861 Builder.createSelect(AnyOf, Op1, Op2, SelectR->getDebugLoc());
1862 SelectR->replaceAllUsesWith(DataSelect);
1863 PhiR->setBackedgeValue(DataSelect);
1864 SelectR->eraseFromParent();
1865
1866 Builder.setInsertPoint(RdxResult);
1867 auto *ExtractLastActive =
1868 Builder.createNaryOp(VPInstruction::ExtractLastActive,
1869 {PhiR->getStartValue(), DataSelect, MaskSelect},
1870 RdxResult->getDebugLoc());
1871 RdxResult->replaceAllUsesWith(ExtractLastActive);
1872 RdxResult->eraseFromParent();
1873 }
1874
1875 return true;
1876}
1877
1878/// Given a first argmin/argmax pattern with strict predicate consisting of
1879/// 1) a MinOrMax reduction \p MinOrMaxPhiR producing \p MinOrMaxResult,
1880/// 2) a wide induction \p WideIV,
1881/// 3) a FindLastIV reduction \p FindLastIVPhiR using \p WideIV,
1882/// return the smallest index of the FindLastIV reduction result using UMin,
1883/// unless \p MinOrMaxResult equals the start value of its MinOrMax reduction.
1884/// In that case, return the start value of the FindLastIV reduction instead.
1885/// If \p WideIV is not canonical, a new canonical wide IV is added, and the
1886/// final result is scaled back to the non-canonical \p WideIV.
1887/// The final value of the FindLastIV reduction is originally computed using
1888/// \p FindIVSelect, \p FindIVCmp, and \p FindIVRdxResult, which are replaced
1889/// and removed.
1890/// Returns true if the pattern was handled successfully, false otherwise.
1892 VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR,
1893 VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV,
1894 VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect,
1895 VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult) {
1896 assert(!FindLastIVPhiR->isInLoop() && !FindLastIVPhiR->isOrdered() &&
1897 "inloop and ordered reductions not supported");
1898 assert(FindLastIVPhiR->getVFScaleFactor() == 1 &&
1899 "FindIV reduction must not be scaled");
1900
1902 // TODO: Support non (i.e., narrower than) canonical IV types.
1903 // TODO: Emit remarks for failed transformations.
1904 if (Ty != VPTypeAnalysis(Plan).inferScalarType(WideIV))
1905 return false;
1906
1907 auto *FindIVSelectR = cast<VPSingleDefRecipe>(
1908 FindLastIVPhiR->getBackedgeValue()->getDefiningRecipe());
1909 assert(
1910 match(FindIVSelectR, m_Select(m_VPValue(), m_VPValue(), m_VPValue())) &&
1911 "backedge value must be a select");
1912 if (FindIVSelectR->getOperand(1) != WideIV &&
1913 FindIVSelectR->getOperand(2) != WideIV)
1914 return false;
1915
1916 // If the original wide IV is not canonical, create a new one. The canonical
1917 // wide IV is guaranteed to not wrap for all lanes that are active in the
1918 // vector loop.
1919 if (!WideIV->isCanonical()) {
1920 VPIRValue *Zero = Plan.getConstantInt(Ty, 0);
1921 VPIRValue *One = Plan.getConstantInt(Ty, 1);
1922 auto *WidenCanIV = new VPWidenIntOrFpInductionRecipe(
1923 nullptr, Zero, One, WideIV->getVFValue(),
1924 WideIV->getInductionDescriptor(),
1925 VPIRFlags::WrapFlagsTy(/*HasNUW=*/true, /*HasNSW=*/false),
1926 WideIV->getDebugLoc());
1927 WidenCanIV->insertBefore(WideIV);
1928
1929 // Update the select to use the wide canonical IV.
1930 FindIVSelectR->setOperand(FindIVSelectR->getOperand(1) == WideIV ? 1 : 2,
1931 WidenCanIV);
1932 }
1933 FindLastIVPhiR->setOperand(0, Plan.getOrAddLiveIn(PoisonValue::get(Ty)));
1934
1935 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
1936 // result:
1937 // 1. Find the first canonical indices corresponding to partial min/max
1938 // values, using loop reductions.
1939 // 2. Find which of the partial min/max values are equal to the overall
1940 // min/max value.
1941 // 3. Select among the canonical indices those corresponding to the overall
1942 // min/max value.
1943 // 4. Find the first canonical index of overall min/max and scale it back to
1944 // the original IV using VPDerivedIVRecipe.
1945 // 5. If the overall min/max equals the starting min/max, the condition in
1946 // the loop was always false, due to being strict; return the start value
1947 // of FindLastIVPhiR in that case.
1948 //
1949 // For example, we transforms two independent reduction result computations
1950 // for
1951 //
1952 // <x1> vector loop: {
1953 // vector.body:
1954 // ...
1955 // ir<%iv> = WIDEN-INDUCTION nuw nsw ir<10>, ir<1>, vp<%0>
1956 // WIDEN-REDUCTION-PHI ir<%min.idx> = phi ir<sentinel.min.start>,
1957 // ir<%min.idx.next>
1958 // WIDEN-REDUCTION-PHI ir<%min.val> = phi ir<100>, ir<%min.val.next>
1959 // ....
1960 // WIDEN-INTRINSIC ir<%min.val.next> = call llvm.umin(ir<%min.val>, ir<%l>)
1961 // WIDEN ir<%min.idx.next> = select ir<%cmp>, ir<%iv>, ir<%min.idx>
1962 // ...
1963 // }
1964 // Successor(s): middle.block
1965 //
1966 // middle.block:
1967 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
1968 // vp<%min.result> = compute-reduction-result (umin) ir<%min.val.next>
1969 // vp<%cmp> = icmp ne vp<%iv.rdx>, ir<sentinel.min.start>
1970 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<10>
1971 //
1972 //
1973 // Into:
1974 //
1975 // vp<%reduced.min> = compute-reduction-result (umin) ir<%min.val.next>
1976 // vp<%reduced.mins.mask> = icmp eq ir<%min.val.next>, vp<%reduced.min>
1977 // vp<%idxs2reduce> = select vp<%reduced.mins.mask>, ir<%min.idx.next>,
1978 // ir<MaxUInt>
1979 // vp<%reduced.idx> = compute-reduction-result (umin) vp<%idxs2reduce>
1980 // vp<%scaled.idx> = DERIVED-IV ir<20> + vp<%reduced.idx> * ir<1>
1981 // vp<%always.false> = icmp eq vp<%reduced.min>, ir<100>
1982 // vp<%final.idx> = select vp<%always.false>, ir<10>,
1983 // vp<%scaled.idx>
1984
1985 VPBuilder Builder(FindIVRdxResult);
1986 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
1987 auto *FinalMinOrMaxCmp =
1988 Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
1989 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
1990 VPValue *MaxIV =
1991 Plan.getConstantInt(APInt::getMaxValue(Ty->getIntegerBitWidth()));
1992 auto *FinalIVSelect =
1993 Builder.createSelect(FinalMinOrMaxCmp, LastIVExiting, MaxIV);
1994 VPIRFlags RdxFlags(RecurKind::UMin, false, false, FastMathFlags());
1995 VPSingleDefRecipe *FinalCanIV = Builder.createNaryOp(
1996 VPInstruction::ComputeReductionResult, {FinalIVSelect}, RdxFlags,
1997 FindIVRdxResult->getDebugLoc());
1998
1999 // If we used a new wide canonical IV convert the reduction result back to the
2000 // original IV scale before the final select.
2001 if (!WideIV->isCanonical()) {
2002 auto *DerivedIVRecipe = new VPDerivedIVRecipe(
2004 nullptr, // No FPBinOp for integer induction
2005 WideIV->getStartValue(), FinalCanIV, WideIV->getStepValue());
2006 DerivedIVRecipe->insertBefore(&*Builder.getInsertPoint());
2007 FinalCanIV = DerivedIVRecipe;
2008 }
2009
2010 // If the final min/max value matches its start value, the condition in the
2011 // loop was always false, i.e. no induction value has been selected. If that's
2012 // the case, set the result of the IV reduction to its start value.
2013 VPValue *AlwaysFalse = Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxResult,
2014 MinOrMaxPhiR->getStartValue());
2015 VPValue *FinalIV = Builder.createSelect(
2016 AlwaysFalse, FindIVSelect->getOperand(2), FinalCanIV);
2017 FindIVSelect->replaceAllUsesWith(FinalIV);
2018
2019 // Erase the old FindIV result pattern which is now dead.
2020 FindIVSelect->eraseFromParent();
2021 FindIVCmp->eraseFromParent();
2022 FindIVRdxResult->eraseFromParent();
2023 return true;
2024}
2025
2028 Loop *TheLoop) {
2029 for (auto &PhiR : make_early_inc_range(
2031 auto *MinOrMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
2032 // TODO: check for multi-uses in VPlan directly.
2033 if (!MinOrMaxPhiR || !MinOrMaxPhiR->hasUsesOutsideReductionChain())
2034 continue;
2035
2036 // MinOrMaxPhiR has users outside the reduction cycle in the loop. Check if
2037 // the only other user is a FindLastIV reduction. MinOrMaxPhiR must have
2038 // exactly 2 users:
2039 // 1) the min/max operation of the reduction cycle, and
2040 // 2) the compare of a FindLastIV reduction cycle. This compare must match
2041 // the min/max operation - comparing MinOrMaxPhiR with the operand of the
2042 // min/max operation, and be used only by the select of the FindLastIV
2043 // reduction cycle.
2044 RecurKind RdxKind = MinOrMaxPhiR->getRecurrenceKind();
2045 assert(
2047 "only min/max recurrences support users outside the reduction chain");
2048
2049 auto *MinOrMaxOp =
2050 dyn_cast<VPRecipeWithIRFlags>(MinOrMaxPhiR->getBackedgeValue());
2051 if (!MinOrMaxOp)
2052 return false;
2053
2054 // Check that MinOrMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe
2055 // with an intrinsic that matches the reduction kind.
2056 Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind);
2057 if (!match(MinOrMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
2058 return false;
2059
2060 // MinOrMaxOp must have 2 users: 1) MinOrMaxPhiR and 2)
2061 // ComputeReductionResult.
2062 assert(MinOrMaxOp->getNumUsers() == 2 &&
2063 "MinOrMaxOp must have exactly 2 users");
2064 VPValue *MinOrMaxOpValue = MinOrMaxOp->getOperand(0);
2065 if (MinOrMaxOpValue == MinOrMaxPhiR)
2066 MinOrMaxOpValue = MinOrMaxOp->getOperand(1);
2067
2068 VPValue *CmpOpA;
2069 VPValue *CmpOpB;
2070 CmpPredicate Pred;
2072 MinOrMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
2073 if (!Cmp || Cmp->getNumUsers() != 1 ||
2074 (CmpOpA != MinOrMaxOpValue && CmpOpB != MinOrMaxOpValue))
2075 return false;
2076
2077 if (MinOrMaxOpValue != CmpOpB)
2078 Pred = CmpInst::getSwappedPredicate(Pred);
2079
2080 // MinOrMaxPhiR must have exactly 2 users:
2081 // * MinOrMaxOp,
2082 // * Cmp (that's part of a FindLastIV chain).
2083 if (MinOrMaxPhiR->getNumUsers() != 2)
2084 return false;
2085
2086 VPInstruction *MinOrMaxResult =
2088 assert(is_contained(MinOrMaxPhiR->users(), MinOrMaxOp) &&
2089 "one user must be MinOrMaxOp");
2090 assert(MinOrMaxResult && "MinOrMaxResult must be a user of MinOrMaxOp");
2091
2092 // Cmp must be used by the select of a FindLastIV chain.
2093 VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
2094 VPValue *IVOp, *FindIV;
2095 if (!Sel || Sel->getNumUsers() != 2 ||
2096 !match(Sel,
2098 return false;
2099
2101 std::swap(FindIV, IVOp);
2102 Pred = CmpInst::getInversePredicate(Pred);
2103 }
2104
2105 auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
2107 FindIVPhiR->getRecurrenceKind()))
2108 return false;
2109
2110 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
2111 "cannot handle inloop/ordered reductions yet");
2112
2113 // Check if FindIVPhiR is a FindLast pattern by checking the MinMaxKind
2114 // on its ComputeReductionResult. SMax/UMax indicates FindLast.
2115 VPInstruction *FindIVResult =
2117 FindIVPhiR->getBackedgeValue());
2118 assert(FindIVResult &&
2119 "must be able to retrieve the FindIVResult VPInstruction");
2120 RecurKind FindIVMinMaxKind = FindIVResult->getRecurKind();
2121 if (FindIVMinMaxKind != RecurKind::SMax &&
2122 FindIVMinMaxKind != RecurKind::UMax)
2123 return false;
2124
2125 // TODO: Support cases where IVOp is the IV increment.
2126 if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
2128 return false;
2129
2130 // Check if the predicate is compatible with the reduction kind.
2131 bool IsValidKindPred = [RdxKind, Pred]() {
2132 switch (RdxKind) {
2133 case RecurKind::UMin:
2134 return Pred == CmpInst::ICMP_UGE || Pred == CmpInst::ICMP_UGT;
2135 case RecurKind::UMax:
2136 return Pred == CmpInst::ICMP_ULE || Pred == CmpInst::ICMP_ULT;
2137 case RecurKind::SMax:
2138 return Pred == CmpInst::ICMP_SLE || Pred == CmpInst::ICMP_SLT;
2139 case RecurKind::SMin:
2140 return Pred == CmpInst::ICMP_SGE || Pred == CmpInst::ICMP_SGT;
2141 default:
2142 llvm_unreachable("unhandled recurrence kind");
2143 }
2144 }();
2145 if (!IsValidKindPred) {
2146 ORE->emit([&]() {
2148 DEBUG_TYPE, "VectorizationMultiUseReductionPredicate",
2149 TheLoop->getStartLoc(), TheLoop->getHeader())
2150 << "Multi-use reduction with predicate "
2152 << " incompatible with reduction kind";
2153 });
2154 return false;
2155 }
2156
2157 auto *FindIVSelect = findFindIVSelect(FindIVPhiR->getBackedgeValue());
2158 auto *FindIVCmp = FindIVSelect->getOperand(0)->getDefiningRecipe();
2159 auto *FindIVRdxResult = cast<VPInstruction>(FindIVCmp->getOperand(0));
2160 assert(FindIVSelect->getParent() == MinOrMaxResult->getParent() &&
2161 "both results must be computed in the same block");
2162 // Reducing to a scalar min or max value is placed right before reducing to
2163 // its scalar iteration, in order to generate instructions that use both
2164 // their operands.
2165 MinOrMaxResult->moveBefore(*FindIVRdxResult->getParent(),
2166 FindIVRdxResult->getIterator());
2167
2168 bool IsStrictPredicate = ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred);
2169 if (IsStrictPredicate) {
2170 if (!handleFirstArgMinOrMax(Plan, MinOrMaxPhiR, FindIVPhiR,
2172 MinOrMaxResult, FindIVSelect, FindIVCmp,
2173 FindIVRdxResult))
2174 return false;
2175 continue;
2176 }
2177
2178 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
2179 // result:
2180 // 1. We need to find the last IV for which the condition based on the
2181 // min/max recurrence is true,
2182 // 2. Compare the partial min/max reduction result to its final value and,
2183 // 3. Select the lanes of the partial FindLastIV reductions which
2184 // correspond to the lanes matching the min/max reduction result.
2185 //
2186 // For example, this transforms
2187 // vp<%min.result> = compute-reduction-result ir<%min.val.next>
2188 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
2189 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
2190 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
2191 //
2192 // into:
2193 //
2194 // vp<min.result> = compute-reduction-result ir<%min.val.next>
2195 // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
2196 // vp<%final.iv> = select vp<%final.min.cmp>, vp<%min.idx.next>, SENTINEL
2197 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%final.iv>
2198 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
2199 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
2200 //
2201 VPBuilder B(FindIVRdxResult);
2202 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
2203 auto *FinalMinOrMaxCmp =
2204 B.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
2205 VPValue *Sentinel = FindIVCmp->getOperand(1);
2206 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
2207 auto *FinalIVSelect =
2208 B.createSelect(FinalMinOrMaxCmp, LastIVExiting, Sentinel);
2209 FindIVRdxResult->setOperand(0, FinalIVSelect);
2210 }
2211 return true;
2212}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
#define _
iv Induction Variable Users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static bool handleFirstArgMinOrMax(VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR, VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV, VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect, VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult)
Given a first argmin/argmax pattern with strict predicate consisting of 1) a MinOrMax reduction MinOr...
static VPHeaderPHIRecipe * createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, DebugLoc DL)
Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe for Phi based on IndDesc.
static void insertCheckBlockBeforeVectorLoop(VPlan &Plan, VPBasicBlock *CheckBlockVPBB)
Insert CheckBlockVPBB on the edge leading to the vector preheader, connecting it to both vector and s...
static void simplifyLiveInsWithSCEV(VPlan &Plan, PredicatedScalarEvolution &PSE)
Check Plan's live-in and replace them with constants, if they can be simplified via SCEV.
static bool sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)
Try to sink users of FOR after Previous.
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static bool tryToSinkOrHoistRecurrenceUsers(VPBasicBlock *HeaderVPBB, VPDominatorTree &VPDT)
Sink users of fixed-order recurrences past or hoist before the recipe defining the previous value,...
static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB, VPValue *Cond, bool AddBranchWeights)
Create a BranchOnCond terminator in CheckBlockVPBB.
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)
Try to hoist Previous and its operands before all users of FOR.
static VPInstruction * findFindIVSelect(VPValue *BackedgeVal)
Find and return the final select instruction of the FindIV result pattern for the given BackedgeVal: ...
static constexpr uint32_t CheckBypassWeights[]
static bool areAllLoadsDereferenceable(VPBasicBlock *HeaderVPBB, VPBasicBlock *MiddleVPBB, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Check if all loads in the loop are dereferenceable.
static void printAfterInitialConstruction(VPlan &)
To make RUN_VPLAN_PASS print initial VPlan.
static VPBasicBlock::iterator getExpandSCEVInsertPt(VPBasicBlock *EntryVPBB)
Return an insert point in EntryVPBB after existing VPIRPhi, VPIRInstruction and VPExpandSCEVRecipe re...
static auto getMatchingPhisForScalarLoop(VPBasicBlock *Header, VPBasicBlock *ScalarHeader)
Return an iterator range to iterate over pairs of matching phi nodes in Header and ScalarHeader,...
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS_NO_VERIFY(PASS,...)
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition blake3_impl.h:83
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI StringRef getPredicateName(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
static FastMathFlags getFast()
Definition FMF.h:53
static bool isLT(Predicate P)
Return true if the predicate is SLT or ULT.
static bool isGT(Predicate P)
Return true if the predicate is SGT or UGT.
A struct for saving information about induction variables.
InductionKind getKind() const
const SCEV * getStep() const
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Value * getStartValue() const
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:659
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
iterator find(const KeyT &Key)
Definition MapVector.h:156
iterator end()
Definition MapVector.h:69
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
FastMathFlags getFastMathFlags() const
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class represents an analyzed expression in the program.
static constexpr auto FlagNUW
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
op_range operands()
Definition User.h:267
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4146
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4221
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4173
iterator end()
Definition VPlan.h:4183
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4181
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4234
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:233
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:551
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:630
const VPRecipeBase & back() const
Definition VPlan.h:4195
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4212
bool empty() const
Definition VPlan.h:4192
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:97
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:318
VPRegionBlock * getParent()
Definition VPlan.h:189
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:203
void setName(const Twine &newName)
Definition VPlan.h:182
size_t getNumSuccessors() const
Definition VPlan.h:240
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:340
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:309
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:225
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:300
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:236
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:332
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:183
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:289
void setParent(VPRegionBlock *P)
Definition VPlan.h:200
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:230
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:214
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:193
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:322
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:241
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:259
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:295
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:279
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition VPlanValue.h:504
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition VPlanValue.h:477
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3895
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2299
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2341
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2330
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4299
Class to record and manage LLVM IR flags.
Definition VPlan.h:691
RecurKind getRecurKind() const
Definition VPlan.h:1054
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1226
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1332
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1323
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1339
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1269
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:405
VPBasicBlock * getParent()
Definition VPlan.h:479
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:557
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition VPlan.h:2685
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2746
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2725
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2749
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3040
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4356
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4476
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4468
DebugLoc getDebugLoc() const
Returns the debug location of the VPRegionValue.
Definition VPlanValue.h:228
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:609
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:335
operand_range operands()
Definition VPlanValue.h:403
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:379
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:374
void addOperand(VPValue *Operand)
Definition VPlanValue.h:368
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:49
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:138
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:128
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:202
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1480
unsigned getNumUsers() const
Definition VPlanValue.h:113
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1486
user_range users()
Definition VPlanValue.h:155
A Recipe for widening the canonical induction variable of the vector loop.
Definition VPlan.h:3854
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2396
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2416
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2447
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2494
bool isCanonical() const
Returns true if the induction is canonical, i.e.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4504
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4829
LLVMContext & getContext() const
Definition VPlan.h:4705
VPBasicBlock * getEntry()
Definition VPlan.h:4600
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4663
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4684
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4800
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4703
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4832
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4653
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4693
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4777
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:4866
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1067
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4670
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4629
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4855
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1317
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4797
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4605
bool hasScalarVFOnly() const
Definition VPlan.h:4745
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4643
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4649
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4696
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:4910
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4811
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
bool matchFindIVResult(VPInstruction *VPI, Op0_t ReducedIV, Op1_t Start)
Match FindIV result pattern: select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),...
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
Definition VPlanUtils.h:99
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
Definition VPlanUtils.h:116
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
Definition VPlanUtils.h:137
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2672
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:82
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2145
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:290
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
A recipe for handling first-order recurrence phis.
Definition VPlan.h:2623
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:240
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB, UncountableExitStyle Style)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock=nullptr)
static bool createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void createInLoopReductionRecipes(VPlan &Plan, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static LLVM_ABI_FOR_TEST bool handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Update Plan to account for all early exits.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
Add a new check block before the vector preheader to Plan to check if the main vector loop should be ...
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...