LLVM 22.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanCFG.h"
17#include "VPlanDominatorTree.h"
18#include "VPlanPatternMatch.h"
19#include "VPlanTransforms.h"
23#include "llvm/IR/InstrTypes.h"
24#include "llvm/IR/MDBuilder.h"
27
28#define DEBUG_TYPE "vplan"
29
30using namespace llvm;
31using namespace VPlanPatternMatch;
32
33namespace {
34// Class that is used to build the plain CFG for the incoming IR.
35class PlainCFGBuilder {
36 // The outermost loop of the input loop nest considered for vectorization.
37 Loop *TheLoop;
38
39 // Loop Info analysis.
40 LoopInfo *LI;
41
42 // Loop versioning for alias metadata.
43 LoopVersioning *LVer;
44
45 // Vectorization plan that we are working on.
46 std::unique_ptr<VPlan> Plan;
47
48 // Builder of the VPlan instruction-level representation.
49 VPBuilder VPIRBuilder;
50
51 // NOTE: The following maps are intentionally destroyed after the plain CFG
52 // construction because subsequent VPlan-to-VPlan transformation may
53 // invalidate them.
54 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
56 // Map incoming Value definitions to their newly-created VPValues.
57 DenseMap<Value *, VPValue *> IRDef2VPValue;
58
59 // Hold phi node's that need to be fixed once the plain CFG has been built.
61
62 // Utility functions.
63 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
64 void fixHeaderPhis();
65 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
66#ifndef NDEBUG
67 bool isExternalDef(Value *Val);
68#endif
69 VPValue *getOrCreateVPOperand(Value *IRVal);
70 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
71
72public:
73 PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer)
74 : TheLoop(Lp), LI(LI), LVer(LVer), Plan(std::make_unique<VPlan>(Lp)) {}
75
76 /// Build plain CFG for TheLoop and connect it to Plan's entry.
77 std::unique_ptr<VPlan> buildPlainCFG();
78};
79} // anonymous namespace
80
81// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
82// must have no predecessors.
83void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
84 // Collect VPBB predecessors.
86 for (BasicBlock *Pred : predecessors(BB))
87 VPBBPreds.push_back(getOrCreateVPBB(Pred));
88 VPBB->setPredecessors(VPBBPreds);
89}
90
91static bool isHeaderBB(BasicBlock *BB, Loop *L) {
92 return L && BB == L->getHeader();
93}
94
95// Add operands to VPInstructions representing phi nodes from the input IR.
96void PlainCFGBuilder::fixHeaderPhis() {
97 for (auto *Phi : PhisToFix) {
98 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
99 VPValue *VPVal = IRDef2VPValue[Phi];
100 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
101 auto *PhiR = cast<VPPhi>(VPVal);
102 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
103 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
104 "Expected Phi in header block.");
105 assert(Phi->getNumOperands() == 2 &&
106 "header phi must have exactly 2 operands");
107 for (BasicBlock *Pred : predecessors(Phi->getParent()))
108 PhiR->addOperand(
109 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
110 }
111}
112
113// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
114// existing one if it was already created.
115VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
116 if (auto *VPBB = BB2VPBB.lookup(BB)) {
117 // Retrieve existing VPBB.
118 return VPBB;
119 }
120
121 // Create new VPBB.
122 StringRef Name = BB->getName();
123 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
124 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
125 BB2VPBB[BB] = VPBB;
126 return VPBB;
127}
128
129#ifndef NDEBUG
130// Return true if \p Val is considered an external definition. An external
131// definition is either:
132// 1. A Value that is not an Instruction. This will be refined in the future.
133// 2. An Instruction that is outside of the IR region represented in VPlan,
134// i.e., is not part of the loop nest.
135bool PlainCFGBuilder::isExternalDef(Value *Val) {
136 // All the Values that are not Instructions are considered external
137 // definitions for now.
139 if (!Inst)
140 return true;
141
142 // Check whether Instruction definition is in loop body.
143 return !TheLoop->contains(Inst);
144}
145#endif
146
147// Create a new VPValue or retrieve an existing one for the Instruction's
148// operand \p IRVal. This function must only be used to create/retrieve VPValues
149// for *Instruction's operands* and not to create regular VPInstruction's. For
150// the latter, please, look at 'createVPInstructionsForVPBB'.
151VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
152 auto VPValIt = IRDef2VPValue.find(IRVal);
153 if (VPValIt != IRDef2VPValue.end())
154 // Operand has an associated VPInstruction or VPValue that was previously
155 // created.
156 return VPValIt->second;
157
158 // Operand doesn't have a previously created VPInstruction/VPValue. This
159 // means that operand is:
160 // A) a definition external to VPlan,
161 // B) any other Value without specific representation in VPlan.
162 // For now, we use VPValue to represent A and B and classify both as external
163 // definitions. We may introduce specific VPValue subclasses for them in the
164 // future.
165 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
166
167 // A and B: Create VPValue and add it to the pool of external definitions and
168 // to the Value->VPValue map.
169 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
170 IRDef2VPValue[IRVal] = NewVPVal;
171 return NewVPVal;
172}
173
174// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
175// counterpart. This function must be invoked in RPO so that the operands of a
176// VPInstruction in \p BB have been visited before (except for Phi nodes).
177void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
178 BasicBlock *BB) {
179 VPIRBuilder.setInsertPoint(VPBB);
180 // TODO: Model and preserve debug intrinsics in VPlan.
181 for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
182 Instruction *Inst = &InstRef;
183
184 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
185 // visited Inst when we shouldn't, breaking the RPO traversal order.
186 assert(!IRDef2VPValue.count(Inst) &&
187 "Instruction shouldn't have been visited.");
188
189 if (auto *Br = dyn_cast<BranchInst>(Inst)) {
190 // Conditional branch instruction are represented using BranchOnCond
191 // recipes.
192 if (Br->isConditional()) {
193 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
194 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
195 VPIRMetadata(*Inst), Inst->getDebugLoc());
196 }
197
198 // Skip the rest of the Instruction processing for Branch instructions.
199 continue;
200 }
201
202 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
203 // Don't emit recipes for unconditional switch instructions.
204 if (SI->getNumCases() == 0)
205 continue;
206 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
207 for (auto Case : SI->cases())
208 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
209 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
210 VPIRMetadata(*Inst), Inst->getDebugLoc());
211 continue;
212 }
213
214 VPSingleDefRecipe *NewR;
215 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
216 // Phi node's operands may not have been visited at this point. We create
217 // an empty VPInstruction that we will fix once the whole plain CFG has
218 // been built.
219 NewR = VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi");
220 NewR->setUnderlyingValue(Phi);
221 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
222 // Header phis need to be fixed after the VPBB for the latch has been
223 // created.
224 PhisToFix.push_back(Phi);
225 } else {
226 // Add operands for VPPhi in the order matching its predecessors in
227 // VPlan.
228 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
229 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
230 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
231 getOrCreateVPOperand(Phi->getIncomingValue(I));
232 }
233 for (VPBlockBase *Pred : VPBB->getPredecessors())
234 NewR->addOperand(
235 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
236 }
237 } else {
238 // Build VPIRMetadata from the instruction and add loop versioning
239 // metadata for loads and stores.
240 VPIRMetadata MD(*Inst);
241 if (isa<LoadInst, StoreInst>(Inst) && LVer) {
242 const auto &[AliasScopeMD, NoAliasMD] =
243 LVer->getNoAliasMetadataFor(Inst);
244 if (AliasScopeMD)
245 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
246 if (NoAliasMD)
247 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
248 }
249
250 // Translate LLVM-IR operands into VPValue operands and set them in the
251 // new VPInstruction.
252 SmallVector<VPValue *, 4> VPOperands;
253 for (Value *Op : Inst->operands())
254 VPOperands.push_back(getOrCreateVPOperand(Op));
255
256 if (auto *CI = dyn_cast<CastInst>(Inst)) {
257 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
258 CI->getType(), CI->getDebugLoc(),
259 VPIRFlags(*CI), MD);
260 NewR->setUnderlyingValue(CI);
261 } else {
262 // Build VPInstruction for any arbitrary Instruction without specific
263 // representation in VPlan.
264 NewR =
265 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
266 VPIRFlags(*Inst), MD, Inst->getDebugLoc());
267 }
268 }
269
270 IRDef2VPValue[Inst] = NewR;
271 }
272}
273
274// Main interface to build the plain CFG.
275std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
276 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
277 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
278 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
279 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
280
281 // 1. Scan the body of the loop in a topological order to visit each basic
282 // block after having visited its predecessor basic blocks. Create a VPBB for
283 // each BB and link it to its successor and predecessor VPBBs. Note that
284 // predecessors must be set in the same order as they are in the incomming IR.
285 // Otherwise, there might be problems with existing phi nodes and algorithm
286 // based on predecessors traversal.
287
288 // Loop PH needs to be explicitly visited since it's not taken into account by
289 // LoopBlocksDFS.
290 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
291 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
292 "Unexpected loop preheader");
293 for (auto &I : *ThePreheaderBB) {
294 if (I.getType()->isVoidTy())
295 continue;
296 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
297 }
298
299 LoopBlocksRPO RPO(TheLoop);
300 RPO.perform(LI);
301
302 for (BasicBlock *BB : RPO) {
303 // Create or retrieve the VPBasicBlock for this BB.
304 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
305 // Set VPBB predecessors in the same order as they are in the incoming BB.
306 setVPBBPredsFromBB(VPBB, BB);
307
308 // Create VPInstructions for BB.
309 createVPInstructionsForVPBB(VPBB, BB);
310
311 // Set VPBB successors. We create empty VPBBs for successors if they don't
312 // exist already. Recipes will be created when the successor is visited
313 // during the RPO traversal.
314 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
316 getOrCreateVPBB(SI->getDefaultDest())};
317 for (auto Case : SI->cases())
318 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
319 VPBB->setSuccessors(Succs);
320 continue;
321 }
322 auto *BI = cast<BranchInst>(BB->getTerminator());
323 unsigned NumSuccs = succ_size(BB);
324 if (NumSuccs == 1) {
325 VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
326 continue;
327 }
328 assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
329 "block must have conditional branch with 2 successors");
330
331 BasicBlock *IRSucc0 = BI->getSuccessor(0);
332 BasicBlock *IRSucc1 = BI->getSuccessor(1);
333 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
334 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
335 VPBB->setTwoSuccessors(Successor0, Successor1);
336 }
337
338 for (auto *EB : Plan->getExitBlocks())
339 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
340
341 // 2. The whole CFG has been built at this point so all the input Values must
342 // have a VPlan counterpart. Fix VPlan header phi by adding their
343 // corresponding VPlan operands.
344 fixHeaderPhis();
345
346 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
347 Plan->getEntry()->setPlan(&*Plan);
348
349 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
350 // VPIRInstructions wrapping them.
351 // // Note that the operand order corresponds to IR predecessor order, and may
352 // need adjusting when VPlan predecessors are added, if an exit block has
353 // multiple predecessor.
354 for (auto *EB : Plan->getExitBlocks()) {
355 for (VPRecipeBase &R : EB->phis()) {
356 auto *PhiR = cast<VPIRPhi>(&R);
357 PHINode &Phi = PhiR->getIRPhi();
358 assert(PhiR->getNumOperands() == 0 &&
359 "no phi operands should be added yet");
360 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
361 PhiR->addOperand(
362 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
363 }
364 }
365
366 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
367 return std::move(Plan);
368}
369
370/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
371/// has exactly 2 predecessors (preheader and latch), where the block
372/// dominates the latch and the preheader dominates the block. If it is a
373/// header block return true and canonicalize the predecessors of the header
374/// (making sure the preheader appears first and the latch second) and the
375/// successors of the latch (making sure the loop exit comes first). Otherwise
376/// return false.
378 const VPDominatorTree &VPDT) {
379 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
380 if (Preds.size() != 2)
381 return false;
382
383 auto *PreheaderVPBB = Preds[0];
384 auto *LatchVPBB = Preds[1];
385 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
386 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
387 std::swap(PreheaderVPBB, LatchVPBB);
388
389 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
390 !VPDT.dominates(HeaderVPB, LatchVPBB))
391 return false;
392
393 // Canonicalize predecessors of header so that preheader is first and
394 // latch second.
395 HeaderVPB->swapPredecessors();
396 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
397 R.swapOperands();
398 }
399
400 // The two successors of conditional branch match the condition, with the
401 // first successor corresponding to true and the second to false. We
402 // canonicalize the successors of the latch when introducing the region, such
403 // that the latch exits the region when its condition is true; invert the
404 // original condition if the original CFG branches to the header on true.
405 // Note that the exit edge is not yet connected for top-level loops.
406 if (LatchVPBB->getSingleSuccessor() ||
407 LatchVPBB->getSuccessors()[0] != HeaderVPB)
408 return true;
409
410 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
411 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
414 "terminator must be a BranchOnCond");
415 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
416 Not->insertBefore(Term);
417 Term->setOperand(0, Not);
418 LatchVPBB->swapSuccessors();
419
420 return true;
421}
422
423/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
424static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
425 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
426 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
427
428 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
429 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
430 VPBlockBase *LatchExitVPB = LatchVPBB->getSingleSuccessor();
431 assert(LatchExitVPB && "Latch expected to be left with a single successor");
432
433 // Create an empty region first and insert it between PreheaderVPBB and
434 // LatchExitVPB, taking care to preserve the original predecessor & successor
435 // order of blocks. Set region entry and exiting after both HeaderVPB and
436 // LatchVPBB have been disconnected from their predecessors/successors.
437 auto *R = Plan.createLoopRegion();
438 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R);
439 VPBlockUtils::disconnectBlocks(LatchVPBB, R);
440 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
441 R->setEntry(HeaderVPB);
442 R->setExiting(LatchVPBB);
443
444 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
445 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
446 VPBB->setParent(R);
447}
448
449// Add the necessary canonical IV and branch recipes required to control the
450// loop.
451static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
452 VPBasicBlock *LatchVPBB, Type *IdxTy,
453 DebugLoc DL) {
454 Value *StartIdx = ConstantInt::get(IdxTy, 0);
455 auto *StartV = Plan.getOrAddLiveIn(StartIdx);
456
457 // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
458 auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
459 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
460
461 // We are about to replace the branch to exit the region. Remove the original
462 // BranchOnCond, if there is any.
463 DebugLoc LatchDL = DL;
464 if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
465 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
466 LatchVPBB->getTerminator()->eraseFromParent();
467 }
468
469 VPBuilder Builder(LatchVPBB);
470 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
471 // Initially the induction increment is guaranteed to not wrap, but that may
472 // change later, e.g. when tail-folding, when the flags need to be dropped.
473 auto *CanonicalIVIncrement = Builder.createOverflowingOp(
474 Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL,
475 "index.next");
476 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
477
478 // Add the BranchOnCount VPInstruction to the latch.
479 Builder.createNaryOp(VPInstruction::BranchOnCount,
480 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
481 LatchDL);
482}
483
484/// Creates extracts for values in \p Plan defined in a loop region and used
485/// outside a loop region.
486static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
487 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
488 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
489 if (EB->getSinglePredecessor() != MiddleVPBB)
490 continue;
491
492 for (VPRecipeBase &R : EB->phis()) {
493 auto *ExitIRI = cast<VPIRPhi>(&R);
494 for (unsigned Idx = 0; Idx != ExitIRI->getNumIncoming(); ++Idx) {
495 VPRecipeBase *Inc = ExitIRI->getIncomingValue(Idx)->getDefiningRecipe();
496 if (!Inc)
497 continue;
498 assert(ExitIRI->getNumOperands() == 1 &&
499 ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
500 "exit values from early exits must be fixed when branch to "
501 "early-exit is added");
502 ExitIRI->extractLastLaneOfFirstOperand(B);
503 }
504 }
505 }
506}
507
508static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
509 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
510 VPDominatorTree VPDT(Plan);
511
512 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
513 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
514 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
515
516 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
517 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
518
519 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
520 // The canonical LatchVPBB has the header block as last successor. If it has
521 // another successor, this successor is an exit block - insert middle block on
522 // its edge. Otherwise, add middle block as another successor retaining header
523 // as last.
524 if (LatchVPBB->getNumSuccessors() == 2) {
525 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
526 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
527 } else {
528 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
529 LatchVPBB->swapSuccessors();
530 }
531
532 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
533
534 // Create SCEV and VPValue for the trip count.
535 // We use the symbolic max backedge-taken-count, which works also when
536 // vectorizing loops with uncountable early exits.
537 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
538 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
539 "Invalid backedge-taken count");
540 ScalarEvolution &SE = *PSE.getSE();
541 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
542 InductionTy, TheLoop);
544
545 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
547
548 // The connection order corresponds to the operands of the conditional branch,
549 // with the middle block already connected to the exit block.
550 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
551 // Also connect the entry block to the scalar preheader.
552 // TODO: Also introduce a branch recipe together with the minimum trip count
553 // check.
554 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
555 Plan.getEntry()->swapSuccessors();
556
557 createExtractsForLiveOuts(Plan, MiddleVPBB);
558
559 VPBuilder ScalarPHBuilder(ScalarPH);
560 for (const auto &[PhiR, ScalarPhiR] : zip_equal(
561 drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
562 auto *VectorPhiR = cast<VPPhi>(&PhiR);
563 auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
564 {VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc());
565 cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
566 }
567}
568
569std::unique_ptr<VPlan>
570VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
572 LoopVersioning *LVer) {
573 PlainCFGBuilder Builder(TheLoop, &LI, LVer);
574 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
575 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
576 return VPlan0;
577}
578
580 bool HasUncountableEarlyExit) {
581 auto *MiddleVPBB = cast<VPBasicBlock>(
583 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
584 VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
585
586 // Disconnect all early exits from the loop leaving it with a single exit from
587 // the latch. Early exits that are countable are left for a scalar epilog. The
588 // condition of uncountable early exits (currently at most one is supported)
589 // is fused into the latch exit, and used to branch from middle block to the
590 // early exit destination.
591 [[maybe_unused]] bool HandledUncountableEarlyExit = false;
592 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
593 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
594 if (Pred == MiddleVPBB)
595 continue;
596 if (HasUncountableEarlyExit) {
597 assert(!HandledUncountableEarlyExit &&
598 "can handle exactly one uncountable early exit");
600 cast<VPBasicBlock>(HeaderVPB), LatchVPBB);
601 HandledUncountableEarlyExit = true;
602 } else {
603 for (VPRecipeBase &R : EB->phis())
604 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
605 }
606 cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
608 }
609 }
610
611 assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
612 "missed an uncountable exit that must be handled");
613}
614
616 bool RequiresScalarEpilogueCheck,
617 bool TailFolded) {
618 auto *MiddleVPBB = cast<VPBasicBlock>(
620 // If MiddleVPBB has a single successor then the original loop does not exit
621 // via the latch and the single successor must be the scalar preheader.
622 // There's no need to add a runtime check to MiddleVPBB.
623 if (MiddleVPBB->getNumSuccessors() == 1) {
624 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
625 "must have ScalarPH as single successor");
626 return;
627 }
628
629 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
630
631 // Add a check in the middle block to see if we have completed all of the
632 // iterations in the first vector loop.
633 //
634 // Three cases:
635 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
636 // condition to false.
637 // 2) If (N - N%VF) == N, then we *don't* need to run the
638 // remainder. Thus if tail is to be folded, we know we don't need to run
639 // the remainder and we can set the condition to true.
640 // 3) Otherwise, construct a runtime check.
641
642 // We use the same DebugLoc as the scalar loop latch terminator instead of
643 // the corresponding compare because they may have ended up with different
644 // line numbers and we want to avoid awkward line stepping while debugging.
645 // E.g., if the compare has got a line number inside the loop.
646 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
647 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
648 VPBuilder Builder(MiddleVPBB);
649 VPValue *Cmp;
650 if (!RequiresScalarEpilogueCheck)
651 Cmp = Plan.getFalse();
652 else if (TailFolded)
653 Cmp = Plan.getTrue();
654 else
655 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
656 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
657 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
658}
659
661 VPDominatorTree VPDT(Plan);
662 for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
663 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
664 createLoopRegion(Plan, HeaderVPB);
665
666 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
667 TopRegion->setName("vector loop");
668 TopRegion->getEntryBasicBlock()->setName("vector.body");
669}
670
671// Likelyhood of bypassing the vectorized loop due to a runtime check block,
672// including memory overlap checks block and wrapping/unit-stride checks block.
673static constexpr uint32_t CheckBypassWeights[] = {1, 127};
674
676 BasicBlock *CheckBlock,
677 bool AddBranchWeights) {
678 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
679 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
680 VPBlockBase *VectorPH = Plan.getVectorPreheader();
681 VPBlockBase *ScalarPH = Plan.getScalarPreheader();
682 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
683 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
684 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
685 CheckBlockVPBB->swapSuccessors();
686
687 // We just connected a new block to the scalar preheader. Update all
688 // VPPhis by adding an incoming value for it, replicating the last value.
689 unsigned NumPredecessors = ScalarPH->getNumPredecessors();
690 for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
691 assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
692 assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
693 "must have incoming values for all operands");
694 R.addOperand(R.getOperand(NumPredecessors - 2));
695 }
696
697 VPIRMetadata VPBranchWeights;
698 auto *Term =
699 VPBuilder(CheckBlockVPBB)
702 Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc());
703 if (AddBranchWeights) {
704 MDBuilder MDB(Plan.getContext());
705 MDNode *BranchWeights =
706 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
707 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
708 }
709}
710
712 VPlan &Plan, ElementCount VF, unsigned UF,
713 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
714 bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop,
716 // Generate code to check if the loop's trip count is less than VF * UF, or
717 // equal to it in case a scalar epilogue is required; this implies that the
718 // vector trip count is zero. This check also covers the case where adding one
719 // to the backedge-taken count overflowed leading to an incorrect trip count
720 // of zero. In this case we will also jump to the scalar loop.
721 CmpInst::Predicate CmpPred =
722 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
723 // If tail is to be folded, vector loop takes care of all iterations.
724 VPValue *TripCountVPV = Plan.getTripCount();
725 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, SE);
726 Type *TripCountTy = TripCount->getType();
727 auto GetMinTripCount = [&]() -> const SCEV * {
728 // Compute max(MinProfitableTripCount, UF * VF) and return it.
729 const SCEV *VFxUF =
730 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
731 if (UF * VF.getKnownMinValue() >=
732 MinProfitableTripCount.getKnownMinValue()) {
733 // TODO: SCEV should be able to simplify test.
734 return VFxUF;
735 }
736 const SCEV *MinProfitableTripCountSCEV =
737 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
738 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
739 };
740
741 VPBasicBlock *EntryVPBB = Plan.getEntry();
742 VPBuilder Builder(EntryVPBB);
743 VPValue *TripCountCheck = Plan.getFalse();
744 const SCEV *Step = GetMinTripCount();
745 if (TailFolded) {
746 if (CheckNeededWithTailFolding) {
747 // vscale is not necessarily a power-of-2, which means we cannot guarantee
748 // an overflow to zero when updating induction variables and so an
749 // additional overflow check is required before entering the vector loop.
750
751 // Get the maximum unsigned value for the type.
752 VPValue *MaxUIntTripCount =
753 Plan.getConstantInt(cast<IntegerType>(TripCountTy)->getMask());
754 VPValue *DistanceToMax = Builder.createNaryOp(
755 Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
757
758 // Don't execute the vector loop if (UMax - n) < (VF * UF).
759 // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
760 // minProfitableTripCount).
761 TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
762 Builder.createExpandSCEV(Step), DL);
763 } else {
764 // TripCountCheck = false, folding tail implies positive vector trip
765 // count.
766 }
767 } else {
768 // TODO: Emit unconditional branch to vector preheader instead of
769 // conditional branch with known condition.
770 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
771 // Check if the trip count is < the step.
772 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
773 // TODO: Ensure step is at most the trip count when determining max VF and
774 // UF, w/o tail folding.
775 TripCountCheck = Plan.getTrue();
776 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
777 TripCount, Step)) {
778 // Generate the minimum iteration check only if we cannot prove the
779 // check is known to be true, or known to be false.
780 VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
781 TripCountCheck = Builder.createICmp(
782 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
783 } // else step known to be < trip count, use TripCountCheck preset to false.
784 }
785 VPInstruction *Term =
786 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
788 MDBuilder MDB(Plan.getContext());
789 MDNode *BranchWeights = MDB.createBranchWeights(
790 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
791 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
792 }
793}
794
796 VPlan &Plan, Value *TripCount, Value *VectorTripCount,
797 bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF,
798 unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE) {
799 // Add the minimum iteration check for the epilogue vector loop.
800 VPValue *TC = Plan.getOrAddLiveIn(TripCount);
801 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
802 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
803 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
804 VPValue *Count = Builder.createNaryOp(
805 Instruction::Sub, {TC, Plan.getOrAddLiveIn(VectorTripCount)},
806 DebugLoc::getUnknown(), "n.vec.remaining");
807
808 // Generate code to check if the loop's trip count is less than VF * UF of
809 // the vector epilogue loop.
810 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
811 auto *CheckMinIters = Builder.createICmp(
812 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
813 VPInstruction *Branch =
814 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
815
816 // We assume the remaining `Count` is equally distributed in
817 // [0, MainLoopStep)
818 // So the probability for `Count < EpilogueLoopStep` should be
819 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
820 // TODO: Improve the estimate by taking the estimated trip count into
821 // consideration.
822 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
823 const uint32_t Weights[] = {EstimatedSkipCount,
824 MainLoopStep - EstimatedSkipCount};
825 MDBuilder MDB(Plan.getContext());
826 MDNode *BranchWeights =
827 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
828 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
829}
830
831/// If \p V is used by a recipe matching pattern \p P, return it. Otherwise
832/// return nullptr;
833template <typename MatchT>
834static VPRecipeBase *findUserOf(VPValue *V, const MatchT &P) {
835 auto It = find_if(V->users(), match_fn(P));
836 return It == V->user_end() ? nullptr : cast<VPRecipeBase>(*It);
837}
838
839/// If \p V is used by a VPInstruction with \p Opcode, return it. Otherwise
840/// return nullptr.
841template <unsigned Opcode> static VPInstruction *findUserOf(VPValue *V) {
843}
844
846 auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
847 auto *MinMaxR =
848 dyn_cast_or_null<VPRecipeWithIRFlags>(RedPhiR->getBackedgeValue());
849 if (!MinMaxR)
850 return nullptr;
851
852 // Check that MinMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe
853 // with an intrinsic that matches the reduction kind.
854 Intrinsic::ID ExpectedIntrinsicID =
855 getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind());
856 if (!match(MinMaxR, m_Intrinsic(ExpectedIntrinsicID)))
857 return nullptr;
858
859 if (MinMaxR->getOperand(0) == RedPhiR)
860 return MinMaxR->getOperand(1);
861
862 assert(MinMaxR->getOperand(1) == RedPhiR &&
863 "Reduction phi operand expected");
864 return MinMaxR->getOperand(0);
865 };
866
867 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
869 MinMaxNumReductionsToHandle;
870 bool HasUnsupportedPhi = false;
871 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
873 continue;
874 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
875 if (!Cur) {
876 // TODO: Also support fixed-order recurrence phis.
877 HasUnsupportedPhi = true;
878 continue;
879 }
881 Cur->getRecurrenceKind())) {
882 HasUnsupportedPhi = true;
883 continue;
884 }
885
886 VPValue *MinMaxOp = GetMinMaxCompareValue(Cur);
887 if (!MinMaxOp)
888 return false;
889
890 MinMaxNumReductionsToHandle.emplace_back(Cur, MinMaxOp);
891 }
892
893 if (MinMaxNumReductionsToHandle.empty())
894 return true;
895
896 // We won't be able to resume execution in the scalar tail, if there are
897 // unsupported header phis or there is no scalar tail at all, due to
898 // tail-folding.
899 if (HasUnsupportedPhi || !Plan.hasScalarTail())
900 return false;
901
902 /// Check if the vector loop of \p Plan can early exit and restart
903 /// execution of last vector iteration in the scalar loop. This requires all
904 /// recipes up to early exit point be side-effect free as they are
905 /// re-executed. Currently we check that the loop is free of any recipe that
906 /// may write to memory. Expected to operate on an early VPlan w/o nested
907 /// regions.
910 auto *VPBB = cast<VPBasicBlock>(VPB);
911 for (auto &R : *VPBB) {
912 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
913 return false;
914 }
915 }
916
917 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
918 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
919 VPValue *AllNaNLanes = nullptr;
920 SmallPtrSet<VPValue *, 2> RdxResults;
921 for (const auto &[_, MinMaxOp] : MinMaxNumReductionsToHandle) {
922 VPValue *RedNaNLanes =
923 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
924 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
925 : RedNaNLanes;
926 }
927
928 VPValue *AnyNaNLane =
929 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
930 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
931 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
932 for (const auto &[RedPhiR, _] : MinMaxNumReductionsToHandle) {
934 RedPhiR->getRecurrenceKind()) &&
935 "unsupported reduction");
936
937 // If we exit early due to NaNs, compute the final reduction result based on
938 // the reduction phi at the beginning of the last vector iteration.
939 auto *RdxResult =
941
942 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
943 RdxResult->getOperand(1));
944 RdxResult->setOperand(1, NewSel);
945 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
946 RdxResults.insert(RdxResult);
947 }
948
949 auto *LatchExitingBranch = LatchVPBB->getTerminator();
950 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
951 "Unexpected terminator");
952 auto *IsLatchExitTaken = LatchBuilder.createICmp(
953 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
954 LatchExitingBranch->getOperand(1));
955 auto *AnyExitTaken = LatchBuilder.createNaryOp(
956 Instruction::Or, {AnyNaNLane, IsLatchExitTaken});
957 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
958 LatchExitingBranch->eraseFromParent();
959
960 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
961 // true, the resume from the start of the last vector iteration via the
962 // canonical IV, otherwise from the original value.
963 for (auto &R : Plan.getScalarPreheader()->phis()) {
964 auto *ResumeR = cast<VPPhi>(&R);
965 VPValue *VecV = ResumeR->getOperand(0);
966 if (RdxResults.contains(VecV))
967 continue;
968 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
969 if (DerivedIV->getNumUsers() == 1 &&
970 DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
971 auto *NewSel =
972 MiddleBuilder.createSelect(AnyNaNLane, LoopRegion->getCanonicalIV(),
973 &Plan.getVectorTripCount());
974 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
975 DerivedIV->setOperand(1, NewSel);
976 continue;
977 }
978 }
979 // Bail out and abandon the current, partially modified, VPlan if we
980 // encounter resume phi that cannot be updated yet.
981 if (VecV != &Plan.getVectorTripCount()) {
982 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
983 "FMaxNum/FMinNum reduction.\n");
984 return false;
985 }
986 auto *NewSel = MiddleBuilder.createSelect(
987 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
988 ResumeR->setOperand(0, NewSel);
989 }
990
991 auto *MiddleTerm = MiddleVPBB->getTerminator();
992 MiddleBuilder.setInsertPoint(MiddleTerm);
993 VPValue *MiddleCond = MiddleTerm->getOperand(0);
994 VPValue *NewCond =
995 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
996 MiddleTerm->setOperand(0, NewCond);
997 return true;
998}
999
1001 for (auto &PhiR : make_early_inc_range(
1003 auto *MinMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
1004 // TODO: check for multi-uses in VPlan directly.
1005 if (!MinMaxPhiR || !MinMaxPhiR->hasUsesOutsideReductionChain())
1006 continue;
1007
1008 // MinMaxPhiR has users outside the reduction cycle in the loop. Check if
1009 // the only other user is a FindLastIV reduction. MinMaxPhiR must have
1010 // exactly 3 users: 1) the min/max operation, the compare of a FindLastIV
1011 // reduction and ComputeReductionResult. The comparisom must compare
1012 // MinMaxPhiR against the min/max operand used for the min/max reduction
1013 // and only be used by the select of the FindLastIV reduction.
1014 RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind();
1015 assert(
1017 "only min/max recurrences support users outside the reduction chain");
1018
1019 auto *MinMaxOp =
1020 dyn_cast<VPRecipeWithIRFlags>(MinMaxPhiR->getBackedgeValue());
1021 if (!MinMaxOp)
1022 return false;
1023
1024 // Check that MinMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1025 // with an intrinsic that matches the reduction kind.
1026 Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind);
1027 if (!match(MinMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
1028 return false;
1029
1030 // MinMaxOp must have 2 users: 1) MinMaxPhiR and 2) ComputeReductionResult
1031 // (asserted below).
1032 assert(MinMaxOp->getNumUsers() == 2 &&
1033 "MinMaxOp must have exactly 2 users");
1034 VPValue *MinMaxOpValue = MinMaxOp->getOperand(0);
1035 if (MinMaxOpValue == MinMaxPhiR)
1036 MinMaxOpValue = MinMaxOp->getOperand(1);
1037
1038 VPValue *CmpOpA;
1039 VPValue *CmpOpB;
1040 CmpPredicate Pred;
1042 MinMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
1043 if (!Cmp || Cmp->getNumUsers() != 1 ||
1044 (CmpOpA != MinMaxOpValue && CmpOpB != MinMaxOpValue))
1045 return false;
1046
1047 if (MinMaxOpValue != CmpOpB)
1048 Pred = CmpInst::getSwappedPredicate(Pred);
1049
1050 // MinMaxPhiR must have exactly 3 users:
1051 // * MinMaxOp,
1052 // * Cmp (that's part of a FindLastIV chain),
1053 // * ComputeReductionResult.
1054 if (MinMaxPhiR->getNumUsers() != 3)
1055 return false;
1056
1057 VPInstruction *MinMaxResult =
1059 assert(is_contained(MinMaxPhiR->users(), MinMaxOp) &&
1060 "one user must be MinMaxOp");
1061 assert(MinMaxResult && "MinMaxResult must be a user of MinMaxPhiR");
1062 assert(is_contained(MinMaxOp->users(), MinMaxResult) &&
1063 "MinMaxResult must be a user of MinMaxOp (and of MinMaxPhiR");
1064
1065 // Cmp must be used by the select of a FindLastIV chain.
1066 VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
1067 VPValue *IVOp, *FindIV;
1068 if (!Sel || Sel->getNumUsers() != 2 ||
1069 !match(Sel,
1070 m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))))
1071 return false;
1072
1073 if (!isa<VPReductionPHIRecipe>(FindIV)) {
1074 std::swap(FindIV, IVOp);
1075 Pred = CmpInst::getInversePredicate(Pred);
1076 }
1077
1078 auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
1080 FindIVPhiR->getRecurrenceKind()))
1081 return false;
1082
1083 // TODO: Support cases where IVOp is the IV increment.
1084 if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
1086 return false;
1087
1088 CmpInst::Predicate RdxPredicate = [RdxKind]() {
1089 switch (RdxKind) {
1090 case RecurKind::UMin:
1091 return CmpInst::ICMP_UGE;
1092 case RecurKind::UMax:
1093 return CmpInst::ICMP_ULE;
1094 case RecurKind::SMax:
1095 return CmpInst::ICMP_SLE;
1096 case RecurKind::SMin:
1097 return CmpInst::ICMP_SGE;
1098 default:
1099 llvm_unreachable("unhandled recurrence kind");
1100 }
1101 }();
1102
1103 // TODO: Strict predicates need to find the first IV value for which the
1104 // predicate holds, not the last.
1105 if (Pred != RdxPredicate)
1106 return false;
1107
1108 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
1109 "cannot handle inloop/ordered reductions yet");
1110
1111 // The reduction using MinMaxPhiR needs adjusting to compute the correct
1112 // result:
1113 // 1. We need to find the last IV for which the condition based on the
1114 // min/max recurrence is true,
1115 // 2. Compare the partial min/max reduction result to its final value and,
1116 // 3. Select the lanes of the partial FindLastIV reductions which
1117 // correspond to the lanes matching the min/max reduction result.
1118 //
1119 // For example, this transforms
1120 // vp<%min.result> = compute-reduction-result ir<%min.val>,
1121 // ir<%min.val.next>
1122 // vp<%find.iv.result = compute-find-iv-result ir<%min.idx>, ir<0>,
1123 // SENTINEL, vp<%min.idx.next>
1124 //
1125 // into:
1126 //
1127 // vp<min.result> = compute-reduction-result ir<%min.val>, ir<%min.val.next>
1128 // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
1129 // vp<%final.iv> = select vp<%final.min.cmp>, ir<%min.idx.next>, SENTINEL
1130 // vp<%find.iv.result> = compute-find-iv-result ir<%min.idx>, ir<0>,
1131 // SENTINEL, vp<%final.iv>
1132 VPInstruction *FindIVResult =
1134 assert(FindIVResult->getParent() == MinMaxResult->getParent() &&
1135 "both results must be computed in the same block");
1136 MinMaxResult->moveBefore(*FindIVResult->getParent(),
1137 FindIVResult->getIterator());
1138
1139 VPBuilder B(FindIVResult);
1140 VPValue *MinMaxExiting = MinMaxResult->getOperand(1);
1141 auto *FinalMinMaxCmp =
1142 B.createICmp(CmpInst::ICMP_EQ, MinMaxExiting, MinMaxResult);
1143 VPValue *Sentinel = FindIVResult->getOperand(2);
1144 VPValue *LastIVExiting = FindIVResult->getOperand(3);
1145 auto *FinalIVSelect =
1146 B.createSelect(FinalMinMaxCmp, LastIVExiting, Sentinel);
1147 FindIVResult->setOperand(3, FinalIVSelect);
1148 }
1149 return true;
1150}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define _
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static constexpr uint32_t CheckBypassWeights[]
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
op_range operands()
Definition User.h:292
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3971
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4006
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4059
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:623
const VPRecipeBase & back() const
Definition VPlan.h:4020
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4037
bool empty() const
Definition VPlan.h:4017
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:186
void setName(const Twine &newName)
Definition VPlan.h:166
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:114
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:232
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:170
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:189
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3552
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4124
Helper to manage IR metadata for recipes.
Definition VPlan.h:982
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1031
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition VPlan.h:2432
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4159
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4257
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:251
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:246
void addOperand(VPValue *Operand)
Definition VPlanValue.h:240
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:193
unsigned getNumUsers() const
Definition VPlanValue.h:113
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4289
LLVMContext & getContext() const
Definition VPlan.h:4482
VPBasicBlock * getEntry()
Definition VPlan.h:4382
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4473
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4480
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4444
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4551
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4434
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4557
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1011
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4451
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4407
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4606
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1226
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4554
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4536
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4616
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4425
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4430
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4387
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4660
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
Definition VPlanCFG.h:229
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
static bool handleMultiUseReductions(VPlan &Plan)
Try to legalize reductions with multiple in-loop uses.
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static LLVM_ABI_FOR_TEST void handleEarlyExits(VPlan &Plan, bool HasUncountableExit)
Update Plan to account for all early exits.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, ScalarEvolution &SE)
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB)
Update Plan to account for the uncountable early exit from EarlyExitingVPBB to EarlyExitVPBB by.
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *TripCount, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...