LLVM 22.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanDominatorTree.h"
19#include "VPlanPatternMatch.h"
20#include "VPlanTransforms.h"
25#include "llvm/IR/InstrTypes.h"
26#include "llvm/IR/MDBuilder.h"
29
30#define DEBUG_TYPE "vplan"
31
32using namespace llvm;
33using namespace VPlanPatternMatch;
34
35namespace {
36// Class that is used to build the plain CFG for the incoming IR.
37class PlainCFGBuilder {
38 // The outermost loop of the input loop nest considered for vectorization.
39 Loop *TheLoop;
40
41 // Loop Info analysis.
42 LoopInfo *LI;
43
44 // Loop versioning for alias metadata.
45 LoopVersioning *LVer;
46
47 // Vectorization plan that we are working on.
48 std::unique_ptr<VPlan> Plan;
49
50 // Builder of the VPlan instruction-level representation.
51 VPBuilder VPIRBuilder;
52
53 // NOTE: The following maps are intentionally destroyed after the plain CFG
54 // construction because subsequent VPlan-to-VPlan transformation may
55 // invalidate them.
56 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
58 // Map incoming Value definitions to their newly-created VPValues.
59 DenseMap<Value *, VPValue *> IRDef2VPValue;
60
61 // Hold phi node's that need to be fixed once the plain CFG has been built.
63
64 // Utility functions.
65 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
66 void fixHeaderPhis();
67 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
68#ifndef NDEBUG
69 bool isExternalDef(Value *Val);
70#endif
71 VPValue *getOrCreateVPOperand(Value *IRVal);
72 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
73
74public:
75 PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer)
76 : TheLoop(Lp), LI(LI), LVer(LVer), Plan(std::make_unique<VPlan>(Lp)) {}
77
78 /// Build plain CFG for TheLoop and connect it to Plan's entry.
79 std::unique_ptr<VPlan> buildPlainCFG();
80};
81} // anonymous namespace
82
83// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
84// must have no predecessors.
85void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
86 // Collect VPBB predecessors.
88 for (BasicBlock *Pred : predecessors(BB))
89 VPBBPreds.push_back(getOrCreateVPBB(Pred));
90 VPBB->setPredecessors(VPBBPreds);
91}
92
93static bool isHeaderBB(BasicBlock *BB, Loop *L) {
94 return L && BB == L->getHeader();
95}
96
97// Add operands to VPInstructions representing phi nodes from the input IR.
98void PlainCFGBuilder::fixHeaderPhis() {
99 for (auto *Phi : PhisToFix) {
100 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
101 VPValue *VPVal = IRDef2VPValue[Phi];
102 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
103 auto *PhiR = cast<VPPhi>(VPVal);
104 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
105 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
106 "Expected Phi in header block.");
107 assert(Phi->getNumOperands() == 2 &&
108 "header phi must have exactly 2 operands");
109 for (BasicBlock *Pred : predecessors(Phi->getParent()))
110 PhiR->addOperand(
111 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
112 }
113}
114
115// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
116// existing one if it was already created.
117VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
118 if (auto *VPBB = BB2VPBB.lookup(BB)) {
119 // Retrieve existing VPBB.
120 return VPBB;
121 }
122
123 // Create new VPBB.
124 StringRef Name = BB->getName();
125 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
126 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
127 BB2VPBB[BB] = VPBB;
128 return VPBB;
129}
130
131#ifndef NDEBUG
132// Return true if \p Val is considered an external definition. An external
133// definition is either:
134// 1. A Value that is not an Instruction. This will be refined in the future.
135// 2. An Instruction that is outside of the IR region represented in VPlan,
136// i.e., is not part of the loop nest.
137bool PlainCFGBuilder::isExternalDef(Value *Val) {
138 // All the Values that are not Instructions are considered external
139 // definitions for now.
141 if (!Inst)
142 return true;
143
144 // Check whether Instruction definition is in loop body.
145 return !TheLoop->contains(Inst);
146}
147#endif
148
149// Create a new VPValue or retrieve an existing one for the Instruction's
150// operand \p IRVal. This function must only be used to create/retrieve VPValues
151// for *Instruction's operands* and not to create regular VPInstruction's. For
152// the latter, please, look at 'createVPInstructionsForVPBB'.
153VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
154 auto VPValIt = IRDef2VPValue.find(IRVal);
155 if (VPValIt != IRDef2VPValue.end())
156 // Operand has an associated VPInstruction or VPValue that was previously
157 // created.
158 return VPValIt->second;
159
160 // Operand doesn't have a previously created VPInstruction/VPValue. This
161 // means that operand is:
162 // A) a definition external to VPlan,
163 // B) any other Value without specific representation in VPlan.
164 // For now, we use VPValue to represent A and B and classify both as external
165 // definitions. We may introduce specific VPValue subclasses for them in the
166 // future.
167 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
168
169 // A and B: Create VPValue and add it to the pool of external definitions and
170 // to the Value->VPValue map.
171 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
172 IRDef2VPValue[IRVal] = NewVPVal;
173 return NewVPVal;
174}
175
176// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
177// counterpart. This function must be invoked in RPO so that the operands of a
178// VPInstruction in \p BB have been visited before (except for Phi nodes).
179void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
180 BasicBlock *BB) {
181 VPIRBuilder.setInsertPoint(VPBB);
182 // TODO: Model and preserve debug intrinsics in VPlan.
183 for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
184 Instruction *Inst = &InstRef;
185
186 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
187 // visited Inst when we shouldn't, breaking the RPO traversal order.
188 assert(!IRDef2VPValue.count(Inst) &&
189 "Instruction shouldn't have been visited.");
190
191 if (auto *Br = dyn_cast<BranchInst>(Inst)) {
192 // Conditional branch instruction are represented using BranchOnCond
193 // recipes.
194 if (Br->isConditional()) {
195 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
196 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
197 VPIRMetadata(*Inst), Inst->getDebugLoc());
198 }
199
200 // Skip the rest of the Instruction processing for Branch instructions.
201 continue;
202 }
203
204 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
205 // Don't emit recipes for unconditional switch instructions.
206 if (SI->getNumCases() == 0)
207 continue;
208 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
209 for (auto Case : SI->cases())
210 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
211 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
212 VPIRMetadata(*Inst), Inst->getDebugLoc());
213 continue;
214 }
215
216 VPSingleDefRecipe *NewR;
217 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
218 // Phi node's operands may not have been visited at this point. We create
219 // an empty VPInstruction that we will fix once the whole plain CFG has
220 // been built.
221 NewR = VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi");
222 NewR->setUnderlyingValue(Phi);
223 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
224 // Header phis need to be fixed after the VPBB for the latch has been
225 // created.
226 PhisToFix.push_back(Phi);
227 } else {
228 // Add operands for VPPhi in the order matching its predecessors in
229 // VPlan.
230 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
231 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
232 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
233 getOrCreateVPOperand(Phi->getIncomingValue(I));
234 }
235 for (VPBlockBase *Pred : VPBB->getPredecessors())
236 NewR->addOperand(
237 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
238 }
239 } else {
240 // Build VPIRMetadata from the instruction and add loop versioning
241 // metadata for loads and stores.
242 VPIRMetadata MD(*Inst);
243 if (isa<LoadInst, StoreInst>(Inst) && LVer) {
244 const auto &[AliasScopeMD, NoAliasMD] =
245 LVer->getNoAliasMetadataFor(Inst);
246 if (AliasScopeMD)
247 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
248 if (NoAliasMD)
249 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
250 }
251
252 // Translate LLVM-IR operands into VPValue operands and set them in the
253 // new VPInstruction.
254 SmallVector<VPValue *, 4> VPOperands;
255 for (Value *Op : Inst->operands())
256 VPOperands.push_back(getOrCreateVPOperand(Op));
257
258 if (auto *CI = dyn_cast<CastInst>(Inst)) {
259 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
260 CI->getType(), CI->getDebugLoc(),
261 VPIRFlags(*CI), MD);
262 NewR->setUnderlyingValue(CI);
263 } else {
264 // Build VPInstruction for any arbitrary Instruction without specific
265 // representation in VPlan.
266 NewR =
267 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
268 VPIRFlags(*Inst), MD, Inst->getDebugLoc());
269 }
270 }
271
272 IRDef2VPValue[Inst] = NewR;
273 }
274}
275
276// Main interface to build the plain CFG.
277std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
278 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
279 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
280 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
281 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
282
283 // 1. Scan the body of the loop in a topological order to visit each basic
284 // block after having visited its predecessor basic blocks. Create a VPBB for
285 // each BB and link it to its successor and predecessor VPBBs. Note that
286 // predecessors must be set in the same order as they are in the incomming IR.
287 // Otherwise, there might be problems with existing phi nodes and algorithm
288 // based on predecessors traversal.
289
290 // Loop PH needs to be explicitly visited since it's not taken into account by
291 // LoopBlocksDFS.
292 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
293 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
294 "Unexpected loop preheader");
295 for (auto &I : *ThePreheaderBB) {
296 if (I.getType()->isVoidTy())
297 continue;
298 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
299 }
300
301 LoopBlocksRPO RPO(TheLoop);
302 RPO.perform(LI);
303
304 for (BasicBlock *BB : RPO) {
305 // Create or retrieve the VPBasicBlock for this BB.
306 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
307 // Set VPBB predecessors in the same order as they are in the incoming BB.
308 setVPBBPredsFromBB(VPBB, BB);
309
310 // Create VPInstructions for BB.
311 createVPInstructionsForVPBB(VPBB, BB);
312
313 // Set VPBB successors. We create empty VPBBs for successors if they don't
314 // exist already. Recipes will be created when the successor is visited
315 // during the RPO traversal.
316 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
318 getOrCreateVPBB(SI->getDefaultDest())};
319 for (auto Case : SI->cases())
320 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
321 VPBB->setSuccessors(Succs);
322 continue;
323 }
324 auto *BI = cast<BranchInst>(BB->getTerminator());
325 unsigned NumSuccs = succ_size(BB);
326 if (NumSuccs == 1) {
327 VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
328 continue;
329 }
330 assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
331 "block must have conditional branch with 2 successors");
332
333 BasicBlock *IRSucc0 = BI->getSuccessor(0);
334 BasicBlock *IRSucc1 = BI->getSuccessor(1);
335 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
336 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
337 VPBB->setTwoSuccessors(Successor0, Successor1);
338 }
339
340 for (auto *EB : Plan->getExitBlocks())
341 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
342
343 // 2. The whole CFG has been built at this point so all the input Values must
344 // have a VPlan counterpart. Fix VPlan header phi by adding their
345 // corresponding VPlan operands.
346 fixHeaderPhis();
347
348 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
349 Plan->getEntry()->setPlan(&*Plan);
350
351 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
352 // VPIRInstructions wrapping them.
353 // // Note that the operand order corresponds to IR predecessor order, and may
354 // need adjusting when VPlan predecessors are added, if an exit block has
355 // multiple predecessor.
356 for (auto *EB : Plan->getExitBlocks()) {
357 for (VPRecipeBase &R : EB->phis()) {
358 auto *PhiR = cast<VPIRPhi>(&R);
359 PHINode &Phi = PhiR->getIRPhi();
360 assert(PhiR->getNumOperands() == 0 &&
361 "no phi operands should be added yet");
362 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
363 PhiR->addOperand(
364 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
365 }
366 }
367
368 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
369 return std::move(Plan);
370}
371
372/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
373/// has exactly 2 predecessors (preheader and latch), where the block
374/// dominates the latch and the preheader dominates the block. If it is a
375/// header block return true and canonicalize the predecessors of the header
376/// (making sure the preheader appears first and the latch second) and the
377/// successors of the latch (making sure the loop exit comes first). Otherwise
378/// return false.
380 const VPDominatorTree &VPDT) {
381 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
382 if (Preds.size() != 2)
383 return false;
384
385 auto *PreheaderVPBB = Preds[0];
386 auto *LatchVPBB = Preds[1];
387 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
388 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
389 std::swap(PreheaderVPBB, LatchVPBB);
390
391 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
392 !VPDT.dominates(HeaderVPB, LatchVPBB))
393 return false;
394
395 // Canonicalize predecessors of header so that preheader is first and
396 // latch second.
397 HeaderVPB->swapPredecessors();
398 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
399 R.swapOperands();
400 }
401
402 // The two successors of conditional branch match the condition, with the
403 // first successor corresponding to true and the second to false. We
404 // canonicalize the successors of the latch when introducing the region, such
405 // that the latch exits the region when its condition is true; invert the
406 // original condition if the original CFG branches to the header on true.
407 // Note that the exit edge is not yet connected for top-level loops.
408 if (LatchVPBB->getSingleSuccessor() ||
409 LatchVPBB->getSuccessors()[0] != HeaderVPB)
410 return true;
411
412 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
413 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
416 "terminator must be a BranchOnCond");
417 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
418 Not->insertBefore(Term);
419 Term->setOperand(0, Not);
420 LatchVPBB->swapSuccessors();
421
422 return true;
423}
424
425/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
426static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
427 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
428 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
429
430 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
431 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
432
433 // Create an empty region first and insert it between PreheaderVPBB and
434 // the exit blocks, taking care to preserve the original predecessor &
435 // successor order of blocks. Set region entry and exiting after both
436 // HeaderVPB and LatchVPBB have been disconnected from their
437 // predecessors/successors.
438 auto *R = Plan.createLoopRegion();
439
440 // Transfer latch's successors to the region.
442
443 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
444 R->setEntry(HeaderVPB);
445 R->setExiting(LatchVPBB);
446
447 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
448 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
449 VPBB->setParent(R);
450}
451
452// Add the necessary canonical IV and branch recipes required to control the
453// loop.
454static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
455 VPBasicBlock *LatchVPBB, Type *IdxTy,
456 DebugLoc DL) {
457 Value *StartIdx = ConstantInt::get(IdxTy, 0);
458 auto *StartV = Plan.getOrAddLiveIn(StartIdx);
459
460 // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
461 auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
462 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
463
464 // We are about to replace the branch to exit the region. Remove the original
465 // BranchOnCond, if there is any.
466 DebugLoc LatchDL = DL;
467 if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
468 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
469 LatchVPBB->getTerminator()->eraseFromParent();
470 }
471
472 VPBuilder Builder(LatchVPBB);
473 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
474 // Initially the induction increment is guaranteed to not wrap, but that may
475 // change later, e.g. when tail-folding, when the flags need to be dropped.
476 auto *CanonicalIVIncrement = Builder.createOverflowingOp(
477 Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL,
478 "index.next");
479 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
480
481 // Add the BranchOnCount VPInstruction to the latch.
482 Builder.createNaryOp(VPInstruction::BranchOnCount,
483 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
484 LatchDL);
485}
486
487/// Creates extracts for values in \p Plan defined in a loop region and used
488/// outside a loop region.
489static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
490 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
491 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
492 if (EB->getSinglePredecessor() != MiddleVPBB)
493 continue;
494
495 for (VPRecipeBase &R : EB->phis()) {
496 auto *ExitIRI = cast<VPIRPhi>(&R);
497 for (unsigned Idx = 0; Idx != ExitIRI->getNumIncoming(); ++Idx) {
498 VPRecipeBase *Inc = ExitIRI->getIncomingValue(Idx)->getDefiningRecipe();
499 if (!Inc)
500 continue;
501 assert(ExitIRI->getNumOperands() == 1 &&
502 ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
503 "exit values from early exits must be fixed when branch to "
504 "early-exit is added");
505 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(B);
506 }
507 }
508 }
509}
510
511static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
512 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
513 VPDominatorTree VPDT(Plan);
514
515 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
516 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
517 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
518
519 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
520 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
521
522 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
523 // The canonical LatchVPBB has the header block as last successor. If it has
524 // another successor, this successor is an exit block - insert middle block on
525 // its edge. Otherwise, add middle block as another successor retaining header
526 // as last.
527 if (LatchVPBB->getNumSuccessors() == 2) {
528 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
529 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
530 } else {
531 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
532 LatchVPBB->swapSuccessors();
533 }
534
535 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
536
537 // Create SCEV and VPValue for the trip count.
538 // We use the symbolic max backedge-taken-count, which works also when
539 // vectorizing loops with uncountable early exits.
540 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
541 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
542 "Invalid backedge-taken count");
543 ScalarEvolution &SE = *PSE.getSE();
544 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
545 InductionTy, TheLoop);
547
548 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
550
551 // The connection order corresponds to the operands of the conditional branch,
552 // with the middle block already connected to the exit block.
553 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
554 // Also connect the entry block to the scalar preheader.
555 // TODO: Also introduce a branch recipe together with the minimum trip count
556 // check.
557 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
558 Plan.getEntry()->swapSuccessors();
559
560 createExtractsForLiveOuts(Plan, MiddleVPBB);
561
562 VPBuilder ScalarPHBuilder(ScalarPH);
563 for (const auto &[PhiR, ScalarPhiR] : zip_equal(
564 drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
565 auto *VectorPhiR = cast<VPPhi>(&PhiR);
566 auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
567 {VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc());
568 cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
569 }
570}
571
572/// Check \p Plan's live-in and replace them with constants, if they can be
573/// simplified via SCEV.
576 auto GetSimplifiedLiveInViaSCEV = [&](VPValue *VPV) -> VPValue * {
577 const SCEV *Expr = vputils::getSCEVExprForVPValue(VPV, PSE);
578 if (auto *C = dyn_cast<SCEVConstant>(Expr))
579 return Plan.getOrAddLiveIn(C->getValue());
580 return nullptr;
581 };
582
583 for (VPValue *LiveIn : to_vector(Plan.getLiveIns())) {
584 if (VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
585 LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
586 }
587}
588
589std::unique_ptr<VPlan>
590VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
592 LoopVersioning *LVer) {
593 PlainCFGBuilder Builder(TheLoop, &LI, LVer);
594 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
595 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
596 simplifyLiveInsWithSCEV(*VPlan0, PSE);
597 return VPlan0;
598}
599
600/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
601/// for \p Phi based on \p IndDesc.
602static VPHeaderPHIRecipe *
604 const InductionDescriptor &IndDesc, VPlan &Plan,
605 PredicatedScalarEvolution &PSE, Loop &OrigLoop,
606 DebugLoc DL) {
607 [[maybe_unused]] ScalarEvolution &SE = *PSE.getSE();
608 assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
609 "step must be loop invariant");
610 assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
611 (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
612 SE.getSCEV(IndDesc.getStartValue()) ==
613 vputils::getSCEVExprForVPValue(Start, PSE))) &&
614 "Start VPValue must match IndDesc's start value");
615
616 VPValue *Step =
618
620 return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(),
621 IndDesc, DL);
622
625 "must have an integer or float induction at this point");
626
627 // Update wide induction increments to use the same step as the corresponding
628 // wide induction. This enables detecting induction increments directly in
629 // VPlan and removes redundant splats.
630 using namespace llvm::VPlanPatternMatch;
631 if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
632 PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
633
634 // It is always safe to copy over the NoWrap and FastMath flags. In
635 // particular, when folding tail by masking, the masked-off lanes are never
636 // used, so it is safe.
638
639 return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
640 IndDesc, Flags, DL);
641}
642
644 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
647 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
648 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
649 // Retrieve the header manually from the intial plain-CFG VPlan.
650 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
651 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
652 assert(VPDominatorTree(Plan).dominates(HeaderVPBB,
653 HeaderVPBB->getPredecessors()[1]) &&
654 "header must dominate its latch");
655
656 auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
657 // TODO: Gradually replace uses of underlying instruction by analyses on
658 // VPlan.
659 auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
660 assert(PhiR->getNumOperands() == 2 &&
661 "Must have 2 operands for header phis");
662
663 // Extract common values once.
664 VPIRValue *Start = cast<VPIRValue>(PhiR->getOperand(0));
665 VPValue *BackedgeValue = PhiR->getOperand(1);
666
667 if (FixedOrderRecurrences.contains(Phi)) {
668 // TODO: Currently fixed-order recurrences are modeled as chains of
669 // first-order recurrences. If there are no users of the intermediate
670 // recurrences in the chain, the fixed order recurrence should be
671 // modeled directly, enabling more efficient codegen.
672 return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
673 }
674
675 auto InductionIt = Inductions.find(Phi);
676 if (InductionIt != Inductions.end())
677 return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
678 Plan, PSE, OrigLoop,
679 PhiR->getDebugLoc());
680
681 assert(Reductions.contains(Phi) && "only reductions are expected now");
682 const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
684 Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
685 "incoming value must match start value");
686 // Will be updated later to >1 if reduction is partial.
687 unsigned ScaleFactor = 1;
688 bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
689 return new VPReductionPHIRecipe(
690 Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
691 getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
692 ScaleFactor),
694 };
695
696 for (VPRecipeBase &R : make_early_inc_range(HeaderVPBB->phis())) {
698 continue;
699 auto *PhiR = cast<VPPhi>(&R);
700 VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
701 HeaderPhiR->insertBefore(PhiR);
702 PhiR->replaceAllUsesWith(HeaderPhiR);
703 PhiR->eraseFromParent();
704 }
705}
706
708 VPlan &Plan, const DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
709 const DenseSet<BasicBlock *> &BlocksNeedingPredication,
710 ElementCount MinVF) {
711 VPTypeAnalysis TypeInfo(Plan);
714
715 for (VPRecipeBase &R : Header->phis()) {
716 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
717 if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
718 continue;
719
720 RecurKind Kind = PhiR->getRecurrenceKind();
721 assert(
724 "AnyOf and FindIV reductions are not allowed for in-loop reductions");
725
726 bool IsFPRecurrence =
728 FastMathFlags FMFs =
729 IsFPRecurrence ? FastMathFlags::getFast() : FastMathFlags();
730
731 // Collect the chain of "link" recipes for the reduction starting at PhiR.
733 Worklist.insert(PhiR);
734 for (unsigned I = 0; I != Worklist.size(); ++I) {
735 VPSingleDefRecipe *Cur = Worklist[I];
736 for (VPUser *U : Cur->users()) {
737 auto *UserRecipe = cast<VPSingleDefRecipe>(U);
738 if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
739 assert((UserRecipe->getParent() == Plan.getMiddleBlock() ||
740 UserRecipe->getParent() == Plan.getScalarPreheader()) &&
741 "U must be either in the loop region, the middle block or the "
742 "scalar preheader.");
743 continue;
744 }
745
746 // Stores using instructions will be sunk later.
748 continue;
749 Worklist.insert(UserRecipe);
750 }
751 }
752
753 // Visit operation "Links" along the reduction chain top-down starting from
754 // the phi until LoopExitValue. We keep track of the previous item
755 // (PreviousLink) to tell which of the two operands of a Link will remain
756 // scalar and which will be reduced. For minmax by select(cmp), Link will be
757 // the select instructions. Blend recipes of in-loop reduction phi's will
758 // get folded to their non-phi operand, as the reduction recipe handles the
759 // condition directly.
760 VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
761 for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
762 if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
763 assert(Blend->getNumIncomingValues() == 2 &&
764 "Blend must have 2 incoming values");
765 unsigned PhiRIdx = Blend->getIncomingValue(0) == PhiR ? 0 : 1;
766 assert(Blend->getIncomingValue(PhiRIdx) == PhiR &&
767 "PhiR must be an operand of the blend");
768 Blend->replaceAllUsesWith(Blend->getIncomingValue(1 - PhiRIdx));
769 continue;
770 }
771
772 if (IsFPRecurrence) {
773 FastMathFlags CurFMF =
774 cast<VPRecipeWithIRFlags>(CurrentLink)->getFastMathFlags();
775 if (match(CurrentLink, m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
776 CurFMF |= cast<VPRecipeWithIRFlags>(CurrentLink->getOperand(0))
777 ->getFastMathFlags();
778 FMFs &= CurFMF;
779 }
780
781 Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
782
783 // Recognize a call to the llvm.fmuladd intrinsic.
784 bool IsFMulAdd = Kind == RecurKind::FMulAdd;
785 VPValue *VecOp;
786 VPBasicBlock *LinkVPBB = CurrentLink->getParent();
787 if (IsFMulAdd) {
789 "Expected current VPInstruction to be a call to the "
790 "llvm.fmuladd intrinsic");
791 assert(CurrentLink->getOperand(2) == PreviousLink &&
792 "expected a call where the previous link is the added operand");
793
794 // If the instruction is a call to the llvm.fmuladd intrinsic then we
795 // need to create an fmul recipe (multiplying the first two operands of
796 // the fmuladd together) to use as the vector operand for the fadd
797 // reduction.
798 auto *FMulRecipe = new VPInstruction(
799 Instruction::FMul,
800 {CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
801 CurrentLinkI->getFastMathFlags());
802 LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
803 VecOp = FMulRecipe;
804 } else if (Kind == RecurKind::AddChainWithSubs &&
805 match(CurrentLink, m_Sub(m_VPValue(), m_VPValue()))) {
806 Type *PhiTy = TypeInfo.inferScalarType(PhiR);
807 auto *Zero = Plan.getConstantInt(PhiTy, 0);
808 auto *Sub = new VPInstruction(Instruction::Sub,
809 {Zero, CurrentLink->getOperand(1)}, {},
810 {}, CurrentLinkI->getDebugLoc());
811 Sub->setUnderlyingValue(CurrentLinkI);
812 LinkVPBB->insert(Sub, CurrentLink->getIterator());
813 VecOp = Sub;
814 } else {
815 // Index of the first operand which holds a non-mask vector operand.
816 unsigned IndexOfFirstOperand = 0;
818 if (match(CurrentLink, m_Cmp(m_VPValue(), m_VPValue())))
819 continue;
820 assert(match(CurrentLink,
822 "must be a select recipe");
823 IndexOfFirstOperand = 1;
824 }
825 // Note that for non-commutable operands (cmp-selects), the semantics of
826 // the cmp-select are captured in the recurrence kind.
827 unsigned VecOpId =
828 CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
829 ? IndexOfFirstOperand + 1
830 : IndexOfFirstOperand;
831 VecOp = CurrentLink->getOperand(VecOpId);
832 assert(VecOp != PreviousLink &&
833 CurrentLink->getOperand(CurrentLink->getNumOperands() - 1 -
834 (VecOpId - IndexOfFirstOperand)) ==
835 PreviousLink &&
836 "PreviousLink must be the operand other than VecOp");
837 }
838
839 // Get block mask from BlockMaskCache if the block needs predication.
840 VPValue *CondOp = nullptr;
841 if (BlocksNeedingPredication.contains(CurrentLinkI->getParent()))
842 CondOp = BlockMaskCache.lookup(LinkVPBB);
843
844 assert(PhiR->getVFScaleFactor() == 1 &&
845 "inloop reductions must be unscaled");
846 auto *RedRecipe = new VPReductionRecipe(
847 Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
848 getReductionStyle(/*IsInLoop=*/true, PhiR->isOrdered(), 1),
849 CurrentLinkI->getDebugLoc());
850 // Append the recipe to the end of the VPBasicBlock because we need to
851 // ensure that it comes after all of it's inputs, including CondOp.
852 // Delete CurrentLink as it will be invalid if its operand is replaced
853 // with a reduction defined at the bottom of the block in the next link.
854 if (LinkVPBB->getNumSuccessors() == 0)
855 RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
856 else
857 LinkVPBB->appendRecipe(RedRecipe);
858
859 CurrentLink->replaceAllUsesWith(RedRecipe);
860 ToDelete.push_back(CurrentLink);
861 PreviousLink = RedRecipe;
862 }
863 }
864
865 for (VPRecipeBase *R : ToDelete)
866 R->eraseFromParent();
867}
868
870 bool HasUncountableEarlyExit) {
871 auto *MiddleVPBB = cast<VPBasicBlock>(
873 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
874 VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
875
876 // Disconnect all early exits from the loop leaving it with a single exit from
877 // the latch. Early exits that are countable are left for a scalar epilog. The
878 // condition of uncountable early exits (currently at most one is supported)
879 // is fused into the latch exit, and used to branch from middle block to the
880 // early exit destination.
881 [[maybe_unused]] bool HandledUncountableEarlyExit = false;
882 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
883 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
884 if (Pred == MiddleVPBB)
885 continue;
886 if (HasUncountableEarlyExit) {
887 assert(!HandledUncountableEarlyExit &&
888 "can handle exactly one uncountable early exit");
890 cast<VPBasicBlock>(HeaderVPB), LatchVPBB);
891 HandledUncountableEarlyExit = true;
892 } else {
893 for (VPRecipeBase &R : EB->phis())
894 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
895 }
896 cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
898 }
899 }
900
901 assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
902 "missed an uncountable exit that must be handled");
903}
904
906 bool RequiresScalarEpilogueCheck,
907 bool TailFolded) {
908 auto *MiddleVPBB = cast<VPBasicBlock>(
910 // If MiddleVPBB has a single successor then the original loop does not exit
911 // via the latch and the single successor must be the scalar preheader.
912 // There's no need to add a runtime check to MiddleVPBB.
913 if (MiddleVPBB->getNumSuccessors() == 1) {
914 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
915 "must have ScalarPH as single successor");
916 return;
917 }
918
919 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
920
921 // Add a check in the middle block to see if we have completed all of the
922 // iterations in the first vector loop.
923 //
924 // Three cases:
925 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
926 // condition to false.
927 // 2) If (N - N%VF) == N, then we *don't* need to run the
928 // remainder. Thus if tail is to be folded, we know we don't need to run
929 // the remainder and we can set the condition to true.
930 // 3) Otherwise, construct a runtime check.
931
932 // We use the same DebugLoc as the scalar loop latch terminator instead of
933 // the corresponding compare because they may have ended up with different
934 // line numbers and we want to avoid awkward line stepping while debugging.
935 // E.g., if the compare has got a line number inside the loop.
936 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
937 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
938 VPBuilder Builder(MiddleVPBB);
939 VPValue *Cmp;
940 if (!RequiresScalarEpilogueCheck)
941 Cmp = Plan.getFalse();
942 else if (TailFolded)
943 Cmp = Plan.getTrue();
944 else
945 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
946 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
947 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
948}
949
951 VPDominatorTree VPDT(Plan);
952 for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
953 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
954 createLoopRegion(Plan, HeaderVPB);
955
956 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
957 TopRegion->setName("vector loop");
958 TopRegion->getEntryBasicBlock()->setName("vector.body");
959}
960
961// Likelyhood of bypassing the vectorized loop due to a runtime check block,
962// including memory overlap checks block and wrapping/unit-stride checks block.
963static constexpr uint32_t CheckBypassWeights[] = {1, 127};
964
966 BasicBlock *CheckBlock,
967 bool AddBranchWeights) {
968 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
969 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
970 VPBlockBase *VectorPH = Plan.getVectorPreheader();
971 VPBlockBase *ScalarPH = Plan.getScalarPreheader();
972 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
973 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
974 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
975 CheckBlockVPBB->swapSuccessors();
976
977 // We just connected a new block to the scalar preheader. Update all
978 // VPPhis by adding an incoming value for it, replicating the last value.
979 unsigned NumPredecessors = ScalarPH->getNumPredecessors();
980 for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
981 assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
982 assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
983 "must have incoming values for all operands");
984 R.addOperand(R.getOperand(NumPredecessors - 2));
985 }
986
987 VPIRMetadata VPBranchWeights;
988 auto *Term =
989 VPBuilder(CheckBlockVPBB)
992 Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc());
993 if (AddBranchWeights) {
994 MDBuilder MDB(Plan.getContext());
995 MDNode *BranchWeights =
996 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
997 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
998 }
999}
1000
1002 VPlan &Plan, ElementCount VF, unsigned UF,
1003 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
1004 bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop,
1007 // Generate code to check if the loop's trip count is less than VF * UF, or
1008 // equal to it in case a scalar epilogue is required; this implies that the
1009 // vector trip count is zero. This check also covers the case where adding one
1010 // to the backedge-taken count overflowed leading to an incorrect trip count
1011 // of zero. In this case we will also jump to the scalar loop.
1012 CmpInst::Predicate CmpPred =
1013 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1014 // If tail is to be folded, vector loop takes care of all iterations.
1015 VPValue *TripCountVPV = Plan.getTripCount();
1016 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, PSE);
1017 Type *TripCountTy = TripCount->getType();
1018 ScalarEvolution &SE = *PSE.getSE();
1019 auto GetMinTripCount = [&]() -> const SCEV * {
1020 // Compute max(MinProfitableTripCount, UF * VF) and return it.
1021 const SCEV *VFxUF =
1022 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
1023 if (UF * VF.getKnownMinValue() >=
1024 MinProfitableTripCount.getKnownMinValue()) {
1025 // TODO: SCEV should be able to simplify test.
1026 return VFxUF;
1027 }
1028 const SCEV *MinProfitableTripCountSCEV =
1029 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
1030 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
1031 };
1032
1033 VPBasicBlock *EntryVPBB = Plan.getEntry();
1034 VPBuilder Builder(EntryVPBB);
1035 VPValue *TripCountCheck = Plan.getFalse();
1036 const SCEV *Step = GetMinTripCount();
1037 if (TailFolded) {
1038 if (CheckNeededWithTailFolding) {
1039 // vscale is not necessarily a power-of-2, which means we cannot guarantee
1040 // an overflow to zero when updating induction variables and so an
1041 // additional overflow check is required before entering the vector loop.
1042
1043 // Get the maximum unsigned value for the type.
1044 VPValue *MaxUIntTripCount =
1045 Plan.getConstantInt(cast<IntegerType>(TripCountTy)->getMask());
1046 VPValue *DistanceToMax = Builder.createNaryOp(
1047 Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
1049
1050 // Don't execute the vector loop if (UMax - n) < (VF * UF).
1051 // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
1052 // minProfitableTripCount).
1053 TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
1054 Builder.createExpandSCEV(Step), DL);
1055 } else {
1056 // TripCountCheck = false, folding tail implies positive vector trip
1057 // count.
1058 }
1059 } else {
1060 // TODO: Emit unconditional branch to vector preheader instead of
1061 // conditional branch with known condition.
1062 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
1063 // Check if the trip count is < the step.
1064 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
1065 // TODO: Ensure step is at most the trip count when determining max VF and
1066 // UF, w/o tail folding.
1067 TripCountCheck = Plan.getTrue();
1068 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
1069 TripCount, Step)) {
1070 // Generate the minimum iteration check only if we cannot prove the
1071 // check is known to be true, or known to be false.
1072 VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
1073 TripCountCheck = Builder.createICmp(
1074 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
1075 } // else step known to be < trip count, use TripCountCheck preset to false.
1076 }
1077 VPInstruction *Term =
1078 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
1080 MDBuilder MDB(Plan.getContext());
1081 MDNode *BranchWeights = MDB.createBranchWeights(
1082 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
1083 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1084 }
1085}
1086
1088 VPlan &Plan, Value *TripCount, Value *VectorTripCount,
1089 bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF,
1090 unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE) {
1091 // Add the minimum iteration check for the epilogue vector loop.
1092 VPValue *TC = Plan.getOrAddLiveIn(TripCount);
1093 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
1094 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
1095 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
1096 VPValue *Count = Builder.createNaryOp(
1097 Instruction::Sub, {TC, Plan.getOrAddLiveIn(VectorTripCount)},
1098 DebugLoc::getUnknown(), "n.vec.remaining");
1099
1100 // Generate code to check if the loop's trip count is less than VF * UF of
1101 // the vector epilogue loop.
1102 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1103 auto *CheckMinIters = Builder.createICmp(
1104 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
1105 VPInstruction *Branch =
1106 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
1107
1108 // We assume the remaining `Count` is equally distributed in
1109 // [0, MainLoopStep)
1110 // So the probability for `Count < EpilogueLoopStep` should be
1111 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
1112 // TODO: Improve the estimate by taking the estimated trip count into
1113 // consideration.
1114 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
1115 const uint32_t Weights[] = {EstimatedSkipCount,
1116 MainLoopStep - EstimatedSkipCount};
1117 MDBuilder MDB(Plan.getContext());
1118 MDNode *BranchWeights =
1119 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
1120 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
1121}
1122
1123/// If \p V is used by a recipe matching pattern \p P, return it. Otherwise
1124/// return nullptr;
1125template <typename MatchT>
1126static VPRecipeBase *findUserOf(VPValue *V, const MatchT &P) {
1127 auto It = find_if(V->users(), match_fn(P));
1128 return It == V->user_end() ? nullptr : cast<VPRecipeBase>(*It);
1129}
1130
1131/// If \p V is used by a VPInstruction with \p Opcode, return it. Otherwise
1132/// return nullptr.
1133template <unsigned Opcode> static VPInstruction *findUserOf(VPValue *V) {
1135}
1136
1138 auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
1139 auto *MinMaxR =
1140 dyn_cast_or_null<VPRecipeWithIRFlags>(RedPhiR->getBackedgeValue());
1141 if (!MinMaxR)
1142 return nullptr;
1143
1144 // Check that MinMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1145 // with an intrinsic that matches the reduction kind.
1146 Intrinsic::ID ExpectedIntrinsicID =
1147 getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind());
1148 if (!match(MinMaxR, m_Intrinsic(ExpectedIntrinsicID)))
1149 return nullptr;
1150
1151 if (MinMaxR->getOperand(0) == RedPhiR)
1152 return MinMaxR->getOperand(1);
1153
1154 assert(MinMaxR->getOperand(1) == RedPhiR &&
1155 "Reduction phi operand expected");
1156 return MinMaxR->getOperand(0);
1157 };
1158
1159 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1161 MinMaxNumReductionsToHandle;
1162 bool HasUnsupportedPhi = false;
1163 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
1165 continue;
1166 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
1167 if (!Cur) {
1168 // TODO: Also support fixed-order recurrence phis.
1169 HasUnsupportedPhi = true;
1170 continue;
1171 }
1173 Cur->getRecurrenceKind())) {
1174 HasUnsupportedPhi = true;
1175 continue;
1176 }
1177
1178 VPValue *MinMaxOp = GetMinMaxCompareValue(Cur);
1179 if (!MinMaxOp)
1180 return false;
1181
1182 MinMaxNumReductionsToHandle.emplace_back(Cur, MinMaxOp);
1183 }
1184
1185 if (MinMaxNumReductionsToHandle.empty())
1186 return true;
1187
1188 // We won't be able to resume execution in the scalar tail, if there are
1189 // unsupported header phis or there is no scalar tail at all, due to
1190 // tail-folding.
1191 if (HasUnsupportedPhi || !Plan.hasScalarTail())
1192 return false;
1193
1194 /// Check if the vector loop of \p Plan can early exit and restart
1195 /// execution of last vector iteration in the scalar loop. This requires all
1196 /// recipes up to early exit point be side-effect free as they are
1197 /// re-executed. Currently we check that the loop is free of any recipe that
1198 /// may write to memory. Expected to operate on an early VPlan w/o nested
1199 /// regions.
1202 auto *VPBB = cast<VPBasicBlock>(VPB);
1203 for (auto &R : *VPBB) {
1204 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
1205 return false;
1206 }
1207 }
1208
1209 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1210 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
1211 VPValue *AllNaNLanes = nullptr;
1212 SmallPtrSet<VPValue *, 2> RdxResults;
1213 for (const auto &[_, MinMaxOp] : MinMaxNumReductionsToHandle) {
1214 VPValue *RedNaNLanes =
1215 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
1216 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
1217 : RedNaNLanes;
1218 }
1219
1220 VPValue *AnyNaNLane =
1221 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
1222 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
1223 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
1224 for (const auto &[RedPhiR, _] : MinMaxNumReductionsToHandle) {
1226 RedPhiR->getRecurrenceKind()) &&
1227 "unsupported reduction");
1228
1229 // If we exit early due to NaNs, compute the final reduction result based on
1230 // the reduction phi at the beginning of the last vector iteration.
1231 auto *RdxResult =
1233
1234 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
1235 RdxResult->getOperand(1));
1236 RdxResult->setOperand(1, NewSel);
1237 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
1238 RdxResults.insert(RdxResult);
1239 }
1240
1241 auto *LatchExitingBranch = LatchVPBB->getTerminator();
1242 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
1243 "Unexpected terminator");
1244 auto *IsLatchExitTaken = LatchBuilder.createICmp(
1245 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
1246 LatchExitingBranch->getOperand(1));
1247 auto *AnyExitTaken = LatchBuilder.createNaryOp(
1248 Instruction::Or, {AnyNaNLane, IsLatchExitTaken});
1249 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
1250 LatchExitingBranch->eraseFromParent();
1251
1252 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
1253 // true, the resume from the start of the last vector iteration via the
1254 // canonical IV, otherwise from the original value.
1255 for (auto &R : Plan.getScalarPreheader()->phis()) {
1256 auto *ResumeR = cast<VPPhi>(&R);
1257 VPValue *VecV = ResumeR->getOperand(0);
1258 if (RdxResults.contains(VecV))
1259 continue;
1260 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
1261 if (DerivedIV->getNumUsers() == 1 &&
1262 DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
1263 auto *NewSel =
1264 MiddleBuilder.createSelect(AnyNaNLane, LoopRegion->getCanonicalIV(),
1265 &Plan.getVectorTripCount());
1266 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
1267 DerivedIV->setOperand(1, NewSel);
1268 continue;
1269 }
1270 }
1271 // Bail out and abandon the current, partially modified, VPlan if we
1272 // encounter resume phi that cannot be updated yet.
1273 if (VecV != &Plan.getVectorTripCount()) {
1274 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
1275 "FMaxNum/FMinNum reduction.\n");
1276 return false;
1277 }
1278 auto *NewSel = MiddleBuilder.createSelect(
1279 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
1280 ResumeR->setOperand(0, NewSel);
1281 }
1282
1283 auto *MiddleTerm = MiddleVPBB->getTerminator();
1284 MiddleBuilder.setInsertPoint(MiddleTerm);
1285 VPValue *MiddleCond = MiddleTerm->getOperand(0);
1286 VPValue *NewCond =
1287 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
1288 MiddleTerm->setOperand(0, NewCond);
1289 return true;
1290}
1291
1293 for (auto &PhiR : make_early_inc_range(
1295 auto *MinMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
1296 // TODO: check for multi-uses in VPlan directly.
1297 if (!MinMaxPhiR || !MinMaxPhiR->hasUsesOutsideReductionChain())
1298 continue;
1299
1300 // MinMaxPhiR has users outside the reduction cycle in the loop. Check if
1301 // the only other user is a FindLastIV reduction. MinMaxPhiR must have
1302 // exactly 3 users: 1) the min/max operation, the compare of a FindLastIV
1303 // reduction and ComputeReductionResult. The comparisom must compare
1304 // MinMaxPhiR against the min/max operand used for the min/max reduction
1305 // and only be used by the select of the FindLastIV reduction.
1306 RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind();
1307 assert(
1309 "only min/max recurrences support users outside the reduction chain");
1310
1311 auto *MinMaxOp =
1312 dyn_cast<VPRecipeWithIRFlags>(MinMaxPhiR->getBackedgeValue());
1313 if (!MinMaxOp)
1314 return false;
1315
1316 // Check that MinMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1317 // with an intrinsic that matches the reduction kind.
1318 Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind);
1319 if (!match(MinMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
1320 return false;
1321
1322 // MinMaxOp must have 2 users: 1) MinMaxPhiR and 2) ComputeReductionResult
1323 // (asserted below).
1324 assert(MinMaxOp->getNumUsers() == 2 &&
1325 "MinMaxOp must have exactly 2 users");
1326 VPValue *MinMaxOpValue = MinMaxOp->getOperand(0);
1327 if (MinMaxOpValue == MinMaxPhiR)
1328 MinMaxOpValue = MinMaxOp->getOperand(1);
1329
1330 VPValue *CmpOpA;
1331 VPValue *CmpOpB;
1332 CmpPredicate Pred;
1334 MinMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
1335 if (!Cmp || Cmp->getNumUsers() != 1 ||
1336 (CmpOpA != MinMaxOpValue && CmpOpB != MinMaxOpValue))
1337 return false;
1338
1339 if (MinMaxOpValue != CmpOpB)
1340 Pred = CmpInst::getSwappedPredicate(Pred);
1341
1342 // MinMaxPhiR must have exactly 3 users:
1343 // * MinMaxOp,
1344 // * Cmp (that's part of a FindLastIV chain),
1345 // * ComputeReductionResult.
1346 if (MinMaxPhiR->getNumUsers() != 3)
1347 return false;
1348
1349 VPInstruction *MinMaxResult =
1351 assert(is_contained(MinMaxPhiR->users(), MinMaxOp) &&
1352 "one user must be MinMaxOp");
1353 assert(MinMaxResult && "MinMaxResult must be a user of MinMaxPhiR");
1354 assert(is_contained(MinMaxOp->users(), MinMaxResult) &&
1355 "MinMaxResult must be a user of MinMaxOp (and of MinMaxPhiR");
1356
1357 // Cmp must be used by the select of a FindLastIV chain.
1358 VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
1359 VPValue *IVOp, *FindIV;
1360 if (!Sel || Sel->getNumUsers() != 2 ||
1361 !match(Sel,
1362 m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))))
1363 return false;
1364
1365 if (!isa<VPReductionPHIRecipe>(FindIV)) {
1366 std::swap(FindIV, IVOp);
1367 Pred = CmpInst::getInversePredicate(Pred);
1368 }
1369
1370 auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
1372 FindIVPhiR->getRecurrenceKind()))
1373 return false;
1374
1375 // TODO: Support cases where IVOp is the IV increment.
1376 if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
1378 return false;
1379
1380 CmpInst::Predicate RdxPredicate = [RdxKind]() {
1381 switch (RdxKind) {
1382 case RecurKind::UMin:
1383 return CmpInst::ICMP_UGE;
1384 case RecurKind::UMax:
1385 return CmpInst::ICMP_ULE;
1386 case RecurKind::SMax:
1387 return CmpInst::ICMP_SLE;
1388 case RecurKind::SMin:
1389 return CmpInst::ICMP_SGE;
1390 default:
1391 llvm_unreachable("unhandled recurrence kind");
1392 }
1393 }();
1394
1395 // TODO: Strict predicates need to find the first IV value for which the
1396 // predicate holds, not the last.
1397 if (Pred != RdxPredicate)
1398 return false;
1399
1400 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
1401 "cannot handle inloop/ordered reductions yet");
1402
1403 // The reduction using MinMaxPhiR needs adjusting to compute the correct
1404 // result:
1405 // 1. We need to find the last IV for which the condition based on the
1406 // min/max recurrence is true,
1407 // 2. Compare the partial min/max reduction result to its final value and,
1408 // 3. Select the lanes of the partial FindLastIV reductions which
1409 // correspond to the lanes matching the min/max reduction result.
1410 //
1411 // For example, this transforms
1412 // vp<%min.result> = compute-reduction-result ir<%min.val>,
1413 // ir<%min.val.next>
1414 // vp<%find.iv.result = compute-find-iv-result ir<%min.idx>, ir<0>,
1415 // SENTINEL, vp<%min.idx.next>
1416 //
1417 // into:
1418 //
1419 // vp<min.result> = compute-reduction-result ir<%min.val>, ir<%min.val.next>
1420 // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
1421 // vp<%final.iv> = select vp<%final.min.cmp>, ir<%min.idx.next>, SENTINEL
1422 // vp<%find.iv.result> = compute-find-iv-result ir<%min.idx>, ir<0>,
1423 // SENTINEL, vp<%final.iv>
1424 VPInstruction *FindIVResult =
1426 assert(FindIVResult->getParent() == MinMaxResult->getParent() &&
1427 "both results must be computed in the same block");
1428 MinMaxResult->moveBefore(*FindIVResult->getParent(),
1429 FindIVResult->getIterator());
1430
1431 VPBuilder B(FindIVResult);
1432 VPValue *MinMaxExiting = MinMaxResult->getOperand(1);
1433 auto *FinalMinMaxCmp =
1434 B.createICmp(CmpInst::ICMP_EQ, MinMaxExiting, MinMaxResult);
1435 VPValue *Sentinel = FindIVResult->getOperand(2);
1436 VPValue *LastIVExiting = FindIVResult->getOperand(3);
1437 auto *FinalIVSelect =
1438 B.createSelect(FinalMinMaxCmp, LastIVExiting, Sentinel);
1439 FindIVResult->setOperand(3, FinalIVSelect);
1440 }
1441 return true;
1442}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define _
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static VPHeaderPHIRecipe * createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, DebugLoc DL)
Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe for Phi based on IndDesc.
static void simplifyLiveInsWithSCEV(VPlan &Plan, PredicatedScalarEvolution &PSE)
Check Plan's live-in and replace them with constants, if they can be simplified via SCEV.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static constexpr uint32_t CheckBypassWeights[]
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
static FastMathFlags getFast()
Definition FMF.h:50
A struct for saving information about induction variables.
InductionKind getKind() const
const SCEV * getStep() const
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Value * getStartValue() const
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
iterator find(const KeyT &Key)
Definition MapVector.h:154
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
op_range operands()
Definition User.h:293
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3949
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4024
iterator end()
Definition VPlan.h:3986
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3984
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4037
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:228
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:635
const VPRecipeBase & back() const
Definition VPlan.h:3998
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4015
bool empty() const
Definition VPlan.h:3995
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:198
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:178
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:122
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:242
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:173
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:191
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:211
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3535
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2020
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4102
Class to record and manage LLVM IR flags.
Definition VPlan.h:608
Helper to manage IR metadata for recipes.
Definition VPlan.h:980
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1034
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition VPlan.h:2406
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2766
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4137
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4235
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:229
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:273
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:268
void addOperand(VPValue *Operand)
Definition VPlanValue.h:262
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:45
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:119
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:173
unsigned getNumUsers() const
Definition VPlanValue.h:105
user_range users()
Definition VPlanValue.h:126
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2166
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4267
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4546
LLVMContext & getContext() const
Definition VPlan.h:4452
VPBasicBlock * getEntry()
Definition VPlan.h:4356
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4450
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4446
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4414
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4525
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4549
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4404
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4443
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4506
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1022
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4421
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4381
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4572
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1234
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4522
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4582
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4395
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4400
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4361
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4626
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4528
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
Definition VPlanUtils.h:93
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2392
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
Definition VPlanCFG.h:229
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
A recipe for handling first-order recurrence phis.
Definition VPlan.h:2344
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:184
static bool handleMultiUseReductions(VPlan &Plan)
Try to legalize reductions with multiple in-loop uses.
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static LLVM_ABI_FOR_TEST void handleEarlyExits(VPlan &Plan, bool HasUncountableExit)
Update Plan to account for all early exits.
static void createInLoopReductionRecipes(VPlan &Plan, const DenseMap< VPBasicBlock *, VPValue * > &BlockMaskCache, const DenseSet< BasicBlock * > &BlocksNeedingPredication, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
static void createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB)
Update Plan to account for the uncountable early exit from EarlyExitingVPBB to EarlyExitVPBB by intro...
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *TripCount, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...