LLVM 23.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanDominatorTree.h"
19#include "VPlanHelpers.h"
20#include "VPlanPatternMatch.h"
21#include "VPlanTransforms.h"
22#include "VPlanUtils.h"
24#include "llvm/Analysis/Loads.h"
31#include "llvm/IR/InstrTypes.h"
32#include "llvm/IR/MDBuilder.h"
33#include "llvm/Support/Debug.h"
37
38#define DEBUG_TYPE "vplan"
39
40using namespace llvm;
41using namespace LoopVectorizationUtils;
42using namespace VPlanPatternMatch;
43
44namespace {
45// Class that is used to build the plain CFG for the incoming IR.
46class PlainCFGBuilder {
47 // The outermost loop of the input loop nest considered for vectorization.
48 Loop *TheLoop;
49
50 // Loop Info analysis.
51 LoopInfo *LI;
52
53 // Loop versioning for alias metadata.
54 LoopVersioning *LVer;
55
56 // Vectorization plan that we are working on.
57 std::unique_ptr<VPlan> Plan;
58
59 // Builder of the VPlan instruction-level representation.
60 VPBuilder VPIRBuilder;
61
62 // NOTE: The following maps are intentionally destroyed after the plain CFG
63 // construction because subsequent VPlan-to-VPlan transformation may
64 // invalidate them.
65 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
67 // Map incoming Value definitions to their newly-created VPValues.
68 DenseMap<Value *, VPValue *> IRDef2VPValue;
69
70 // Hold phi node's that need to be fixed once the plain CFG has been built.
72
73 // Utility functions.
74 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
75 void fixHeaderPhis();
76 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
77#ifndef NDEBUG
78 bool isExternalDef(Value *Val);
79#endif
80 VPValue *getOrCreateVPOperand(Value *IRVal);
81 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
82
83public:
84 PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer, Type *IdxTy)
85 : TheLoop(Lp), LI(LI), LVer(LVer),
86 Plan(std::make_unique<VPlan>(Lp, IdxTy)) {}
87
88 /// Build plain CFG for TheLoop and connect it to Plan's entry.
89 std::unique_ptr<VPlan> buildPlainCFG();
90};
91} // anonymous namespace
92
93// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
94// must have no predecessors.
95void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
96 // Collect VPBB predecessors.
98 for (BasicBlock *Pred : predecessors(BB))
99 VPBBPreds.push_back(getOrCreateVPBB(Pred));
100 VPBB->setPredecessors(VPBBPreds);
101}
102
103static bool isHeaderBB(BasicBlock *BB, Loop *L) {
104 return L && BB == L->getHeader();
105}
106
107// Add operands to VPInstructions representing phi nodes from the input IR.
108void PlainCFGBuilder::fixHeaderPhis() {
109 for (auto *Phi : PhisToFix) {
110 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
111 VPValue *VPVal = IRDef2VPValue[Phi];
112 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
113 auto *PhiR = cast<VPPhi>(VPVal);
114 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
115 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
116 "Expected Phi in header block.");
117 assert(Phi->getNumOperands() == 2 &&
118 "header phi must have exactly 2 operands");
119 for (BasicBlock *Pred : predecessors(Phi->getParent()))
120 PhiR->addIncoming(
121 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
122 }
123}
124
125// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
126// existing one if it was already created.
127VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
128 if (auto *VPBB = BB2VPBB.lookup(BB)) {
129 // Retrieve existing VPBB.
130 return VPBB;
131 }
132
133 // Create new VPBB.
134 StringRef Name = BB->getName();
135 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
136 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
137 BB2VPBB[BB] = VPBB;
138 return VPBB;
139}
140
141#ifndef NDEBUG
142// Return true if \p Val is considered an external definition. An external
143// definition is either:
144// 1. A Value that is not an Instruction. This will be refined in the future.
145// 2. An Instruction that is outside of the IR region represented in VPlan,
146// i.e., is not part of the loop nest.
147bool PlainCFGBuilder::isExternalDef(Value *Val) {
148 // All the Values that are not Instructions are considered external
149 // definitions for now.
151 if (!Inst)
152 return true;
153
154 // Check whether Instruction definition is in loop body.
155 return !TheLoop->contains(Inst);
156}
157#endif
158
159// Create a new VPValue or retrieve an existing one for the Instruction's
160// operand \p IRVal. This function must only be used to create/retrieve VPValues
161// for *Instruction's operands* and not to create regular VPInstruction's. For
162// the latter, please, look at 'createVPInstructionsForVPBB'.
163VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
164 auto VPValIt = IRDef2VPValue.find(IRVal);
165 if (VPValIt != IRDef2VPValue.end())
166 // Operand has an associated VPInstruction or VPValue that was previously
167 // created.
168 return VPValIt->second;
169
170 // Operand doesn't have a previously created VPInstruction/VPValue. This
171 // means that operand is:
172 // A) a definition external to VPlan,
173 // B) any other Value without specific representation in VPlan.
174 // For now, we use VPValue to represent A and B and classify both as external
175 // definitions. We may introduce specific VPValue subclasses for them in the
176 // future.
177 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
178
179 // A and B: Create VPValue and add it to the pool of external definitions and
180 // to the Value->VPValue map.
181 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
182 IRDef2VPValue[IRVal] = NewVPVal;
183 return NewVPVal;
184}
185
186// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
187// counterpart. This function must be invoked in RPO so that the operands of a
188// VPInstruction in \p BB have been visited before (except for Phi nodes).
189void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
190 BasicBlock *BB) {
191 VPIRBuilder.setInsertPoint(VPBB);
192 // TODO: Model and preserve debug intrinsics in VPlan.
193 for (Instruction &InstRef : *BB) {
194 Instruction *Inst = &InstRef;
195
196 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
197 // visited Inst when we shouldn't, breaking the RPO traversal order.
198 assert(!IRDef2VPValue.count(Inst) &&
199 "Instruction shouldn't have been visited.");
200
201 if (isa<UncondBrInst>(Inst))
202 // Skip the rest of the Instruction processing for Branch instructions.
203 continue;
204
205 if (auto *Br = dyn_cast<CondBrInst>(Inst)) {
206 // Conditional branch instruction are represented using BranchOnCond
207 // recipes.
208 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
209 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
210 VPIRMetadata(*Inst), Inst->getDebugLoc());
211 continue;
212 }
213
214 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
215 // Don't emit recipes for unconditional switch instructions.
216 if (SI->getNumCases() == 0)
217 continue;
218 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
219 for (auto Case : SI->cases())
220 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
221 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
222 VPIRMetadata(*Inst), Inst->getDebugLoc());
223 continue;
224 }
225
226 VPSingleDefRecipe *NewR;
227 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
228 // Phi node's operands may not have been visited at this point. We create
229 // an empty VPInstruction that we will fix once the whole plain CFG has
230 // been built.
231 NewR = VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi",
232 *Phi, Phi->getType());
233 NewR->setUnderlyingValue(Phi);
234 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
235 // Header phis need to be fixed after the VPBB for the latch has been
236 // created.
237 PhisToFix.push_back(Phi);
238 } else {
239 // Add operands for VPPhi in the order matching its predecessors in
240 // VPlan.
241 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
242 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
243 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
244 getOrCreateVPOperand(Phi->getIncomingValue(I));
245 }
246 for (VPBlockBase *Pred : VPBB->getPredecessors())
247 cast<VPPhi>(NewR)->addIncoming(
248 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
249 }
250 } else {
251 // Build VPIRMetadata from the instruction and add loop versioning
252 // metadata for loads and stores.
253 VPIRMetadata MD(*Inst);
254 if (isa<LoadInst, StoreInst>(Inst) && LVer) {
255 const auto &[AliasScopeMD, NoAliasMD] =
256 LVer->getNoAliasMetadataFor(Inst);
257 if (AliasScopeMD)
258 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
259 if (NoAliasMD)
260 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
261 }
262
263 // Translate LLVM-IR operands into VPValue operands and set them in the
264 // new VPInstruction.
265 SmallVector<VPValue *, 4> VPOperands;
266 for (Value *Op : Inst->operands())
267 VPOperands.push_back(getOrCreateVPOperand(Op));
268
269 if (auto *CI = dyn_cast<CastInst>(Inst)) {
270 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
271 CI->getType(), CI->getDebugLoc(),
272 VPIRFlags(*CI), MD);
273 NewR->setUnderlyingValue(CI);
274 } else if (auto *LI = dyn_cast<LoadInst>(Inst)) {
275 NewR = VPIRBuilder.createScalarLoad(LI->getType(), VPOperands[0],
276 LI->getDebugLoc(), MD);
277 NewR->setUnderlyingValue(LI);
278 } else {
279 // Build VPInstruction for any arbitrary Instruction without specific
280 // representation in VPlan.
281 NewR = VPIRBuilder.createNaryOp(
282 Inst->getOpcode(), VPOperands, Inst, VPIRFlags(*Inst), MD,
283 Inst->getDebugLoc(), "", Inst->getType());
284 }
285 }
286
287 IRDef2VPValue[Inst] = NewR;
288 }
289}
290
291// Main interface to build the plain CFG.
292std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
293 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
294 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
295 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
296 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
297
298 // 1. Scan the body of the loop in a topological order to visit each basic
299 // block after having visited its predecessor basic blocks. Create a VPBB for
300 // each BB and link it to its successor and predecessor VPBBs. Note that
301 // predecessors must be set in the same order as they are in the incomming IR.
302 // Otherwise, there might be problems with existing phi nodes and algorithm
303 // based on predecessors traversal.
304
305 // Loop PH needs to be explicitly visited since it's not taken into account by
306 // LoopBlocksDFS.
307 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
308 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
309 "Unexpected loop preheader");
310 for (auto &I : *ThePreheaderBB) {
311 if (I.getType()->isVoidTy())
312 continue;
313 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
314 }
315
316 LoopBlocksRPO RPO(TheLoop);
317 RPO.perform(LI);
318
319 for (BasicBlock *BB : RPO) {
320 // Create or retrieve the VPBasicBlock for this BB.
321 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
322 // Set VPBB predecessors in the same order as they are in the incoming BB.
323 setVPBBPredsFromBB(VPBB, BB);
324
325 // Create VPInstructions for BB.
326 createVPInstructionsForVPBB(VPBB, BB);
327
328 // Set VPBB successors. We create empty VPBBs for successors if they don't
329 // exist already. Recipes will be created when the successor is visited
330 // during the RPO traversal.
331 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
333 getOrCreateVPBB(SI->getDefaultDest())};
334 for (auto Case : SI->cases())
335 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
336 VPBB->setSuccessors(Succs);
337 continue;
338 }
339 if (auto *BI = dyn_cast<UncondBrInst>(BB->getTerminator())) {
340 VPBB->setOneSuccessor(getOrCreateVPBB(BI->getSuccessor()));
341 continue;
342 }
343 auto *BI = cast<CondBrInst>(BB->getTerminator());
344 BasicBlock *IRSucc0 = BI->getSuccessor(0);
345 BasicBlock *IRSucc1 = BI->getSuccessor(1);
346 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
347 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
348 VPBB->setTwoSuccessors(Successor0, Successor1);
349 }
350
351 for (auto *EB : Plan->getExitBlocks())
352 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
353
354 // 2. The whole CFG has been built at this point so all the input Values must
355 // have a VPlan counterpart. Fix VPlan header phi by adding their
356 // corresponding VPlan operands.
357 fixHeaderPhis();
358
359 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
360 Plan->getEntry()->setPlan(&*Plan);
361
362 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
363 // VPIRInstructions wrapping them.
364 // // Note that the operand order corresponds to IR predecessor order, and may
365 // need adjusting when VPlan predecessors are added, if an exit block has
366 // multiple predecessor.
367 for (auto *EB : Plan->getExitBlocks()) {
368 for (VPRecipeBase &R : EB->phis()) {
369 auto *PhiR = cast<VPIRPhi>(&R);
370 PHINode &Phi = PhiR->getIRPhi();
371 assert(PhiR->getNumOperands() == 0 &&
372 "no phi operands should be added yet");
373 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
374 PhiR->addIncoming(
375 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
376 }
377 }
378
379 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
380 return std::move(Plan);
381}
382
383/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
384/// has exactly 2 predecessors (preheader and latch), where the block
385/// dominates the latch and the preheader dominates the block. If it is a
386/// header block return true and canonicalize the predecessors of the header
387/// (making sure the preheader appears first and the latch second) and the
388/// successors of the latch (making sure the loop exit comes first). Otherwise
389/// return false.
391 const VPDominatorTree &VPDT) {
392 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
393 if (Preds.size() != 2)
394 return false;
395
396 auto *PreheaderVPBB = Preds[0];
397 auto *LatchVPBB = Preds[1];
398 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
399 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
400 std::swap(PreheaderVPBB, LatchVPBB);
401
402 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
403 !VPDT.dominates(HeaderVPB, LatchVPBB))
404 return false;
405
406 // Canonicalize predecessors of header so that preheader is first and
407 // latch second.
408 HeaderVPB->swapPredecessors();
409 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
410 R.swapOperands();
411 }
412
413 // The two successors of conditional branch match the condition, with the
414 // first successor corresponding to true and the second to false. We
415 // canonicalize the successors of the latch when introducing the region, such
416 // that the latch exits the region when its condition is true; invert the
417 // original condition if the original CFG branches to the header on true.
418 // Note that the exit edge is not yet connected for top-level loops.
419 if (LatchVPBB->getSingleSuccessor() ||
420 LatchVPBB->getSuccessors()[0] != HeaderVPB)
421 return true;
422
423 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
424 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
427 "terminator must be a BranchOnCond");
428 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
429 Not->insertBefore(Term);
430 Term->setOperand(0, Not);
431 LatchVPBB->swapSuccessors();
432
433 return true;
434}
435
436/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB. For the
437/// outermost loop adjust the regions exiting terminator to be based on the
438/// canonical IV.
439static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB, DebugLoc DL) {
440 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
441 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPB->getPredecessors()[1]);
442 auto *OutermostHeaderVPBB =
444
445 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
446 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
447
448 // Create an empty region first and insert it between PreheaderVPBB and
449 // the exit blocks, taking care to preserve the original predecessor &
450 // successor order of blocks. Set region entry and exiting after both
451 // HeaderVPB and LatchVPBB have been disconnected from their
452 // predecessors/successors. Only the outermost loop has a canonical IV. Nested
453 // loops are assigned a canonical IV of null type and unknown debug location.
454 bool IsOutermost = HeaderVPB == OutermostHeaderVPBB;
455 Type *CanIVTy = nullptr;
456 if (IsOutermost)
457 CanIVTy = Plan.getVectorTripCount().getType();
458 else
460 auto *R = Plan.createLoopRegion(CanIVTy, DL);
461
462 // Transfer latch's successors to the region.
464
465 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
466 R->setEntry(HeaderVPB);
467 R->setExiting(LatchVPBB);
468
469 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
470 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
471 VPBB->setParent(R);
472
473 if (!IsOutermost)
474 return;
475
476 auto *LatchTerm = LatchVPBB->getTerminator();
477 VPBuilder Builder(LatchTerm);
478 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
479 // Initially the induction increment is guaranteed to not wrap, but that may
480 // change later, e.g. when tail-folding, when the flags need to be dropped.
481 auto *CanonicalIVIncrement = Builder.createAdd(
482 R->getCanonicalIV(), &Plan.getVFxUF(), DL, "index.next", {true, false});
483
484 if (match(LatchTerm, m_BranchOnTwoConds())) {
485 auto *IsLatchExitTaken = Builder.createICmp(
486 CmpInst::ICMP_EQ, CanonicalIVIncrement, &Plan.getVectorTripCount());
487 LatchTerm->setOperand(1, IsLatchExitTaken);
488 } else {
489 // We are replacing the branch to exit the region. Remove the original
490 // BranchOnCond.
491 assert(match(LatchTerm, m_BranchOnCond()) && "Unexpected terminator");
492 DebugLoc LatchDL = LatchTerm->getDebugLoc();
493 Builder.createNaryOp(VPInstruction::BranchOnCount,
494 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
495 LatchDL);
496 LatchTerm->eraseFromParent();
497 }
498}
499
500/// Creates extracts for values in \p Plan defined in a loop region and used
501/// outside a loop region.
502static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
503 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
504 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
505 if (!is_contained(EB->predecessors(), MiddleVPBB))
506 continue;
507
508 for (VPRecipeBase &R : EB->phis()) {
509 auto *ExitIRI = cast<VPIRPhi>(&R);
510 VPValue *Exiting = ExitIRI->getIncomingValueForBlock(MiddleVPBB);
511 if (isa<VPIRValue>(Exiting))
512 continue;
513 Exiting = B.createNaryOp(VPInstruction::ExtractLastPart, Exiting);
514 Exiting = B.createNaryOp(VPInstruction::ExtractLastLane, Exiting);
515 ExitIRI->setIncomingValueForBlock(MiddleVPBB, Exiting);
516 }
517 }
518}
519
520static void addInitialSkeleton(VPlan &Plan, Type *InductionTy,
521 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
522 VPDominatorTree VPDT(Plan);
523
524 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
525 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
526 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
527
528 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
529 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
530
531 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
532 // The canonical LatchVPBB has the header block as last successor. If it has
533 // another successor, this successor is an exit block - insert middle block on
534 // its edge. Otherwise, add middle block as another successor retaining header
535 // as last. In the latter case, the latch has no conditional terminator yet,
536 // so insert a placeholder BranchOnCond that always continues to the header.
537 // It will be canonicalized to a BranchOnCount later
538 if (LatchVPBB->getNumSuccessors() == 2) {
539 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
540 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
541 } else {
542 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
543 LatchVPBB->swapSuccessors();
545 {Plan.getFalse()});
546 }
547
548 // Create SCEV and VPValue for the trip count.
549 // We use the symbolic max backedge-taken-count, which works also when
550 // vectorizing loops with uncountable early exits.
551 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
552 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
553 "Invalid backedge-taken count");
554 ScalarEvolution &SE = *PSE.getSE();
555 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
556 InductionTy, TheLoop);
558
559 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
561
562 // The connection order corresponds to the operands of the conditional branch,
563 // with the middle block already connected to the exit block.
564 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
565 // Also connect the entry block to the scalar preheader.
566 // TODO: Also introduce a branch recipe together with the minimum trip count
567 // check.
568 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
569 Plan.getEntry()->swapSuccessors();
570
571 createExtractsForLiveOuts(Plan, MiddleVPBB);
572
573 // Create resume phis in the scalar preheader for each phi in the scalar loop.
574 // Their incoming value from the vector loop will be the last lane of the
575 // corresponding vector loop header phi.
576 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
577 VPBuilder ScalarPHBuilder(ScalarPH);
578 assert(equal(ScalarPH->getPredecessors(),
579 ArrayRef<VPBlockBase *>({MiddleVPBB, Plan.getEntry()})) &&
580 "unexpected predecessor order of scalar ph");
581 for (const auto &[PhiR, ScalarPhiR] :
582 zip_equal(HeaderVPBB->phis(), Plan.getScalarHeader()->phis())) {
583 auto *VectorPhiR = cast<VPPhi>(&PhiR);
584 VPValue *BackedgeVal = VectorPhiR->getOperand(1);
585 VPValue *ResumeFromVectorLoop =
586 MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart, BackedgeVal);
587 ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
588 VPInstruction::ExtractLastLane, ResumeFromVectorLoop);
589 // Create scalar resume phi, with the first operand being the incoming value
590 // from the middle block and the second operand coming from the entry block.
591 auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
592 {ResumeFromVectorLoop, VectorPhiR->getOperand(0)},
593 VectorPhiR->getDebugLoc());
594 cast<VPIRPhi>(&ScalarPhiR)->addIncoming(ResumePhiR);
595 }
596}
597
598/// Check \p Plan's live-in and replace them with constants, if they can be
599/// simplified via SCEV.
602 auto GetSimplifiedLiveInViaSCEV = [&](VPValue *VPV) -> VPValue * {
603 const SCEV *Expr = vputils::getSCEVExprForVPValue(VPV, PSE);
604 if (auto *C = dyn_cast<SCEVConstant>(Expr))
605 return Plan.getOrAddLiveIn(C->getValue());
606 return nullptr;
607 };
608
609 for (VPValue *LiveIn : to_vector(Plan.getLiveIns())) {
610 if (VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
611 LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
612 }
613}
614
615/// To make RUN_VPLAN_PASS print initial VPlan.
617
618std::unique_ptr<VPlan>
619VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
621 LoopVersioning *LVer) {
622 PlainCFGBuilder Builder(TheLoop, &LI, LVer, InductionTy);
623 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
624 addInitialSkeleton(*VPlan0, InductionTy, PSE, TheLoop);
625 simplifyLiveInsWithSCEV(*VPlan0, PSE);
626
628 return VPlan0;
629}
630
631/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
632/// for \p Phi based on \p IndDesc.
633static VPHeaderPHIRecipe *
635 const InductionDescriptor &IndDesc, VPlan &Plan,
636 PredicatedScalarEvolution &PSE, Loop &OrigLoop,
637 DebugLoc DL) {
638 [[maybe_unused]] ScalarEvolution &SE = *PSE.getSE();
639 assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
640 "step must be loop invariant");
641 assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
642 (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
643 PSE.getSCEV(IndDesc.getStartValue()) ==
644 vputils::getSCEVExprForVPValue(Start, PSE))) &&
645 "Start VPValue must match IndDesc's start value");
646
647 VPValue *Step =
649
650 VPValue *BackedgeVal = PhiR->getOperand(1);
651 // Replace live-out extracts of WideIV's backedge value by ExitingIVValue
652 // recipes. optimizeInductionLiveOutUsers will later compute the proper
653 // DerivedIV.
654 auto ReplaceExtractsWithExitingIVValue = [&](VPHeaderPHIRecipe *WideIV) {
655 for (VPUser *U : to_vector(BackedgeVal->users())) {
657 continue;
658 auto *ExtractLastPart = cast<VPInstruction>(U);
659 VPUser *ExtractLastPartUser = ExtractLastPart->getSingleUser();
660 assert(ExtractLastPartUser && "must have a single user");
661 if (!match(ExtractLastPartUser, m_ExtractLastLane(m_VPValue())))
662 continue;
663 auto *ExtractLastLane = cast<VPInstruction>(ExtractLastPartUser);
664 assert(is_contained(ExtractLastLane->getParent()->successors(),
665 Plan.getScalarPreheader()) &&
666 "last lane must be extracted in the middle block");
667 VPBuilder Builder(ExtractLastLane);
668 ExtractLastLane->replaceAllUsesWith(
669 Builder.createNaryOp(VPInstruction::ExitingIVValue, {WideIV}));
670 ExtractLastLane->eraseFromParent();
671 ExtractLastPart->eraseFromParent();
672 }
673 };
674
676 auto *WideIV = new VPWidenPointerInductionRecipe(
677 Phi, Start, Step, &Plan.getVFxUF(), IndDesc, DL);
678 ReplaceExtractsWithExitingIVValue(WideIV);
679 return WideIV;
680 }
681
684 "must have an integer or float induction at this point");
685
686 // Update wide induction increments to use the same step as the corresponding
687 // wide induction. This enables detecting induction increments directly in
688 // VPlan and removes redundant splats.
689 if (match(BackedgeVal, m_Add(m_Specific(PhiR), m_VPValue())))
690 BackedgeVal->getDefiningRecipe()->setOperand(1, Step);
691
692 // It is always safe to copy over the NoWrap and FastMath flags. In
693 // particular, when folding tail by masking, the masked-off lanes are never
694 // used, so it is safe.
696
697 auto *WideIV = new VPWidenIntOrFpInductionRecipe(
698 Phi, Start, Step, &Plan.getVF(), IndDesc, Flags, DL);
699
700 ReplaceExtractsWithExitingIVValue(WideIV);
701 return WideIV;
702}
703
704/// Try to sink users of \p FOR after \p Previous. \returns true if sinking
705/// succeeded or was not necessary, and false otherwise.
706static bool
708 VPRecipeBase *Previous,
709 const VPDominatorTree &VPDT) {
710 // Collect recipes that need sinking.
713 Seen.insert(Previous);
714 auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
715 // The previous value must not depend on the users of the recurrence phi.
716 // In that case, FOR is not a fixed order recurrence.
717 if (SinkCandidate == Previous)
718 return false;
719
720 if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
721 !Seen.insert(SinkCandidate).second ||
722 VPDT.properlyDominates(Previous, SinkCandidate))
723 return true;
724
725 if (vputils::cannotHoistOrSinkRecipe(*SinkCandidate, /*Sinking=*/true))
726 return false;
727
728 WorkList.push_back(SinkCandidate);
729 return true;
730 };
731
732 // Recursively sink users of FOR after Previous.
733 WorkList.push_back(FOR);
734 for (unsigned I = 0; I != WorkList.size(); ++I) {
735 VPRecipeBase *Current = WorkList[I];
736 assert(Current->getNumDefinedValues() == 1 &&
737 "only recipes with a single defined value expected");
738
739 for (VPUser *User : Current->getVPSingleValue()->users()) {
740 if (!TryToPushSinkCandidate(cast<VPRecipeBase>(User)))
741 return false;
742 }
743 }
744
745 // Keep recipes to sink ordered by dominance so earlier instructions are
746 // processed first.
747 sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
748 return VPDT.properlyDominates(A, B);
749 });
750
751 for (VPRecipeBase *SinkCandidate : WorkList) {
752 if (SinkCandidate == FOR)
753 continue;
754
755 SinkCandidate->moveAfter(Previous);
756 Previous = SinkCandidate;
757 }
758 return true;
759}
760
761/// Try to hoist \p Previous and its operands before all users of \p FOR.
762/// \returns true if hoisting succeeded or was not necessary, and false
763/// otherwise.
765 VPRecipeBase *Previous,
766 const VPDominatorTree &VPDT) {
768 return false;
769
770 // Collect recipes that need hoisting.
771 SmallVector<VPRecipeBase *> HoistCandidates;
773 // Find the closest hoist point by looking at all users of FOR and selecting
774 // the recipe dominating all other users.
775 VPRecipeBase *HoistPoint = nullptr;
776 for (VPUser *U : FOR->users()) {
777 auto *R = cast<VPRecipeBase>(U);
778 if (!HoistPoint || VPDT.properlyDominates(R, HoistPoint))
779 HoistPoint = R;
780 }
781 assert(all_of(FOR->users(),
782 [&VPDT, HoistPoint](VPUser *U) {
783 auto *R = cast<VPRecipeBase>(U);
784 return HoistPoint == R ||
785 VPDT.properlyDominates(HoistPoint, R);
786 }) &&
787 "HoistPoint must dominate all users of FOR");
788
789 auto NeedsHoisting = [HoistPoint, &VPDT,
790 &Visited](VPValue *HoistCandidateV) -> VPRecipeBase * {
791 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
792 if (!HoistCandidate)
793 return nullptr;
794 // Hoist candidate was already visited, no need to hoist.
795 if (!Visited.insert(HoistCandidate).second)
796 return nullptr;
797 // If we reached a recipe that dominates HoistPoint, we don't need to
798 // hoist the recipe.
799 if (VPDT.properlyDominates(HoistCandidate, HoistPoint))
800 return nullptr;
801 return HoistCandidate;
802 };
803
804 if (!NeedsHoisting(Previous->getVPSingleValue()))
805 return true;
806
807 // Recursively try to hoist Previous and its operands before all users of
808 // FOR.
809 HoistCandidates.push_back(Previous);
810
811 for (unsigned I = 0; I != HoistCandidates.size(); ++I) {
812 VPRecipeBase *Current = HoistCandidates[I];
813 assert(Current->getNumDefinedValues() == 1 &&
814 "only recipes with a single defined value expected");
816 return false;
817
818 for (VPValue *Op : Current->operands()) {
819 // If we reach FOR, it means the original Previous depends on some other
820 // recurrence that in turn depends on FOR. If that is the case, we would
821 // also need to hoist recipes involving the other FOR, which may break
822 // dependencies.
823 if (Op == FOR)
824 return false;
825
826 if (auto *R = NeedsHoisting(Op)) {
827 // Bail out if the recipe defines multiple values.
828 // TODO: Hoisting such recipes requires additional handling.
829 if (R->getNumDefinedValues() != 1)
830 return false;
831 HoistCandidates.push_back(R);
832 }
833 }
834 }
835
836 // Order recipes to hoist by dominance so earlier instructions are processed
837 // first.
838 sort(HoistCandidates, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
839 return VPDT.properlyDominates(A, B);
840 });
841
842 for (VPRecipeBase *HoistCandidate : HoistCandidates) {
843 HoistCandidate->moveBefore(*HoistPoint->getParent(),
844 HoistPoint->getIterator());
845 }
846
847 return true;
848}
849
850/// Sink users of fixed-order recurrences past or hoist before the recipe
851/// defining the previous value, introduce FirstOrderRecurrenceSplice
852/// VPInstructions, and replace FOR uses. Returns false if hoisting or sinking
853/// fails.
855 const VPDominatorTree &VPDT) {
856 auto FORs =
859 [](VPRecipeBase &R) {
860 return cast<VPFirstOrderRecurrencePHIRecipe>(&R);
861 });
862 for (VPFirstOrderRecurrencePHIRecipe *FOR : FORs) {
863 // Follow through FOR phi chains to find the actual Previous recipe.
864 // Fixed-order recurrences do not contain cycles, so this loop is
865 // guaranteed to terminate.
867 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
868 while (auto *PrevPhi =
870 assert(PrevPhi->getParent() == FOR->getParent() &&
871 "PrevPhi must be in same block as FOR");
872 assert(SeenPhis.insert(PrevPhi).second &&
873 "PrevPhi must not be visited multiple times");
874 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
875 }
876
877 VPBasicBlock *InsertBlock = FOR->getParent();
878 VPBasicBlock::iterator InsertPt = InsertBlock->getFirstNonPhi();
879 if (Previous) {
880 // Sink FOR users after Previous or hoist Previous before FOR users.
881 if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT) &&
882 !hoistPreviousBeforeFORUsers(FOR, Previous, VPDT))
883 return false;
884 InsertBlock = Previous->getParent();
885 InsertPt = isa<VPHeaderPHIRecipe>(Previous)
886 ? InsertBlock->getFirstNonPhi()
887 : std::next(Previous->getIterator());
888 }
889
890 // Create FirstOrderRecurrenceSplice and replace FOR uses.
891 VPBuilder LoopBuilder(InsertBlock, InsertPt);
892 auto *RecurSplice =
894 {FOR, FOR->getBackedgeValue()});
895 FOR->replaceUsesWithIf(RecurSplice, [RecurSplice](VPUser &U, unsigned) {
896 return &U != RecurSplice;
897 });
898 }
899
900 return true;
901}
902
904 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
905 const VPDominatorTree &VPDT,
908 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
909 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
910 // Retrieve the header manually from the intial plain-CFG VPlan.
911 auto [HeaderVPBB, LatchVPBB] = VPBlockUtils::getPlainCFGHeaderAndLatch(Plan);
912 assert(VPDT.dominates(HeaderVPBB, LatchVPBB) &&
913 "header must dominate its latch");
914
915 auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
916 // TODO: Gradually replace uses of underlying instruction by analyses on
917 // VPlan.
918 auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
919 assert(PhiR->getNumOperands() == 2 &&
920 "Must have 2 operands for header phis");
921
922 // Extract common values once.
923 VPIRValue *Start = cast<VPIRValue>(PhiR->getOperand(0));
924 VPValue *BackedgeValue = PhiR->getOperand(1);
925
926 if (FixedOrderRecurrences.contains(Phi)) {
927 // TODO: Currently fixed-order recurrences are modeled as chains of
928 // first-order recurrences. If there are no users of the intermediate
929 // recurrences in the chain, the fixed order recurrence should be
930 // modeled directly, enabling more efficient codegen.
931 return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
932 }
933
934 auto InductionIt = Inductions.find(Phi);
935 if (InductionIt != Inductions.end())
936 return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
937 Plan, PSE, OrigLoop,
938 PhiR->getDebugLoc());
939
940 assert(Reductions.contains(Phi) && "only reductions are expected now");
941 const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
943 Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
944 "incoming value must match start value");
945 // Will be updated later to >1 if reduction is partial.
946 unsigned ScaleFactor = 1;
947 bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
948 return new VPReductionPHIRecipe(
949 Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
950 getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
951 ScaleFactor),
952 Phi->getType()->isFloatingPointTy() ? RdxDesc.getFastMathFlags()
953 : VPIRFlags(),
955 };
956
957 for (VPRecipeBase &R : make_early_inc_range(HeaderVPBB->phis())) {
958 auto *PhiR = cast<VPPhi>(&R);
959 VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
960 HeaderPhiR->insertBefore(PhiR);
961 PhiR->replaceAllUsesWith(HeaderPhiR);
962 PhiR->eraseFromParent();
963 }
964
965 if (!tryToSinkOrHoistRecurrenceUsers(HeaderVPBB, VPDT))
966 return false;
967
968 // Skip renaming resume phi recipes, if any header phi has been removed.
969 if (range_size(HeaderVPBB->phis()) !=
971 return true;
972 for (const auto &[HeaderPhiR, ScalarPhiR] :
973 zip_equal(HeaderVPBB->phis(), Plan.getScalarPreheader()->phis())) {
974 auto *ResumePhiR = cast<VPPhi>(&ScalarPhiR);
975 if (isa<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhiR)) {
976 ResumePhiR->setName("scalar.recur.init");
977 auto *ExtractLastLane = cast<VPInstruction>(ResumePhiR->getOperand(0));
978 ExtractLastLane->setName("vector.recur.extract");
979 continue;
980 }
981 ResumePhiR->setName(isa<VPWidenInductionRecipe>(HeaderPhiR)
982 ? "bc.resume.val"
983 : "bc.merge.rdx");
984 }
985 return true;
986}
987
990 bool OptForSize,
991 unsigned SCEVCheckThreshold,
993 Loop *TheLoop) {
994 // Collect which wide IVs have predicates and add them to PSE.
995 auto [HeaderVPBB, _] = VPBlockUtils::getPlainCFGHeaderAndLatch(Plan);
997 for (auto &R : HeaderVPBB->phis()) {
998 auto *WideIV = dyn_cast<VPWidenInductionRecipe>(&R);
999 if (!WideIV || WideIV->getNoWrapPredicates().empty())
1000 continue;
1001 PredicatedIVs.insert(WideIV);
1002 for (const auto *P : WideIV->getNoWrapPredicates())
1003 PSE.addPredicate(*P);
1004 }
1005
1006 // Bail out if exit phis use predicated IVs via ExitingIVValue, as the
1007 // predicated SCEV may not hold outside the loop (PR33706). Check each IV's
1008 // predicates directly, regardless of whether PSE was already non-trivial
1009 // from other sources (e.g., LAI predicates).
1010 // TODO: Overly conservative; the pre-computed exit values are not correct
1011 // outside the loop, but the exit values could be extracted from the vector
1012 // loop.
1013 if (!PredicatedIVs.empty())
1014 for (auto *EB : Plan.getExitBlocks())
1015 for (VPRecipeBase &R : EB->phis())
1016 for (VPValue *Op : R.operands()) {
1017 VPValue *Inner;
1018 if (!match(Op, m_ExitingIVValue(m_VPValue(Inner))))
1019 continue;
1020 auto *WideIV = dyn_cast<VPWidenInductionRecipe>(Inner);
1021 if (!WideIV || !PredicatedIVs.contains(WideIV))
1022 continue;
1023 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Predicated IV has "
1024 "outside-loop use via ExitingIVValue\n");
1025 return false;
1026 }
1027
1028 unsigned TotalComplexity = PSE.getPredicate().getComplexity();
1029 if (TotalComplexity && OptForSize) {
1030 LLVM_DEBUG(
1031 dbgs() << "LV: Not vectorizing: SCEV predicates needed for induction "
1032 "but optimizing for size\n");
1034 "Runtime SCEV check is required with -Os/-Oz",
1035 "runtime SCEV checks needed but optimizing for size",
1036 "CantVersionLoopWithOptForSize", ORE, TheLoop);
1037 return false;
1038 }
1039
1040 if (TotalComplexity > SCEVCheckThreshold) {
1041 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Too many SCEV checks needed ("
1042 << TotalComplexity << " > " << SCEVCheckThreshold
1043 << ")\n");
1045 "Too many SCEV checks needed",
1046 "Too many SCEV assumptions need to be made and checked at runtime",
1047 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1048 return false;
1049 }
1050
1051 return true;
1052}
1053
1055 ElementCount MinVF) {
1058
1059 for (VPRecipeBase &R : Header->phis()) {
1060 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
1061 if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
1062 continue;
1063
1064 RecurKind Kind = PhiR->getRecurrenceKind();
1068 "AnyOf and Find reductions are not allowed for in-loop reductions");
1069
1070 bool IsFPRecurrence =
1072 FastMathFlags FMFs =
1073 IsFPRecurrence ? FastMathFlags::getFast() : FastMathFlags();
1074
1075 // Collect the chain of "link" recipes for the reduction starting at PhiR.
1077 Worklist.insert(PhiR);
1078 for (unsigned I = 0; I != Worklist.size(); ++I) {
1079 VPSingleDefRecipe *Cur = Worklist[I];
1080 for (VPUser *U : Cur->users()) {
1081 auto *UserRecipe = cast<VPSingleDefRecipe>(U);
1082 if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
1083 assert((UserRecipe->getParent() == Plan.getMiddleBlock() ||
1084 UserRecipe->getParent() == Plan.getScalarPreheader()) &&
1085 "U must be either in the loop region, the middle block or the "
1086 "scalar preheader.");
1087 continue;
1088 }
1089
1090 // Stores using instructions will be sunk later.
1091 if (match(UserRecipe, m_VPInstruction<Instruction::Store>()))
1092 continue;
1093 Worklist.insert(UserRecipe);
1094 }
1095 }
1096
1097 // Visit operation "Links" along the reduction chain top-down starting from
1098 // the phi until LoopExitValue. We keep track of the previous item
1099 // (PreviousLink) to tell which of the two operands of a Link will remain
1100 // scalar and which will be reduced. For minmax by select(cmp), Link will be
1101 // the select instructions. Blend recipes of in-loop reduction phi's will
1102 // get folded to their non-phi operand, as the reduction recipe handles the
1103 // condition directly.
1104 VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
1105 for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
1106 if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
1107 assert(Blend->getNumIncomingValues() == 2 &&
1108 "Blend must have 2 incoming values");
1109 unsigned PhiRIdx = Blend->getIncomingValue(0) == PhiR ? 0 : 1;
1110 assert(Blend->getIncomingValue(PhiRIdx) == PhiR &&
1111 "PhiR must be an operand of the blend");
1112 Blend->replaceAllUsesWith(Blend->getIncomingValue(1 - PhiRIdx));
1113 continue;
1114 }
1115
1116 if (IsFPRecurrence) {
1117 FastMathFlags CurFMF =
1118 cast<VPRecipeWithIRFlags>(CurrentLink)->getFastMathFlagsOrNone();
1119 if (match(CurrentLink, m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
1120 CurFMF |= cast<VPRecipeWithIRFlags>(CurrentLink->getOperand(0))
1121 ->getFastMathFlagsOrNone();
1122 FMFs &= CurFMF;
1123 }
1124
1125 Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
1126
1127 // Recognize a call to the llvm.fmuladd intrinsic.
1128 bool IsFMulAdd = Kind == RecurKind::FMulAdd;
1129 VPValue *VecOp;
1130 VPBasicBlock *LinkVPBB = CurrentLink->getParent();
1131 if (IsFMulAdd) {
1133 "Expected current VPInstruction to be a call to the "
1134 "llvm.fmuladd intrinsic");
1135 assert(CurrentLink->getOperand(2) == PreviousLink &&
1136 "expected a call where the previous link is the added operand");
1137
1138 // If the instruction is a call to the llvm.fmuladd intrinsic then we
1139 // need to create an fmul recipe (multiplying the first two operands of
1140 // the fmuladd together) to use as the vector operand for the fadd
1141 // reduction.
1142 auto *FMulRecipe = new VPInstruction(
1143 Instruction::FMul,
1144 {CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
1145 CurrentLinkI->getFastMathFlags());
1146 LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
1147 VecOp = FMulRecipe;
1148 } else if (Kind == RecurKind::AddChainWithSubs &&
1149 match(CurrentLink, m_Sub(m_VPValue(), m_VPValue()))) {
1150 Type *PhiTy = PhiR->getScalarType();
1151 auto *Zero = Plan.getConstantInt(PhiTy, 0);
1152 VPBuilder Builder(LinkVPBB, CurrentLink->getIterator());
1153 auto *Sub = Builder.createSub(Zero, CurrentLink->getOperand(1),
1154 CurrentLinkI->getDebugLoc());
1155 Sub->setUnderlyingValue(CurrentLinkI);
1156 VecOp = Sub;
1157 } else {
1158 // Index of the first operand which holds a non-mask vector operand.
1159 unsigned IndexOfFirstOperand = 0;
1161 if (match(CurrentLink, m_Cmp(m_VPValue(), m_VPValue())))
1162 continue;
1163 assert(match(CurrentLink,
1165 "must be a select recipe");
1166 IndexOfFirstOperand = 1;
1167 }
1168 // Note that for non-commutable operands (cmp-selects), the semantics of
1169 // the cmp-select are captured in the recurrence kind.
1170 unsigned VecOpId =
1171 CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
1172 ? IndexOfFirstOperand + 1
1173 : IndexOfFirstOperand;
1174 VecOp = CurrentLink->getOperand(VecOpId);
1175 assert(
1176 VecOp != PreviousLink &&
1177 CurrentLink->getOperand(
1178 cast<VPInstruction>(CurrentLink)->getNumOperandsWithoutMask() -
1179 1 - (VecOpId - IndexOfFirstOperand)) == PreviousLink &&
1180 "PreviousLink must be the operand other than VecOp");
1181 }
1182
1183 assert(PhiR->getVFScaleFactor() == 1 &&
1184 "inloop reductions must be unscaled");
1185 VPValue *CondOp = cast<VPInstruction>(CurrentLink)->getMask();
1186 auto *RedRecipe = new VPReductionRecipe(
1187 Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
1188 getReductionStyle(/*IsInLoop=*/true, PhiR->isOrdered(), 1),
1189 CurrentLinkI->getDebugLoc());
1190 // Append the recipe to the end of the VPBasicBlock because we need to
1191 // ensure that it comes after all of it's inputs, including CondOp.
1192 // Delete CurrentLink as it will be invalid if its operand is replaced
1193 // with a reduction defined at the bottom of the block in the next link.
1194 if (LinkVPBB->getNumSuccessors() == 0)
1195 RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
1196 else
1197 LinkVPBB->appendRecipe(RedRecipe);
1198
1199 CurrentLink->replaceAllUsesWith(RedRecipe);
1200 // Move any store recipes using the RedRecipe that appear before it in the
1201 // same block to just after the RedRecipe.
1202 for (VPUser *U : make_early_inc_range(RedRecipe->users())) {
1203 auto *UserR = dyn_cast<VPRecipeBase>(U);
1204 if (!UserR || UserR->getParent() != LinkVPBB)
1205 continue;
1207 continue;
1208 UserR->moveAfter(RedRecipe);
1209 }
1210 ToDelete.push_back(CurrentLink);
1211 PreviousLink = RedRecipe;
1212 }
1213 }
1214
1215 for (VPRecipeBase *R : ToDelete)
1216 R->eraseFromParent();
1217}
1218
1219/// Check if all loads in the loop are dereferenceable. Iterates over the
1220/// loop body blocks reachable from \p HeaderVPBB. Returns false if any
1221/// non-dereferenceable load is found.
1222static bool areAllLoadsDereferenceable(VPBasicBlock *HeaderVPBB, Loop *TheLoop,
1224 DominatorTree &DT, AssumptionCache *AC) {
1225 ScalarEvolution &SE = *PSE.getSE();
1226 const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
1227 for (VPBasicBlock *VPBB : vp_rpo_plain_cfg_loop_body(HeaderVPBB)) {
1228 for (VPRecipeBase &R : *VPBB) {
1229 auto *VPI = dyn_cast<VPInstructionWithType>(&R);
1230 if (!VPI || VPI->getOpcode() != Instruction::Load) {
1231 assert(!R.mayReadFromMemory() && "unexpected recipe reading memory");
1232 continue;
1233 }
1234
1235 // Get the pointer SCEV for dereferenceability checking.
1236 VPValue *Ptr = VPI->getOperand(0);
1237 const SCEV *PtrSCEV = vputils::getSCEVExprForVPValue(Ptr, PSE, TheLoop);
1238 if (isa<SCEVCouldNotCompute>(PtrSCEV)) {
1239 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Found non-dereferenceable "
1240 "load with SCEVCouldNotCompute pointer\n");
1241 return false;
1242 }
1243
1244 // Check dereferenceability using the SCEV-based version.
1245 Type *LoadTy = VPI->getScalarType();
1246 const SCEV *SizeSCEV =
1247 SE.getStoreSizeOfExpr(DL.getIndexType(PtrSCEV->getType()), LoadTy);
1248 auto *Load = cast<LoadInst>(VPI->getUnderlyingValue());
1250 if (isDereferenceableAndAlignedInLoop(PtrSCEV, Load->getAlign(), SizeSCEV,
1251 TheLoop, SE, DT, AC, &Preds))
1252 continue;
1253
1254 LLVM_DEBUG(
1255 dbgs() << "LV: Not vectorizing: Auto-vectorization of loops with "
1256 "potentially faulting load is not supported.\n");
1257 return false;
1258 }
1259 }
1260 return true;
1261}
1262
1264 Loop *TheLoop,
1266 DominatorTree &DT, AssumptionCache *AC) {
1267 auto *MiddleVPBB = VPBlockUtils::getPlainCFGMiddleBlock(Plan);
1268 auto [HeaderVPBB, LatchVPBB] = VPBlockUtils::getPlainCFGHeaderAndLatch(Plan);
1269
1270 // TODO: We would like to detect uncountable exits and stores within loops
1271 // with such exits from the VPlan alone. Exit detection can be moved
1272 // here from handleUncountableEarlyExits, but we need to improve
1273 // detection of recipes which may write to memory.
1275 // Dereferenceability is checked separately for uncountable exit loops with
1276 // stores, as only the loads contributing to the exit condition need to
1277 // be checked.
1278 if (Style == UncountableExitStyle::ReadOnly &&
1279 !areAllLoadsDereferenceable(HeaderVPBB, TheLoop, PSE, DT, AC))
1280 return false;
1281 // TODO: Check target preference for style.
1282 return handleUncountableEarlyExits(Plan, HeaderVPBB, LatchVPBB, MiddleVPBB,
1283 TheLoop, PSE, DT, AC, Style);
1284 }
1285
1286 // Disconnect countable early exits from the loop, leaving it with a single
1287 // exit from the latch. Countable early exits are left for a scalar epilog.
1288 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
1289 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
1290 if (Pred == MiddleVPBB)
1291 continue;
1292
1293 // Remove phi operands for the early exiting block.
1294 for (VPRecipeBase &R : EB->phis())
1295 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
1296 auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
1297 EarlyExitingVPBB->getTerminator()->eraseFromParent();
1299 }
1300 }
1301 return true;
1302}
1303
1304void VPlanTransforms::addMiddleCheck(VPlan &Plan, bool TailFolded) {
1305 auto *MiddleVPBB = VPBlockUtils::getPlainCFGMiddleBlock(Plan);
1306 // If MiddleVPBB has a single successor then the original loop does not exit
1307 // via the latch and the single successor must be the scalar preheader.
1308 // There's no need to add a runtime check to MiddleVPBB.
1309 if (MiddleVPBB->getNumSuccessors() == 1) {
1310 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
1311 "must have ScalarPH as single successor");
1312 return;
1313 }
1314
1315 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
1316
1317 // Add a check in the middle block to see if we have completed all of the
1318 // iterations in the first vector loop.
1319 //
1320 // Three cases:
1321 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
1322 // condition to false.
1323 // 2) If (N - N%VF) == N, then we *don't* need to run the
1324 // remainder. Thus if tail is to be folded, we know we don't need to run
1325 // the remainder and we can set the condition to true.
1326 // 3) Otherwise, construct a runtime check.
1327
1328 // We use the same DebugLoc as the scalar loop latch terminator instead of
1329 // the corresponding compare because they may have ended up with different
1330 // line numbers and we want to avoid awkward line stepping while debugging.
1331 // E.g., if the compare has got a line number inside the loop.
1332 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
1333 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
1334 VPBuilder Builder(MiddleVPBB);
1335 VPValue *Cmp;
1336 if (TailFolded)
1337 Cmp = Plan.getTrue();
1338 else
1339 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
1340 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
1341 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
1342}
1343
1345 VPDominatorTree VPDT(Plan);
1347 Plan.getEntry());
1348 for (VPBlockBase *HeaderVPB : POT)
1349 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
1350 createLoopRegion(Plan, HeaderVPB, DL);
1351
1352 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
1353 TopRegion->setName("vector loop");
1354 TopRegion->getEntryBasicBlock()->setName("vector.body");
1355}
1356
1358 assert(Plan.getExitBlocks().size() == 1 &&
1359 "only a single-exit block is supported currently");
1360 assert(Plan.getExitBlocks().front()->getSinglePredecessor() ==
1361 Plan.getMiddleBlock() &&
1362 "the exit block must have middle block as single predecessor");
1363
1364 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1365 assert(LoopRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
1366 "The vector loop region must have the middle block as its single "
1367 "successor for now");
1368 VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1369
1370 Header->splitAt(Header->getFirstNonPhi());
1371
1372 // Create the header mask, insert it in the header and branch on it.
1373 auto *IV = new VPWidenCanonicalIVRecipe(
1374 LoopRegion->getCanonicalIV(),
1375 VPIRFlags::WrapFlagsTy(LoopRegion->hasCanonicalIVNUW(), false));
1376 VPBuilder Builder(Header, Header->getFirstNonPhi());
1377 Builder.insert(IV);
1379 VPValue *HeaderMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
1380 Builder.createNaryOp(VPInstruction::BranchOnCond, HeaderMask);
1381
1382 VPBasicBlock *OrigLatch = LoopRegion->getExitingBasicBlock();
1383 VPValue *IVInc;
1384 [[maybe_unused]] bool TermBranchOnCount =
1385 match(OrigLatch->getTerminator(),
1387 m_Specific(&Plan.getVectorTripCount())));
1388 assert(TermBranchOnCount &&
1389 match(IVInc, m_Add(m_Specific(LoopRegion->getCanonicalIV()),
1390 m_Specific(&Plan.getVFxUF()))) &&
1391 std::next(IVInc->getDefiningRecipe()->getIterator()) ==
1392 OrigLatch->getTerminator()->getIterator() &&
1393 "Unexpected canonical iv increment");
1394
1395 // Split the latch at the IV update, and branch to it from the header mask.
1396 VPBasicBlock *Latch =
1397 OrigLatch->splitAt(IVInc->getDefiningRecipe()->getIterator());
1398 Latch->setName("vector.latch");
1399 VPBlockUtils::connectBlocks(Header, Latch);
1400
1401 // Collect any values defined in the loop that need a phi. Currently this
1402 // includes header phi backedges and live-outs extracted in the middle block.
1403 // TODO: Handle early exits via Plan.getExitBlocks()
1405 for (VPRecipeBase &R : Header->phis())
1407 NeedsPhi[cast<VPHeaderPHIRecipe>(R).getBackedgeValue()].push_back(&R);
1408
1409 VPValue *V;
1410 for (VPRecipeBase &R : *Plan.getMiddleBlock())
1411 if (match(&R, m_ExtractLastPart(m_VPValue(V))))
1412 NeedsPhi[V].push_back(&R);
1413
1414 // Insert phis for values coming past the end of the tail.
1415 Builder.setInsertPoint(Latch, Latch->begin());
1416 for (const auto &[V, Users] : NeedsPhi) {
1417 if (isa<VPIRValue>(V))
1418 continue;
1419 VPValue *TailVal =
1420 Plan.getOrAddLiveIn(PoisonValue::get(V->getScalarType()));
1421 VPIRFlags Flags;
1423 "Value used by more than two reduction phis?");
1425 auto *RdxPhi =
1426 RedIt != Users.end() ? cast<VPReductionPHIRecipe>(*RedIt) : nullptr;
1427 if (RdxPhi && !RdxPhi->isInLoop()) {
1428 TailVal = RdxPhi;
1429 Flags = *RdxPhi;
1430 }
1431
1432 VPInstruction *Phi = Builder.createScalarPhi({V, TailVal}, {}, "", Flags);
1433 for (VPUser *U : Users)
1434 U->replaceUsesOfWith(V, Phi);
1435 }
1436
1437 // Any extract of the last element must be updated to extract from the last
1438 // active lane of the header mask instead (i.e., the lane corresponding to the
1439 // last active iteration).
1440 Builder.setInsertPoint(Plan.getMiddleBlock()->getTerminator());
1441 for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
1442 VPValue *Op;
1444 continue;
1445
1446 // Compute the index of the last active lane.
1447 VPValue *LastActiveLane = Builder.createLastActiveLane(HeaderMask);
1448 auto *Ext =
1449 Builder.createNaryOp(VPInstruction::ExtractLane, {LastActiveLane, Op});
1450 R.getVPSingleValue()->replaceAllUsesWith(Ext);
1451 }
1452}
1453
1454/// Insert \p CheckBlockVPBB on the edge leading to the vector preheader,
1455/// connecting it to both vector and scalar preheaders. Updates scalar
1456/// preheader phis to account for the new predecessor.
1458 VPBasicBlock *CheckBlockVPBB) {
1459 VPBlockBase *VectorPH = Plan.getVectorPreheader();
1460 auto *ScalarPH = cast<VPBasicBlock>(Plan.getScalarPreheader());
1461 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
1462 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
1463 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
1464 CheckBlockVPBB->swapSuccessors();
1465 unsigned NumPreds = ScalarPH->getNumPredecessors();
1466 for (VPRecipeBase &R : ScalarPH->phis()) {
1467 auto *Phi = cast<VPPhi>(&R);
1468 assert(Phi->getNumIncoming() == NumPreds - 1 &&
1469 "must have incoming values for all predecessors");
1470 Phi->addIncoming(Phi->getOperand(NumPreds - 2));
1471 }
1472}
1473
1474// Likelyhood of bypassing the vectorized loop due to a runtime check block,
1475// including memory overlap checks block and wrapping/unit-stride checks block.
1476static constexpr uint32_t CheckBypassWeights[] = {1, 127};
1477
1478/// Create a BranchOnCond terminator in \p CheckBlockVPBB. Optionally adds
1479/// branch weights.
1480static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB,
1481 VPValue *Cond, bool AddBranchWeights) {
1483 auto *Term = VPBuilder(CheckBlockVPBB)
1485 if (AddBranchWeights) {
1486 MDBuilder MDB(Plan.getContext());
1487 MDNode *BranchWeights =
1488 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
1489 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1490 }
1491}
1492
1494 VPBasicBlock *CheckBlock,
1495 bool AddBranchWeights) {
1496 insertCheckBlockBeforeVectorLoop(Plan, CheckBlock);
1497 addBypassBranch(Plan, CheckBlock, Cond, AddBranchWeights);
1498}
1499
1501 BasicBlock *CheckBlock,
1502 bool AddBranchWeights) {
1503 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
1504 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
1505 attachVPCheckBlock(Plan, CondVPV, CheckBlockVPBB, AddBranchWeights);
1506}
1507
1509 VPlan &Plan, ElementCount VF, unsigned UF,
1510 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
1511 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
1513 // Generate code to check if the loop's trip count is less than VF * UF, or
1514 // equal to it in case a scalar epilogue is required; this implies that the
1515 // vector trip count is zero. This check also covers the case where adding one
1516 // to the backedge-taken count overflowed leading to an incorrect trip count
1517 // of zero. In this case we will also jump to the scalar loop.
1518 CmpInst::Predicate CmpPred =
1519 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1520 // If tail is to be folded, vector loop takes care of all iterations.
1521 VPValue *TripCountVPV = Plan.getTripCount();
1522 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, PSE);
1523 Type *TripCountTy = TripCount->getType();
1524 ScalarEvolution &SE = *PSE.getSE();
1525 auto GetMinTripCount = [&]() -> const SCEV * {
1526 // Compute max(MinProfitableTripCount, UF * VF) and return it.
1527 const SCEV *VFxUF =
1528 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
1529 if (UF * VF.getKnownMinValue() >=
1530 MinProfitableTripCount.getKnownMinValue()) {
1531 // TODO: SCEV should be able to simplify test.
1532 return VFxUF;
1533 }
1534 const SCEV *MinProfitableTripCountSCEV =
1535 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
1536 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
1537 };
1538
1539 VPBuilder Builder(CheckBlock);
1540 VPValue *TripCountCheck = Plan.getFalse();
1541 const SCEV *Step = GetMinTripCount();
1542 // TripCountCheck = false, folding tail implies positive vector trip
1543 // count.
1544 if (!TailFolded) {
1545 // TODO: Emit unconditional branch to vector preheader instead of
1546 // conditional branch with known condition.
1547 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
1548 // Check if the trip count is < the step.
1549 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
1550 // TODO: Ensure step is at most the trip count when determining max VF and
1551 // UF, w/o tail folding.
1552 TripCountCheck = Plan.getTrue();
1553 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
1554 TripCount, Step)) {
1555 // Generate the minimum iteration check only if we cannot prove the
1556 // check is known to be true, or known to be false.
1557 // Try to expand Step into VPInstructions in CheckBlock; otherwise fall
1558 // back to a VPExpandSCEV recipe in the plan's entry block.
1559 VPValue *MinTripCountVPV =
1560 VPSCEVExpander(Builder, *PSE.getSE(), DL).tryToExpand(Step);
1561 if (!MinTripCountVPV)
1562 MinTripCountVPV = VPBuilder(Plan.getEntry()).createExpandSCEV(Step);
1563 TripCountCheck = Builder.createICmp(
1564 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
1565 } // else step known to be < trip count, use TripCountCheck preset to false.
1566 }
1567 VPInstruction *Term =
1568 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
1570 MDBuilder MDB(Plan.getContext());
1571 MDNode *BranchWeights = MDB.createBranchWeights(
1572 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
1573 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1574 }
1575}
1576
1578 VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue,
1579 Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL,
1581 auto *CheckBlock = Plan.createVPBasicBlock("vector.main.loop.iter.check");
1582 insertCheckBlockBeforeVectorLoop(Plan, CheckBlock);
1584 RequiresScalarEpilogue, /*TailFolded=*/false,
1585 OrigLoop, MinItersBypassWeights, DL, PSE,
1586 CheckBlock);
1587}
1588
1590 VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
1591 ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
1592 unsigned EpilogueLoopStep, ScalarEvolution &SE) {
1593 // Add the minimum iteration check for the epilogue vector loop.
1594 VPValue *TC = Plan.getTripCount();
1595 Value *TripCount = TC->getLiveInIRValue();
1596 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
1597 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
1598 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
1599 VPValue *Count = Builder.createSub(TC, Plan.getOrAddLiveIn(VectorTripCount),
1600 DebugLoc::getUnknown(), "n.vec.remaining");
1601
1602 // Generate code to check if the loop's trip count is less than VF * UF of
1603 // the vector epilogue loop.
1604 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1605 auto *CheckMinIters = Builder.createICmp(
1606 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
1607 VPInstruction *Branch =
1608 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
1609
1610 // We assume the remaining `Count` is equally distributed in
1611 // [0, MainLoopStep)
1612 // So the probability for `Count < EpilogueLoopStep` should be
1613 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
1614 // TODO: Improve the estimate by taking the estimated trip count into
1615 // consideration.
1616 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
1617 const uint32_t Weights[] = {EstimatedSkipCount,
1618 MainLoopStep - EstimatedSkipCount};
1619 MDBuilder MDB(Plan.getContext());
1620 MDNode *BranchWeights =
1621 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
1622 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
1623}
1624
1625/// Find and return the final select instruction of the FindIV result pattern
1626/// for the given \p BackedgeVal:
1627/// select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),
1628/// ComputeReductionResult(ReducedIV), Start.
1630 return cast<VPInstruction>(
1631 vputils::findRecipe(BackedgeVal, [BackedgeVal](VPRecipeBase *R) {
1632 auto *VPI = dyn_cast<VPInstruction>(R);
1633 return VPI &&
1634 matchFindIVResult(VPI, m_Specific(BackedgeVal), m_VPValue());
1635 }));
1636}
1637
1639 auto GetMinOrMaxCompareValue =
1640 [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
1641 auto *MinOrMaxR =
1642 dyn_cast_or_null<VPRecipeWithIRFlags>(RedPhiR->getBackedgeValue());
1643 if (!MinOrMaxR)
1644 return nullptr;
1645
1646 // Check that MinOrMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1647 // with an intrinsic that matches the reduction kind.
1648 Intrinsic::ID ExpectedIntrinsicID =
1649 getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind());
1650 if (!match(MinOrMaxR, m_Intrinsic(ExpectedIntrinsicID)))
1651 return nullptr;
1652
1653 if (MinOrMaxR->getOperand(0) == RedPhiR)
1654 return MinOrMaxR->getOperand(1);
1655
1656 assert(MinOrMaxR->getOperand(1) == RedPhiR &&
1657 "Reduction phi operand expected");
1658 return MinOrMaxR->getOperand(0);
1659 };
1660
1661 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1663 MinOrMaxNumReductionsToHandle;
1664 bool HasUnsupportedPhi = false;
1665 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
1667 continue;
1668 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
1669 if (!Cur) {
1670 // TODO: Also support fixed-order recurrence phis.
1671 HasUnsupportedPhi = true;
1672 continue;
1673 }
1675 Cur->getRecurrenceKind())) {
1676 HasUnsupportedPhi = true;
1677 continue;
1678 }
1679
1680 VPValue *MinOrMaxOp = GetMinOrMaxCompareValue(Cur);
1681 if (!MinOrMaxOp)
1682 return false;
1683
1684 MinOrMaxNumReductionsToHandle.emplace_back(Cur, MinOrMaxOp);
1685 }
1686
1687 if (MinOrMaxNumReductionsToHandle.empty())
1688 return true;
1689
1690 // We won't be able to resume execution in the scalar tail, if there are
1691 // unsupported header phis or there is no scalar tail at all, due to
1692 // tail-folding.
1693 if (HasUnsupportedPhi || !Plan.hasScalarTail())
1694 return false;
1695
1696 /// Check if the vector loop of \p Plan can early exit and restart
1697 /// execution of last vector iteration in the scalar loop. This requires all
1698 /// recipes up to early exit point be side-effect free as they are
1699 /// re-executed. Currently we check that the loop is free of any recipe that
1700 /// may write to memory. Expected to operate on an early VPlan w/o nested
1701 /// regions.
1704 auto *VPBB = cast<VPBasicBlock>(VPB);
1705 for (auto &R : *VPBB) {
1706 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
1707 return false;
1708 }
1709 }
1710
1711 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1712 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
1713 VPValue *AllNaNLanes = nullptr;
1714 SmallPtrSet<VPValue *, 2> RdxResults;
1715 for (const auto &[_, MinOrMaxOp] : MinOrMaxNumReductionsToHandle) {
1716 VPValue *RedNaNLanes =
1717 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinOrMaxOp, MinOrMaxOp);
1718 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
1719 : RedNaNLanes;
1720 }
1721
1722 VPValue *AnyNaNLane =
1723 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
1724 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
1725 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
1726 for (const auto &[RedPhiR, _] : MinOrMaxNumReductionsToHandle) {
1728 RedPhiR->getRecurrenceKind()) &&
1729 "unsupported reduction");
1730
1731 // If we exit early due to NaNs, compute the final reduction result based on
1732 // the reduction phi at the beginning of the last vector iteration.
1733 auto *RdxResult = vputils::findComputeReductionResult(RedPhiR);
1734 assert(RdxResult && "must find a ComputeReductionResult");
1735
1736 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
1737 RdxResult->getOperand(0));
1738 RdxResult->setOperand(0, NewSel);
1739 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
1740 RdxResults.insert(RdxResult);
1741 }
1742
1743 auto *LatchExitingBranch = LatchVPBB->getTerminator();
1744 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
1745 "Unexpected terminator");
1746 auto *IsLatchExitTaken = LatchBuilder.createICmp(
1747 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
1748 LatchExitingBranch->getOperand(1));
1749 auto *AnyExitTaken = LatchBuilder.createOr(AnyNaNLane, IsLatchExitTaken);
1750 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
1751 LatchExitingBranch->eraseFromParent();
1752
1753 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
1754 // true, the resume from the start of the last vector iteration via the
1755 // canonical IV, otherwise from the original value.
1756 auto IsTC = [&Plan](VPValue *V) {
1757 return V == &Plan.getVectorTripCount() || V == Plan.getTripCount();
1758 };
1759 for (auto &R : Plan.getScalarPreheader()->phis()) {
1760 auto *ResumeR = cast<VPPhi>(&R);
1761 VPValue *VecV = ResumeR->getOperand(0);
1762 if (RdxResults.contains(VecV))
1763 continue;
1764 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
1765 VPValue *DIVTC = DerivedIV->getOperand(1);
1766 if (DerivedIV->getNumUsers() == 1 && IsTC(DIVTC)) {
1767 auto *NewSel = MiddleBuilder.createSelect(
1768 AnyNaNLane, LoopRegion->getCanonicalIV(), DIVTC);
1769 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
1770 DerivedIV->setOperand(1, NewSel);
1771 continue;
1772 }
1773 }
1774 // Bail out and abandon the current, partially modified, VPlan if we
1775 // encounter resume phi that cannot be updated yet.
1776 if (!IsTC(VecV)) {
1777 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
1778 "FMaxNum/FMinNum reduction.\n");
1779 return false;
1780 }
1781 auto *NewSel = MiddleBuilder.createSelect(
1782 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
1783 ResumeR->setOperand(0, NewSel);
1784 }
1785
1786 auto *MiddleTerm = MiddleVPBB->getTerminator();
1787 MiddleBuilder.setInsertPoint(MiddleTerm);
1788 VPValue *MiddleCond = MiddleTerm->getOperand(0);
1789 VPValue *NewCond =
1790 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
1791 MiddleTerm->setOperand(0, NewCond);
1792 return true;
1793}
1794
1796 if (Plan.hasScalarVFOnly())
1797 return false;
1798
1799 // We want to create the following nodes:
1800 // vector.body:
1801 // ...new WidenPHI recipe introduced to keep the mask value for the latest
1802 // iteration where any lane was active.
1803 // mask.phi = phi [ ir<false>, vector.ph ], [ vp<new.mask>, vector.body ]
1804 // ...data.phi (a VPReductionPHIRecipe for a FindLast reduction) already
1805 // exists, but needs updating to use 'new.data' for the backedge value.
1806 // data.phi = phi ir<default.val>, vp<new.data>
1807 //
1808 // ...'data' and 'compare' created by existing nodes...
1809 //
1810 // ...new recipes introduced to determine whether to update the reduction
1811 // values or keep the current one.
1812 // any.active = i1 any-of ir<compare>
1813 // new.mask = select vp<any.active>, ir<compare>, vp<mask.phi>
1814 // new.data = select vp<any.active>, ir<data>, ir<data.phi>
1815 //
1816 // middle.block:
1817 // ...extract-last-active replaces compute-reduction-result.
1818 // result = extract-last-active vp<new.data>, vp<new.mask>, ir<default.val>
1819
1821 for (VPRecipeBase &Phi :
1823 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&Phi);
1825 PhiR->getRecurrenceKind()))
1826 Phis.push_back(PhiR);
1827 }
1828
1829 if (Phis.empty())
1830 return true;
1831
1832 VPValue *HeaderMask = vputils::findHeaderMask(Plan);
1833 for (VPReductionPHIRecipe *PhiR : Phis) {
1834 // Find the condition for the select/blend.
1835 VPValue *BackedgeSelect = PhiR->getBackedgeValue();
1836 VPValue *CondSelect = BackedgeSelect;
1837
1838 // If there's a header mask, the backedge select will not be the find-last
1839 // select.
1840 if (HeaderMask &&
1841 !match(BackedgeSelect,
1842 m_SelectLike(m_Specific(HeaderMask), m_VPValue(CondSelect),
1843 m_Specific(PhiR))))
1844 return false;
1845
1846 VPValue *Cond = nullptr, *Op1 = nullptr, *Op2 = nullptr;
1847
1848 // If we're matching a blend rather than a select, there should be one
1849 // incoming value which is the data, then all other incoming values should
1850 // be the phi.
1851 auto MatchBlend = [&](VPRecipeBase *R) {
1852 auto *Blend = dyn_cast<VPBlendRecipe>(R);
1853 if (!Blend)
1854 return false;
1855 assert(!Blend->isNormalized() && "must run before blend normalizaion");
1856 unsigned NumIncomingDataValues = 0;
1857 for (unsigned I = 0; I < Blend->getNumIncomingValues(); ++I) {
1858 VPValue *Incoming = Blend->getIncomingValue(I);
1859 if (Incoming != PhiR) {
1860 ++NumIncomingDataValues;
1861 Cond = Blend->getMask(I);
1862 Op1 = Incoming;
1863 Op2 = PhiR;
1864 }
1865 }
1866 return NumIncomingDataValues == 1;
1867 };
1868
1869 VPSingleDefRecipe *SelectR =
1871 if (!match(SelectR,
1872 m_Select(m_VPValue(Cond), m_VPValue(Op1), m_VPValue(Op2))) &&
1873 !MatchBlend(SelectR))
1874 return false;
1875
1876 assert(Cond != HeaderMask && "Cond must not be HeaderMask");
1877
1878 // Find final reduction computation and replace it with an
1879 // extract.last.active intrinsic.
1880 auto *RdxResult =
1882 assert(RdxResult && "Could not find reduction result");
1883
1884 // Add mask phi.
1885 VPBuilder Builder = VPBuilder::getToInsertAfter(PhiR);
1886 auto *MaskPHI = Builder.createWidenPhi(Plan.getFalse());
1887
1888 // Add select for mask.
1889 Builder.setInsertPoint(SelectR);
1890
1891 if (Op1 == PhiR) {
1892 // Normalize to selecting the data operand when the condition is true by
1893 // swapping operands and negating the condition.
1894 std::swap(Op1, Op2);
1895 Cond = Builder.createNot(Cond);
1896 }
1897 assert(Op2 == PhiR && "data value must be selected if Cond is true");
1898
1899 if (HeaderMask)
1900 Cond = Builder.createLogicalAnd(HeaderMask, Cond);
1901
1902 VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, {Cond});
1903 VPValue *MaskSelect = Builder.createSelect(AnyOf, Cond, MaskPHI);
1904 MaskPHI->addIncoming(MaskSelect);
1905
1906 // Replace select for data.
1907 VPValue *DataSelect =
1908 Builder.createSelect(AnyOf, Op1, Op2, SelectR->getDebugLoc());
1909 SelectR->replaceAllUsesWith(DataSelect);
1910 PhiR->setBackedgeValue(DataSelect);
1911 SelectR->eraseFromParent();
1912
1913 Builder.setInsertPoint(RdxResult);
1914 auto *ExtractLastActive =
1915 Builder.createNaryOp(VPInstruction::ExtractLastActive,
1916 {PhiR->getStartValue(), DataSelect, MaskSelect},
1917 RdxResult->getDebugLoc());
1918 RdxResult->replaceAllUsesWith(ExtractLastActive);
1919 RdxResult->eraseFromParent();
1920 }
1921
1922 return true;
1923}
1924
1925/// Given a first argmin/argmax pattern with strict predicate consisting of
1926/// 1) a MinOrMax reduction \p MinOrMaxPhiR producing \p MinOrMaxResult,
1927/// 2) a wide induction \p WideIV,
1928/// 3) a FindLastIV reduction \p FindLastIVPhiR using \p WideIV,
1929/// return the smallest index of the FindLastIV reduction result using UMin,
1930/// unless \p MinOrMaxResult equals the start value of its MinOrMax reduction.
1931/// In that case, return the start value of the FindLastIV reduction instead.
1932/// If \p WideIV is not canonical, a new canonical wide IV is added, and the
1933/// final result is scaled back to the non-canonical \p WideIV.
1934/// The final value of the FindLastIV reduction is originally computed using
1935/// \p FindIVSelect, \p FindIVCmp, and \p FindIVRdxResult, which are replaced
1936/// and removed.
1937/// Returns true if the pattern was handled successfully, false otherwise.
1939 VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR,
1940 VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV,
1941 VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect,
1942 VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult) {
1943 assert(!FindLastIVPhiR->isInLoop() && !FindLastIVPhiR->isOrdered() &&
1944 "inloop and ordered reductions not supported");
1945 assert(FindLastIVPhiR->getVFScaleFactor() == 1 &&
1946 "FindIV reduction must not be scaled");
1947
1949 // TODO: Support non (i.e., narrower than) canonical IV types.
1950 // TODO: Emit remarks for failed transformations.
1951 if (Ty != WideIV->getScalarType())
1952 return false;
1953
1954 auto *FindIVSelectR = cast<VPSingleDefRecipe>(
1955 FindLastIVPhiR->getBackedgeValue()->getDefiningRecipe());
1956 assert(
1957 match(FindIVSelectR, m_Select(m_VPValue(), m_VPValue(), m_VPValue())) &&
1958 "backedge value must be a select");
1959 if (FindIVSelectR->getOperand(1) != WideIV &&
1960 FindIVSelectR->getOperand(2) != WideIV)
1961 return false;
1962
1963 // If the original wide IV is not canonical, create a new one. The canonical
1964 // wide IV is guaranteed to not wrap for all lanes that are active in the
1965 // vector loop.
1966 if (!WideIV->isCanonical()) {
1967 VPIRValue *Zero = Plan.getConstantInt(Ty, 0);
1968 VPIRValue *One = Plan.getConstantInt(Ty, 1);
1969 auto *WidenCanIV = new VPWidenIntOrFpInductionRecipe(
1970 nullptr, Zero, One, WideIV->getVFValue(),
1971 WideIV->getInductionDescriptor(),
1972 VPIRFlags::WrapFlagsTy(/*HasNUW=*/true, /*HasNSW=*/false),
1973 WideIV->getDebugLoc());
1974 WidenCanIV->insertBefore(WideIV);
1975
1976 // Update the select to use the wide canonical IV.
1977 FindIVSelectR->setOperand(FindIVSelectR->getOperand(1) == WideIV ? 1 : 2,
1978 WidenCanIV);
1979 }
1980 FindLastIVPhiR->setOperand(0, Plan.getOrAddLiveIn(PoisonValue::get(Ty)));
1981
1982 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
1983 // result:
1984 // 1. Find the first canonical indices corresponding to partial min/max
1985 // values, using loop reductions.
1986 // 2. Find which of the partial min/max values are equal to the overall
1987 // min/max value.
1988 // 3. Select among the canonical indices those corresponding to the overall
1989 // min/max value.
1990 // 4. Find the first canonical index of overall min/max and scale it back to
1991 // the original IV using VPDerivedIVRecipe.
1992 // 5. If the overall min/max equals the starting min/max, the condition in
1993 // the loop was always false, due to being strict; return the start value
1994 // of FindLastIVPhiR in that case.
1995 //
1996 // For example, we transforms two independent reduction result computations
1997 // for
1998 //
1999 // <x1> vector loop: {
2000 // vector.body:
2001 // ...
2002 // ir<%iv> = WIDEN-INDUCTION nuw nsw ir<10>, ir<1>, vp<%0>
2003 // WIDEN-REDUCTION-PHI ir<%min.idx> = phi ir<sentinel.min.start>,
2004 // ir<%min.idx.next>
2005 // WIDEN-REDUCTION-PHI ir<%min.val> = phi ir<100>, ir<%min.val.next>
2006 // ....
2007 // WIDEN-INTRINSIC ir<%min.val.next> = call llvm.umin(ir<%min.val>, ir<%l>)
2008 // WIDEN ir<%min.idx.next> = select ir<%cmp>, ir<%iv>, ir<%min.idx>
2009 // ...
2010 // }
2011 // Successor(s): middle.block
2012 //
2013 // middle.block:
2014 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
2015 // vp<%min.result> = compute-reduction-result (umin) ir<%min.val.next>
2016 // vp<%cmp> = icmp ne vp<%iv.rdx>, ir<sentinel.min.start>
2017 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<10>
2018 //
2019 //
2020 // Into:
2021 //
2022 // vp<%reduced.min> = compute-reduction-result (umin) ir<%min.val.next>
2023 // vp<%reduced.mins.mask> = icmp eq ir<%min.val.next>, vp<%reduced.min>
2024 // vp<%idxs2reduce> = select vp<%reduced.mins.mask>, ir<%min.idx.next>,
2025 // ir<MaxUInt>
2026 // vp<%reduced.idx> = compute-reduction-result (umin) vp<%idxs2reduce>
2027 // vp<%scaled.idx> = DERIVED-IV ir<20> + vp<%reduced.idx> * ir<1>
2028 // vp<%always.false> = icmp eq vp<%reduced.min>, ir<100>
2029 // vp<%final.idx> = select vp<%always.false>, ir<10>,
2030 // vp<%scaled.idx>
2031
2032 VPBuilder Builder(FindIVRdxResult);
2033 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
2034 auto *FinalMinOrMaxCmp =
2035 Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
2036 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
2037 VPValue *MaxIV =
2038 Plan.getConstantInt(APInt::getMaxValue(Ty->getIntegerBitWidth()));
2039 auto *FinalIVSelect =
2040 Builder.createSelect(FinalMinOrMaxCmp, LastIVExiting, MaxIV);
2041 VPIRFlags RdxFlags(RecurKind::UMin, false, false, FastMathFlags());
2042 VPSingleDefRecipe *FinalCanIV = Builder.createNaryOp(
2043 VPInstruction::ComputeReductionResult, {FinalIVSelect}, RdxFlags,
2044 FindIVRdxResult->getDebugLoc());
2045
2046 // If we used a new wide canonical IV convert the reduction result back to the
2047 // original IV scale before the final select.
2048 if (!WideIV->isCanonical()) {
2049 auto *DerivedIVRecipe = new VPDerivedIVRecipe(
2051 nullptr, // No FPBinOp for integer induction
2052 WideIV->getStartValue(), FinalCanIV, WideIV->getStepValue());
2053 DerivedIVRecipe->insertBefore(&*Builder.getInsertPoint());
2054 FinalCanIV = DerivedIVRecipe;
2055 }
2056
2057 // If the final min/max value matches its start value, the condition in the
2058 // loop was always false, i.e. no induction value has been selected. If that's
2059 // the case, set the result of the IV reduction to its start value.
2060 VPValue *AlwaysFalse = Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxResult,
2061 MinOrMaxPhiR->getStartValue());
2062 VPValue *FinalIV = Builder.createSelect(
2063 AlwaysFalse, FindIVSelect->getOperand(2), FinalCanIV);
2064 FindIVSelect->replaceAllUsesWith(FinalIV);
2065
2066 // Erase the old FindIV result pattern which is now dead.
2067 FindIVSelect->eraseFromParent();
2068 FindIVCmp->eraseFromParent();
2069 FindIVRdxResult->eraseFromParent();
2070 return true;
2071}
2072
2075 Loop *TheLoop) {
2076 for (auto &PhiR : make_early_inc_range(
2078 auto *MinOrMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
2079 // TODO: check for multi-uses in VPlan directly.
2080 if (!MinOrMaxPhiR || !MinOrMaxPhiR->hasUsesOutsideReductionChain())
2081 continue;
2082
2083 // MinOrMaxPhiR has users outside the reduction cycle in the loop. Check if
2084 // the only other user is a FindLastIV reduction. MinOrMaxPhiR must have
2085 // exactly 2 users:
2086 // 1) the min/max operation of the reduction cycle, and
2087 // 2) the compare of a FindLastIV reduction cycle. This compare must match
2088 // the min/max operation - comparing MinOrMaxPhiR with the operand of the
2089 // min/max operation, and be used only by the select of the FindLastIV
2090 // reduction cycle.
2091 RecurKind RdxKind = MinOrMaxPhiR->getRecurrenceKind();
2092 assert(
2094 "only min/max recurrences support users outside the reduction chain");
2095
2096 auto *MinOrMaxOp =
2097 dyn_cast<VPRecipeWithIRFlags>(MinOrMaxPhiR->getBackedgeValue());
2098 if (!MinOrMaxOp)
2099 return false;
2100
2101 // Check that MinOrMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe
2102 // with an intrinsic that matches the reduction kind.
2103 Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind);
2104 if (!match(MinOrMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
2105 return false;
2106
2107 // MinOrMaxOp must have 2 users: 1) MinOrMaxPhiR and 2)
2108 // ComputeReductionResult.
2109 assert(MinOrMaxOp->getNumUsers() == 2 &&
2110 "MinOrMaxOp must have exactly 2 users");
2111 VPValue *MinOrMaxOpValue = MinOrMaxOp->getOperand(0);
2112 if (MinOrMaxOpValue == MinOrMaxPhiR)
2113 MinOrMaxOpValue = MinOrMaxOp->getOperand(1);
2114
2115 VPValue *CmpOpA;
2116 VPValue *CmpOpB;
2117 CmpPredicate Pred;
2119 MinOrMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
2120 if (!Cmp || Cmp->getNumUsers() != 1 ||
2121 (CmpOpA != MinOrMaxOpValue && CmpOpB != MinOrMaxOpValue))
2122 return false;
2123
2124 if (MinOrMaxOpValue != CmpOpB)
2125 Pred = CmpInst::getSwappedPredicate(Pred);
2126
2127 // MinOrMaxPhiR must have exactly 2 users:
2128 // * MinOrMaxOp,
2129 // * Cmp (that's part of a FindLastIV chain).
2130 if (MinOrMaxPhiR->getNumUsers() != 2)
2131 return false;
2132
2133 VPInstruction *MinOrMaxResult =
2135 assert(is_contained(MinOrMaxPhiR->users(), MinOrMaxOp) &&
2136 "one user must be MinOrMaxOp");
2137 assert(MinOrMaxResult && "MinOrMaxResult must be a user of MinOrMaxOp");
2138
2139 // Cmp must be used by the select of a FindLastIV chain.
2140 VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
2141 VPValue *IVOp, *FindIV;
2142 if (!Sel || Sel->getNumUsers() != 2 ||
2143 !match(Sel,
2145 return false;
2146
2148 std::swap(FindIV, IVOp);
2149 Pred = CmpInst::getInversePredicate(Pred);
2150 }
2151
2152 auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
2154 FindIVPhiR->getRecurrenceKind()))
2155 return false;
2156
2157 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
2158 "cannot handle inloop/ordered reductions yet");
2159
2160 // Check if FindIVPhiR is a FindLast pattern by checking the MinMaxKind
2161 // on its ComputeReductionResult. SMax/UMax indicates FindLast.
2162 VPInstruction *FindIVResult =
2164 FindIVPhiR->getBackedgeValue());
2165 assert(FindIVResult &&
2166 "must be able to retrieve the FindIVResult VPInstruction");
2167 RecurKind FindIVMinMaxKind = FindIVResult->getRecurKind();
2168 if (FindIVMinMaxKind != RecurKind::SMax &&
2169 FindIVMinMaxKind != RecurKind::UMax)
2170 return false;
2171
2172 // TODO: Support cases where IVOp is the IV increment.
2173 if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
2175 return false;
2176
2177 // Check if the predicate is compatible with the reduction kind.
2178 bool IsValidKindPred = [RdxKind, Pred]() {
2179 switch (RdxKind) {
2180 case RecurKind::UMin:
2181 return Pred == CmpInst::ICMP_UGE || Pred == CmpInst::ICMP_UGT;
2182 case RecurKind::UMax:
2183 return Pred == CmpInst::ICMP_ULE || Pred == CmpInst::ICMP_ULT;
2184 case RecurKind::SMax:
2185 return Pred == CmpInst::ICMP_SLE || Pred == CmpInst::ICMP_SLT;
2186 case RecurKind::SMin:
2187 return Pred == CmpInst::ICMP_SGE || Pred == CmpInst::ICMP_SGT;
2188 default:
2189 llvm_unreachable("unhandled recurrence kind");
2190 }
2191 }();
2192 if (!IsValidKindPred) {
2193 ORE->emit([&]() {
2195 DEBUG_TYPE, "VectorizationMultiUseReductionPredicate",
2196 TheLoop->getStartLoc(), TheLoop->getHeader())
2197 << "Multi-use reduction with predicate "
2199 << " incompatible with reduction kind";
2200 });
2201 return false;
2202 }
2203
2204 auto *FindIVSelect = findFindIVSelect(FindIVPhiR->getBackedgeValue());
2205 auto *FindIVCmp = FindIVSelect->getOperand(0)->getDefiningRecipe();
2206 auto *FindIVRdxResult = cast<VPInstruction>(FindIVCmp->getOperand(0));
2207 assert(FindIVSelect->getParent() == MinOrMaxResult->getParent() &&
2208 "both results must be computed in the same block");
2209 // Reducing to a scalar min or max value is placed right before reducing to
2210 // its scalar iteration, in order to generate instructions that use both
2211 // their operands.
2212 MinOrMaxResult->moveBefore(*FindIVRdxResult->getParent(),
2213 FindIVRdxResult->getIterator());
2214
2215 bool IsStrictPredicate = ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred);
2216 if (IsStrictPredicate) {
2217 if (!handleFirstArgMinOrMax(Plan, MinOrMaxPhiR, FindIVPhiR,
2219 MinOrMaxResult, FindIVSelect, FindIVCmp,
2220 FindIVRdxResult))
2221 return false;
2222 continue;
2223 }
2224
2225 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
2226 // result:
2227 // 1. We need to find the last IV for which the condition based on the
2228 // min/max recurrence is true,
2229 // 2. Compare the partial min/max reduction result to its final value and,
2230 // 3. Select the lanes of the partial FindLastIV reductions which
2231 // correspond to the lanes matching the min/max reduction result.
2232 //
2233 // For example, this transforms
2234 // vp<%min.result> = compute-reduction-result ir<%min.val.next>
2235 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
2236 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
2237 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
2238 //
2239 // into:
2240 //
2241 // vp<min.result> = compute-reduction-result ir<%min.val.next>
2242 // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
2243 // vp<%final.iv> = select vp<%final.min.cmp>, vp<%min.idx.next>, SENTINEL
2244 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%final.iv>
2245 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
2246 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
2247 //
2248 VPBuilder B(FindIVRdxResult);
2249 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
2250 auto *FinalMinOrMaxCmp =
2251 B.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
2252 VPValue *Sentinel = FindIVCmp->getOperand(1);
2253 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
2254 auto *FinalIVSelect =
2255 B.createSelect(FinalMinOrMaxCmp, LastIVExiting, Sentinel);
2256 FindIVRdxResult->setOperand(0, FinalIVSelect);
2257 }
2258 return true;
2259}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
#define _
iv Induction Variable Users
Definition IVUsers.cpp:48
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file defines less commonly used SmallVector utilities.
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static bool handleFirstArgMinOrMax(VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR, VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV, VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect, VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult)
Given a first argmin/argmax pattern with strict predicate consisting of 1) a MinOrMax reduction MinOr...
static VPHeaderPHIRecipe * createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, DebugLoc DL)
Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe for Phi based on IndDesc.
static void insertCheckBlockBeforeVectorLoop(VPlan &Plan, VPBasicBlock *CheckBlockVPBB)
Insert CheckBlockVPBB on the edge leading to the vector preheader, connecting it to both vector and s...
static void simplifyLiveInsWithSCEV(VPlan &Plan, PredicatedScalarEvolution &PSE)
Check Plan's live-in and replace them with constants, if they can be simplified via SCEV.
static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB, VPValue *Cond, bool AddBranchWeights)
Create a BranchOnCond terminator in CheckBlockVPBB.
static bool sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, const VPDominatorTree &VPDT)
Try to sink users of FOR after Previous.
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, const VPDominatorTree &VPDT)
Try to hoist Previous and its operands before all users of FOR.
static bool areAllLoadsDereferenceable(VPBasicBlock *HeaderVPBB, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Check if all loads in the loop are dereferenceable.
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB, DebugLoc DL)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static VPInstruction * findFindIVSelect(VPValue *BackedgeVal)
Find and return the final select instruction of the FindIV result pattern for the given BackedgeVal: ...
static bool tryToSinkOrHoistRecurrenceUsers(VPBasicBlock *HeaderVPBB, const VPDominatorTree &VPDT)
Sink users of fixed-order recurrences past or hoist before the recipe defining the previous value,...
static constexpr uint32_t CheckBypassWeights[]
static void printAfterInitialConstruction(VPlan &)
To make RUN_VPLAN_PASS print initial VPlan.
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS_NO_VERIFY(PASS,...)
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition blake3_impl.h:83
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:750
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
static LLVM_ABI StringRef getPredicateName(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:151
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
static FastMathFlags getFast()
Definition FMF.h:50
static bool isLT(Predicate P)
Return true if the predicate is SLT or ULT.
static bool isGT(Predicate P)
Return true if the predicate is SGT or UGT.
A struct for saving information about induction variables.
InductionKind getKind() const
const SCEV * getStep() const
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Value * getStartValue() const
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
LLVM_ABI std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:659
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1069
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
iterator find(const KeyT &Key)
Definition MapVector.h:156
iterator end()
Definition MapVector.h:69
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEVPredicate & getPredicate() const
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
FastMathFlags getFastMathFlags() const
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
This class represents an analyzed expression in the program.
static constexpr auto FlagNUW
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
op_range operands()
Definition User.h:267
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4404
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4479
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4431
iterator end()
Definition VPlan.h:4441
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4439
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4492
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:266
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:560
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:639
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4470
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:94
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:315
VPRegionBlock * getParent()
Definition VPlan.h:186
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:236
void setName(const Twine &newName)
Definition VPlan.h:179
size_t getNumSuccessors() const
Definition VPlan.h:237
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:337
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:306
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:222
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:297
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:233
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:329
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:216
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:286
void setParent(VPRegionBlock *P)
Definition VPlan.h:197
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:227
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:211
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:222
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:362
static VPBasicBlock * getPlainCFGMiddleBlock(const VPlan &Plan)
Returns the middle block of Plan in plain CFG form (before regions are formed).
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:270
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:288
static std::pair< VPBasicBlock *, VPBasicBlock * > getPlainCFGHeaderAndLatch(const VPlan &Plan)
Returns the header and latch of the outermost loop of Plan in plain CFG form (before regions are form...
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:308
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition VPlanValue.h:561
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition VPlanValue.h:534
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:4182
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B) const
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2436
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2483
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2472
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4557
Class to record and manage LLVM IR flags.
Definition VPlan.h:695
RecurKind getRecurKind() const
Definition VPlan.h:1058
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1226
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1328
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1319
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1332
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1272
void addIncoming(VPValue *IncomingV)
Append IncomingV as an incoming value to the phi-like recipe.
Definition VPlan.h:1665
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:402
VPBasicBlock * getParent()
Definition VPlan.h:477
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:555
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
Definition VPlanValue.h:337
A recipe for handling reduction phis.
Definition VPlan.h:2864
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2924
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2908
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2927
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3233
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4614
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4734
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4739
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4726
DebugLoc getDebugLoc() const
Returns the debug location of the VPRegionValue.
Definition VPlanValue.h:234
Lightweight SCEV-to-VPlan expander.
Definition VPlanUtils.h:190
VPValue * tryToExpand(const SCEV *S)
Try to expand S into recipes and live-ins using the builder.
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:609
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384
operand_range operands()
Definition VPlanValue.h:457
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:430
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:425
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:143
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:208
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1481
unsigned getNumUsers() const
Definition VPlanValue.h:115
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1487
user_range users()
Definition VPlanValue.h:157
A Recipe for widening the canonical induction variable of the vector loop.
Definition VPlan.h:4125
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2567
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2587
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2623
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2670
bool isCanonical() const
Returns true if the induction is canonical, i.e.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4762
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:5087
LLVMContext & getContext() const
Definition VPlan.h:4963
VPBasicBlock * getEntry()
Definition VPlan.h:4858
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4921
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4942
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:5058
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4961
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:5090
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4911
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4951
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:5035
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:5124
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1068
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4928
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4887
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:5113
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1318
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:5055
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4863
bool hasScalarVFOnly() const
Definition VPlan.h:5003
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4901
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4907
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4954
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:5168
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:5069
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SelectLike_match< CondTy, LTy, RTy > m_SelectLike(const CondTy &C, const LTy &TrueC, const RTy &FalseC)
Matches a value that behaves like a boolean-controlled select, i.e.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
bool matchFindIVResult(VPInstruction *VPI, Op0_t ReducedIV, Op1_t Start)
Match FindIV result pattern: select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),...
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
Definition VPlanUtils.h:123
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
Definition VPlanUtils.h:140
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) Note: If ...
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
SmallVector< VPBasicBlock * > vp_rpo_plain_cfg_loop_body(VPBasicBlock *Header)
Returns the VPBasicBlocks forming the loop body of a plain (pre-region) VPlan in reverse post-order s...
Definition VPlanCFG.h:265
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2851
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto map_to_vector(ContainerTy &&C, FuncTy &&F)
Map a range to a SmallVector with element types deduced from the mapping.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr size_t range_size(R &&Range)
Returns the size of the Range, i.e., the number of elements.
Definition STLExtras.h:1693
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:79
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
Definition VPlan.h:84
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:551
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2145
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:300
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
A recipe for handling first-order recurrence phis.
Definition VPlan.h:2802
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:246
Type * getType() const
Returns the scalar type of this symbolic value.
Definition VPlanValue.h:294
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void createInLoopReductionRecipes(VPlan &Plan, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan, DebugLoc DL)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static bool createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const VPDominatorTree &VPDT, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static LLVM_ABI_FOR_TEST bool handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Update Plan to account for all early exits.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
static bool finalizeSCEVPredicates(VPlan &Plan, PredicatedScalarEvolution &PSE, bool OptForSize, unsigned SCEVCheckThreshold, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Finalize SCEV predicates by adding induction predicates from Plan to PSE and checking constraints.
static void addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
Add a new check block before the vector preheader to Plan to check if the main vector loop should be ...
static bool handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC, UncountableExitStyle Style)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock)
static void attachVPCheckBlock(VPlan &Plan, VPValue *Cond, VPBasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...