LLVM 23.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
28#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/Twine.h"
35#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/CFG.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Value.h"
44#include "llvm/Support/Debug.h"
50#include <cassert>
51#include <string>
52
53using namespace llvm;
54using namespace llvm::VPlanPatternMatch;
55
56namespace llvm {
58} // namespace llvm
59
60/// @{
61/// Metadata attribute names
62const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
64 "llvm.loop.vectorize.followup_vectorized";
66 "llvm.loop.vectorize.followup_epilogue";
67/// @}
68
70
72
74 "vplan-print-in-dot-format", cl::Hidden,
75 cl::desc("Use dot format instead of plain text when dumping VPlans"));
76
77#define DEBUG_TYPE "loop-vectorize"
78
79#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
81 const VPBasicBlock *Parent = R.getParent();
82 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
83 R.print(OS, "", SlotTracker);
84 return OS;
85}
86#endif
87
89 const ElementCount &VF) const {
90 switch (LaneKind) {
92 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
93 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
94 Builder.getInt32(VF.getKnownMinValue() - Lane));
96 return Builder.getInt64(Lane);
97 }
98 llvm_unreachable("Unknown lane kind");
99}
100
101#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
103 if (const VPRecipeBase *R = getDefiningRecipe())
104 R->print(OS, "", SlotTracker);
105 else
107}
108
109void VPValue::dump() const {
110 const VPRecipeBase *Instr = getDefiningRecipe();
112 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
114 dbgs() << "\n";
115}
116
117void VPRecipeBase::dump() const {
118 VPSlotTracker SlotTracker(getParent() ? getParent()->getPlan() : nullptr);
119 print(dbgs(), "", SlotTracker);
120 dbgs() << "\n";
121}
122#endif
123
124#if !defined(NDEBUG)
125bool VPRecipeValue::isDefinedBy(const VPDef *D) const {
126 return getDefiningRecipe() == D;
127}
128#endif
129
131 auto *RecipeValue = dyn_cast<VPRecipeValue>(this);
132 if (!RecipeValue)
133 return nullptr;
134 if (auto *MultiDef = dyn_cast<VPMultiDefValue>(RecipeValue))
135 return MultiDef->getDef();
136 return static_cast<VPSingleDefRecipe *>(RecipeValue);
137}
138
140 return const_cast<VPValue *>(this)->getDefiningRecipe();
141}
142
144 return cast<VPIRValue>(this)->getValue();
145}
146
148
150 assert(Users.empty() &&
151 "trying to delete a VPRecipeValue with remaining users");
152 getDefiningRecipe()->removeDefinedValue(this);
153}
154
157 assert(Def && "VPSingleDefValue requires a defining recipe");
158 Def->addDefinedValue(this);
159}
160
162 : VPRecipeValue(VPVMultiDefValueSC, UV), Def(Def) {
163 assert(Def && "VPMultiDefValue requires a defining recipe");
164 Def->addDefinedValue(this);
165}
166
167// Get the top-most entry block of \p Start. This is the entry block of the
168// containing VPlan. This function is templated to support both const and non-const blocks
169template <typename T> static T *getPlanEntry(T *Start) {
170 T *Next = Start;
171 T *Current = Start;
172 while ((Next = Next->getParent()))
173 Current = Next;
174
175 SmallSetVector<T *, 8> WorkList;
176 WorkList.insert(Current);
177
178 for (unsigned i = 0; i < WorkList.size(); i++) {
179 T *Current = WorkList[i];
180 if (!Current->hasPredecessors())
181 return Current;
182 auto &Predecessors = Current->getPredecessors();
183 WorkList.insert_range(Predecessors);
184 }
185
186 llvm_unreachable("VPlan without any entry node without predecessors");
187}
188
189VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
190
191const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
192
193/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
200
207
208void VPBlockBase::setPlan(VPlan *ParentPlan) {
209 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
210 Plan = ParentPlan;
211}
212
213/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
215 const VPBlockBase *Block = this;
217 Block = Region->getExiting();
219}
220
227
229 if (!Successors.empty() || !Parent)
230 return this;
231 assert(Parent->getExiting() == this &&
232 "Block w/o successors not the exiting block of its parent.");
233 return Parent->getEnclosingBlockWithSuccessors();
234}
235
237 if (!Predecessors.empty() || !Parent)
238 return this;
239 assert(Parent->getEntry() == this &&
240 "Block w/o predecessors not the entry of its parent.");
241 return Parent->getEnclosingBlockWithPredecessors();
242}
243
245 iterator It = begin();
246 while (It != end() && It->isPhi())
247 It++;
248 return It;
249}
250
258
259Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
261 return Def->getUnderlyingValue();
262
263 if (hasScalarValue(Def, Lane))
264 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
265
266 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
268 return Data.VPV2Scalars[Def][0];
269 }
270
271 // Look through BuildVector to avoid redundant extracts.
272 // TODO: Remove once replicate regions are unrolled explicitly.
273 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
274 auto *BuildVector = cast<VPInstruction>(Def);
275 return get(BuildVector->getOperand(Lane.getKnownLane()), true);
276 }
277
279 auto *VecPart = Data.VPV2Vector[Def];
280 if (!VecPart->getType()->isVectorTy()) {
281 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
282 return VecPart;
283 }
284 // TODO: Cache created scalar values.
285 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
286 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
287 // set(Def, Extract, Instance);
288 return Extract;
289}
290
291Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
292 if (NeedsScalar) {
293 assert((VF.isScalar() || isa<VPIRValue, VPSymbolicValue>(Def) ||
295 (hasScalarValue(Def, VPLane(0)) &&
296 Data.VPV2Scalars[Def].size() == 1)) &&
297 "Trying to access a single scalar per part but has multiple scalars "
298 "per part.");
299 return get(Def, VPLane(0));
300 }
301
302 // If Values have been set for this Def return the one relevant for \p Part.
303 if (hasVectorValue(Def))
304 return Data.VPV2Vector[Def];
305
306 auto GetBroadcastInstrs = [this](Value *V) {
307 if (VF.isScalar())
308 return V;
309 // Broadcast the scalar into all locations in the vector.
310 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
311 return Shuf;
312 };
313
314 if (!hasScalarValue(Def, {0})) {
315 Value *IRV = Def->getLiveInIRValue();
316 Value *B = GetBroadcastInstrs(IRV);
317 set(Def, B);
318 return B;
319 }
320
321 Value *ScalarValue = get(Def, VPLane(0));
322 // If we aren't vectorizing, we can just copy the scalar map values over
323 // to the vector map.
324 if (VF.isScalar()) {
325 set(Def, ScalarValue);
326 return ScalarValue;
327 }
328
329 bool IsSingleScalar = vputils::isSingleScalar(Def);
330 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
331
332 // We need to construct the vector value for a single-scalar value by
333 // broadcasting the scalar to all lanes.
334 // TODO: Replace by introducing Broadcast VPInstructions.
335 assert(IsSingleScalar && "must be a single-scalar at this point");
336 // Set the insert point after the last scalarized instruction or after the
337 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
338 // will directly follow the scalar definitions.
339 auto OldIP = Builder.saveIP();
340 auto *LastInst = cast<Instruction>(get(Def, LastLane));
341 auto NewIP = isa<PHINode>(LastInst)
342 ? LastInst->getParent()->getFirstNonPHIIt()
343 : std::next(BasicBlock::iterator(LastInst));
344 Builder.SetInsertPoint(&*NewIP);
345 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
346 set(Def, VectorValue);
347 Builder.restoreIP(OldIP);
348 return VectorValue;
349}
350
352 const DILocation *DIL = DL;
353 // When a FSDiscriminator is enabled, we don't need to add the multiply
354 // factors to the discriminators.
355 if (DIL &&
356 Builder.GetInsertBlock()
357 ->getParent()
358 ->shouldEmitDebugInfoForProfiling() &&
360 // FIXME: For scalable vectors, assume vscale=1.
361 unsigned UF = Plan->getConcreteUF();
362 auto NewDIL =
363 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
364 if (NewDIL)
365 Builder.SetCurrentDebugLocation(*NewDIL);
366 else
367 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
368 << DIL->getFilename() << " Line: " << DIL->getLine());
369 } else
370 Builder.SetCurrentDebugLocation(DL);
371}
372
374 Value *WideValue,
375 const VPLane &Lane) {
376 Value *ScalarInst = get(Def, Lane);
377 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
378 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
379 // We must handle each element of a vectorized struct type.
380 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
381 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
382 Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
383 VectorValue =
384 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
385 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
386 }
387 } else {
388 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
389 }
390 return WideValue;
391}
392
393BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
394 auto &CFG = State.CFG;
395 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
396 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
397 BasicBlock *PrevBB = CFG.PrevBB;
398 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
399 PrevBB->getParent(), CFG.ExitBB);
400 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
401
402 return NewBB;
403}
404
406 auto &CFG = State.CFG;
407 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
408
409 // Register NewBB in its loop. In innermost loops its the same for all
410 // BB's.
411 Loop *ParentLoop = State.CurrentParentLoop;
412 // If this block has a sole successor that is an exit block or is an exit
413 // block itself then it needs adding to the same parent loop as the exit
414 // block.
415 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
416 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
417 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
418 ParentLoop = State.LI->getLoopFor(
419 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
420 }
421
422 if (ParentLoop && !State.LI->getLoopFor(NewBB))
423 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
424
426 if (VPBlockUtils::isHeader(this, State.VPDT)) {
427 // There's no block for the latch yet, connect to the preheader only.
428 Preds = {getPredecessors()[0]};
429 } else {
430 Preds = to_vector(getPredecessors());
431 }
432
433 // Hook up the new basic block to its predecessors.
434 for (VPBlockBase *PredVPBlock : Preds) {
435 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
436 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
437 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
438 "Predecessor basic-block not found building successor.");
439 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
440 auto *PredBBTerminator = PredBB->getTerminator();
441 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
442
443 if (isa<UnreachableInst>(PredBBTerminator)) {
444 assert(PredVPSuccessors.size() == 1 &&
445 "Predecessor ending w/o branch must have single successor.");
446 DebugLoc DL = PredBBTerminator->getDebugLoc();
447 PredBBTerminator->eraseFromParent();
448 auto *Br = UncondBrInst::Create(NewBB, PredBB);
449 Br->setDebugLoc(DL);
450 } else if (auto *UBI = dyn_cast<UncondBrInst>(PredBBTerminator)) {
451 UBI->setSuccessor(NewBB);
452 } else {
453 // Set each forward successor here when it is created, excluding
454 // backedges. A backward successor is set when the branch is created.
455 // Branches to VPIRBasicBlocks must have the same successors in VPlan as
456 // in the original IR, except when the predecessor is the entry block.
457 // This enables including SCEV and memory runtime check blocks in VPlan.
458 // TODO: Remove exception by modeling the terminator of entry block using
459 // BranchOnCond.
460 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
461 auto *TermBr = cast<CondBrInst>(PredBBTerminator);
462 assert((!TermBr->getSuccessor(idx) ||
463 (isa<VPIRBasicBlock>(this) &&
464 (TermBr->getSuccessor(idx) == NewBB ||
465 PredVPBlock == getPlan()->getEntry()))) &&
466 "Trying to reset an existing successor block.");
467 TermBr->setSuccessor(idx, NewBB);
468 }
469 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
470 }
471}
472
475 "VPIRBasicBlock can have at most two successors at the moment!");
476 // Move completely disconnected blocks to their final position.
477 if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
478 IRBB->moveAfter(State->CFG.PrevBB);
479 State->Builder.SetInsertPoint(IRBB->getTerminator());
480 State->CFG.PrevBB = IRBB;
481 State->CFG.VPBB2IRBB[this] = IRBB;
482 executeRecipes(State, IRBB);
483 // Create a branch instruction to terminate IRBB if one was not created yet
484 // and is needed.
485 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
486 auto *Br = State->Builder.CreateBr(IRBB);
487 Br->setOperand(0, nullptr);
488 IRBB->getTerminator()->eraseFromParent();
489 } else {
490 assert((getNumSuccessors() == 0 ||
491 isa<UncondBrInst, CondBrInst>(IRBB->getTerminator())) &&
492 "other blocks must be terminated by a branch");
493 }
494
495 connectToPredecessors(*State);
496}
497
498VPIRBasicBlock *VPIRBasicBlock::clone() {
499 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
500 for (VPRecipeBase &R : Recipes)
501 NewBlock->appendRecipe(R.clone());
502 return NewBlock;
503}
504
506 if (VPBlockUtils::isHeader(this, State->VPDT)) {
507 // Create and register the new vector loop.
508 Loop *PrevParentLoop = State->CurrentParentLoop;
509 State->CurrentParentLoop = State->LI->AllocateLoop();
510
511 // Insert the new loop into the loop nest and register the new basic blocks
512 // before calling any utilities such as SCEV that require valid LoopInfo.
513 if (PrevParentLoop)
514 PrevParentLoop->addChildLoop(State->CurrentParentLoop);
515 else
516 State->LI->addTopLevelLoop(State->CurrentParentLoop);
517 }
518
519 // 1. Create an IR basic block.
520 BasicBlock *NewBB = createEmptyBasicBlock(*State);
521
522 State->Builder.SetInsertPoint(NewBB);
523 // Temporarily terminate with unreachable until CFG is rewired.
524 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
525 State->Builder.SetInsertPoint(Terminator);
526
527 State->CFG.PrevBB = NewBB;
528 State->CFG.VPBB2IRBB[this] = NewBB;
529 connectToPredecessors(*State);
530
531 // 2. Fill the IR basic block with IR instructions.
532 executeRecipes(State, NewBB);
533
534 // If this block is a latch, update CurrentParentLoop.
535 if (VPBlockUtils::isLatch(this, State->VPDT))
536 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
537}
538
539VPBasicBlock *VPBasicBlock::clone() {
540 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
541 for (VPRecipeBase &R : *this)
542 NewBlock->appendRecipe(R.clone());
543 return NewBlock;
544}
545
547 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
548 << " in BB: " << BB->getName() << '\n');
549
550 State->CFG.PrevVPBB = this;
551
552 for (VPRecipeBase &Recipe : Recipes) {
553 State->setDebugLocFrom(Recipe.getDebugLoc());
554 Recipe.execute(*State);
555 }
556
557 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
558}
559
560VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
561 assert((SplitAt == end() || SplitAt->getParent() == this) &&
562 "can only split at a position in the same block");
563
564 // Create new empty block after the block to split.
565 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
567
568 // If this is the exiting block, make the split the new exiting block.
569 auto *ParentRegion = getParent();
570 if (ParentRegion && ParentRegion->getExiting() == this)
571 ParentRegion->setExiting(SplitBlock);
572
573 // Finally, move the recipes starting at SplitAt to new block.
574 for (VPRecipeBase &ToMove :
575 make_early_inc_range(make_range(SplitAt, this->end())))
576 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
577
578 return SplitBlock;
579}
580
581/// Return the enclosing loop region for region \p P. The templated version is
582/// used to support both const and non-const block arguments.
583template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
584 if (P && P->isReplicator()) {
585 P = P->getParent();
586 // Multiple loop regions can be nested, but replicate regions can only be
587 // nested inside a loop region or must be outside any other region.
588 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
589 }
590 return P;
591}
592
596
600
601static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
602 if (VPBB->empty()) {
603 assert(
604 VPBB->getNumSuccessors() < 2 &&
605 "block with multiple successors doesn't have a recipe as terminator");
606 return false;
607 }
608
609 const VPRecipeBase *R = &VPBB->back();
610 [[maybe_unused]] bool IsSwitch =
612 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
613 [[maybe_unused]] bool IsBranchOnTwoConds = match(R, m_BranchOnTwoConds());
614 [[maybe_unused]] bool IsCondBranch =
617 if (VPBB->getNumSuccessors() == 2 ||
618 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
619 assert((IsCondBranch || IsSwitch || IsBranchOnTwoConds) &&
620 "block with multiple successors not terminated by "
621 "conditional branch nor switch recipe");
622
623 return true;
624 }
625
626 if (VPBB->getNumSuccessors() > 2) {
627 assert((IsSwitch || IsBranchOnTwoConds) &&
628 "block with more than 2 successors not terminated by a switch or "
629 "branch-on-two-conds recipe");
630 return true;
631 }
632
633 assert(
634 !IsCondBranch && !IsBranchOnTwoConds &&
635 "block with 0 or 1 successors terminated by conditional branch recipe");
636 return false;
637}
638
640 if (hasConditionalTerminator(this))
641 return &back();
642 return nullptr;
643}
644
646 if (hasConditionalTerminator(this))
647 return &back();
648 return nullptr;
649}
650
652 return getParent() && getParent()->getExitingBasicBlock() == this;
653}
654
655#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
660
661void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
662 if (!hasSuccessors()) {
663 O << Indent << "No successors\n";
664 } else {
665 O << Indent << "Successor(s): ";
666 ListSeparator LS;
667 for (auto *Succ : getSuccessors())
668 O << LS << Succ->getName();
669 O << '\n';
670 }
671}
672
673void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
674 VPSlotTracker &SlotTracker) const {
675 O << Indent << getName() << ":\n";
676
677 auto RecipeIndent = Indent + " ";
678 for (const VPRecipeBase &Recipe : *this) {
679 Recipe.print(O, RecipeIndent, SlotTracker);
680 O << '\n';
681 }
682
683 printSuccessors(O, Indent);
684}
685#endif
686
687std::pair<VPBlockBase *, VPBlockBase *>
690 VPBlockBase *Exiting = nullptr;
691 bool InRegion = Entry->getParent();
692 // First, clone blocks reachable from Entry.
693 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
694 VPBlockBase *NewBB = BB->clone();
695 Old2NewVPBlocks[BB] = NewBB;
696 if (InRegion && BB->getNumSuccessors() == 0) {
697 assert(!Exiting && "Multiple exiting blocks?");
698 Exiting = BB;
699 }
700 }
701 assert((!InRegion || Exiting) && "regions must have a single exiting block");
702
703 // Second, update the predecessors & successors of the cloned blocks.
704 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
705 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
707 for (VPBlockBase *Pred : BB->getPredecessors()) {
708 NewPreds.push_back(Old2NewVPBlocks[Pred]);
709 }
710 NewBB->setPredecessors(NewPreds);
712 for (VPBlockBase *Succ : BB->successors()) {
713 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
714 }
715 NewBB->setSuccessors(NewSuccs);
716 }
717
718#if !defined(NDEBUG)
719 // Verify that the order of predecessors and successors matches in the cloned
720 // version.
721 for (const auto &[OldBB, NewBB] :
723 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
724 for (const auto &[OldPred, NewPred] :
725 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
726 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
727
728 for (const auto &[OldSucc, NewSucc] :
729 zip(OldBB->successors(), NewBB->successors()))
730 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
731 }
732#endif
733
734 return std::make_pair(Old2NewVPBlocks[Entry],
735 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
736}
737
738VPRegionBlock *VPRegionBlock::clone() {
739 const auto &[NewEntry, NewExiting] = VPBlockUtils::cloneFrom(getEntry());
740 VPlan &Plan = *getPlan();
741 VPRegionValue *CanIV = getCanonicalIV();
742 VPRegionBlock *NewRegion =
743 CanIV ? Plan.createLoopRegion(CanIV->getType(), CanIV->getDebugLoc(),
744 getName(), NewEntry, NewExiting)
745 : Plan.createReplicateRegion(NewEntry, NewExiting, getName());
746
747 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
748 Block->setParent(NewRegion);
749 return NewRegion;
750}
751
753 llvm_unreachable("regions must get dissolved before ::execute");
754}
755
758 for (VPRecipeBase &R : Recipes)
759 Cost += R.cost(VF, Ctx);
760 return Cost;
761}
762
763const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
764 const VPBlockBase *Pred = nullptr;
765 if (hasPredecessors()) {
766 Pred = getPredecessors()[Idx];
767 } else {
768 auto *Region = getParent();
769 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
770 "must be in the entry block of a non-replicate region");
771 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
772 "loop region has a single predecessor (preheader), its entry block "
773 "has 2 incoming blocks");
774
775 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
776 // region itself whose exiting block feeds the phi across the backedge.
777 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
778 }
779 return Pred->getExitingBasicBlock();
780}
781
783 if (!isReplicator()) {
784 // Neglect the cost of canonical IV, matching the legacy cost model.
787 Cost += Block->cost(VF, Ctx);
788 InstructionCost BackedgeCost =
789 ForceTargetInstructionCost.getNumOccurrences()
791 : Ctx.TTI.getCFInstrCost(Instruction::UncondBr, Ctx.CostKind);
792 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
793 << ": vector loop backedge\n");
794 Cost += BackedgeCost;
795 return Cost;
796 }
797
798 // Compute the cost of a replicate region. Replicating isn't supported for
799 // scalable vectors, return an invalid cost for them.
800 // TODO: Discard scalable VPlans with replicate recipes earlier after
801 // construction.
802 if (VF.isScalable())
804
805 // Compute and return the cost of the conditionally executed recipes.
806 assert(VF.isVector() && "Can only compute vector cost at the moment.");
808 return Then->cost(VF, Ctx);
809}
810
811#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
813 VPSlotTracker &SlotTracker) const {
814 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
815 auto NewIndent = Indent + " ";
816 if (auto *CanIV = getCanonicalIV()) {
817 O << '\n';
818 CanIV->print(O, SlotTracker);
819 O << " = CANONICAL-IV\n";
820 }
821 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
822 O << '\n';
823 BlockBase->print(O, NewIndent, SlotTracker);
824 }
825 O << Indent << "}\n";
826
827 printSuccessors(O, Indent);
828}
829#endif
830
832 auto *Header = cast<VPBasicBlock>(getEntry());
833 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
834 auto *CanIV = getCanonicalIV();
835 if (CanIV->getNumUsers() > 0) {
836 VPlan &Plan = *getPlan();
837 auto *Zero = Plan.getZero(CanIV->getType());
838 DebugLoc DL = CanIV->getDebugLoc();
840 VPBuilder HeaderBuilder(Header, Header->begin());
841 auto *ScalarR =
842 HeaderBuilder.createScalarPhi({Zero, CanIVInc}, DL, "index");
843 CanIV->replaceAllUsesWith(ScalarR);
844 }
845
846 VPBlockBase *Preheader = getSinglePredecessor();
847 VPBlockUtils::disconnectBlocks(Preheader, this);
848
849 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
850 VPB->setParent(getParent());
851
852 VPBlockUtils::connectBlocks(Preheader, Header);
853 VPBlockUtils::transferSuccessors(this, ExitingLatch);
854 VPBlockUtils::connectBlocks(ExitingLatch, Header);
855}
856
858 // TODO: Represent the increment as VPRegionValue as well.
859 VPRegionValue *CanIV = getCanonicalIV();
860 assert(CanIV && "Expected a canonical IV");
861
862 if (auto *Inc = vputils::findCanonicalIVIncrement(*getPlan()))
863 return Inc;
864
865 assert(!getPlan()->getVFxUF().isMaterialized() &&
866 "VFxUF can be used only before it is materialized.");
867 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
868 return VPBuilder(ExitingLatch->getTerminator())
869 .createOverflowingOp(Instruction::Add, {CanIV, &getPlan()->getVFxUF()},
870 {hasCanonicalIVNUW(), /* HasNSW */ false},
871 CanIV->getDebugLoc(), "index.next");
872}
873
874VPlan::VPlan(Loop *L, Type *IdxTy)
875 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
876 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
877 ScalarHeader = createVPIRBasicBlock(L->getHeader());
878
879 SmallVector<BasicBlock *> IRExitBlocks;
880 L->getUniqueExitBlocks(IRExitBlocks);
881 for (BasicBlock *EB : IRExitBlocks)
882 ExitBlocks.push_back(createVPIRBasicBlock(EB));
883}
884
886 VPSymbolicValue DummyValue(nullptr);
887
888 for (auto *VPB : CreatedBlocks) {
889 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
890 // Replace all operands of recipes and all VPValues defined in VPBB with
891 // DummyValue so the block can be deleted.
892 for (VPRecipeBase &R : *VPBB) {
893 for (auto *Def : R.definedValues())
894 Def->replaceAllUsesWith(&DummyValue);
895
896 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
897 R.setOperand(I, &DummyValue);
898 }
899 } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) {
900 CanIV->replaceAllUsesWith(&DummyValue);
901 }
902
903 delete VPB;
904 }
905 for (VPValue *VPV : getLiveIns())
906 delete VPV;
907 delete BackedgeTakenCount;
908}
909
911 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
912 return VPIRBB->getIRBasicBlock() == IRBB;
913 });
914 assert(Iter != getExitBlocks().end() && "no exit block found");
915 return *Iter;
916}
917
919 return is_contained(ExitBlocks, VPBB);
920}
921
922/// To make RUN_VPLAN_PASS print final VPlan.
923static void printFinalVPlan(VPlan &) {}
924
925/// Generate the code inside the preheader and body of the vectorized loop.
926/// Assumes a single pre-header basic-block was created for this. Introduce
927/// additional basic-blocks as needed, and fill them all.
930 "all region blocks must be dissolved before ::execute");
931
932 // Initialize CFG state.
933 State->CFG.PrevVPBB = nullptr;
934 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
935
936 // Update VPDominatorTree since VPBasicBlock may be removed after State was
937 // constructed.
938 State->VPDT.recalculate(*this);
939
940 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
941 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
942 cast<UncondBrInst>(VectorPreHeader->getTerminator())->setSuccessor(nullptr);
943 State->CFG.DTU.applyUpdates(
944 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
945
946 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
947 << ", UF=" << getConcreteUF() << '\n');
948 setName("Final VPlan");
949 // TODO: RUN_VPLAN_PASS/VPlanTransforms::runPass should automatically dump
950 // VPlans after some specific stages when "-debug" is specified, but that
951 // hasn't been implemented yet. For now, just do both:
952 LLVM_DEBUG(dump());
954
955 BasicBlock *ScalarPh = State->CFG.ExitBB;
956 VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
957 if (ScalarPhVPBB) {
958 // Disconnect scalar preheader and scalar header, as the dominator tree edge
959 // will be updated as part of VPlan execution. This allows keeping the DTU
960 // logic generic during VPlan execution.
961 State->CFG.DTU.applyUpdates(
962 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
963 }
965 Entry);
966 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
967 // successor blocks including the middle, exit and scalar preheader blocks.
968 for (VPBlockBase *Block : RPOT)
969 Block->execute(State);
970
971 if (hasEarlyExit()) {
972 // Fix up LoopInfo for extra dispatch blocks when vectorizing loops with
973 // early exits. For dispatch blocks, we need to find the smallest common
974 // loop of all successors that are in a loop. Note: we only need to update
975 // loop info for blocks after the middle block, but there is no easy way to
976 // get those at this point.
977 for (VPBlockBase *VPB : reverse(RPOT)) {
978 auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
979 if (!VPBB || isa<VPIRBasicBlock>(VPBB))
980 continue;
981 BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
982 Loop *L = State->LI->getLoopFor(BB);
983 if (!L || any_of(successors(BB),
984 [L](BasicBlock *Succ) { return L->contains(Succ); }))
985 continue;
986 // Find the innermost loop containing all successors that are in a loop.
987 // Successors not in any loop don't constrain the target loop.
988 Loop *Target = nullptr;
989 for (BasicBlock *Succ : successors(BB)) {
990 Loop *SuccLoop = State->LI->getLoopFor(Succ);
991 if (!SuccLoop)
992 continue;
993 if (!Target)
994 Target = SuccLoop;
995 else
996 Target = State->LI->getSmallestCommonLoop(Target, SuccLoop);
997 }
998 State->LI->removeBlock(BB);
999 if (Target)
1000 Target->addBasicBlockToLoop(BB, *State->LI);
1001 }
1002 }
1003
1004 // If the original loop is unreachable, delete it and all its blocks.
1005 if (!ScalarPhVPBB) {
1006 // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
1007 // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
1008 // IR instructions.
1009 for (VPIRBasicBlock *EB : getExitBlocks()) {
1010 for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
1011 if (R.getNumOperands() == 1)
1012 R.eraseFromParent();
1013 }
1014 }
1015
1016 Loop *OrigLoop =
1017 State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
1018 auto Blocks = OrigLoop->getBlocksVector();
1019 Blocks.push_back(ScalarPh);
1020 while (!OrigLoop->isInnermost())
1021 State->LI->erase(*OrigLoop->begin());
1022 State->LI->erase(OrigLoop);
1023 for (auto *BB : Blocks)
1024 State->LI->removeBlock(BB);
1025 DeleteDeadBlocks(Blocks, &State->CFG.DTU);
1026 }
1027
1028 State->CFG.DTU.flush();
1029
1030 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
1031 if (!Header)
1032 return;
1033
1034 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
1035 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1036
1037 // Fix the latch value of canonical, reduction and first-order recurrences
1038 // phis in the vector loop.
1039 for (VPRecipeBase &R : Header->phis()) {
1040 // Skip phi-like recipes that generate their backedege values themselves.
1041 if (isa<VPWidenPHIRecipe>(&R))
1042 continue;
1043
1044 auto *PhiR = cast<VPSingleDefRecipe>(&R);
1045 // VPInstructions currently model scalar Phis only.
1046 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1048 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1049
1050 Value *Phi = State->get(PhiR, NeedsScalar);
1051 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1052 // not.
1053 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1054 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1055 }
1056}
1057
1059 // For now only return the cost of the vector loop region, ignoring any other
1060 // blocks, like the preheader or middle blocks, expect for checking them for
1061 // recipes with invalid costs.
1063
1064 // If the cost of the loop region is invalid or any recipe in the skeleton
1065 // outside loop regions are invalid return an invalid cost.
1068 [&VF, &Ctx](VPBasicBlock *VPBB) {
1069 return !VPBB->cost(VF, Ctx).isValid();
1070 }))
1072
1073 return Cost;
1074}
1075
1077 // TODO: Cache if possible.
1079 if (auto *R = dyn_cast<VPRegionBlock>(B))
1080 return R->isReplicator() ? nullptr : R;
1081 return nullptr;
1082}
1083
1086 if (auto *R = dyn_cast<VPRegionBlock>(B))
1087 return R->isReplicator() ? nullptr : R;
1088 return nullptr;
1089}
1090
1092 const VPRegionBlock *LoopRegion = getVectorLoopRegion();
1093 assert(LoopRegion && "expected a vector loop region");
1095 vp_depth_first_shallow(LoopRegion->getEntry())),
1096 [](const VPRegionBlock *R) { return !R->isReplicator(); });
1097}
1098
1099#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1102
1103 if (VF.getNumUsers() > 0) {
1104 O << "\nLive-in ";
1105 VF.printAsOperand(O, SlotTracker);
1106 O << " = VF";
1107 }
1108
1109 if (UF.getNumUsers() > 0) {
1110 O << "\nLive-in ";
1111 UF.printAsOperand(O, SlotTracker);
1112 O << " = UF";
1113 }
1114
1115 if (VFxUF.getNumUsers() > 0) {
1116 O << "\nLive-in ";
1117 VFxUF.printAsOperand(O, SlotTracker);
1118 O << " = VF * UF";
1119 }
1120
1121 if (VectorTripCount.getNumUsers() > 0) {
1122 O << "\nLive-in ";
1123 VectorTripCount.printAsOperand(O, SlotTracker);
1124 O << " = vector-trip-count";
1125 }
1126
1127 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1128 O << "\nLive-in ";
1129 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1130 O << " = backedge-taken count";
1131 }
1132
1133 O << "\n";
1134 if (TripCount) {
1135 if (isa<VPIRValue>(TripCount))
1136 O << "Live-in ";
1137 TripCount->printAsOperand(O, SlotTracker);
1138 O << " = original trip-count";
1139 O << "\n";
1140 }
1141}
1142
1146
1147 O << "VPlan '" << getName() << "' {";
1148
1149 printLiveIns(O);
1150
1152 RPOT(getEntry());
1153 for (const VPBlockBase *Block : RPOT) {
1154 O << '\n';
1155 Block->print(O, "", SlotTracker);
1156 }
1157
1158 O << "}\n";
1159}
1160
1161std::string VPlan::getName() const {
1162 std::string Out;
1163 raw_string_ostream RSO(Out);
1164 RSO << Name << " for ";
1165 if (!VFs.empty()) {
1166 RSO << "VF={" << VFs[0];
1167 for (ElementCount VF : drop_begin(VFs))
1168 RSO << "," << VF;
1169 RSO << "},";
1170 }
1171
1172 if (UFs.empty()) {
1173 RSO << "UF>=1";
1174 } else {
1175 RSO << "UF={" << UFs[0];
1176 for (unsigned UF : drop_begin(UFs))
1177 RSO << "," << UF;
1178 RSO << "}";
1179 }
1180
1181 return Out;
1182}
1183
1186 VPlanPrinter Printer(O, *this);
1187 Printer.dump();
1188}
1189
1191void VPlan::dump() const { print(dbgs()); }
1192#endif
1193
1194static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1195 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1196 // Update the operands of all cloned recipes starting at NewEntry. This
1197 // traverses all reachable blocks. This is done in two steps, to handle cycles
1198 // in PHI recipes.
1200 OldDeepRPOT(Entry);
1202 NewDeepRPOT(NewEntry);
1203 // First, collect all mappings from old to new VPValues defined by cloned
1204 // recipes.
1205 for (const auto &[OldBB, NewBB] :
1208 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1209 "blocks must have the same number of recipes");
1210 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1211 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1212 "recipes must have the same number of operands");
1213 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1214 "recipes must define the same number of operands");
1215 for (const auto &[OldV, NewV] :
1216 zip(OldR.definedValues(), NewR.definedValues()))
1217 Old2NewVPValues[OldV] = NewV;
1218 }
1219 }
1220
1221 // Update all operands to use cloned VPValues.
1222 for (VPBasicBlock *NewBB :
1224 for (VPRecipeBase &NewR : *NewBB)
1225 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1226 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1227 NewR.setOperand(I, NewOp);
1228 }
1229 }
1230}
1231
1233 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1234 // Clone blocks.
1235 const auto &[NewEntry, __] = VPBlockUtils::cloneFrom(Entry);
1236
1237 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1238 VPIRBasicBlock *NewScalarHeader = nullptr;
1239 if (getScalarHeader()->hasPredecessors()) {
1240 NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1241 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1242 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1243 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1244 }));
1245 } else {
1246 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1247 }
1248 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1249 auto *NewPlan =
1250 new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader, getIndexType());
1251 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1252 for (VPIRValue *OldLiveIn : getLiveIns())
1253 Old2NewVPValues[OldLiveIn] = NewPlan->getOrAddLiveIn(OldLiveIn);
1254
1255 if (auto *TripCountIRV = dyn_cast_or_null<VPIRValue>(TripCount))
1256 Old2NewVPValues[TripCountIRV] = NewPlan->getOrAddLiveIn(TripCountIRV);
1257 // else NewTripCount will be created and inserted into Old2NewVPValues when
1258 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1259
1260 if (auto *LoopRegion = getVectorLoopRegion()) {
1261 auto *OldCanIV = LoopRegion->getCanonicalIV();
1262 auto *NewCanIV = NewPlan->getVectorLoopRegion()->getCanonicalIV();
1263 assert(OldCanIV && NewCanIV &&
1264 "Loop regions of both plans must have canonical IVs.");
1265 Old2NewVPValues[OldCanIV] = NewCanIV;
1266 }
1267
1268 assert(none_of(Old2NewVPValues.keys(), IsaPred<VPSymbolicValue>) &&
1269 "All VPSymbolicValues must be handled below");
1270
1271 if (BackedgeTakenCount)
1272 NewPlan->BackedgeTakenCount =
1273 new VPSymbolicValue(BackedgeTakenCount->getType());
1274
1275 // Map and propagate materialized state for symbolic values.
1276 for (auto [OldSV, NewSV] :
1277 {std::pair{&VectorTripCount, &NewPlan->VectorTripCount},
1278 {&VF, &NewPlan->VF},
1279 {&UF, &NewPlan->UF},
1280 {&VFxUF, &NewPlan->VFxUF},
1281 {BackedgeTakenCount, NewPlan->BackedgeTakenCount}}) {
1282 if (!OldSV)
1283 continue;
1284 Old2NewVPValues[OldSV] = NewSV;
1285 if (OldSV->isMaterialized())
1286 NewSV->markMaterialized();
1287 }
1288
1289 remapOperands(Entry, NewEntry, Old2NewVPValues);
1290
1291 // Initialize remaining fields of cloned VPlan.
1292 NewPlan->VFs = VFs;
1293 NewPlan->UFs = UFs;
1294 // TODO: Adjust names.
1295 NewPlan->Name = Name;
1296 if (TripCount) {
1297 assert(Old2NewVPValues.contains(TripCount) &&
1298 "TripCount must have been added to Old2NewVPValues");
1299 NewPlan->TripCount = Old2NewVPValues[TripCount];
1300 }
1301
1302 // Transfer all cloned blocks (the second half of all current blocks) from
1303 // current to new VPlan.
1304 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1305 for (unsigned I :
1306 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1307 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1308 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1309
1310 // Update ExitBlocks of the new plan.
1311 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1312 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1313 VPB != NewScalarHeader)
1314 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1315 }
1316
1317 return NewPlan;
1318}
1319
1321 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1322 CreatedBlocks.push_back(VPIRBB);
1323 return VPIRBB;
1324}
1325
1327 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1328 for (Instruction &I :
1329 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1330 VPIRBB->appendRecipe(VPIRInstruction::create(I));
1331 return VPIRBB;
1332}
1333
1334#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1335
1336Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1337 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1338 Twine(getOrCreateBID(Block));
1339}
1340
1341Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1342 const std::string &Name = Block->getName();
1343 if (!Name.empty())
1344 return Name;
1345 return "VPB" + Twine(getOrCreateBID(Block));
1346}
1347
1349 Depth = 1;
1350 bumpIndent(0);
1351 OS << "digraph VPlan {\n";
1352 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1353 if (!Plan.getName().empty())
1354 OS << "\\n" << DOT::EscapeString(Plan.getName());
1355
1356 {
1357 // Print live-ins.
1358 std::string Str;
1359 raw_string_ostream SS(Str);
1360 Plan.printLiveIns(SS);
1362 StringRef(Str).rtrim('\n').split(Lines, "\n");
1363 for (auto Line : Lines)
1364 OS << DOT::EscapeString(Line.str()) << "\\n";
1365 }
1366
1367 OS << "\"]\n";
1368 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1369 OS << "edge [fontname=Courier, fontsize=30]\n";
1370 OS << "compound=true\n";
1371
1372 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1373 dumpBlock(Block);
1374
1375 OS << "}\n";
1376}
1377
1378void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1380 dumpBasicBlock(BasicBlock);
1382 dumpRegion(Region);
1383 else
1384 llvm_unreachable("Unsupported kind of VPBlock.");
1385}
1386
1387void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1388 bool Hidden, const Twine &Label) {
1389 // Due to "dot" we print an edge between two regions as an edge between the
1390 // exiting basic block and the entry basic of the respective regions.
1391 const VPBlockBase *Tail = From->getExitingBasicBlock();
1392 const VPBlockBase *Head = To->getEntryBasicBlock();
1393 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1394 OS << " [ label=\"" << Label << '\"';
1395 if (Tail != From)
1396 OS << " ltail=" << getUID(From);
1397 if (Head != To)
1398 OS << " lhead=" << getUID(To);
1399 if (Hidden)
1400 OS << "; splines=none";
1401 OS << "]\n";
1402}
1403
1404void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1405 auto &Successors = Block->getSuccessors();
1406 if (Successors.size() == 1)
1407 drawEdge(Block, Successors.front(), false, "");
1408 else if (Successors.size() == 2) {
1409 drawEdge(Block, Successors.front(), false, "T");
1410 drawEdge(Block, Successors.back(), false, "F");
1411 } else {
1412 unsigned SuccessorNumber = 0;
1413 for (auto *Successor : Successors)
1414 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1415 }
1416}
1417
1418void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1419 // Implement dot-formatted dump by performing plain-text dump into the
1420 // temporary storage followed by some post-processing.
1421 OS << Indent << getUID(BasicBlock) << " [label =\n";
1422 bumpIndent(1);
1423 std::string Str;
1424 raw_string_ostream SS(Str);
1425 // Use no indentation as we need to wrap the lines into quotes ourselves.
1426 BasicBlock->print(SS, "", SlotTracker);
1427
1428 // We need to process each line of the output separately, so split
1429 // single-string plain-text dump.
1431 StringRef(Str).rtrim('\n').split(Lines, "\n");
1432
1433 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1434 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1435 };
1436
1437 // Don't need the "+" after the last line.
1438 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1439 EmitLine(Line, " +\n");
1440 EmitLine(Lines.back(), "\n");
1441
1442 bumpIndent(-1);
1443 OS << Indent << "]\n";
1444
1445 dumpEdges(BasicBlock);
1446}
1447
1448void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1449 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1450 bumpIndent(1);
1451 OS << Indent << "fontname=Courier\n"
1452 << Indent << "label=\""
1453 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1454 << DOT::EscapeString(Region->getName()) << "\"\n";
1455
1456 if (auto *CanIV = Region->getCanonicalIV()) {
1457 OS << Indent << "\"";
1458 std::string Op;
1459 raw_string_ostream S(Op);
1460 CanIV->printAsOperand(S, SlotTracker);
1461 OS << DOT::EscapeString(Op);
1462 OS << " = CANONICAL-IV\"\n";
1463 }
1464
1465 // Dump the blocks of the region.
1466 assert(Region->getEntry() && "Region contains no inner blocks.");
1467 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1468 dumpBlock(Block);
1469 bumpIndent(-1);
1470 OS << Indent << "}\n";
1471 dumpEdges(Region);
1472}
1473
1474#endif
1475
1476/// Returns true if there is a vector loop region and \p VPV is defined in a
1477/// loop region.
1478static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1479 if (isa<VPRegionValue>(VPV))
1480 return true;
1481 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1482 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1484}
1485
1490 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1491 if (auto *SV = dyn_cast<VPSymbolicValue>(this))
1492 SV->markMaterialized();
1493}
1494
1496 VPValue *New,
1497 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1499 // Note that this early exit is required for correctness; the implementation
1500 // below relies on the number of users for this VPValue to decrease, which
1501 // isn't the case if this == New.
1502 if (this == New)
1503 return;
1504
1505 for (unsigned J = 0; J < getNumUsers();) {
1506 VPUser *User = Users[J];
1507 bool RemovedUser = false;
1508 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1509 if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1510 continue;
1511
1512 RemovedUser = true;
1513 User->setOperand(I, New);
1514 }
1515 // If a user got removed after updating the current user, the next user to
1516 // update will be moved to the current position, so we only need to
1517 // increment the index if the number of users did not change.
1518 if (!RemovedUser)
1519 J++;
1520 }
1521}
1522
1524 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1525 if (getOperand(Idx) == From)
1526 setOperand(Idx, To);
1527 }
1528}
1529
1530#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1532 OS << Tracker.getOrCreateName(this);
1533}
1534
1537 Op->printAsOperand(O, SlotTracker);
1538 });
1539}
1540#endif
1541
1542void VPSlotTracker::assignName(const VPValue *V) {
1543 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1544 auto *UV = V->getUnderlyingValue();
1545 auto *VPI = dyn_cast_or_null<VPInstruction>(V);
1546 if (!UV && !(VPI && !VPI->getName().empty())) {
1547 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1548 NextSlot++;
1549 return;
1550 }
1551
1552 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1553 // appending ".Number" to the name if there are multiple uses.
1554 std::string Name;
1555 if (UV)
1556 Name = getName(UV);
1557 else
1558 Name = VPI->getName();
1559
1560 assert(!Name.empty() && "Name cannot be empty.");
1561 StringRef Prefix = UV ? "ir<" : "vp<%";
1562 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1563
1564 // First assign the base name for V.
1565 const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
1566 // Integer or FP constants with different types will result in the same string
1567 // due to stripping types.
1569 return;
1570
1571 // If it is already used by C > 0 other VPValues, increase the version counter
1572 // C and use it for V.
1573 const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
1574 if (!UseInserted) {
1575 C->second++;
1576 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1577 }
1578}
1579
1580void VPSlotTracker::assignNames(const VPlan &Plan) {
1581 if (Plan.VF.getNumUsers() > 0)
1582 assignName(&Plan.VF);
1583 if (Plan.UF.getNumUsers() > 0)
1584 assignName(&Plan.UF);
1585 if (Plan.VFxUF.getNumUsers() > 0)
1586 assignName(&Plan.VFxUF);
1587 assignName(&Plan.VectorTripCount);
1588 if (Plan.BackedgeTakenCount)
1589 assignName(Plan.BackedgeTakenCount);
1590 for (VPValue *LI : Plan.getLiveIns())
1591 assignName(LI);
1592
1593 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1594 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1595 for (const VPBlockBase *VPB : RPOT) {
1596 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB))
1597 assignNames(VPBB);
1598 else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV())
1599 assignName(CanIV);
1600 }
1601}
1602
1603void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1604 for (const VPRecipeBase &Recipe : *VPBB)
1605 for (VPValue *Def : Recipe.definedValues())
1606 assignName(Def);
1607}
1608
1609std::string VPSlotTracker::getName(const Value *V) {
1610 std::string Name;
1611 raw_string_ostream S(Name);
1612 if (V->hasName() || !isa<Instruction>(V)) {
1613 V->printAsOperand(S, false);
1614 return Name;
1615 }
1616
1617 if (!MST) {
1618 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1619 // instruction.
1620 auto *I = cast<Instruction>(V);
1621 // This check is required to support unit tests with incomplete IR.
1622 if (I->getParent()) {
1623 MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1624 MST->incorporateFunction(*I->getFunction());
1625 } else {
1626 MST = std::make_unique<ModuleSlotTracker>(nullptr);
1627 }
1628 }
1629 V->printAsOperand(S, false, *MST);
1630 return Name;
1631}
1632
1633std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1634 std::string Name = VPValue2Name.lookup(V);
1635 if (!Name.empty())
1636 return Name;
1637
1638 // If no name was assigned, no VPlan was provided when creating the slot
1639 // tracker or it is not reachable from the provided VPlan. This can happen,
1640 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1641 // in a debugger.
1642 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1643 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1644 // here.
1645 const VPRecipeBase *DefR = V->getDefiningRecipe();
1646 (void)DefR;
1647 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1648 "VPValue defined by a recipe in a VPlan?");
1649
1650 // Use the underlying value's name, if there is one.
1651 if (auto *UV = V->getUnderlyingValue()) {
1652 std::string Name;
1653 raw_string_ostream S(Name);
1654 UV->printAsOperand(S, false);
1655 return (Twine("ir<") + Name + ">").str();
1656 }
1657
1658 return "<badref>";
1659}
1660
1662 VPValue *TrueVal,
1663 VPValue *FalseVal, DebugLoc DL) {
1664 assert(VPTypeAnalysis(*getInsertBlock()->getPlan())
1665 .inferScalarType(ChainOp)
1666 ->isIntegerTy(1) &&
1667 "ChainOp must be i1 for AnyOf reduction");
1668 VPIRFlags Flags(RecurKind::Or, /*IsOrdered=*/false, /*IsInLoop=*/false,
1669 FastMathFlags());
1670 auto *OrReduce =
1672 auto *Freeze = createNaryOp(Instruction::Freeze, {OrReduce}, DL);
1673 return createSelect(Freeze, TrueVal, FalseVal, DL, "rdx.select");
1674}
1675
1677 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1678 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1679 bool PredicateAtRangeStart = Predicate(Range.Start);
1680
1681 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1682 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1683 Range.End = TmpVF;
1684 break;
1685 }
1686
1687 return PredicateAtRangeStart;
1688}
1689
1691 assert(count_if(VPlans,
1692 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1693 1 &&
1694 "Multiple VPlans for VF.");
1695
1696 for (const VPlanPtr &Plan : VPlans) {
1697 if (Plan->hasVF(VF))
1698 return *Plan.get();
1699 }
1700 llvm_unreachable("No plan found!");
1701}
1702
1705 // Reserve first location for self reference to the LoopID metadata node.
1706 MDs.push_back(nullptr);
1707 bool IsUnrollMetadata = false;
1708 MDNode *LoopID = L->getLoopID();
1709 if (LoopID) {
1710 // First find existing loop unrolling disable metadata.
1711 for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
1712 auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1713 if (MD) {
1714 const auto *S = dyn_cast<MDString>(MD->getOperand(0));
1715 if (!S)
1716 continue;
1717 if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
1718 continue;
1719 IsUnrollMetadata =
1720 S->getString().starts_with("llvm.loop.unroll.disable");
1721 }
1722 MDs.push_back(LoopID->getOperand(I));
1723 }
1724 }
1725
1726 if (!IsUnrollMetadata) {
1727 // Add runtime unroll disable metadata.
1728 LLVMContext &Context = L->getHeader()->getContext();
1729 SmallVector<Metadata *, 1> DisableOperands;
1730 DisableOperands.push_back(
1731 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1732 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1733 MDs.push_back(DisableNode);
1734 MDNode *NewLoopID = MDNode::get(Context, MDs);
1735 // Set operand 0 to refer to the loop id itself.
1736 NewLoopID->replaceOperandWith(0, NewLoopID);
1737 L->setLoopID(NewLoopID);
1738 }
1739}
1740
1742 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1743 bool VectorizingEpilogue, MDNode *OrigLoopID,
1744 std::optional<unsigned> OrigAverageTripCount,
1745 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1746 bool DisableRuntimeUnroll) {
1747 // Update the metadata of the scalar loop. Skip the update when vectorizing
1748 // the epilogue loop to ensure it is updated only once. Also skip the update
1749 // when the scalar loop became unreachable.
1750 auto *ScalarPH = Plan.getScalarPreheader();
1751 if (ScalarPH && !VectorizingEpilogue) {
1752 std::optional<MDNode *> RemainderLoopID =
1755 if (RemainderLoopID) {
1756 OrigLoop->setLoopID(*RemainderLoopID);
1757 } else {
1758 if (DisableRuntimeUnroll)
1760
1761 LoopVectorizeHints Hints(OrigLoop, /*InterleaveOnlyWhenForced*/ false,
1762 *ORE);
1763 Hints.setAlreadyVectorized();
1764 }
1765 }
1766 // Tag the scalar remainder so downstream passes (e.g. the unroller and
1767 // WarnMissedTransforms) can produce more informative remarks. Only emit
1768 // when remarks are enabled.
1769 if (ORE->enabled() && ScalarPH && ScalarPH->hasPredecessors())
1770 OrigLoop->addIntLoopAttribute("llvm.loop.vectorize.epilogue", 1);
1771
1772 if (!VectorLoop)
1773 return;
1774
1775 if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1776 OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1778 VectorLoop->setLoopID(*VectorizedLoopID);
1779 } else {
1780 // Keep all loop hints from the original loop on the vector loop (we'll
1781 // replace the vectorizer-specific hints below).
1782 if (OrigLoopID)
1783 VectorLoop->setLoopID(OrigLoopID);
1784
1785 if (!VectorizingEpilogue) {
1786 LoopVectorizeHints Hints(VectorLoop, /*InterleaveOnlyWhenForced*/ false,
1787 *ORE);
1788 Hints.setAlreadyVectorized();
1789 }
1790 }
1791 // Tag the vector loop body so downstream passes can identify it. Only
1792 // emit when remarks are enabled.
1793 if (ORE->enabled())
1794 VectorLoop->addIntLoopAttribute("llvm.loop.vectorize.body", 1);
1796 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1797 if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
1799
1800 // Set/update profile weights for the vector and remainder loops as original
1801 // loop iterations are now distributed among them. Note that original loop
1802 // becomes the scalar remainder loop after vectorization.
1803 //
1804 // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
1805 // end up getting slightly roughened result but that should be OK since
1806 // profile is not inherently precise anyway. Note also possible bypass of
1807 // vector code caused by legality checks is ignored, assigning all the weight
1808 // to the vector loop, optimistically.
1809 //
1810 // For scalable vectorization we can't know at compile time how many
1811 // iterations of the loop are handled in one vector iteration, so instead
1812 // use the value of vscale used for tuning.
1813 unsigned AverageVectorTripCount = 0;
1814 unsigned RemainderAverageTripCount = 0;
1815 auto EC = VectorLoop->getLoopPreheader()->getParent()->getEntryCount();
1816 auto IsProfiled = EC && EC->getCount();
1817 if (!OrigAverageTripCount) {
1818 if (!IsProfiled)
1819 return;
1820 auto &SE = *PSE.getSE();
1821 AverageVectorTripCount = SE.getSmallConstantTripCount(VectorLoop);
1822 if (ProfcheckDisableMetadataFixes || !AverageVectorTripCount)
1823 return;
1824 if (ScalarPH)
1825 RemainderAverageTripCount =
1826 SE.getSmallConstantTripCount(OrigLoop) % EstimatedVFxUF;
1827 // Setting to 1 should be sufficient to generate the correct branch weights.
1828 OrigLoopInvocationWeight = 1;
1829 } else {
1830 // Calculate number of iterations in unrolled loop.
1831 AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1832 // Calculate number of iterations for remainder loop.
1833 RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1834 }
1835 if (HeaderVPBB) {
1836 setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1837 OrigLoopInvocationWeight);
1838 }
1839
1840 if (ScalarPH) {
1841 setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1842 OrigLoopInvocationWeight);
1843 }
1844}
1845
1846#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1848 if (VPlans.empty()) {
1849 O << "LV: No VPlans built.\n";
1850 return;
1851 }
1852 for (const auto &Plan : VPlans)
1854 Plan->printDOT(O);
1855 else
1856 Plan->print(O);
1857}
1858#endif
1859
1860bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
1862 APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
1863 unsigned WideSize = C->getBitWidth();
1864 APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
1865 ? TruncatedVal.sext(WideSize)
1866 : TruncatedVal.zext(WideSize);
1867 return ExtendedVal == *C;
1868}
1869
1872 if (auto *IRV = dyn_cast<VPIRValue>(V))
1873 return TTI::getOperandInfo(IRV->getValue());
1874
1875 return {};
1876}
1877
1879 Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
1880 TTI::VectorInstrContext VIC, bool AlwaysIncludeReplicatingR) {
1881 if (VF.isScalar())
1882 return 0;
1883
1884 assert(!VF.isScalable() &&
1885 "Scalarization overhead not supported for scalable vectors");
1886
1887 InstructionCost ScalarizationCost = 0;
1888 // Compute the cost of scalarizing the result if needed.
1889 if (!ResultTy->isVoidTy()) {
1890 for (Type *VectorTy :
1891 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
1892 ScalarizationCost += TTI.getScalarizationOverhead(
1894 /*Insert=*/true, /*Extract=*/false, CostKind,
1895 /*ForPoisonSrc=*/true, {}, VIC);
1896 }
1897 }
1898 // Compute the cost of scalarizing the operands, skipping ones that do not
1899 // require extraction/scalarization and do not incur any overhead.
1900 SmallPtrSet<const VPValue *, 4> UniqueOperands;
1902 for (auto *Op : Operands) {
1903 if (isa<VPIRValue>(Op) ||
1904 (!AlwaysIncludeReplicatingR &&
1907 cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
1908 !UniqueOperands.insert(Op).second)
1909 continue;
1910 Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
1911 }
1912 return ScalarizationCost +
1913 TTI.getOperandsScalarizationOverhead(Tys, CostKind, VIC);
1914}
1915
1917 ElementCount VF) {
1918 const Instruction *UI = R->getUnderlyingInstr();
1919 if (isa<LoadInst>(UI))
1920 return true;
1921 assert(isa<StoreInst>(UI) && "R must either be a load or store");
1922
1923 if (!NumPredStores) {
1924 // Count the number of predicated stores in the VPlan, caching the result.
1925 // Only stores where scatter is not legal are counted, matching the legacy
1926 // cost model behavior.
1927 const VPlan &Plan = *R->getParent()->getPlan();
1928 NumPredStores = 0;
1929 for (const VPRegionBlock *VPRB :
1932 assert(VPRB->isReplicator() && "must only contain replicate regions");
1933 for (const VPBasicBlock *VPBB :
1935 vp_depth_first_shallow(VPRB->getEntry()))) {
1936 for (const VPRecipeBase &Recipe : *VPBB) {
1937 auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe);
1938 if (!RepR)
1939 continue;
1940 if (!isa<StoreInst>(RepR->getUnderlyingInstr()))
1941 continue;
1942 // Check if scatter is legal for this store. If so, don't count it.
1943 Type *Ty = Types.inferScalarType(RepR->getOperand(0));
1944 auto *VTy = VectorType::get(Ty, VF);
1945 const Align Alignment =
1946 getLoadStoreAlignment(RepR->getUnderlyingInstr());
1947 if (!TTI.isLegalMaskedScatter(VTy, Alignment))
1948 ++(*NumPredStores);
1949 }
1950 }
1951 }
1952 }
1954}
1955
1957 return is_contained({Intrinsic::assume, Intrinsic::lifetime_end,
1958 Intrinsic::lifetime_start, Intrinsic::sideeffect,
1959 Intrinsic::pseudoprobe,
1960 Intrinsic::experimental_noalias_scope_decl},
1961 ID);
1962}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu next use AMDGPU Next Use Analysis Printer
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
Flatten the CFG
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
cl::opt< unsigned > NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
The number of stores in a loop that are allowed to need predication.
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS(PASS,...)
static void addRuntimeUnrollDisableMetaData(Loop *L)
Definition VPlan.cpp:1703
static T * getPlanEntry(T *Start)
Definition VPlan.cpp:169
static void printFinalVPlan(VPlan &)
To make RUN_VPLAN_PASS print final VPlan.
Definition VPlan.cpp:923
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
Definition VPlan.cpp:583
const char LLVMLoopVectorizeFollowupAll[]
Definition VPlan.cpp:62
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
Definition VPlan.cpp:1478
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition VPlan.cpp:601
const char LLVMLoopVectorizeFollowupVectorized[]
Definition VPlan.cpp:63
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1194
const char LLVMLoopVectorizeFollowupEpilogue[]
Definition VPlan.cpp:65
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:482
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition DebugLoc.h:123
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
std::optional< ProfileCount > getEntryCount(bool AllowSynthetic=false) const
Get the entry count for this function.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A helper class to return the specified delimiter string after the first invocation of operator String...
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
iterator begin() const
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1690
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1741
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1676
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1847
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void addIntLoopAttribute(StringRef Name, unsigned Value, ArrayRef< StringRef > RemovePrefixes={}) const
Add an integer metadata attribute to this loop's loop-ID node.
Definition LoopInfo.cpp:579
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:547
Metadata node.
Definition Metadata.h:1080
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:832
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4148
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4223
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4175
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:505
iterator end()
Definition VPlan.h:4185
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4183
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:539
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition VPlan.cpp:756
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:763
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:244
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition VPlan.cpp:405
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:593
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:560
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4163
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition VPlan.cpp:546
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition VPlan.cpp:673
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition VPlan.cpp:651
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:639
const VPRecipeBase & back() const
Definition VPlan.h:4197
bool empty() const
Definition VPlan.h:4194
size_t size() const
Definition VPlan.h:4193
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:93
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:314
VPRegionBlock * getParent()
Definition VPlan.h:185
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:214
size_t getNumSuccessors() const
Definition VPlan.h:236
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:218
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:216
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:661
size_t getNumPredecessors() const
Definition VPlan.h:237
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:305
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:236
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:214
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:221
VPlan * getPlan()
Definition VPlan.cpp:189
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:208
const std::string & getName() const
Definition VPlan.h:176
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:232
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:256
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:162
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:228
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:194
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:226
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:210
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:193
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:241
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:259
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:295
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:279
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Clone the CFG for all nodes reachable from Entry, including cloning the blocks and their recipes.
Definition VPlan.cpp:688
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
Definition VPlan.cpp:1661
VPBasicBlock * getInsertBlock() const
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:476
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4301
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:473
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4325
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:498
Class to record and manage LLVM IR flags.
Definition VPlan.h:685
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1208
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1251
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition VPlan.cpp:88
Kind getKind() const
Returns the Kind of lane offset.
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
LLVM_ABI_FOR_TEST VPMultiDefValue(VPRecipeBase *Def, Value *UV=nullptr)
Definition VPlan.cpp:161
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:401
LLVM_ABI_FOR_TEST void dump() const
Dump the recipe to stderr (for debugging).
Definition VPlan.cpp:117
VPBasicBlock * getParent()
Definition VPlan.h:475
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const
Print the recipe, delegating to printRecipe().
virtual LLVM_ABI_FOR_TEST ~VPRecipeValue()=0
Definition VPlan.cpp:149
VPRecipeValue(unsigned char SC, Value *UV=nullptr)
Definition VPlanValue.h:323
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4358
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:738
const VPBlockBase * getEntry() const
Definition VPlan.h:4402
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
Definition VPlan.cpp:831
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4434
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
Definition VPlan.cpp:857
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition VPlan.cpp:782
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:812
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4483
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition VPlan.cpp:752
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4470
const VPBlockBase * getExiting() const
Definition VPlan.h:4414
friend class VPlan
Definition VPlan.h:4359
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:211
Type * getType() const
Returns the type of the VPRegionValue.
Definition VPlanValue.h:227
DebugLoc getDebugLoc() const
Returns the debug location of the VPRegionValue.
Definition VPlanValue.h:230
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3193
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:605
friend class VPSingleDefRecipe
Definition VPlanValue.h:337
LLVM_ABI_FOR_TEST VPSingleDefValue(VPSingleDefRecipe *Def, Value *UV=nullptr)
Construct a VPSingleDefValue. Must only be used by VPSingleDefRecipe.
Definition VPlan.cpp:155
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
Definition VPlan.cpp:1633
An analysis for type-inference for VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:373
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
Definition VPlan.cpp:1523
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1535
operand_range operands()
Definition VPlanValue.h:441
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:417
unsigned getNumOperands() const
Definition VPlanValue.h:411
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:412
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:143
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1486
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1531
void assertNotMaterialized() const
Assert that this VPValue has not been materialized, if it is a VPSymbolicValue.
Definition VPlanValue.h:548
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:75
@ VPVSingleDefValueSC
A symbolic live-in VPValue without IR backing.
Definition VPlanValue.h:85
@ VPVMultiDefValueSC
A VPValue defined by a VPSingleDefRecipe.
Definition VPlanValue.h:86
void dump() const
Dump the value to stderr (for debugging).
Definition VPlan.cpp:109
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:102
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1489
unsigned getNumUsers() const
Definition VPlanValue.h:115
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1495
LLVM_DUMP_METHOD void dump()
Definition VPlan.cpp:1348
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4506
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1185
friend class VPSlotTracker
Definition VPlan.h:4508
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1161
VPBasicBlock * getEntry()
Definition VPlan.h:4602
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
Definition VPlan.h:4919
void setName(const Twine &newName)
Definition VPlan.h:4775
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:910
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:885
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:918
friend class VPlanPrinter
Definition VPlan.h:4507
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4705
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1320
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4834
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4655
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1076
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4902
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1058
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
Definition VPlan.cpp:1091
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4757
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4591
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4857
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1326
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1191
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4645
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:928
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1144
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4651
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1100
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
bool match(Val *V, const Pattern &P)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2312
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
Definition VPlan.cpp:1860
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Or
Bitwise or logical OR of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:73
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Parameters that control the generic loop unrolling transformation.
bool UnrollVectorizedLoop
Disable runtime unrolling by default for vectorized loops.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1871
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
Definition VPlan.cpp:1956
std::optional< unsigned > NumPredStores
Number of predicated stores in the VPlan, computed on demand.
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None, bool AlwaysIncludeReplicatingR=false)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
Definition VPlan.cpp:1878
TargetTransformInfo::TargetCostKind CostKind
VPTypeAnalysis Types
const TargetTransformInfo & TTI
bool useEmulatedMaskMemRefHack(const VPReplicateRecipe *R, ElementCount VF)
Returns true if an artificially high cost for emulated masked memrefs should be used.
Definition VPlan.cpp:1916
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:242
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:147
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:282
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
struct llvm::VPTransformState::DataState Data
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:291
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
Definition VPlan.cpp:251
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
bool hasScalarValue(const VPValue *Def, VPLane Lane)
const TargetTransformInfo * TTI
Target Transform Info.
VPlan * Plan
Pointer to the VPlan code is generated for.
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
bool hasVectorValue(const VPValue *Def)
VPDominatorTree VPDT
VPlan-based dominator tree.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition VPlan.cpp:373
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition VPlan.cpp:351
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.