LLVM 23.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
28#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/Twine.h"
35#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/CFG.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Value.h"
44#include "llvm/Support/Debug.h"
50#include <cassert>
51#include <string>
52
53using namespace llvm;
54using namespace llvm::VPlanPatternMatch;
55
56namespace llvm {
58} // namespace llvm
59
60/// @{
61/// Metadata attribute names
62const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
64 "llvm.loop.vectorize.followup_vectorized";
66 "llvm.loop.vectorize.followup_epilogue";
67/// @}
68
70
72
74 "vplan-print-in-dot-format", cl::Hidden,
75 cl::desc("Use dot format instead of plain text when dumping VPlans"));
76
77#define DEBUG_TYPE "loop-vectorize"
78
79#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
81 const VPBasicBlock *Parent = R.getParent();
82 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
83 R.print(OS, "", SlotTracker);
84 return OS;
85}
86#endif
87
89 const ElementCount &VF) const {
90 switch (LaneKind) {
92 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
93 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
94 Builder.getInt32(VF.getKnownMinValue() - Lane));
96 return Builder.getInt64(Lane);
97 }
98 llvm_unreachable("Unknown lane kind");
99}
100
101#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
103 if (const VPRecipeBase *R = getDefiningRecipe())
104 R->print(OS, "", SlotTracker);
105 else
107}
108
109void VPValue::dump() const {
110 const VPRecipeBase *Instr = getDefiningRecipe();
112 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
114 dbgs() << "\n";
115}
116
117void VPRecipeBase::dump() const {
118 VPSlotTracker SlotTracker(getParent() ? getParent()->getPlan() : nullptr);
119 print(dbgs(), "", SlotTracker);
120 dbgs() << "\n";
121}
122#endif
123
124#if !defined(NDEBUG)
125bool VPRecipeValue::isDefinedBy(const VPDef *D) const {
126 return getDefiningRecipe() == D;
127}
128#endif
129
131 auto *RecipeValue = dyn_cast<VPRecipeValue>(this);
132 if (!RecipeValue)
133 return nullptr;
134 if (auto *MultiDef = dyn_cast<VPMultiDefValue>(RecipeValue))
135 return MultiDef->getDef();
136 return static_cast<VPSingleDefRecipe *>(RecipeValue);
137}
138
140 return const_cast<VPValue *>(this)->getDefiningRecipe();
141}
142
144 return cast<VPIRValue>(this)->getValue();
145}
146
148
150 switch (getVPValueID()) {
151 case VPVIRValueSC:
152 return cast<VPIRValue>(this)->getType();
153 case VPRegionValueSC:
154 return cast<VPRegionValue>(this)->getType();
155 case VPVSymbolicSC:
156 return cast<VPSymbolicValue>(this)->getType();
159 return cast<VPRecipeValue>(this)->getScalarType();
160 }
161 llvm_unreachable("Unhandled VPValue subclass");
162}
163
165 assert(Users.empty() &&
166 "trying to delete a VPRecipeValue with remaining users");
167}
168
171 assert(Def && "VPSingleDefValue requires a defining recipe");
172 Def->addDefinedValue(this);
173}
174
176 getDefiningRecipe()->removeDefinedValue(this);
177}
178
180 : VPRecipeValue(VPVMultiDefValueSC, UV, Ty), Def(Def) {
181 assert(Def && "VPMultiDefValue requires a defining recipe");
182 Def->addDefinedValue(this);
183}
184
186 getDefiningRecipe()->removeDefinedValue(this);
187}
188
189// Get the top-most entry block of \p Start. This is the entry block of the
190// containing VPlan. This function is templated to support both const and non-const blocks
191template <typename T> static T *getPlanEntry(T *Start) {
192 T *Next = Start;
193 T *Current = Start;
194 while ((Next = Next->getParent()))
195 Current = Next;
196
197 SmallSetVector<T *, 8> WorkList;
198 WorkList.insert(Current);
199
200 for (unsigned i = 0; i < WorkList.size(); i++) {
201 T *Current = WorkList[i];
202 if (!Current->hasPredecessors())
203 return Current;
204 auto &Predecessors = Current->getPredecessors();
205 WorkList.insert_range(Predecessors);
206 }
207
208 llvm_unreachable("VPlan without any entry node without predecessors");
209}
210
211VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
212
213const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
214
215/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
222
229
230void VPBlockBase::setPlan(VPlan *ParentPlan) {
231 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
232 Plan = ParentPlan;
233}
234
235/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
237 const VPBlockBase *Block = this;
239 Block = Region->getExiting();
241}
242
249
251 if (!Successors.empty() || !Parent)
252 return this;
253 assert(Parent->getExiting() == this &&
254 "Block w/o successors not the exiting block of its parent.");
255 return Parent->getEnclosingBlockWithSuccessors();
256}
257
259 if (!Predecessors.empty() || !Parent)
260 return this;
261 assert(Parent->getEntry() == this &&
262 "Block w/o predecessors not the entry of its parent.");
263 return Parent->getEnclosingBlockWithPredecessors();
264}
265
267 iterator It = begin();
268 while (It != end() && It->isPhi())
269 It++;
270 return It;
271}
272
280
281Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
283 return Def->getUnderlyingValue();
284
285 if (hasScalarValue(Def, Lane))
286 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
287
288 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
290 return Data.VPV2Scalars[Def][0];
291 }
292
293 // Look through BuildVector to avoid redundant extracts.
294 // TODO: Remove once replicate regions are unrolled explicitly.
295 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
296 auto *BuildVector = cast<VPInstruction>(Def);
297 return get(BuildVector->getOperand(Lane.getKnownLane()), true);
298 }
299
301 auto *VecPart = Data.VPV2Vector[Def];
302 if (!VecPart->getType()->isVectorTy()) {
303 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
304 return VecPart;
305 }
306 // TODO: Cache created scalar values.
307 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
308 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
309 // set(Def, Extract, Instance);
310 return Extract;
311}
312
313Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
314 if (NeedsScalar) {
315 assert((VF.isScalar() || isa<VPIRValue, VPSymbolicValue>(Def) ||
317 (hasScalarValue(Def, VPLane(0)) &&
318 Data.VPV2Scalars[Def].size() == 1)) &&
319 "Trying to access a single scalar per part but has multiple scalars "
320 "per part.");
321 return get(Def, VPLane(0));
322 }
323
324 // If Values have been set for this Def return the one relevant for \p Part.
325 if (hasVectorValue(Def))
326 return Data.VPV2Vector[Def];
327
328 auto GetBroadcastInstrs = [this](Value *V) {
329 if (VF.isScalar())
330 return V;
331 // Broadcast the scalar into all locations in the vector.
332 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
333 return Shuf;
334 };
335
336 if (!hasScalarValue(Def, {0})) {
337 Value *IRV = Def->getLiveInIRValue();
338 Value *B = GetBroadcastInstrs(IRV);
339 set(Def, B);
340 return B;
341 }
342
343 Value *ScalarValue = get(Def, VPLane(0));
344 // If we aren't vectorizing, we can just copy the scalar map values over
345 // to the vector map.
346 if (VF.isScalar()) {
347 set(Def, ScalarValue);
348 return ScalarValue;
349 }
350
351 bool IsSingleScalar = vputils::isSingleScalar(Def);
352 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
353
354 // We need to construct the vector value for a single-scalar value by
355 // broadcasting the scalar to all lanes.
356 // TODO: Replace by introducing Broadcast VPInstructions.
357 assert(IsSingleScalar && "must be a single-scalar at this point");
358 // Set the insert point after the last scalarized instruction or after the
359 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
360 // will directly follow the scalar definitions.
361 auto OldIP = Builder.saveIP();
362 auto *LastInst = cast<Instruction>(get(Def, LastLane));
363 auto NewIP = isa<PHINode>(LastInst)
364 ? LastInst->getParent()->getFirstNonPHIIt()
365 : std::next(BasicBlock::iterator(LastInst));
366 Builder.SetInsertPoint(&*NewIP);
367 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
368 set(Def, VectorValue);
369 Builder.restoreIP(OldIP);
370 return VectorValue;
371}
372
374 const DILocation *DIL = DL;
375 // When a FSDiscriminator is enabled, we don't need to add the multiply
376 // factors to the discriminators.
377 if (DIL &&
378 Builder.GetInsertBlock()
379 ->getParent()
380 ->shouldEmitDebugInfoForProfiling() &&
382 // FIXME: For scalable vectors, assume vscale=1.
383 unsigned UF = Plan->getConcreteUF();
384 auto NewDIL =
386 if (NewDIL)
387 Builder.SetCurrentDebugLocation(*NewDIL);
388 else
389 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
390 << DIL->getFilename() << " Line: " << DIL->getLine());
391 } else
392 Builder.SetCurrentDebugLocation(DL);
393}
394
396 Value *WideValue,
397 const VPLane &Lane) {
398 Value *ScalarInst = get(Def, Lane);
399 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
400 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
401 // We must handle each element of a vectorized struct type.
402 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
403 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
404 Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
405 VectorValue =
406 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
407 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
408 }
409 } else {
410 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
411 }
412 return WideValue;
413}
414
415BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
416 auto &CFG = State.CFG;
417 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
418 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
419 BasicBlock *PrevBB = CFG.PrevBB;
420 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
421 PrevBB->getParent(), CFG.ExitBB);
422 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
423
424 return NewBB;
425}
426
428 auto &CFG = State.CFG;
429 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
430
431 // Register NewBB in its loop. In innermost loops its the same for all
432 // BB's.
433 Loop *ParentLoop = State.CurrentParentLoop;
434 // If this block has a sole successor that is an exit block or is an exit
435 // block itself then it needs adding to the same parent loop as the exit
436 // block.
437 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
438 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
439 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
440 ParentLoop = State.LI->getLoopFor(
441 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
442 }
443
444 if (ParentLoop && !State.LI->getLoopFor(NewBB))
445 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
446
448 if (VPBlockUtils::isHeader(this, State.VPDT)) {
449 // There's no block for the latch yet, connect to the preheader only.
450 Preds = {getPredecessors()[0]};
451 } else {
452 Preds = to_vector(getPredecessors());
453 }
454
455 // Hook up the new basic block to its predecessors.
456 for (VPBlockBase *PredVPBlock : Preds) {
457 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
458 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
459 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
460 "Predecessor basic-block not found building successor.");
461 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
462 auto *PredBBTerminator = PredBB->getTerminator();
463 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
464
465 if (isa<UnreachableInst>(PredBBTerminator)) {
466 assert(PredVPSuccessors.size() == 1 &&
467 "Predecessor ending w/o branch must have single successor.");
468 DebugLoc DL = PredBBTerminator->getDebugLoc();
469 PredBBTerminator->eraseFromParent();
470 auto *Br = UncondBrInst::Create(NewBB, PredBB);
471 Br->setDebugLoc(DL);
472 } else if (auto *UBI = dyn_cast<UncondBrInst>(PredBBTerminator)) {
473 UBI->setSuccessor(NewBB);
474 } else {
475 // Set each forward successor here when it is created, excluding
476 // backedges. A backward successor is set when the branch is created.
477 // Branches to VPIRBasicBlocks must have the same successors in VPlan as
478 // in the original IR, except when the predecessor is the entry block.
479 // This enables including SCEV and memory runtime check blocks in VPlan.
480 // TODO: Remove exception by modeling the terminator of entry block using
481 // BranchOnCond.
482 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
483 auto *TermBr = cast<CondBrInst>(PredBBTerminator);
484 assert((!TermBr->getSuccessor(idx) ||
485 (isa<VPIRBasicBlock>(this) &&
486 (TermBr->getSuccessor(idx) == NewBB ||
487 PredVPBlock == getPlan()->getEntry()))) &&
488 "Trying to reset an existing successor block.");
489 TermBr->setSuccessor(idx, NewBB);
490 }
491 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
492 }
493}
494
497 "VPIRBasicBlock can have at most two successors at the moment!");
498 // Move completely disconnected blocks to their final position.
499 if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
500 IRBB->moveAfter(State->CFG.PrevBB);
501 State->Builder.SetInsertPoint(IRBB->getTerminator());
502 State->CFG.PrevBB = IRBB;
503 State->CFG.VPBB2IRBB[this] = IRBB;
504 executeRecipes(State, IRBB);
505 // Create a branch instruction to terminate IRBB if one was not created yet
506 // and is needed.
507 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
508 auto *Br = State->Builder.CreateBr(IRBB);
509 Br->setOperand(0, nullptr);
510 IRBB->getTerminator()->eraseFromParent();
511 } else {
512 assert((getNumSuccessors() == 0 ||
513 isa<UncondBrInst, CondBrInst>(IRBB->getTerminator())) &&
514 "other blocks must be terminated by a branch");
515 }
516
517 connectToPredecessors(*State);
518}
519
520VPIRBasicBlock *VPIRBasicBlock::clone() {
521 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
522 for (VPRecipeBase &R : Recipes)
523 NewBlock->appendRecipe(R.clone());
524 return NewBlock;
525}
526
528 if (VPBlockUtils::isHeader(this, State->VPDT)) {
529 // Create and register the new vector loop.
530 Loop *PrevParentLoop = State->CurrentParentLoop;
531 State->CurrentParentLoop = State->LI->AllocateLoop();
532
533 // Insert the new loop into the loop nest and register the new basic blocks
534 // before calling any utilities such as SCEV that require valid LoopInfo.
535 if (PrevParentLoop)
536 PrevParentLoop->addChildLoop(State->CurrentParentLoop);
537 else
538 State->LI->addTopLevelLoop(State->CurrentParentLoop);
539 }
540
541 // 1. Create an IR basic block.
542 BasicBlock *NewBB = createEmptyBasicBlock(*State);
543
544 State->Builder.SetInsertPoint(NewBB);
545 // Temporarily terminate with unreachable until CFG is rewired.
546 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
547 State->Builder.SetInsertPoint(Terminator);
548
549 State->CFG.PrevBB = NewBB;
550 State->CFG.VPBB2IRBB[this] = NewBB;
551 connectToPredecessors(*State);
552
553 // 2. Fill the IR basic block with IR instructions.
554 executeRecipes(State, NewBB);
555
556 // If this block is a latch, update CurrentParentLoop.
557 if (VPBlockUtils::isLatch(this, State->VPDT))
558 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
559}
560
561VPBasicBlock *VPBasicBlock::clone() {
562 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
563 for (VPRecipeBase &R : *this)
564 NewBlock->appendRecipe(R.clone());
565 return NewBlock;
566}
567
569 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
570 << " in BB: " << BB->getName() << '\n');
571
572 State->CFG.PrevVPBB = this;
573
574 for (VPRecipeBase &Recipe : Recipes) {
575 State->setDebugLocFrom(Recipe.getDebugLoc());
576 Recipe.execute(*State);
577 }
578
579 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
580}
581
582VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
583 assert((SplitAt == end() || SplitAt->getParent() == this) &&
584 "can only split at a position in the same block");
585
586 // Create new empty block after the block to split.
587 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
589
590 // If this is the exiting block, make the split the new exiting block.
591 auto *ParentRegion = getParent();
592 if (ParentRegion && ParentRegion->getExiting() == this)
593 ParentRegion->setExiting(SplitBlock);
594
595 // Finally, move the recipes starting at SplitAt to new block.
596 for (VPRecipeBase &ToMove :
597 make_early_inc_range(make_range(SplitAt, this->end())))
598 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
599
600 return SplitBlock;
601}
602
603/// Return the enclosing loop region for region \p P. The templated version is
604/// used to support both const and non-const block arguments.
605template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
606 if (P && P->isReplicator()) {
607 P = P->getParent();
608 // Multiple loop regions can be nested, but replicate regions can only be
609 // nested inside a loop region or must be outside any other region.
610 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
611 }
612 return P;
613}
614
618
622
623static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
624 if (VPBB->empty()) {
625 assert(
626 VPBB->getNumSuccessors() < 2 &&
627 "block with multiple successors doesn't have a recipe as terminator");
628 return false;
629 }
630
631 const VPRecipeBase *R = &VPBB->back();
632 [[maybe_unused]] bool IsSwitch =
634 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
635 [[maybe_unused]] bool IsBranchOnTwoConds = match(R, m_BranchOnTwoConds());
636 [[maybe_unused]] bool IsCondBranch =
639 if (VPBB->getNumSuccessors() == 2 ||
640 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
641 assert((IsCondBranch || IsSwitch || IsBranchOnTwoConds) &&
642 "block with multiple successors not terminated by "
643 "conditional branch nor switch recipe");
644
645 return true;
646 }
647
648 if (VPBB->getNumSuccessors() > 2) {
649 assert((IsSwitch || IsBranchOnTwoConds) &&
650 "block with more than 2 successors not terminated by a switch or "
651 "branch-on-two-conds recipe");
652 return true;
653 }
654
655 assert(
656 !IsCondBranch && !IsBranchOnTwoConds &&
657 "block with 0 or 1 successors terminated by conditional branch recipe");
658 return false;
659}
660
662 if (hasConditionalTerminator(this))
663 return &back();
664 return nullptr;
665}
666
668 if (hasConditionalTerminator(this))
669 return &back();
670 return nullptr;
671}
672
674 return getParent() && getParent()->getExitingBasicBlock() == this;
675}
676
677#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
682
683void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
684 if (!hasSuccessors()) {
685 O << Indent << "No successors\n";
686 } else {
687 O << Indent << "Successor(s): ";
688 ListSeparator LS;
689 for (auto *Succ : getSuccessors())
690 O << LS << Succ->getName();
691 O << '\n';
692 }
693}
694
695void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
696 VPSlotTracker &SlotTracker) const {
697 O << Indent << getName() << ":\n";
698
699 auto RecipeIndent = Indent + " ";
700 for (const VPRecipeBase &Recipe : *this) {
701 Recipe.print(O, RecipeIndent, SlotTracker);
702 O << '\n';
703 }
704
705 printSuccessors(O, Indent);
706}
707#endif
708
709std::pair<VPBlockBase *, VPBlockBase *>
712 VPBlockBase *Exiting = nullptr;
713 bool InRegion = Entry->getParent();
714 // First, clone blocks reachable from Entry.
715 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
716 VPBlockBase *NewBB = BB->clone();
717 Old2NewVPBlocks[BB] = NewBB;
718 if (InRegion && BB->getNumSuccessors() == 0) {
719 assert(!Exiting && "Multiple exiting blocks?");
720 Exiting = BB;
721 }
722 }
723 assert((!InRegion || Exiting) && "regions must have a single exiting block");
724
725 // Second, update the predecessors & successors of the cloned blocks.
726 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
727 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
729 for (VPBlockBase *Pred : BB->getPredecessors()) {
730 NewPreds.push_back(Old2NewVPBlocks[Pred]);
731 }
732 NewBB->setPredecessors(NewPreds);
734 for (VPBlockBase *Succ : BB->successors()) {
735 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
736 }
737 NewBB->setSuccessors(NewSuccs);
738 }
739
740#if !defined(NDEBUG)
741 // Verify that the order of predecessors and successors matches in the cloned
742 // version.
743 for (const auto &[OldBB, NewBB] :
745 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
746 for (const auto &[OldPred, NewPred] :
747 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
748 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
749
750 for (const auto &[OldSucc, NewSucc] :
751 zip(OldBB->successors(), NewBB->successors()))
752 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
753 }
754#endif
755
756 return std::make_pair(Old2NewVPBlocks[Entry],
757 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
758}
759
760VPRegionBlock *VPRegionBlock::clone() {
761 const auto &[NewEntry, NewExiting] = VPBlockUtils::cloneFrom(getEntry());
762 VPlan &Plan = *getPlan();
763 VPRegionValue *CanIV = getCanonicalIV();
764 VPRegionBlock *NewRegion =
765 CanIV ? Plan.createLoopRegion(CanIV->getType(), CanIV->getDebugLoc(),
766 getName(), NewEntry, NewExiting)
767 : Plan.createReplicateRegion(NewEntry, NewExiting, getName());
768
769 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
770 Block->setParent(NewRegion);
771 return NewRegion;
772}
773
775 llvm_unreachable("regions must get dissolved before ::execute");
776}
777
780 for (VPRecipeBase &R : Recipes)
781 Cost += R.cost(VF, Ctx);
782 return Cost;
783}
784
785const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
786 const VPBlockBase *Pred = nullptr;
787 if (hasPredecessors()) {
788 Pred = getPredecessors()[Idx];
789 } else {
790 auto *Region = getParent();
791 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
792 "must be in the entry block of a non-replicate region");
793 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
794 "loop region has a single predecessor (preheader), its entry block "
795 "has 2 incoming blocks");
796
797 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
798 // region itself whose exiting block feeds the phi across the backedge.
799 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
800 }
801 return Pred->getExitingBasicBlock();
802}
803
805 if (!isReplicator()) {
806 // Neglect the cost of canonical IV, matching the legacy cost model.
809 Cost += Block->cost(VF, Ctx);
810 InstructionCost BackedgeCost =
811 ForceTargetInstructionCost.getNumOccurrences()
813 : Ctx.TTI.getCFInstrCost(Instruction::UncondBr, Ctx.CostKind);
814 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
815 << ": vector loop backedge\n");
816 Cost += BackedgeCost;
817 return Cost;
818 }
819
820 // Compute the cost of a replicate region. Replicating isn't supported for
821 // scalable vectors, return an invalid cost for them.
822 // TODO: Discard scalable VPlans with replicate recipes earlier after
823 // construction.
824 if (VF.isScalable())
826
827 // Compute and return the cost of the conditionally executed recipes.
828 assert(VF.isVector() && "Can only compute vector cost at the moment.");
830 return Then->cost(VF, Ctx);
831}
832
833#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
835 VPSlotTracker &SlotTracker) const {
836 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
837 auto NewIndent = Indent + " ";
838 if (auto *CanIV = getCanonicalIV()) {
839 O << '\n';
840 CanIV->print(O, SlotTracker);
841 O << " = CANONICAL-IV\n";
842 }
843 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
844 O << '\n';
845 BlockBase->print(O, NewIndent, SlotTracker);
846 }
847 O << Indent << "}\n";
848
849 printSuccessors(O, Indent);
850}
851#endif
852
854 auto *Header = cast<VPBasicBlock>(getEntry());
855 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
856 auto *CanIV = getCanonicalIV();
857 if (CanIV->getNumUsers() > 0) {
858 VPlan &Plan = *getPlan();
859 auto *Zero = Plan.getZero(CanIV->getType());
860 DebugLoc DL = CanIV->getDebugLoc();
862 VPBuilder HeaderBuilder(Header, Header->begin());
863 auto *ScalarR =
864 HeaderBuilder.createScalarPhi({Zero, CanIVInc}, DL, "index");
865 CanIV->replaceAllUsesWith(ScalarR);
866 }
867
868 VPBlockBase *Preheader = getSinglePredecessor();
869 VPBlockUtils::disconnectBlocks(Preheader, this);
870
871 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
872 VPB->setParent(getParent());
873
874 VPBlockUtils::connectBlocks(Preheader, Header);
875 VPBlockUtils::transferSuccessors(this, ExitingLatch);
876 VPBlockUtils::connectBlocks(ExitingLatch, Header);
877}
878
880 // TODO: Represent the increment as VPRegionValue as well.
881 VPRegionValue *CanIV = getCanonicalIV();
882 assert(CanIV && "Expected a canonical IV");
883
884 if (auto *Inc = vputils::findCanonicalIVIncrement(*getPlan()))
885 return Inc;
886
887 assert(!getPlan()->getVFxUF().isMaterialized() &&
888 "VFxUF can be used only before it is materialized.");
889 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
890 return VPBuilder(ExitingLatch->getTerminator())
891 .createOverflowingOp(Instruction::Add, {CanIV, &getPlan()->getVFxUF()},
892 {hasCanonicalIVNUW(), /* HasNSW */ false},
893 CanIV->getDebugLoc(), "index.next");
894}
895
896VPlan::VPlan(Loop *L, Type *IdxTy)
897 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
898 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
899 ScalarHeader = createVPIRBasicBlock(L->getHeader());
900
901 SmallVector<BasicBlock *> IRExitBlocks;
902 L->getUniqueExitBlocks(IRExitBlocks);
903 for (BasicBlock *EB : IRExitBlocks)
904 ExitBlocks.push_back(createVPIRBasicBlock(EB));
905}
906
908 VPSymbolicValue DummyValue(nullptr);
909
910 for (auto *VPB : CreatedBlocks) {
911 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
912 // Replace all operands of recipes and all VPValues defined in VPBB with
913 // DummyValue so the block can be deleted.
914 for (VPRecipeBase &R : *VPBB) {
915 for (auto *Def : R.definedValues())
916 Def->replaceAllUsesWith(&DummyValue);
917
918 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
919 R.setOperand(I, &DummyValue);
920 }
921 } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) {
922 CanIV->replaceAllUsesWith(&DummyValue);
923 }
924
925 delete VPB;
926 }
927 for (VPValue *VPV : getLiveIns())
928 delete VPV;
929 delete BackedgeTakenCount;
930}
931
933 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
934 return VPIRBB->getIRBasicBlock() == IRBB;
935 });
936 assert(Iter != getExitBlocks().end() && "no exit block found");
937 return *Iter;
938}
939
941 return is_contained(ExitBlocks, VPBB);
942}
943
944/// To make RUN_VPLAN_PASS print final VPlan.
945static void printFinalVPlan(VPlan &) {}
946
947/// Generate the code inside the preheader and body of the vectorized loop.
948/// Assumes a single pre-header basic-block was created for this. Introduce
949/// additional basic-blocks as needed, and fill them all.
952 "all region blocks must be dissolved before ::execute");
953
954 // Initialize CFG state.
955 State->CFG.PrevVPBB = nullptr;
956 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
957
958 // Update VPDominatorTree since VPBasicBlock may be removed after State was
959 // constructed.
960 State->VPDT.recalculate(*this);
961
962 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
963 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
964 cast<UncondBrInst>(VectorPreHeader->getTerminator())->setSuccessor(nullptr);
965 State->CFG.DTU.applyUpdates(
966 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
967
968 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
969 << ", UF=" << getConcreteUF() << '\n');
970 setName("Final VPlan");
971 // TODO: RUN_VPLAN_PASS/VPlanTransforms::runPass should automatically dump
972 // VPlans after some specific stages when "-debug" is specified, but that
973 // hasn't been implemented yet. For now, just do both:
974 LLVM_DEBUG(dump());
976
977 BasicBlock *ScalarPh = State->CFG.ExitBB;
978 VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
979 if (ScalarPhVPBB) {
980 // Disconnect scalar preheader and scalar header, as the dominator tree edge
981 // will be updated as part of VPlan execution. This allows keeping the DTU
982 // logic generic during VPlan execution.
983 State->CFG.DTU.applyUpdates(
984 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
985 }
987 Entry);
988 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
989 // successor blocks including the middle, exit and scalar preheader blocks.
990 for (VPBlockBase *Block : RPOT)
991 Block->execute(State);
992
993 if (hasEarlyExit()) {
994 // Fix up LoopInfo for extra dispatch blocks when vectorizing loops with
995 // early exits. For dispatch blocks, we need to find the smallest common
996 // loop of all successors that are in a loop. Note: we only need to update
997 // loop info for blocks after the middle block, but there is no easy way to
998 // get those at this point.
999 for (VPBlockBase *VPB : reverse(RPOT)) {
1000 auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
1001 if (!VPBB || isa<VPIRBasicBlock>(VPBB))
1002 continue;
1003 BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
1004 Loop *L = State->LI->getLoopFor(BB);
1005 if (!L || any_of(successors(BB),
1006 [L](BasicBlock *Succ) { return L->contains(Succ); }))
1007 continue;
1008 // Find the innermost loop containing all successors that are in a loop.
1009 // Successors not in any loop don't constrain the target loop.
1010 Loop *Target = nullptr;
1011 for (BasicBlock *Succ : successors(BB)) {
1012 Loop *SuccLoop = State->LI->getLoopFor(Succ);
1013 if (!SuccLoop)
1014 continue;
1015 if (!Target)
1016 Target = SuccLoop;
1017 else
1018 Target = State->LI->getSmallestCommonLoop(Target, SuccLoop);
1019 }
1020 State->LI->removeBlock(BB);
1021 if (Target)
1022 Target->addBasicBlockToLoop(BB, *State->LI);
1023 }
1024 }
1025
1026 // If the original loop is unreachable, delete it and all its blocks.
1027 if (!ScalarPhVPBB) {
1028 // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
1029 // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
1030 // IR instructions.
1031 for (VPIRBasicBlock *EB : getExitBlocks()) {
1032 for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
1033 if (R.getNumOperands() == 1)
1034 R.eraseFromParent();
1035 }
1036 }
1037
1038 Loop *OrigLoop =
1039 State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
1040 auto Blocks = OrigLoop->getBlocksVector();
1041 Blocks.push_back(ScalarPh);
1042 while (!OrigLoop->isInnermost())
1043 State->LI->erase(*OrigLoop->begin());
1044 State->LI->erase(OrigLoop);
1045 for (auto *BB : Blocks)
1046 State->LI->removeBlock(BB);
1047 DeleteDeadBlocks(Blocks, &State->CFG.DTU);
1048 }
1049
1050 State->CFG.DTU.flush();
1051
1052 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
1053 if (!Header)
1054 return;
1055
1056 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
1057 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1058
1059 // Fix the latch value of canonical, reduction and first-order recurrences
1060 // phis in the vector loop.
1061 for (VPRecipeBase &R : Header->phis()) {
1062 // Skip phi-like recipes that generate their backedege values themselves.
1063 if (isa<VPWidenPHIRecipe>(&R))
1064 continue;
1065
1066 auto *PhiR = cast<VPSingleDefRecipe>(&R);
1067 // VPInstructions currently model scalar Phis only.
1068 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1070 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1071
1072 Value *Phi = State->get(PhiR, NeedsScalar);
1073 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1074 // not.
1075 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1076 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1077 }
1078}
1079
1081 // For now only return the cost of the vector loop region, ignoring any other
1082 // blocks, like the preheader or middle blocks, expect for checking them for
1083 // recipes with invalid costs.
1085
1086 // If the cost of the loop region is invalid or any recipe in the skeleton
1087 // outside loop regions are invalid return an invalid cost.
1090 [&VF, &Ctx](VPBasicBlock *VPBB) {
1091 return !VPBB->cost(VF, Ctx).isValid();
1092 }))
1094
1095 return Cost;
1096}
1097
1099 // TODO: Cache if possible.
1101 if (auto *R = dyn_cast<VPRegionBlock>(B))
1102 return R->isReplicator() ? nullptr : R;
1103 return nullptr;
1104}
1105
1108 if (auto *R = dyn_cast<VPRegionBlock>(B))
1109 return R->isReplicator() ? nullptr : R;
1110 return nullptr;
1111}
1112
1114 const VPRegionBlock *LoopRegion = getVectorLoopRegion();
1115 assert(LoopRegion && "expected a vector loop region");
1117 vp_depth_first_shallow(LoopRegion->getEntry())),
1118 [](const VPRegionBlock *R) { return !R->isReplicator(); });
1119}
1120
1121#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1124
1125 if (VF.getNumUsers() > 0) {
1126 O << "\nLive-in ";
1127 VF.printAsOperand(O, SlotTracker);
1128 O << " = VF";
1129 }
1130
1131 if (UF.getNumUsers() > 0) {
1132 O << "\nLive-in ";
1133 UF.printAsOperand(O, SlotTracker);
1134 O << " = UF";
1135 }
1136
1137 if (VFxUF.getNumUsers() > 0) {
1138 O << "\nLive-in ";
1139 VFxUF.printAsOperand(O, SlotTracker);
1140 O << " = VF * UF";
1141 }
1142
1143 if (VectorTripCount.getNumUsers() > 0) {
1144 O << "\nLive-in ";
1145 VectorTripCount.printAsOperand(O, SlotTracker);
1146 O << " = vector-trip-count";
1147 }
1148
1149 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1150 O << "\nLive-in ";
1151 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1152 O << " = backedge-taken count";
1153 }
1154
1155 O << "\n";
1156 if (TripCount) {
1157 if (isa<VPIRValue>(TripCount))
1158 O << "Live-in ";
1159 TripCount->printAsOperand(O, SlotTracker);
1160 O << " = original trip-count";
1161 O << "\n";
1162 }
1163}
1164
1168
1169 O << "VPlan '" << getName() << "' {";
1170
1171 printLiveIns(O);
1172
1174 RPOT(getEntry());
1175 for (const VPBlockBase *Block : RPOT) {
1176 O << '\n';
1177 Block->print(O, "", SlotTracker);
1178 }
1179
1180 O << "}\n";
1181}
1182
1183std::string VPlan::getName() const {
1184 std::string Out;
1185 raw_string_ostream RSO(Out);
1186 RSO << Name << " for ";
1187 if (!VFs.empty()) {
1188 RSO << "VF={" << VFs[0];
1189 for (ElementCount VF : drop_begin(VFs))
1190 RSO << "," << VF;
1191 RSO << "},";
1192 }
1193
1194 if (UFs.empty()) {
1195 RSO << "UF>=1";
1196 } else {
1197 RSO << "UF={" << UFs[0];
1198 for (unsigned UF : drop_begin(UFs))
1199 RSO << "," << UF;
1200 RSO << "}";
1201 }
1202
1203 return Out;
1204}
1205
1208 VPlanPrinter Printer(O, *this);
1209 Printer.dump();
1210}
1211
1213void VPlan::dump() const { print(dbgs()); }
1214#endif
1215
1216static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1217 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1218 // Update the operands of all cloned recipes starting at NewEntry. This
1219 // traverses all reachable blocks. This is done in two steps, to handle cycles
1220 // in PHI recipes.
1222 OldDeepRPOT(Entry);
1224 NewDeepRPOT(NewEntry);
1225 // First, collect all mappings from old to new VPValues defined by cloned
1226 // recipes.
1227 for (const auto &[OldBB, NewBB] :
1230 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1231 "blocks must have the same number of recipes");
1232 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1233 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1234 "recipes must have the same number of operands");
1235 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1236 "recipes must define the same number of operands");
1237 for (const auto &[OldV, NewV] :
1238 zip(OldR.definedValues(), NewR.definedValues()))
1239 Old2NewVPValues[OldV] = NewV;
1240 }
1241 }
1242
1243 // Update all operands to use cloned VPValues.
1244 for (VPBasicBlock *NewBB :
1246 for (VPRecipeBase &NewR : *NewBB)
1247 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1248 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1249 NewR.setOperand(I, NewOp);
1250 }
1251 }
1252}
1253
1255 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1256 // Clone blocks.
1257 const auto &[NewEntry, __] = VPBlockUtils::cloneFrom(Entry);
1258
1259 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1260 VPIRBasicBlock *NewScalarHeader = nullptr;
1261 if (getScalarHeader()->hasPredecessors()) {
1262 NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1263 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1264 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1265 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1266 }));
1267 } else {
1268 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1269 }
1270 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1271 auto *NewPlan =
1272 new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader, getIndexType());
1273 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1274 for (VPIRValue *OldLiveIn : getLiveIns())
1275 Old2NewVPValues[OldLiveIn] = NewPlan->getOrAddLiveIn(OldLiveIn);
1276
1277 if (auto *TripCountIRV = dyn_cast_or_null<VPIRValue>(TripCount))
1278 Old2NewVPValues[TripCountIRV] = NewPlan->getOrAddLiveIn(TripCountIRV);
1279 // else NewTripCount will be created and inserted into Old2NewVPValues when
1280 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1281
1282 if (auto *LoopRegion = getVectorLoopRegion()) {
1283 auto *OldCanIV = LoopRegion->getCanonicalIV();
1284 auto *NewCanIV = NewPlan->getVectorLoopRegion()->getCanonicalIV();
1285 assert(OldCanIV && NewCanIV &&
1286 "Loop regions of both plans must have canonical IVs.");
1287 Old2NewVPValues[OldCanIV] = NewCanIV;
1288 }
1289
1290 assert(none_of(Old2NewVPValues.keys(), IsaPred<VPSymbolicValue>) &&
1291 "All VPSymbolicValues must be handled below");
1292
1293 if (BackedgeTakenCount)
1294 NewPlan->BackedgeTakenCount =
1295 new VPSymbolicValue(BackedgeTakenCount->getType());
1296
1297 // Map and propagate materialized state for symbolic values.
1298 for (auto [OldSV, NewSV] :
1299 {std::pair{&VectorTripCount, &NewPlan->VectorTripCount},
1300 {&VF, &NewPlan->VF},
1301 {&UF, &NewPlan->UF},
1302 {&VFxUF, &NewPlan->VFxUF},
1303 {BackedgeTakenCount, NewPlan->BackedgeTakenCount}}) {
1304 if (!OldSV)
1305 continue;
1306 Old2NewVPValues[OldSV] = NewSV;
1307 if (OldSV->isMaterialized())
1308 NewSV->markMaterialized();
1309 }
1310
1311 remapOperands(Entry, NewEntry, Old2NewVPValues);
1312
1313 // Initialize remaining fields of cloned VPlan.
1314 NewPlan->VFs = VFs;
1315 NewPlan->UFs = UFs;
1316 // TODO: Adjust names.
1317 NewPlan->Name = Name;
1318 if (TripCount) {
1319 assert(Old2NewVPValues.contains(TripCount) &&
1320 "TripCount must have been added to Old2NewVPValues");
1321 NewPlan->TripCount = Old2NewVPValues[TripCount];
1322 }
1323
1324 // Transfer all cloned blocks (the second half of all current blocks) from
1325 // current to new VPlan.
1326 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1327 for (unsigned I :
1328 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1329 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1330 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1331
1332 // Update ExitBlocks of the new plan.
1333 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1334 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1335 VPB != NewScalarHeader)
1336 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1337 }
1338
1339 return NewPlan;
1340}
1341
1343 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1344 CreatedBlocks.push_back(VPIRBB);
1345 return VPIRBB;
1346}
1347
1349 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1350 for (Instruction &I :
1351 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1352 VPIRBB->appendRecipe(VPIRInstruction::create(I));
1353 return VPIRBB;
1354}
1355
1356#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1357
1358Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1359 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1360 Twine(getOrCreateBID(Block));
1361}
1362
1363Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1364 const std::string &Name = Block->getName();
1365 if (!Name.empty())
1366 return Name;
1367 return "VPB" + Twine(getOrCreateBID(Block));
1368}
1369
1371 Depth = 1;
1372 bumpIndent(0);
1373 OS << "digraph VPlan {\n";
1374 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1375 if (!Plan.getName().empty())
1376 OS << "\\n" << DOT::EscapeString(Plan.getName());
1377
1378 {
1379 // Print live-ins.
1380 std::string Str;
1381 raw_string_ostream SS(Str);
1382 Plan.printLiveIns(SS);
1384 StringRef(Str).rtrim('\n').split(Lines, "\n");
1385 for (auto Line : Lines)
1386 OS << DOT::EscapeString(Line.str()) << "\\n";
1387 }
1388
1389 OS << "\"]\n";
1390 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1391 OS << "edge [fontname=Courier, fontsize=30]\n";
1392 OS << "compound=true\n";
1393
1394 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1395 dumpBlock(Block);
1396
1397 OS << "}\n";
1398}
1399
1400void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1402 dumpBasicBlock(BasicBlock);
1404 dumpRegion(Region);
1405 else
1406 llvm_unreachable("Unsupported kind of VPBlock.");
1407}
1408
1409void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1410 bool Hidden, const Twine &Label) {
1411 // Due to "dot" we print an edge between two regions as an edge between the
1412 // exiting basic block and the entry basic of the respective regions.
1413 const VPBlockBase *Tail = From->getExitingBasicBlock();
1414 const VPBlockBase *Head = To->getEntryBasicBlock();
1415 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1416 OS << " [ label=\"" << Label << '\"';
1417 if (Tail != From)
1418 OS << " ltail=" << getUID(From);
1419 if (Head != To)
1420 OS << " lhead=" << getUID(To);
1421 if (Hidden)
1422 OS << "; splines=none";
1423 OS << "]\n";
1424}
1425
1426void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1427 auto &Successors = Block->getSuccessors();
1428 if (Successors.size() == 1)
1429 drawEdge(Block, Successors.front(), false, "");
1430 else if (Successors.size() == 2) {
1431 drawEdge(Block, Successors.front(), false, "T");
1432 drawEdge(Block, Successors.back(), false, "F");
1433 } else {
1434 unsigned SuccessorNumber = 0;
1435 for (auto *Successor : Successors)
1436 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1437 }
1438}
1439
1440void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1441 // Implement dot-formatted dump by performing plain-text dump into the
1442 // temporary storage followed by some post-processing.
1443 OS << Indent << getUID(BasicBlock) << " [label =\n";
1444 bumpIndent(1);
1445 std::string Str;
1446 raw_string_ostream SS(Str);
1447 // Use no indentation as we need to wrap the lines into quotes ourselves.
1448 BasicBlock->print(SS, "", SlotTracker);
1449
1450 // We need to process each line of the output separately, so split
1451 // single-string plain-text dump.
1453 StringRef(Str).rtrim('\n').split(Lines, "\n");
1454
1455 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1456 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1457 };
1458
1459 // Don't need the "+" after the last line.
1460 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1461 EmitLine(Line, " +\n");
1462 EmitLine(Lines.back(), "\n");
1463
1464 bumpIndent(-1);
1465 OS << Indent << "]\n";
1466
1467 dumpEdges(BasicBlock);
1468}
1469
1470void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1471 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1472 bumpIndent(1);
1473 OS << Indent << "fontname=Courier\n"
1474 << Indent << "label=\""
1475 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1476 << DOT::EscapeString(Region->getName()) << "\"\n";
1477
1478 if (auto *CanIV = Region->getCanonicalIV()) {
1479 OS << Indent << "\"";
1480 std::string Op;
1481 raw_string_ostream S(Op);
1482 CanIV->printAsOperand(S, SlotTracker);
1483 OS << DOT::EscapeString(Op);
1484 OS << " = CANONICAL-IV\"\n";
1485 }
1486
1487 // Dump the blocks of the region.
1488 assert(Region->getEntry() && "Region contains no inner blocks.");
1489 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1490 dumpBlock(Block);
1491 bumpIndent(-1);
1492 OS << Indent << "}\n";
1493 dumpEdges(Region);
1494}
1495
1496#endif
1497
1498/// Returns true if there is a vector loop region and \p VPV is defined in a
1499/// loop region.
1500static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1501 if (isa<VPRegionValue>(VPV))
1502 return true;
1503 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1504 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1506}
1507
1512 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1513 if (auto *SV = dyn_cast<VPSymbolicValue>(this))
1514 SV->markMaterialized();
1515}
1516
1518 VPValue *New,
1519 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1521 // Note that this early exit is required for correctness; the implementation
1522 // below relies on the number of users for this VPValue to decrease, which
1523 // isn't the case if this == New.
1524 if (this == New)
1525 return;
1526
1527 for (unsigned J = 0; J < getNumUsers();) {
1528 VPUser *User = Users[J];
1529 bool RemovedUser = false;
1530 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1531 if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1532 continue;
1533
1534 RemovedUser = true;
1535 User->setOperand(I, New);
1536 }
1537 // If a user got removed after updating the current user, the next user to
1538 // update will be moved to the current position, so we only need to
1539 // increment the index if the number of users did not change.
1540 if (!RemovedUser)
1541 J++;
1542 }
1543}
1544
1546 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1547 if (getOperand(Idx) == From)
1548 setOperand(Idx, To);
1549 }
1550}
1551
1552#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1554 OS << Tracker.getOrCreateName(this);
1555}
1556
1559 Op->printAsOperand(O, SlotTracker);
1560 });
1561}
1562#endif
1563
1564void VPSlotTracker::assignName(const VPValue *V) {
1565 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1566 auto *UV = V->getUnderlyingValue();
1567 auto *VPI = dyn_cast_or_null<VPInstruction>(V);
1568 if (!UV && !(VPI && !VPI->getName().empty())) {
1569 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1570 NextSlot++;
1571 return;
1572 }
1573
1574 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1575 // appending ".Number" to the name if there are multiple uses.
1576 std::string Name;
1577 if (UV)
1578 Name = getName(UV);
1579 else
1580 Name = VPI->getName();
1581
1582 assert(!Name.empty() && "Name cannot be empty.");
1583 StringRef Prefix = UV ? "ir<" : "vp<%";
1584 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1585
1586 // First assign the base name for V.
1587 const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
1588 // Integer or FP constants with different types will result in the same string
1589 // due to stripping types.
1591 return;
1592
1593 // If it is already used by C > 0 other VPValues, increase the version counter
1594 // C and use it for V.
1595 const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
1596 if (!UseInserted) {
1597 C->second++;
1598 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1599 }
1600}
1601
1602void VPSlotTracker::assignNames(const VPlan &Plan) {
1603 if (Plan.VF.getNumUsers() > 0)
1604 assignName(&Plan.VF);
1605 if (Plan.UF.getNumUsers() > 0)
1606 assignName(&Plan.UF);
1607 if (Plan.VFxUF.getNumUsers() > 0)
1608 assignName(&Plan.VFxUF);
1609 assignName(&Plan.VectorTripCount);
1610 if (Plan.BackedgeTakenCount)
1611 assignName(Plan.BackedgeTakenCount);
1612 for (VPValue *LI : Plan.getLiveIns())
1613 assignName(LI);
1614
1615 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1616 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1617 for (const VPBlockBase *VPB : RPOT) {
1618 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB))
1619 assignNames(VPBB);
1620 else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV())
1621 assignName(CanIV);
1622 }
1623}
1624
1625void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1626 for (const VPRecipeBase &Recipe : *VPBB)
1627 for (VPValue *Def : Recipe.definedValues())
1628 assignName(Def);
1629}
1630
1631std::string VPSlotTracker::getName(const Value *V) {
1632 std::string Name;
1633 raw_string_ostream S(Name);
1634 if (V->hasName() || !isa<Instruction>(V)) {
1635 V->printAsOperand(S, false);
1636 return Name;
1637 }
1638
1639 if (!MST) {
1640 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1641 // instruction.
1642 auto *I = cast<Instruction>(V);
1643 // This check is required to support unit tests with incomplete IR.
1644 if (I->getParent()) {
1645 MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1646 MST->incorporateFunction(*I->getFunction());
1647 } else {
1648 MST = std::make_unique<ModuleSlotTracker>(nullptr);
1649 }
1650 }
1651 V->printAsOperand(S, false, *MST);
1652 return Name;
1653}
1654
1655std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1656 std::string Name = VPValue2Name.lookup(V);
1657 if (!Name.empty())
1658 return Name;
1659
1660 // If no name was assigned, no VPlan was provided when creating the slot
1661 // tracker or it is not reachable from the provided VPlan. This can happen,
1662 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1663 // in a debugger.
1664 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1665 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1666 // here.
1667 const VPRecipeBase *DefR = V->getDefiningRecipe();
1668 (void)DefR;
1669 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1670 "VPValue defined by a recipe in a VPlan?");
1671
1672 // Use the underlying value's name, if there is one.
1673 if (auto *UV = V->getUnderlyingValue()) {
1674 std::string Name;
1675 raw_string_ostream S(Name);
1676 UV->printAsOperand(S, false);
1677 return (Twine("ir<") + Name + ">").str();
1678 }
1679
1680 return "<badref>";
1681}
1682
1684 VPValue *TrueVal,
1685 VPValue *FalseVal, DebugLoc DL) {
1686 assert(VPTypeAnalysis(*getInsertBlock()->getPlan())
1687 .inferScalarType(ChainOp)
1688 ->isIntegerTy(1) &&
1689 "ChainOp must be i1 for AnyOf reduction");
1690 VPIRFlags Flags(RecurKind::Or, /*IsOrdered=*/false, /*IsInLoop=*/false,
1691 FastMathFlags());
1692 auto *OrReduce =
1694 auto *Freeze = createNaryOp(Instruction::Freeze, {OrReduce}, DL);
1695 return createSelect(Freeze, TrueVal, FalseVal, DL, "rdx.select");
1696}
1697
1699 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1700 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1701 bool PredicateAtRangeStart = Predicate(Range.Start);
1702
1703 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1704 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1705 Range.End = TmpVF;
1706 break;
1707 }
1708
1709 return PredicateAtRangeStart;
1710}
1711
1713 assert(count_if(VPlans,
1714 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1715 1 &&
1716 "Multiple VPlans for VF.");
1717
1718 for (const VPlanPtr &Plan : VPlans) {
1719 if (Plan->hasVF(VF))
1720 return *Plan.get();
1721 }
1722 llvm_unreachable("No plan found!");
1723}
1724
1727 // Reserve first location for self reference to the LoopID metadata node.
1728 MDs.push_back(nullptr);
1729 bool IsUnrollMetadata = false;
1730 MDNode *LoopID = L->getLoopID();
1731 if (LoopID) {
1732 // First find existing loop unrolling disable metadata.
1733 for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
1734 auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1735 if (MD) {
1736 const auto *S = dyn_cast<MDString>(MD->getOperand(0));
1737 if (!S)
1738 continue;
1739 if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
1740 continue;
1741 IsUnrollMetadata =
1742 S->getString().starts_with("llvm.loop.unroll.disable");
1743 }
1744 MDs.push_back(LoopID->getOperand(I));
1745 }
1746 }
1747
1748 if (!IsUnrollMetadata) {
1749 // Add runtime unroll disable metadata.
1750 LLVMContext &Context = L->getHeader()->getContext();
1751 SmallVector<Metadata *, 1> DisableOperands;
1752 DisableOperands.push_back(
1753 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1754 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1755 MDs.push_back(DisableNode);
1756 MDNode *NewLoopID = MDNode::get(Context, MDs);
1757 // Set operand 0 to refer to the loop id itself.
1758 NewLoopID->replaceOperandWith(0, NewLoopID);
1759 L->setLoopID(NewLoopID);
1760 }
1761}
1762
1764 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1765 bool VectorizingEpilogue, MDNode *OrigLoopID,
1766 std::optional<unsigned> OrigAverageTripCount,
1767 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1768 bool DisableRuntimeUnroll) {
1769 // Update the metadata of the scalar loop. Skip the update when vectorizing
1770 // the epilogue loop to ensure it is updated only once. Also skip the update
1771 // when the scalar loop became unreachable.
1772 auto *ScalarPH = Plan.getScalarPreheader();
1773 if (ScalarPH && !VectorizingEpilogue) {
1774 std::optional<MDNode *> RemainderLoopID =
1777 if (RemainderLoopID) {
1778 OrigLoop->setLoopID(*RemainderLoopID);
1779 } else {
1780 if (DisableRuntimeUnroll)
1782
1783 LoopVectorizeHints Hints(OrigLoop, /*InterleaveOnlyWhenForced*/ false,
1784 *ORE);
1785 Hints.setAlreadyVectorized();
1786 }
1787 }
1788 // Tag the scalar remainder so downstream passes (e.g. the unroller and
1789 // WarnMissedTransforms) can produce more informative remarks. Only emit
1790 // when remarks are enabled.
1791 if (ORE->enabled() && ScalarPH && ScalarPH->hasPredecessors())
1792 OrigLoop->addIntLoopAttribute("llvm.loop.vectorize.epilogue", 1);
1793
1794 if (!VectorLoop)
1795 return;
1796
1797 if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1798 OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1800 VectorLoop->setLoopID(*VectorizedLoopID);
1801 } else {
1802 // Keep all loop hints from the original loop on the vector loop (we'll
1803 // replace the vectorizer-specific hints below).
1804 if (OrigLoopID)
1805 VectorLoop->setLoopID(OrigLoopID);
1806
1807 if (!VectorizingEpilogue) {
1808 LoopVectorizeHints Hints(VectorLoop, /*InterleaveOnlyWhenForced*/ false,
1809 *ORE);
1810 Hints.setAlreadyVectorized();
1811 }
1812 }
1813 // Tag the vector loop body so downstream passes can identify it. Only
1814 // emit when remarks are enabled.
1815 if (ORE->enabled())
1816 VectorLoop->addIntLoopAttribute("llvm.loop.vectorize.body", 1);
1818 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1819 if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
1821
1822 // Set/update profile weights for the vector and remainder loops as original
1823 // loop iterations are now distributed among them. Note that original loop
1824 // becomes the scalar remainder loop after vectorization.
1825 //
1826 // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
1827 // end up getting slightly roughened result but that should be OK since
1828 // profile is not inherently precise anyway. Note also possible bypass of
1829 // vector code caused by legality checks is ignored, assigning all the weight
1830 // to the vector loop, optimistically.
1831 //
1832 // For scalable vectorization we can't know at compile time how many
1833 // iterations of the loop are handled in one vector iteration, so instead
1834 // use the value of vscale used for tuning.
1835 unsigned AverageVectorTripCount = 0;
1836 unsigned RemainderAverageTripCount = 0;
1837 auto EC = VectorLoop->getLoopPreheader()->getParent()->getEntryCount();
1838 auto IsProfiled = EC && EC->getCount();
1839 if (!OrigAverageTripCount) {
1840 if (!IsProfiled)
1841 return;
1842 auto &SE = *PSE.getSE();
1843 AverageVectorTripCount = SE.getSmallConstantTripCount(VectorLoop);
1844 if (ProfcheckDisableMetadataFixes || !AverageVectorTripCount)
1845 return;
1846 if (ScalarPH)
1847 RemainderAverageTripCount =
1848 SE.getSmallConstantTripCount(OrigLoop) % EstimatedVFxUF;
1849 // Setting to 1 should be sufficient to generate the correct branch weights.
1850 OrigLoopInvocationWeight = 1;
1851 } else {
1852 // Calculate number of iterations in unrolled loop.
1853 AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1854 // Calculate number of iterations for remainder loop.
1855 RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1856 }
1857 if (HeaderVPBB) {
1858 setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1859 OrigLoopInvocationWeight);
1860 }
1861
1862 if (ScalarPH) {
1863 setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1864 OrigLoopInvocationWeight);
1865 }
1866}
1867
1868#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1870 if (VPlans.empty()) {
1871 O << "LV: No VPlans built.\n";
1872 return;
1873 }
1874 for (const auto &Plan : VPlans)
1876 Plan->printDOT(O);
1877 else
1878 Plan->print(O);
1879}
1880#endif
1881
1882bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
1884 APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
1885 unsigned WideSize = C->getBitWidth();
1886 APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
1887 ? TruncatedVal.sext(WideSize)
1888 : TruncatedVal.zext(WideSize);
1889 return ExtendedVal == *C;
1890}
1891
1894 if (auto *IRV = dyn_cast<VPIRValue>(V))
1895 return TTI::getOperandInfo(IRV->getValue());
1896
1897 return {};
1898}
1899
1901 Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
1902 TTI::VectorInstrContext VIC, bool AlwaysIncludeReplicatingR) {
1903 if (VF.isScalar())
1904 return 0;
1905
1906 assert(!VF.isScalable() &&
1907 "Scalarization overhead not supported for scalable vectors");
1908
1909 InstructionCost ScalarizationCost = 0;
1910 // Compute the cost of scalarizing the result if needed.
1911 if (!ResultTy->isVoidTy()) {
1912 for (Type *VectorTy :
1913 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
1914 ScalarizationCost += TTI.getScalarizationOverhead(
1916 /*Insert=*/true, /*Extract=*/false, CostKind,
1917 /*ForPoisonSrc=*/true, {}, VIC);
1918 }
1919 }
1920 // Compute the cost of scalarizing the operands, skipping ones that do not
1921 // require extraction/scalarization and do not incur any overhead.
1922 SmallPtrSet<const VPValue *, 4> UniqueOperands;
1924 for (auto *Op : Operands) {
1925 if (isa<VPIRValue>(Op) ||
1926 (!AlwaysIncludeReplicatingR &&
1929 cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
1930 !UniqueOperands.insert(Op).second)
1931 continue;
1932 Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
1933 }
1934 return ScalarizationCost +
1935 TTI.getOperandsScalarizationOverhead(Tys, CostKind, VIC);
1936}
1937
1939 ElementCount VF) {
1940 const Instruction *UI = R->getUnderlyingInstr();
1941 if (isa<LoadInst>(UI))
1942 return true;
1943 assert(isa<StoreInst>(UI) && "R must either be a load or store");
1944
1945 if (!NumPredStores) {
1946 // Count the number of predicated stores in the VPlan, caching the result.
1947 // Only stores where scatter is not legal are counted, matching the legacy
1948 // cost model behavior.
1949 const VPlan &Plan = *R->getParent()->getPlan();
1950 NumPredStores = 0;
1951 for (const VPRegionBlock *VPRB :
1954 assert(VPRB->isReplicator() && "must only contain replicate regions");
1955 for (const VPBasicBlock *VPBB :
1957 vp_depth_first_shallow(VPRB->getEntry()))) {
1958 for (const VPRecipeBase &Recipe : *VPBB) {
1959 auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe);
1960 if (!RepR)
1961 continue;
1962 if (!isa<StoreInst>(RepR->getUnderlyingInstr()))
1963 continue;
1964 // Check if scatter is legal for this store. If so, don't count it.
1965 Type *Ty = Types.inferScalarType(RepR->getOperand(0));
1966 auto *VTy = VectorType::get(Ty, VF);
1967 const Align Alignment =
1968 getLoadStoreAlignment(RepR->getUnderlyingInstr());
1969 if (!TTI.isLegalMaskedScatter(VTy, Alignment))
1970 ++(*NumPredStores);
1971 }
1972 }
1973 }
1974 }
1976}
1977
1979 return is_contained({Intrinsic::assume, Intrinsic::lifetime_end,
1980 Intrinsic::lifetime_start, Intrinsic::sideeffect,
1981 Intrinsic::pseudoprobe,
1982 Intrinsic::experimental_noalias_scope_decl},
1983 ID);
1984}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu next use AMDGPU Next Use Analysis Printer
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
Flatten the CFG
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
cl::opt< unsigned > NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
The number of stores in a loop that are allowed to need predication.
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS(PASS,...)
static void addRuntimeUnrollDisableMetaData(Loop *L)
Definition VPlan.cpp:1725
static T * getPlanEntry(T *Start)
Definition VPlan.cpp:191
static void printFinalVPlan(VPlan &)
To make RUN_VPLAN_PASS print final VPlan.
Definition VPlan.cpp:945
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
Definition VPlan.cpp:605
const char LLVMLoopVectorizeFollowupAll[]
Definition VPlan.cpp:62
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
Definition VPlan.cpp:1500
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition VPlan.cpp:623
const char LLVMLoopVectorizeFollowupVectorized[]
Definition VPlan.cpp:63
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1216
const char LLVMLoopVectorizeFollowupEpilogue[]
Definition VPlan.cpp:65
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:482
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition DebugLoc.h:123
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
std::optional< ProfileCount > getEntryCount(bool AllowSynthetic=false) const
Get the entry count for this function.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A helper class to return the specified delimiter string after the first invocation of operator String...
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
iterator begin() const
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1712
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1763
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1698
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1869
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void addIntLoopAttribute(StringRef Name, unsigned Value, ArrayRef< StringRef > RemovePrefixes={}) const
Add an integer metadata attribute to this loop's loop-ID node.
Definition LoopInfo.cpp:579
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:547
Metadata node.
Definition Metadata.h:1080
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:832
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4256
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4331
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4283
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:527
iterator end()
Definition VPlan.h:4293
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4291
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:561
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition VPlan.cpp:778
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:785
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:266
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition VPlan.cpp:427
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:615
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:582
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4271
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition VPlan.cpp:568
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition VPlan.cpp:695
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition VPlan.cpp:673
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:661
const VPRecipeBase & back() const
Definition VPlan.h:4305
bool empty() const
Definition VPlan.h:4302
size_t size() const
Definition VPlan.h:4301
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:93
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:314
VPRegionBlock * getParent()
Definition VPlan.h:185
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:236
size_t getNumSuccessors() const
Definition VPlan.h:236
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:218
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:216
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:683
size_t getNumPredecessors() const
Definition VPlan.h:237
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:305
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:258
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:214
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:221
VPlan * getPlan()
Definition VPlan.cpp:211
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:230
const std::string & getName() const
Definition VPlan.h:176
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:232
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:256
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:162
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:250
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:216
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:226
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:210
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:193
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:241
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:259
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:295
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:279
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Clone the CFG for all nodes reachable from Entry, including cloning the blocks and their recipes.
Definition VPlan.cpp:710
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
Definition VPlan.cpp:1683
VPBasicBlock * getInsertBlock() const
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:490
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4409
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:495
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4433
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:520
Class to record and manage LLVM IR flags.
Definition VPlan.h:696
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1227
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1270
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition VPlan.cpp:88
Kind getKind() const
Returns the Kind of lane offset.
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
LLVM_ABI_FOR_TEST VPMultiDefValue(VPRecipeBase *Def, Value *UV, Type *Ty)
Definition VPlan.cpp:179
~VPMultiDefValue() override
Definition VPlan.cpp:185
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:401
LLVM_ABI_FOR_TEST void dump() const
Dump the recipe to stderr (for debugging).
Definition VPlan.cpp:117
VPBasicBlock * getParent()
Definition VPlan.h:476
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const
Print the recipe, delegating to printRecipe().
virtual LLVM_ABI_FOR_TEST ~VPRecipeValue()=0
Definition VPlan.cpp:164
VPRecipeValue(unsigned char SC, Value *UV, Type *Ty=nullptr)
Definition VPlanValue.h:330
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4466
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:760
const VPBlockBase * getEntry() const
Definition VPlan.h:4510
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
Definition VPlan.cpp:853
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4542
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
Definition VPlan.cpp:879
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition VPlan.cpp:804
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:834
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4591
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition VPlan.cpp:774
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4578
const VPBlockBase * getExiting() const
Definition VPlan.h:4522
friend class VPlan
Definition VPlan.h:4467
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:215
Type * getType() const
Returns the type of the VPRegionValue.
Definition VPlanValue.h:231
DebugLoc getDebugLoc() const
Returns the debug location of the VPRegionValue.
Definition VPlanValue.h:234
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3296
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:610
LLVM_ABI_FOR_TEST VPSingleDefValue(VPSingleDefRecipe *Def, Value *UV=nullptr, Type *Ty=nullptr)
Construct a VPSingleDefValue. Must only be used by VPSingleDefRecipe.
Definition VPlan.cpp:169
~VPSingleDefValue() override
Definition VPlan.cpp:175
friend class VPSingleDefRecipe
Definition VPlanValue.h:348
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
Definition VPlan.cpp:1655
An analysis for type-inference for VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
Definition VPlan.cpp:1545
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1557
operand_range operands()
Definition VPlanValue.h:455
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:428
unsigned getNumOperands() const
Definition VPlanValue.h:422
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:423
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Definition VPlan.cpp:149
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:143
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1508
unsigned getVPValueID() const
Definition VPlanValue.h:101
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1553
void assertNotMaterialized() const
Assert that this VPValue has not been materialized, if it is a VPSymbolicValue.
Definition VPlanValue.h:562
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:75
@ VPVSingleDefValueSC
A symbolic live-in VPValue without IR backing.
Definition VPlanValue.h:85
@ VPVSymbolicSC
A live-in VPValue wrapping an IR Value.
Definition VPlanValue.h:84
@ VPRegionValueSC
A VPValue defined by a multi-def recipe.
Definition VPlanValue.h:87
@ VPVMultiDefValueSC
A VPValue defined by a VPSingleDefRecipe.
Definition VPlanValue.h:86
void dump() const
Dump the value to stderr (for debugging).
Definition VPlan.cpp:109
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:102
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1511
unsigned getNumUsers() const
Definition VPlanValue.h:115
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1517
LLVM_DUMP_METHOD void dump()
Definition VPlan.cpp:1370
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4614
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1207
friend class VPSlotTracker
Definition VPlan.h:4616
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1183
VPBasicBlock * getEntry()
Definition VPlan.h:4710
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
Definition VPlan.h:5027
void setName(const Twine &newName)
Definition VPlan.h:4883
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:932
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:907
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:940
friend class VPlanPrinter
Definition VPlan.h:4615
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4813
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1342
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4942
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4763
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1098
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:5010
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1080
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
Definition VPlan.cpp:1113
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4865
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4699
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4965
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1348
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1213
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4753
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:950
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1166
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4759
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1122
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
bool match(Val *V, const Pattern &P)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2312
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
Definition VPlan.cpp:1882
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Or
Bitwise or logical OR of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:73
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Parameters that control the generic loop unrolling transformation.
bool UnrollVectorizedLoop
Disable runtime unrolling by default for vectorized loops.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1893
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
Definition VPlan.cpp:1978
std::optional< unsigned > NumPredStores
Number of predicated stores in the VPlan, computed on demand.
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None, bool AlwaysIncludeReplicatingR=false)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
Definition VPlan.cpp:1900
TargetTransformInfo::TargetCostKind CostKind
VPTypeAnalysis Types
const TargetTransformInfo & TTI
bool useEmulatedMaskMemRefHack(const VPReplicateRecipe *R, ElementCount VF)
Returns true if an artificially high cost for emulated masked memrefs should be used.
Definition VPlan.cpp:1938
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:246
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:147
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:286
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
struct llvm::VPTransformState::DataState Data
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:313
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
Definition VPlan.cpp:273
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
bool hasScalarValue(const VPValue *Def, VPLane Lane)
const TargetTransformInfo * TTI
Target Transform Info.
VPlan * Plan
Pointer to the VPlan code is generated for.
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
bool hasVectorValue(const VPValue *Def)
VPDominatorTree VPDT
VPlan-based dominator tree.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition VPlan.cpp:395
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition VPlan.cpp:373
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.