LLVM 23.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
28#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/Twine.h"
35#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/CFG.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Value.h"
44#include "llvm/Support/Debug.h"
50#include <cassert>
51#include <string>
52
53using namespace llvm;
54using namespace llvm::VPlanPatternMatch;
55
56namespace llvm {
58} // namespace llvm
59
60/// @{
61/// Metadata attribute names
62const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
64 "llvm.loop.vectorize.followup_vectorized";
66 "llvm.loop.vectorize.followup_epilogue";
67/// @}
68
70
72
74 "vplan-print-in-dot-format", cl::Hidden,
75 cl::desc("Use dot format instead of plain text when dumping VPlans"));
76
77#define DEBUG_TYPE "loop-vectorize"
78
79#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
81 const VPBasicBlock *Parent = R.getParent();
82 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
83 R.print(OS, "", SlotTracker);
84 return OS;
85}
86#endif
87
89 const ElementCount &VF) const {
90 switch (LaneKind) {
92 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
93 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
94 Builder.getInt32(VF.getKnownMinValue() - Lane));
96 return Builder.getInt64(Lane);
97 }
98 llvm_unreachable("Unknown lane kind");
99}
100
101#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
103 if (const VPRecipeBase *R = getDefiningRecipe())
104 R->print(OS, "", SlotTracker);
105 else
107}
108
109void VPValue::dump() const {
110 const VPRecipeBase *Instr = getDefiningRecipe();
112 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
114 dbgs() << "\n";
115}
116
117void VPRecipeBase::dump() const {
118 VPSlotTracker SlotTracker(getParent() ? getParent()->getPlan() : nullptr);
119 print(dbgs(), "", SlotTracker);
120 dbgs() << "\n";
121}
122#endif
123
124#if !defined(NDEBUG)
125bool VPRecipeValue::isDefinedBy(const VPDef *D) const { return Def == D; }
126#endif
127
129 auto *DefValue = dyn_cast<VPRecipeValue>(this);
130 return DefValue ? DefValue->Def : nullptr;
131}
132
134 auto *DefValue = dyn_cast<VPRecipeValue>(this);
135 return DefValue ? DefValue->Def : nullptr;
136}
137
139 return cast<VPIRValue>(this)->getValue();
140}
141
143
145 : VPValue(VPVRecipeValueSC, UV), Def(Def) {
146 assert(Def && "VPRecipeValue requires a defining recipe");
147 Def->addDefinedValue(this);
148}
149
151 assert(Users.empty() &&
152 "trying to delete a VPRecipeValue with remaining users");
153 Def->removeDefinedValue(this);
154}
155
156// Get the top-most entry block of \p Start. This is the entry block of the
157// containing VPlan. This function is templated to support both const and non-const blocks
158template <typename T> static T *getPlanEntry(T *Start) {
159 T *Next = Start;
160 T *Current = Start;
161 while ((Next = Next->getParent()))
162 Current = Next;
163
164 SmallSetVector<T *, 8> WorkList;
165 WorkList.insert(Current);
166
167 for (unsigned i = 0; i < WorkList.size(); i++) {
168 T *Current = WorkList[i];
169 if (!Current->hasPredecessors())
170 return Current;
171 auto &Predecessors = Current->getPredecessors();
172 WorkList.insert_range(Predecessors);
173 }
174
175 llvm_unreachable("VPlan without any entry node without predecessors");
176}
177
178VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
179
180const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
181
182/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
189
196
197void VPBlockBase::setPlan(VPlan *ParentPlan) {
198 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
199 Plan = ParentPlan;
200}
201
202/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
204 const VPBlockBase *Block = this;
206 Block = Region->getExiting();
208}
209
216
218 if (!Successors.empty() || !Parent)
219 return this;
220 assert(Parent->getExiting() == this &&
221 "Block w/o successors not the exiting block of its parent.");
222 return Parent->getEnclosingBlockWithSuccessors();
223}
224
226 if (!Predecessors.empty() || !Parent)
227 return this;
228 assert(Parent->getEntry() == this &&
229 "Block w/o predecessors not the entry of its parent.");
230 return Parent->getEnclosingBlockWithPredecessors();
231}
232
234 iterator It = begin();
235 while (It != end() && It->isPhi())
236 It++;
237 return It;
238}
239
247
250 return Def->getUnderlyingValue();
251
252 if (hasScalarValue(Def, Lane))
253 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
254
255 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
257 return Data.VPV2Scalars[Def][0];
258 }
259
260 // Look through BuildVector to avoid redundant extracts.
261 // TODO: Remove once replicate regions are unrolled explicitly.
262 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
263 auto *BuildVector = cast<VPInstruction>(Def);
264 return get(BuildVector->getOperand(Lane.getKnownLane()), true);
265 }
266
268 auto *VecPart = Data.VPV2Vector[Def];
269 if (!VecPart->getType()->isVectorTy()) {
270 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
271 return VecPart;
272 }
273 // TODO: Cache created scalar values.
274 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
275 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
276 // set(Def, Extract, Instance);
277 return Extract;
278}
279
280Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
281 if (NeedsScalar) {
282 assert((VF.isScalar() || isa<VPIRValue, VPSymbolicValue>(Def) ||
284 (hasScalarValue(Def, VPLane(0)) &&
285 Data.VPV2Scalars[Def].size() == 1)) &&
286 "Trying to access a single scalar per part but has multiple scalars "
287 "per part.");
288 return get(Def, VPLane(0));
289 }
290
291 // If Values have been set for this Def return the one relevant for \p Part.
292 if (hasVectorValue(Def))
293 return Data.VPV2Vector[Def];
294
295 auto GetBroadcastInstrs = [this](Value *V) {
296 if (VF.isScalar())
297 return V;
298 // Broadcast the scalar into all locations in the vector.
299 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
300 return Shuf;
301 };
302
303 if (!hasScalarValue(Def, {0})) {
304 Value *IRV = Def->getLiveInIRValue();
305 Value *B = GetBroadcastInstrs(IRV);
306 set(Def, B);
307 return B;
308 }
309
310 Value *ScalarValue = get(Def, VPLane(0));
311 // If we aren't vectorizing, we can just copy the scalar map values over
312 // to the vector map.
313 if (VF.isScalar()) {
314 set(Def, ScalarValue);
315 return ScalarValue;
316 }
317
318 bool IsSingleScalar = vputils::isSingleScalar(Def);
319 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
320
321 // We need to construct the vector value for a single-scalar value by
322 // broadcasting the scalar to all lanes.
323 // TODO: Replace by introducing Broadcast VPInstructions.
324 assert(IsSingleScalar && "must be a single-scalar at this point");
325 // Set the insert point after the last scalarized instruction or after the
326 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
327 // will directly follow the scalar definitions.
328 auto OldIP = Builder.saveIP();
329 auto *LastInst = cast<Instruction>(get(Def, LastLane));
330 auto NewIP = isa<PHINode>(LastInst)
331 ? LastInst->getParent()->getFirstNonPHIIt()
332 : std::next(BasicBlock::iterator(LastInst));
333 Builder.SetInsertPoint(&*NewIP);
334 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
335 set(Def, VectorValue);
336 Builder.restoreIP(OldIP);
337 return VectorValue;
338}
339
341 const DILocation *DIL = DL;
342 // When a FSDiscriminator is enabled, we don't need to add the multiply
343 // factors to the discriminators.
344 if (DIL &&
345 Builder.GetInsertBlock()
346 ->getParent()
347 ->shouldEmitDebugInfoForProfiling() &&
349 // FIXME: For scalable vectors, assume vscale=1.
350 unsigned UF = Plan->getConcreteUF();
351 auto NewDIL =
352 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
353 if (NewDIL)
354 Builder.SetCurrentDebugLocation(*NewDIL);
355 else
356 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
357 << DIL->getFilename() << " Line: " << DIL->getLine());
358 } else
359 Builder.SetCurrentDebugLocation(DL);
360}
361
363 Value *WideValue,
364 const VPLane &Lane) {
365 Value *ScalarInst = get(Def, Lane);
366 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
367 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
368 // We must handle each element of a vectorized struct type.
369 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
370 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
371 Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
372 VectorValue =
373 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
374 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
375 }
376 } else {
377 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
378 }
379 return WideValue;
380}
382BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
383 auto &CFG = State.CFG;
384 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
385 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
386 BasicBlock *PrevBB = CFG.PrevBB;
387 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
388 PrevBB->getParent(), CFG.ExitBB);
389 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
390
391 return NewBB;
392}
393
395 auto &CFG = State.CFG;
396 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
397
398 // Register NewBB in its loop. In innermost loops its the same for all
399 // BB's.
400 Loop *ParentLoop = State.CurrentParentLoop;
401 // If this block has a sole successor that is an exit block or is an exit
402 // block itself then it needs adding to the same parent loop as the exit
403 // block.
404 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
405 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
406 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
407 ParentLoop = State.LI->getLoopFor(
408 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
409 }
410
411 if (ParentLoop && !State.LI->getLoopFor(NewBB))
412 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
413
415 if (VPBlockUtils::isHeader(this, State.VPDT)) {
416 // There's no block for the latch yet, connect to the preheader only.
417 Preds = {getPredecessors()[0]};
418 } else {
419 Preds = to_vector(getPredecessors());
420 }
421
422 // Hook up the new basic block to its predecessors.
423 for (VPBlockBase *PredVPBlock : Preds) {
424 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
425 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
426 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
427 "Predecessor basic-block not found building successor.");
428 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
429 auto *PredBBTerminator = PredBB->getTerminator();
430 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
431
432 if (isa<UnreachableInst>(PredBBTerminator)) {
433 assert(PredVPSuccessors.size() == 1 &&
434 "Predecessor ending w/o branch must have single successor.");
435 DebugLoc DL = PredBBTerminator->getDebugLoc();
436 PredBBTerminator->eraseFromParent();
437 auto *Br = UncondBrInst::Create(NewBB, PredBB);
438 Br->setDebugLoc(DL);
439 } else if (auto *UBI = dyn_cast<UncondBrInst>(PredBBTerminator)) {
440 UBI->setSuccessor(NewBB);
441 } else {
442 // Set each forward successor here when it is created, excluding
443 // backedges. A backward successor is set when the branch is created.
444 // Branches to VPIRBasicBlocks must have the same successors in VPlan as
445 // in the original IR, except when the predecessor is the entry block.
446 // This enables including SCEV and memory runtime check blocks in VPlan.
447 // TODO: Remove exception by modeling the terminator of entry block using
448 // BranchOnCond.
449 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
450 auto *TermBr = cast<CondBrInst>(PredBBTerminator);
451 assert((!TermBr->getSuccessor(idx) ||
452 (isa<VPIRBasicBlock>(this) &&
453 (TermBr->getSuccessor(idx) == NewBB ||
454 PredVPBlock == getPlan()->getEntry()))) &&
455 "Trying to reset an existing successor block.");
456 TermBr->setSuccessor(idx, NewBB);
457 }
458 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
459 }
460}
461
464 "VPIRBasicBlock can have at most two successors at the moment!");
465 // Move completely disconnected blocks to their final position.
466 if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
467 IRBB->moveAfter(State->CFG.PrevBB);
468 State->Builder.SetInsertPoint(IRBB->getTerminator());
469 State->CFG.PrevBB = IRBB;
470 State->CFG.VPBB2IRBB[this] = IRBB;
471 executeRecipes(State, IRBB);
472 // Create a branch instruction to terminate IRBB if one was not created yet
473 // and is needed.
474 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
475 auto *Br = State->Builder.CreateBr(IRBB);
476 Br->setOperand(0, nullptr);
477 IRBB->getTerminator()->eraseFromParent();
478 } else {
479 assert((getNumSuccessors() == 0 ||
480 isa<UncondBrInst, CondBrInst>(IRBB->getTerminator())) &&
481 "other blocks must be terminated by a branch");
482 }
483
484 connectToPredecessors(*State);
485}
486
487VPIRBasicBlock *VPIRBasicBlock::clone() {
488 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
489 for (VPRecipeBase &R : Recipes)
490 NewBlock->appendRecipe(R.clone());
491 return NewBlock;
492}
493
495 bool Replica = bool(State->Lane);
496 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
497
498 if (VPBlockUtils::isHeader(this, State->VPDT)) {
499 // Create and register the new vector loop.
500 Loop *PrevParentLoop = State->CurrentParentLoop;
501 State->CurrentParentLoop = State->LI->AllocateLoop();
502
503 // Insert the new loop into the loop nest and register the new basic blocks
504 // before calling any utilities such as SCEV that require valid LoopInfo.
505 if (PrevParentLoop)
506 PrevParentLoop->addChildLoop(State->CurrentParentLoop);
507 else
508 State->LI->addTopLevelLoop(State->CurrentParentLoop);
509 }
510
511 auto IsReplicateRegion = [](VPBlockBase *BB) {
513 assert((!R || R->isReplicator()) &&
514 "only replicate region blocks should remain");
515 return R;
516 };
517 // 1. Create an IR basic block.
518 if ((Replica && this == getParent()->getEntry()) ||
519 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
520 // Reuse the previous basic block if the current VPBB is either
521 // * the entry to a replicate region, or
522 // * the exit of a replicate region.
523 State->CFG.VPBB2IRBB[this] = NewBB;
524 } else {
525 NewBB = createEmptyBasicBlock(*State);
526
527 State->Builder.SetInsertPoint(NewBB);
528 // Temporarily terminate with unreachable until CFG is rewired.
529 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
530 State->Builder.SetInsertPoint(Terminator);
531
532 State->CFG.PrevBB = NewBB;
533 State->CFG.VPBB2IRBB[this] = NewBB;
534 connectToPredecessors(*State);
535 }
536
537 // 2. Fill the IR basic block with IR instructions.
538 executeRecipes(State, NewBB);
539
540 // If this block is a latch, update CurrentParentLoop.
541 if (VPBlockUtils::isLatch(this, State->VPDT))
542 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
543}
544
545VPBasicBlock *VPBasicBlock::clone() {
546 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
547 for (VPRecipeBase &R : *this)
548 NewBlock->appendRecipe(R.clone());
549 return NewBlock;
550}
551
553 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
554 << " in BB: " << BB->getName() << '\n');
555
556 State->CFG.PrevVPBB = this;
557
558 for (VPRecipeBase &Recipe : Recipes) {
559 State->setDebugLocFrom(Recipe.getDebugLoc());
560 Recipe.execute(*State);
561 }
562
563 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
564}
565
566VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
567 assert((SplitAt == end() || SplitAt->getParent() == this) &&
568 "can only split at a position in the same block");
569
570 // Create new empty block after the block to split.
571 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
573
574 // If this is the exiting block, make the split the new exiting block.
575 auto *ParentRegion = getParent();
576 if (ParentRegion && ParentRegion->getExiting() == this)
577 ParentRegion->setExiting(SplitBlock);
578
579 // Finally, move the recipes starting at SplitAt to new block.
580 for (VPRecipeBase &ToMove :
581 make_early_inc_range(make_range(SplitAt, this->end())))
582 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
583
584 return SplitBlock;
585}
586
587/// Return the enclosing loop region for region \p P. The templated version is
588/// used to support both const and non-const block arguments.
589template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
590 if (P && P->isReplicator()) {
591 P = P->getParent();
592 // Multiple loop regions can be nested, but replicate regions can only be
593 // nested inside a loop region or must be outside any other region.
594 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
595 }
596 return P;
597}
598
602
606
607static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
608 if (VPBB->empty()) {
609 assert(
610 VPBB->getNumSuccessors() < 2 &&
611 "block with multiple successors doesn't have a recipe as terminator");
612 return false;
613 }
614
615 const VPRecipeBase *R = &VPBB->back();
616 [[maybe_unused]] bool IsSwitch =
618 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
619 [[maybe_unused]] bool IsBranchOnTwoConds = match(R, m_BranchOnTwoConds());
620 [[maybe_unused]] bool IsCondBranch =
623 if (VPBB->getNumSuccessors() == 2 ||
624 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
625 assert((IsCondBranch || IsSwitch || IsBranchOnTwoConds) &&
626 "block with multiple successors not terminated by "
627 "conditional branch nor switch recipe");
628
629 return true;
630 }
631
632 if (VPBB->getNumSuccessors() > 2) {
633 assert((IsSwitch || IsBranchOnTwoConds) &&
634 "block with more than 2 successors not terminated by a switch or "
635 "branch-on-two-conds recipe");
636 return true;
637 }
638
639 assert(
640 !IsCondBranch && !IsBranchOnTwoConds &&
641 "block with 0 or 1 successors terminated by conditional branch recipe");
642 return false;
643}
644
646 if (hasConditionalTerminator(this))
647 return &back();
648 return nullptr;
649}
650
652 if (hasConditionalTerminator(this))
653 return &back();
654 return nullptr;
655}
656
658 return getParent() && getParent()->getExitingBasicBlock() == this;
659}
660
661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
666
667void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
668 if (!hasSuccessors()) {
669 O << Indent << "No successors\n";
670 } else {
671 O << Indent << "Successor(s): ";
672 ListSeparator LS;
673 for (auto *Succ : getSuccessors())
674 O << LS << Succ->getName();
675 O << '\n';
676 }
677}
678
679void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
680 VPSlotTracker &SlotTracker) const {
681 O << Indent << getName() << ":\n";
682
683 auto RecipeIndent = Indent + " ";
684 for (const VPRecipeBase &Recipe : *this) {
685 Recipe.print(O, RecipeIndent, SlotTracker);
686 O << '\n';
687 }
688
689 printSuccessors(O, Indent);
690}
691#endif
692
693std::pair<VPBlockBase *, VPBlockBase *>
696 VPBlockBase *Exiting = nullptr;
697 bool InRegion = Entry->getParent();
698 // First, clone blocks reachable from Entry.
699 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
700 VPBlockBase *NewBB = BB->clone();
701 Old2NewVPBlocks[BB] = NewBB;
702 if (InRegion && BB->getNumSuccessors() == 0) {
703 assert(!Exiting && "Multiple exiting blocks?");
704 Exiting = BB;
705 }
706 }
707 assert((!InRegion || Exiting) && "regions must have a single exiting block");
708
709 // Second, update the predecessors & successors of the cloned blocks.
710 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
711 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
713 for (VPBlockBase *Pred : BB->getPredecessors()) {
714 NewPreds.push_back(Old2NewVPBlocks[Pred]);
715 }
716 NewBB->setPredecessors(NewPreds);
718 for (VPBlockBase *Succ : BB->successors()) {
719 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
720 }
721 NewBB->setSuccessors(NewSuccs);
722 }
723
724#if !defined(NDEBUG)
725 // Verify that the order of predecessors and successors matches in the cloned
726 // version.
727 for (const auto &[OldBB, NewBB] :
729 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
730 for (const auto &[OldPred, NewPred] :
731 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
732 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
733
734 for (const auto &[OldSucc, NewSucc] :
735 zip(OldBB->successors(), NewBB->successors()))
736 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
737 }
738#endif
739
740 return std::make_pair(Old2NewVPBlocks[Entry],
741 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
742}
743
744VPRegionBlock *VPRegionBlock::clone() {
745 const auto &[NewEntry, NewExiting] = VPBlockUtils::cloneFrom(getEntry());
746 VPlan &Plan = *getPlan();
747 VPRegionValue *CanIV = getCanonicalIV();
748 VPRegionBlock *NewRegion =
749 CanIV ? Plan.createLoopRegion(CanIV->getType(), CanIV->getDebugLoc(),
750 getName(), NewEntry, NewExiting)
751 : Plan.createReplicateRegion(NewEntry, NewExiting, getName());
752
753 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
754 Block->setParent(NewRegion);
755 return NewRegion;
756}
757
760 "Loop regions should have been lowered to plain CFG");
761 assert(!State->Lane && "Replicating a Region with non-null instance.");
762 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
763
765 Entry);
766 State->Lane = VPLane(0);
767 for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
768 State->Lane = VPLane(Lane, VPLane::Kind::First);
769 // Visit the VPBlocks connected to \p this, starting from it.
770 for (VPBlockBase *Block : RPOT) {
771 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
772 Block->execute(State);
773 }
774 }
775
776 // Exit replicating mode.
777 State->Lane.reset();
778}
779
782 for (VPRecipeBase &R : Recipes)
783 Cost += R.cost(VF, Ctx);
784 return Cost;
785}
786
787const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
788 const VPBlockBase *Pred = nullptr;
789 if (hasPredecessors()) {
790 Pred = getPredecessors()[Idx];
791 } else {
792 auto *Region = getParent();
793 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
794 "must be in the entry block of a non-replicate region");
795 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
796 "loop region has a single predecessor (preheader), its entry block "
797 "has 2 incoming blocks");
798
799 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
800 // region itself whose exiting block feeds the phi across the backedge.
801 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
802 }
803 return Pred->getExitingBasicBlock();
804}
805
807 if (!isReplicator()) {
808 // Neglect the cost of canonical IV, matching the legacy cost model.
811 Cost += Block->cost(VF, Ctx);
812 InstructionCost BackedgeCost =
813 ForceTargetInstructionCost.getNumOccurrences()
815 : Ctx.TTI.getCFInstrCost(Instruction::UncondBr, Ctx.CostKind);
816 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
817 << ": vector loop backedge\n");
818 Cost += BackedgeCost;
819 return Cost;
820 }
821
822 // Compute the cost of a replicate region. Replicating isn't supported for
823 // scalable vectors, return an invalid cost for them.
824 // TODO: Discard scalable VPlans with replicate recipes earlier after
825 // construction.
826 if (VF.isScalable())
828
829 // Compute and return the cost of the conditionally executed recipes.
830 assert(VF.isVector() && "Can only compute vector cost at the moment.");
832 return Then->cost(VF, Ctx);
833}
834
835#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
837 VPSlotTracker &SlotTracker) const {
838 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
839 auto NewIndent = Indent + " ";
840 if (auto *CanIV = getCanonicalIV()) {
841 O << '\n';
842 CanIV->print(O, SlotTracker);
843 O << " = CANONICAL-IV\n";
844 }
845 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
846 O << '\n';
847 BlockBase->print(O, NewIndent, SlotTracker);
848 }
849 O << Indent << "}\n";
850
851 printSuccessors(O, Indent);
852}
853#endif
854
856 auto *Header = cast<VPBasicBlock>(getEntry());
857 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
858 auto *CanIV = getCanonicalIV();
859 if (CanIV->getNumUsers() > 0) {
860 VPlan &Plan = *getPlan();
861 auto *Zero = Plan.getZero(CanIV->getType());
862 DebugLoc DL = CanIV->getDebugLoc();
864 VPBuilder HeaderBuilder(Header, Header->begin());
865 auto *ScalarR =
866 HeaderBuilder.createScalarPhi({Zero, CanIVInc}, DL, "index");
867 CanIV->replaceAllUsesWith(ScalarR);
868 }
869
870 VPBlockBase *Preheader = getSinglePredecessor();
871 VPBlockUtils::disconnectBlocks(Preheader, this);
872
873 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
874 VPB->setParent(getParent());
875
876 VPBlockUtils::connectBlocks(Preheader, Header);
877 VPBlockUtils::transferSuccessors(this, ExitingLatch);
878 VPBlockUtils::connectBlocks(ExitingLatch, Header);
879}
880
882 // TODO: Represent the increment as VPRegionValue as well.
883 VPRegionValue *CanIV = getCanonicalIV();
884 assert(CanIV && "Expected a canonical IV");
885
886 if (auto *Inc = vputils::findCanonicalIVIncrement(*getPlan()))
887 return Inc;
888
889 assert(!getPlan()->getVFxUF().isMaterialized() &&
890 "VFxUF can be used only before it is materialized.");
891 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
892 return VPBuilder(ExitingLatch->getTerminator())
893 .createOverflowingOp(Instruction::Add, {CanIV, &getPlan()->getVFxUF()},
894 {hasCanonicalIVNUW(), /* HasNSW */ false},
895 CanIV->getDebugLoc(), "index.next");
896}
897
898VPlan::VPlan(Loop *L) {
899 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
900 ScalarHeader = createVPIRBasicBlock(L->getHeader());
901
902 SmallVector<BasicBlock *> IRExitBlocks;
903 L->getUniqueExitBlocks(IRExitBlocks);
904 for (BasicBlock *EB : IRExitBlocks)
905 ExitBlocks.push_back(createVPIRBasicBlock(EB));
906}
907
909 VPSymbolicValue DummyValue;
910
911 for (auto *VPB : CreatedBlocks) {
912 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
913 // Replace all operands of recipes and all VPValues defined in VPBB with
914 // DummyValue so the block can be deleted.
915 for (VPRecipeBase &R : *VPBB) {
916 for (auto *Def : R.definedValues())
917 Def->replaceAllUsesWith(&DummyValue);
918
919 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
920 R.setOperand(I, &DummyValue);
921 }
922 } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) {
923 CanIV->replaceAllUsesWith(&DummyValue);
924 }
925
926 delete VPB;
927 }
928 for (VPValue *VPV : getLiveIns())
929 delete VPV;
930 delete BackedgeTakenCount;
931}
932
934 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
935 return VPIRBB->getIRBasicBlock() == IRBB;
936 });
937 assert(Iter != getExitBlocks().end() && "no exit block found");
938 return *Iter;
939}
940
942 return is_contained(ExitBlocks, VPBB);
943}
944
945/// To make RUN_VPLAN_PASS print final VPlan.
946static void printFinalVPlan(VPlan &) {}
947
948/// Generate the code inside the preheader and body of the vectorized loop.
949/// Assumes a single pre-header basic-block was created for this. Introduce
950/// additional basic-blocks as needed, and fill them all.
952 // Initialize CFG state.
953 State->CFG.PrevVPBB = nullptr;
954 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
955
956 // Update VPDominatorTree since VPBasicBlock may be removed after State was
957 // constructed.
958 State->VPDT.recalculate(*this);
959
960 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
961 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
962 cast<UncondBrInst>(VectorPreHeader->getTerminator())->setSuccessor(nullptr);
963 State->CFG.DTU.applyUpdates(
964 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
965
966 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
967 << ", UF=" << getConcreteUF() << '\n');
968 setName("Final VPlan");
969 // TODO: RUN_VPLAN_PASS/VPlanTransforms::runPass should automatically dump
970 // VPlans after some specific stages when "-debug" is specified, but that
971 // hasn't been implemented yet. For now, just do both:
972 LLVM_DEBUG(dump());
974
975 BasicBlock *ScalarPh = State->CFG.ExitBB;
976 VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
977 if (ScalarPhVPBB) {
978 // Disconnect scalar preheader and scalar header, as the dominator tree edge
979 // will be updated as part of VPlan execution. This allows keeping the DTU
980 // logic generic during VPlan execution.
981 State->CFG.DTU.applyUpdates(
982 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
983 }
985 Entry);
986 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
987 // successor blocks including the middle, exit and scalar preheader blocks.
988 for (VPBlockBase *Block : RPOT)
989 Block->execute(State);
990
991 if (hasEarlyExit()) {
992 // Fix up LoopInfo for extra dispatch blocks when vectorizing loops with
993 // early exits. For dispatch blocks, we need to find the smallest common
994 // loop of all successors that are in a loop. Note: we only need to update
995 // loop info for blocks after the middle block, but there is no easy way to
996 // get those at this point.
997 for (VPBlockBase *VPB : reverse(RPOT)) {
998 auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
999 if (!VPBB || isa<VPIRBasicBlock>(VPBB))
1000 continue;
1001 BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
1002 Loop *L = State->LI->getLoopFor(BB);
1003 if (!L || any_of(successors(BB),
1004 [L](BasicBlock *Succ) { return L->contains(Succ); }))
1005 continue;
1006 // Find the innermost loop containing all successors that are in a loop.
1007 // Successors not in any loop don't constrain the target loop.
1008 Loop *Target = nullptr;
1009 for (BasicBlock *Succ : successors(BB)) {
1010 Loop *SuccLoop = State->LI->getLoopFor(Succ);
1011 if (!SuccLoop)
1012 continue;
1013 if (!Target)
1014 Target = SuccLoop;
1015 else
1016 Target = State->LI->getSmallestCommonLoop(Target, SuccLoop);
1017 }
1018 State->LI->removeBlock(BB);
1019 if (Target)
1020 Target->addBasicBlockToLoop(BB, *State->LI);
1021 }
1022 }
1023
1024 // If the original loop is unreachable, delete it and all its blocks.
1025 if (!ScalarPhVPBB) {
1026 // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
1027 // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
1028 // IR instructions.
1029 for (VPIRBasicBlock *EB : getExitBlocks()) {
1030 for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
1031 if (R.getNumOperands() == 1)
1032 R.eraseFromParent();
1033 }
1034 }
1035
1036 Loop *OrigLoop =
1037 State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
1038 auto Blocks = OrigLoop->getBlocksVector();
1039 Blocks.push_back(ScalarPh);
1040 while (!OrigLoop->isInnermost())
1041 State->LI->erase(*OrigLoop->begin());
1042 State->LI->erase(OrigLoop);
1043 for (auto *BB : Blocks)
1044 State->LI->removeBlock(BB);
1045 DeleteDeadBlocks(Blocks, &State->CFG.DTU);
1046 }
1047
1048 State->CFG.DTU.flush();
1049
1050 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
1051 if (!Header)
1052 return;
1053
1054 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
1055 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1056
1057 // Fix the latch value of canonical, reduction and first-order recurrences
1058 // phis in the vector loop.
1059 for (VPRecipeBase &R : Header->phis()) {
1060 // Skip phi-like recipes that generate their backedege values themselves.
1061 if (isa<VPWidenPHIRecipe>(&R))
1062 continue;
1063
1064 auto *PhiR = cast<VPSingleDefRecipe>(&R);
1065 // VPInstructions currently model scalar Phis only.
1066 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1068 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1069
1070 Value *Phi = State->get(PhiR, NeedsScalar);
1071 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1072 // not.
1073 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1074 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1075 }
1076}
1077
1079 // For now only return the cost of the vector loop region, ignoring any other
1080 // blocks, like the preheader or middle blocks, expect for checking them for
1081 // recipes with invalid costs.
1083
1084 // If the cost of the loop region is invalid or any recipe in the skeleton
1085 // outside loop regions are invalid return an invalid cost.
1088 [&VF, &Ctx](VPBasicBlock *VPBB) {
1089 return !VPBB->cost(VF, Ctx).isValid();
1090 }))
1092
1093 return Cost;
1094}
1095
1097 // TODO: Cache if possible.
1099 if (auto *R = dyn_cast<VPRegionBlock>(B))
1100 return R->isReplicator() ? nullptr : R;
1101 return nullptr;
1102}
1103
1106 if (auto *R = dyn_cast<VPRegionBlock>(B))
1107 return R->isReplicator() ? nullptr : R;
1108 return nullptr;
1109}
1110
1111#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1114
1115 if (VF.getNumUsers() > 0) {
1116 O << "\nLive-in ";
1117 VF.printAsOperand(O, SlotTracker);
1118 O << " = VF";
1119 }
1120
1121 if (UF.getNumUsers() > 0) {
1122 O << "\nLive-in ";
1123 UF.printAsOperand(O, SlotTracker);
1124 O << " = UF";
1125 }
1126
1127 if (VFxUF.getNumUsers() > 0) {
1128 O << "\nLive-in ";
1129 VFxUF.printAsOperand(O, SlotTracker);
1130 O << " = VF * UF";
1131 }
1132
1133 if (VectorTripCount.getNumUsers() > 0) {
1134 O << "\nLive-in ";
1135 VectorTripCount.printAsOperand(O, SlotTracker);
1136 O << " = vector-trip-count";
1137 }
1138
1139 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1140 O << "\nLive-in ";
1141 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1142 O << " = backedge-taken count";
1143 }
1144
1145 O << "\n";
1146 if (TripCount) {
1147 if (isa<VPIRValue>(TripCount))
1148 O << "Live-in ";
1149 TripCount->printAsOperand(O, SlotTracker);
1150 O << " = original trip-count";
1151 O << "\n";
1152 }
1153}
1154
1158
1159 O << "VPlan '" << getName() << "' {";
1160
1161 printLiveIns(O);
1162
1164 RPOT(getEntry());
1165 for (const VPBlockBase *Block : RPOT) {
1166 O << '\n';
1167 Block->print(O, "", SlotTracker);
1168 }
1169
1170 O << "}\n";
1171}
1172
1173std::string VPlan::getName() const {
1174 std::string Out;
1175 raw_string_ostream RSO(Out);
1176 RSO << Name << " for ";
1177 if (!VFs.empty()) {
1178 RSO << "VF={" << VFs[0];
1179 for (ElementCount VF : drop_begin(VFs))
1180 RSO << "," << VF;
1181 RSO << "},";
1182 }
1183
1184 if (UFs.empty()) {
1185 RSO << "UF>=1";
1186 } else {
1187 RSO << "UF={" << UFs[0];
1188 for (unsigned UF : drop_begin(UFs))
1189 RSO << "," << UF;
1190 RSO << "}";
1191 }
1192
1193 return Out;
1194}
1195
1198 VPlanPrinter Printer(O, *this);
1199 Printer.dump();
1200}
1201
1203void VPlan::dump() const { print(dbgs()); }
1204#endif
1205
1206static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1207 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1208 // Update the operands of all cloned recipes starting at NewEntry. This
1209 // traverses all reachable blocks. This is done in two steps, to handle cycles
1210 // in PHI recipes.
1212 OldDeepRPOT(Entry);
1214 NewDeepRPOT(NewEntry);
1215 // First, collect all mappings from old to new VPValues defined by cloned
1216 // recipes.
1217 for (const auto &[OldBB, NewBB] :
1220 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1221 "blocks must have the same number of recipes");
1222 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1223 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1224 "recipes must have the same number of operands");
1225 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1226 "recipes must define the same number of operands");
1227 for (const auto &[OldV, NewV] :
1228 zip(OldR.definedValues(), NewR.definedValues()))
1229 Old2NewVPValues[OldV] = NewV;
1230 }
1231 }
1232
1233 // Update all operands to use cloned VPValues.
1234 for (VPBasicBlock *NewBB :
1236 for (VPRecipeBase &NewR : *NewBB)
1237 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1238 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1239 NewR.setOperand(I, NewOp);
1240 }
1241 }
1242}
1243
1245 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1246 // Clone blocks.
1247 const auto &[NewEntry, __] = VPBlockUtils::cloneFrom(Entry);
1248
1249 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1250 VPIRBasicBlock *NewScalarHeader = nullptr;
1251 if (getScalarHeader()->hasPredecessors()) {
1252 NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1253 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1254 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1255 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1256 }));
1257 } else {
1258 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1259 }
1260 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1261 auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1262 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1263 for (VPIRValue *OldLiveIn : getLiveIns())
1264 Old2NewVPValues[OldLiveIn] = NewPlan->getOrAddLiveIn(OldLiveIn);
1265
1266 if (auto *TripCountIRV = dyn_cast_or_null<VPIRValue>(TripCount))
1267 Old2NewVPValues[TripCountIRV] = NewPlan->getOrAddLiveIn(TripCountIRV);
1268 // else NewTripCount will be created and inserted into Old2NewVPValues when
1269 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1270
1271 if (auto *LoopRegion = getVectorLoopRegion()) {
1272 auto *OldCanIV = LoopRegion->getCanonicalIV();
1273 auto *NewCanIV = NewPlan->getVectorLoopRegion()->getCanonicalIV();
1274 assert(OldCanIV && NewCanIV &&
1275 "Loop regions of both plans must have canonical IVs.");
1276 Old2NewVPValues[OldCanIV] = NewCanIV;
1277 }
1278
1279 assert(none_of(Old2NewVPValues.keys(), IsaPred<VPSymbolicValue>) &&
1280 "All VPSymbolicValues must be handled below");
1281
1282 if (BackedgeTakenCount)
1283 NewPlan->BackedgeTakenCount = new VPSymbolicValue();
1284
1285 // Map and propagate materialized state for symbolic values.
1286 for (auto [OldSV, NewSV] :
1287 {std::pair{&VectorTripCount, &NewPlan->VectorTripCount},
1288 {&VF, &NewPlan->VF},
1289 {&UF, &NewPlan->UF},
1290 {&VFxUF, &NewPlan->VFxUF},
1291 {BackedgeTakenCount, NewPlan->BackedgeTakenCount}}) {
1292 if (!OldSV)
1293 continue;
1294 Old2NewVPValues[OldSV] = NewSV;
1295 if (OldSV->isMaterialized())
1296 NewSV->markMaterialized();
1297 }
1298
1299 remapOperands(Entry, NewEntry, Old2NewVPValues);
1300
1301 // Initialize remaining fields of cloned VPlan.
1302 NewPlan->VFs = VFs;
1303 NewPlan->UFs = UFs;
1304 // TODO: Adjust names.
1305 NewPlan->Name = Name;
1306 if (TripCount) {
1307 assert(Old2NewVPValues.contains(TripCount) &&
1308 "TripCount must have been added to Old2NewVPValues");
1309 NewPlan->TripCount = Old2NewVPValues[TripCount];
1310 }
1311
1312 // Transfer all cloned blocks (the second half of all current blocks) from
1313 // current to new VPlan.
1314 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1315 for (unsigned I :
1316 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1317 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1318 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1319
1320 // Update ExitBlocks of the new plan.
1321 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1322 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1323 VPB != NewScalarHeader)
1324 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1325 }
1326
1327 return NewPlan;
1328}
1329
1331 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1332 CreatedBlocks.push_back(VPIRBB);
1333 return VPIRBB;
1334}
1335
1337 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1338 for (Instruction &I :
1339 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1340 VPIRBB->appendRecipe(VPIRInstruction::create(I));
1341 return VPIRBB;
1342}
1343
1344#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1345
1346Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1347 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1348 Twine(getOrCreateBID(Block));
1349}
1350
1351Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1352 const std::string &Name = Block->getName();
1353 if (!Name.empty())
1354 return Name;
1355 return "VPB" + Twine(getOrCreateBID(Block));
1356}
1357
1359 Depth = 1;
1360 bumpIndent(0);
1361 OS << "digraph VPlan {\n";
1362 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1363 if (!Plan.getName().empty())
1364 OS << "\\n" << DOT::EscapeString(Plan.getName());
1365
1366 {
1367 // Print live-ins.
1368 std::string Str;
1369 raw_string_ostream SS(Str);
1370 Plan.printLiveIns(SS);
1372 StringRef(Str).rtrim('\n').split(Lines, "\n");
1373 for (auto Line : Lines)
1374 OS << DOT::EscapeString(Line.str()) << "\\n";
1375 }
1376
1377 OS << "\"]\n";
1378 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1379 OS << "edge [fontname=Courier, fontsize=30]\n";
1380 OS << "compound=true\n";
1381
1382 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1383 dumpBlock(Block);
1384
1385 OS << "}\n";
1386}
1387
1388void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1390 dumpBasicBlock(BasicBlock);
1392 dumpRegion(Region);
1393 else
1394 llvm_unreachable("Unsupported kind of VPBlock.");
1395}
1396
1397void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1398 bool Hidden, const Twine &Label) {
1399 // Due to "dot" we print an edge between two regions as an edge between the
1400 // exiting basic block and the entry basic of the respective regions.
1401 const VPBlockBase *Tail = From->getExitingBasicBlock();
1402 const VPBlockBase *Head = To->getEntryBasicBlock();
1403 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1404 OS << " [ label=\"" << Label << '\"';
1405 if (Tail != From)
1406 OS << " ltail=" << getUID(From);
1407 if (Head != To)
1408 OS << " lhead=" << getUID(To);
1409 if (Hidden)
1410 OS << "; splines=none";
1411 OS << "]\n";
1412}
1413
1414void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1415 auto &Successors = Block->getSuccessors();
1416 if (Successors.size() == 1)
1417 drawEdge(Block, Successors.front(), false, "");
1418 else if (Successors.size() == 2) {
1419 drawEdge(Block, Successors.front(), false, "T");
1420 drawEdge(Block, Successors.back(), false, "F");
1421 } else {
1422 unsigned SuccessorNumber = 0;
1423 for (auto *Successor : Successors)
1424 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1425 }
1426}
1427
1428void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1429 // Implement dot-formatted dump by performing plain-text dump into the
1430 // temporary storage followed by some post-processing.
1431 OS << Indent << getUID(BasicBlock) << " [label =\n";
1432 bumpIndent(1);
1433 std::string Str;
1434 raw_string_ostream SS(Str);
1435 // Use no indentation as we need to wrap the lines into quotes ourselves.
1436 BasicBlock->print(SS, "", SlotTracker);
1437
1438 // We need to process each line of the output separately, so split
1439 // single-string plain-text dump.
1441 StringRef(Str).rtrim('\n').split(Lines, "\n");
1442
1443 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1444 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1445 };
1446
1447 // Don't need the "+" after the last line.
1448 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1449 EmitLine(Line, " +\n");
1450 EmitLine(Lines.back(), "\n");
1451
1452 bumpIndent(-1);
1453 OS << Indent << "]\n";
1454
1455 dumpEdges(BasicBlock);
1456}
1457
1458void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1459 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1460 bumpIndent(1);
1461 OS << Indent << "fontname=Courier\n"
1462 << Indent << "label=\""
1463 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1464 << DOT::EscapeString(Region->getName()) << "\"\n";
1465
1466 if (auto *CanIV = Region->getCanonicalIV()) {
1467 OS << Indent << "\"";
1468 std::string Op;
1469 raw_string_ostream S(Op);
1470 CanIV->printAsOperand(S, SlotTracker);
1471 OS << DOT::EscapeString(Op);
1472 OS << " = CANONICAL-IV\"\n";
1473 }
1474
1475 // Dump the blocks of the region.
1476 assert(Region->getEntry() && "Region contains no inner blocks.");
1477 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1478 dumpBlock(Block);
1479 bumpIndent(-1);
1480 OS << Indent << "}\n";
1481 dumpEdges(Region);
1482}
1483
1484#endif
1485
1486/// Returns true if there is a vector loop region and \p VPV is defined in a
1487/// loop region.
1488static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1489 if (isa<VPRegionValue>(VPV))
1490 return true;
1491 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1492 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1494}
1495
1500 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1501 if (auto *SV = dyn_cast<VPSymbolicValue>(this))
1502 SV->markMaterialized();
1503}
1504
1506 VPValue *New,
1507 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1509 // Note that this early exit is required for correctness; the implementation
1510 // below relies on the number of users for this VPValue to decrease, which
1511 // isn't the case if this == New.
1512 if (this == New)
1513 return;
1514
1515 for (unsigned J = 0; J < getNumUsers();) {
1516 VPUser *User = Users[J];
1517 bool RemovedUser = false;
1518 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1519 if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1520 continue;
1521
1522 RemovedUser = true;
1523 User->setOperand(I, New);
1524 }
1525 // If a user got removed after updating the current user, the next user to
1526 // update will be moved to the current position, so we only need to
1527 // increment the index if the number of users did not change.
1528 if (!RemovedUser)
1529 J++;
1530 }
1531}
1532
1534 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1535 if (getOperand(Idx) == From)
1536 setOperand(Idx, To);
1537 }
1538}
1539
1540#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1542 OS << Tracker.getOrCreateName(this);
1543}
1544
1547 Op->printAsOperand(O, SlotTracker);
1548 });
1549}
1550#endif
1551
1552void VPSlotTracker::assignName(const VPValue *V) {
1553 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1554 auto *UV = V->getUnderlyingValue();
1555 auto *VPI = dyn_cast_or_null<VPInstruction>(V);
1556 if (!UV && !(VPI && !VPI->getName().empty())) {
1557 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1558 NextSlot++;
1559 return;
1560 }
1561
1562 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1563 // appending ".Number" to the name if there are multiple uses.
1564 std::string Name;
1565 if (UV)
1566 Name = getName(UV);
1567 else
1568 Name = VPI->getName();
1569
1570 assert(!Name.empty() && "Name cannot be empty.");
1571 StringRef Prefix = UV ? "ir<" : "vp<%";
1572 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1573
1574 // First assign the base name for V.
1575 const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
1576 // Integer or FP constants with different types will result in the same string
1577 // due to stripping types.
1579 return;
1580
1581 // If it is already used by C > 0 other VPValues, increase the version counter
1582 // C and use it for V.
1583 const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
1584 if (!UseInserted) {
1585 C->second++;
1586 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1587 }
1588}
1589
1590void VPSlotTracker::assignNames(const VPlan &Plan) {
1591 if (Plan.VF.getNumUsers() > 0)
1592 assignName(&Plan.VF);
1593 if (Plan.UF.getNumUsers() > 0)
1594 assignName(&Plan.UF);
1595 if (Plan.VFxUF.getNumUsers() > 0)
1596 assignName(&Plan.VFxUF);
1597 assignName(&Plan.VectorTripCount);
1598 if (Plan.BackedgeTakenCount)
1599 assignName(Plan.BackedgeTakenCount);
1600 for (VPValue *LI : Plan.getLiveIns())
1601 assignName(LI);
1602
1603 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1604 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1605 for (const VPBlockBase *VPB : RPOT) {
1606 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB))
1607 assignNames(VPBB);
1608 else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV())
1609 assignName(CanIV);
1610 }
1611}
1612
1613void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1614 for (const VPRecipeBase &Recipe : *VPBB)
1615 for (VPValue *Def : Recipe.definedValues())
1616 assignName(Def);
1617}
1618
1619std::string VPSlotTracker::getName(const Value *V) {
1620 std::string Name;
1621 raw_string_ostream S(Name);
1622 if (V->hasName() || !isa<Instruction>(V)) {
1623 V->printAsOperand(S, false);
1624 return Name;
1625 }
1626
1627 if (!MST) {
1628 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1629 // instruction.
1630 auto *I = cast<Instruction>(V);
1631 // This check is required to support unit tests with incomplete IR.
1632 if (I->getParent()) {
1633 MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1634 MST->incorporateFunction(*I->getFunction());
1635 } else {
1636 MST = std::make_unique<ModuleSlotTracker>(nullptr);
1637 }
1638 }
1639 V->printAsOperand(S, false, *MST);
1640 return Name;
1641}
1642
1643std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1644 std::string Name = VPValue2Name.lookup(V);
1645 if (!Name.empty())
1646 return Name;
1647
1648 // If no name was assigned, no VPlan was provided when creating the slot
1649 // tracker or it is not reachable from the provided VPlan. This can happen,
1650 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1651 // in a debugger.
1652 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1653 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1654 // here.
1655 const VPRecipeBase *DefR = V->getDefiningRecipe();
1656 (void)DefR;
1657 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1658 "VPValue defined by a recipe in a VPlan?");
1659
1660 // Use the underlying value's name, if there is one.
1661 if (auto *UV = V->getUnderlyingValue()) {
1662 std::string Name;
1663 raw_string_ostream S(Name);
1664 UV->printAsOperand(S, false);
1665 return (Twine("ir<") + Name + ">").str();
1666 }
1667
1668 return "<badref>";
1669}
1670
1672 VPValue *TrueVal,
1673 VPValue *FalseVal, DebugLoc DL) {
1674 assert(VPTypeAnalysis(*getInsertBlock()->getPlan())
1675 .inferScalarType(ChainOp)
1676 ->isIntegerTy(1) &&
1677 "ChainOp must be i1 for AnyOf reduction");
1678 VPIRFlags Flags(RecurKind::Or, /*IsOrdered=*/false, /*IsInLoop=*/false,
1679 FastMathFlags());
1680 auto *OrReduce =
1682 auto *Freeze = createNaryOp(Instruction::Freeze, {OrReduce}, DL);
1683 return createSelect(Freeze, TrueVal, FalseVal, DL, "rdx.select");
1684}
1685
1687 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1688 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1689 bool PredicateAtRangeStart = Predicate(Range.Start);
1690
1691 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1692 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1693 Range.End = TmpVF;
1694 break;
1695 }
1696
1697 return PredicateAtRangeStart;
1698}
1699
1700/// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1701/// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1702/// of VF's starting at a given VF and extending it as much as possible. Each
1703/// vectorization decision can potentially shorten this sub-range during
1704/// buildVPlan().
1706 ElementCount MaxVF) {
1707 auto MaxVFTimes2 = MaxVF * 2;
1708 for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1709 VFRange SubRange = {VF, MaxVFTimes2};
1710 if (auto Plan = tryToBuildVPlan(SubRange)) {
1712 // Update the name of the latch of the top-level vector loop region region
1713 // after optimizations which includes block folding.
1714 Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
1715 VPlans.push_back(std::move(Plan));
1716 }
1717 VF = SubRange.End;
1718 }
1719}
1720
1722 assert(count_if(VPlans,
1723 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1724 1 &&
1725 "Multiple VPlans for VF.");
1726
1727 for (const VPlanPtr &Plan : VPlans) {
1728 if (Plan->hasVF(VF))
1729 return *Plan.get();
1730 }
1731 llvm_unreachable("No plan found!");
1732}
1733
1736 // Reserve first location for self reference to the LoopID metadata node.
1737 MDs.push_back(nullptr);
1738 bool IsUnrollMetadata = false;
1739 MDNode *LoopID = L->getLoopID();
1740 if (LoopID) {
1741 // First find existing loop unrolling disable metadata.
1742 for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
1743 auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1744 if (MD) {
1745 const auto *S = dyn_cast<MDString>(MD->getOperand(0));
1746 if (!S)
1747 continue;
1748 if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
1749 continue;
1750 IsUnrollMetadata =
1751 S->getString().starts_with("llvm.loop.unroll.disable");
1752 }
1753 MDs.push_back(LoopID->getOperand(I));
1754 }
1755 }
1756
1757 if (!IsUnrollMetadata) {
1758 // Add runtime unroll disable metadata.
1759 LLVMContext &Context = L->getHeader()->getContext();
1760 SmallVector<Metadata *, 1> DisableOperands;
1761 DisableOperands.push_back(
1762 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1763 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1764 MDs.push_back(DisableNode);
1765 MDNode *NewLoopID = MDNode::get(Context, MDs);
1766 // Set operand 0 to refer to the loop id itself.
1767 NewLoopID->replaceOperandWith(0, NewLoopID);
1768 L->setLoopID(NewLoopID);
1769 }
1770}
1771
1773 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1774 bool VectorizingEpilogue, MDNode *OrigLoopID,
1775 std::optional<unsigned> OrigAverageTripCount,
1776 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1777 bool DisableRuntimeUnroll) {
1778 // Update the metadata of the scalar loop. Skip the update when vectorizing
1779 // the epilogue loop to ensure it is updated only once. Also skip the update
1780 // when the scalar loop became unreachable.
1781 auto *ScalarPH = Plan.getScalarPreheader();
1782 if (ScalarPH && !VectorizingEpilogue) {
1783 std::optional<MDNode *> RemainderLoopID =
1786 if (RemainderLoopID) {
1787 OrigLoop->setLoopID(*RemainderLoopID);
1788 } else {
1789 if (DisableRuntimeUnroll)
1791
1792 LoopVectorizeHints Hints(OrigLoop, /*InterleaveOnlyWhenForced*/ false,
1793 *ORE);
1794 Hints.setAlreadyVectorized();
1795 }
1796 }
1797 // Tag the scalar remainder so downstream passes (e.g. the unroller and
1798 // WarnMissedTransforms) can produce more informative remarks. Only emit
1799 // when remarks are enabled.
1800 if (ORE->enabled() && ScalarPH && ScalarPH->hasPredecessors())
1801 OrigLoop->addIntLoopAttribute("llvm.loop.vectorize.epilogue", 1);
1802
1803 if (!VectorLoop)
1804 return;
1805
1806 if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1807 OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1809 VectorLoop->setLoopID(*VectorizedLoopID);
1810 } else {
1811 // Keep all loop hints from the original loop on the vector loop (we'll
1812 // replace the vectorizer-specific hints below).
1813 if (OrigLoopID)
1814 VectorLoop->setLoopID(OrigLoopID);
1815
1816 if (!VectorizingEpilogue) {
1817 LoopVectorizeHints Hints(VectorLoop, /*InterleaveOnlyWhenForced*/ false,
1818 *ORE);
1819 Hints.setAlreadyVectorized();
1820 }
1821 }
1822 // Tag the vector loop body so downstream passes can identify it. Only
1823 // emit when remarks are enabled.
1824 if (ORE->enabled())
1825 VectorLoop->addIntLoopAttribute("llvm.loop.vectorize.body", 1);
1827 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1828 if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
1830
1831 // Set/update profile weights for the vector and remainder loops as original
1832 // loop iterations are now distributed among them. Note that original loop
1833 // becomes the scalar remainder loop after vectorization.
1834 //
1835 // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
1836 // end up getting slightly roughened result but that should be OK since
1837 // profile is not inherently precise anyway. Note also possible bypass of
1838 // vector code caused by legality checks is ignored, assigning all the weight
1839 // to the vector loop, optimistically.
1840 //
1841 // For scalable vectorization we can't know at compile time how many
1842 // iterations of the loop are handled in one vector iteration, so instead
1843 // use the value of vscale used for tuning.
1844 unsigned AverageVectorTripCount = 0;
1845 unsigned RemainderAverageTripCount = 0;
1846 auto EC = VectorLoop->getLoopPreheader()->getParent()->getEntryCount();
1847 auto IsProfiled = EC && EC->getCount();
1848 if (!OrigAverageTripCount) {
1849 if (!IsProfiled)
1850 return;
1851 auto &SE = *PSE.getSE();
1852 AverageVectorTripCount = SE.getSmallConstantTripCount(VectorLoop);
1853 if (ProfcheckDisableMetadataFixes || !AverageVectorTripCount)
1854 return;
1855 if (ScalarPH)
1856 RemainderAverageTripCount =
1857 SE.getSmallConstantTripCount(OrigLoop) % EstimatedVFxUF;
1858 // Setting to 1 should be sufficient to generate the correct branch weights.
1859 OrigLoopInvocationWeight = 1;
1860 } else {
1861 // Calculate number of iterations in unrolled loop.
1862 AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1863 // Calculate number of iterations for remainder loop.
1864 RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1865 }
1866 if (HeaderVPBB) {
1867 setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1868 OrigLoopInvocationWeight);
1869 }
1870
1871 if (ScalarPH) {
1872 setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1873 OrigLoopInvocationWeight);
1874 }
1875}
1876
1877#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1879 if (VPlans.empty()) {
1880 O << "LV: No VPlans built.\n";
1881 return;
1882 }
1883 for (const auto &Plan : VPlans)
1885 Plan->printDOT(O);
1886 else
1887 Plan->print(O);
1888}
1889#endif
1890
1891bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
1893 APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
1894 unsigned WideSize = C->getBitWidth();
1895 APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
1896 ? TruncatedVal.sext(WideSize)
1897 : TruncatedVal.zext(WideSize);
1898 return ExtendedVal == *C;
1899}
1900
1903 if (auto *IRV = dyn_cast<VPIRValue>(V))
1904 return TTI::getOperandInfo(IRV->getValue());
1905
1906 return {};
1907}
1908
1910 Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
1911 TTI::VectorInstrContext VIC, bool AlwaysIncludeReplicatingR) {
1912 if (VF.isScalar())
1913 return 0;
1914
1915 assert(!VF.isScalable() &&
1916 "Scalarization overhead not supported for scalable vectors");
1917
1918 InstructionCost ScalarizationCost = 0;
1919 // Compute the cost of scalarizing the result if needed.
1920 if (!ResultTy->isVoidTy()) {
1921 for (Type *VectorTy :
1922 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
1923 ScalarizationCost += TTI.getScalarizationOverhead(
1925 /*Insert=*/true, /*Extract=*/false, CostKind,
1926 /*ForPoisonSrc=*/true, {}, VIC);
1927 }
1928 }
1929 // Compute the cost of scalarizing the operands, skipping ones that do not
1930 // require extraction/scalarization and do not incur any overhead.
1931 SmallPtrSet<const VPValue *, 4> UniqueOperands;
1933 for (auto *Op : Operands) {
1934 if (isa<VPIRValue>(Op) ||
1935 (!AlwaysIncludeReplicatingR &&
1938 cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
1939 !UniqueOperands.insert(Op).second)
1940 continue;
1941 Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
1942 }
1943 return ScalarizationCost +
1944 TTI.getOperandsScalarizationOverhead(Tys, CostKind, VIC);
1945}
1946
1948 ElementCount VF) {
1949 const Instruction *UI = R->getUnderlyingInstr();
1950 if (isa<LoadInst>(UI))
1951 return true;
1952 assert(isa<StoreInst>(UI) && "R must either be a load or store");
1953
1954 if (!NumPredStores) {
1955 // Count the number of predicated stores in the VPlan, caching the result.
1956 // Only stores where scatter is not legal are counted, matching the legacy
1957 // cost model behavior.
1958 const VPlan &Plan = *R->getParent()->getPlan();
1959 NumPredStores = 0;
1960 for (const VPRegionBlock *VPRB :
1963 assert(VPRB->isReplicator() && "must only contain replicate regions");
1964 for (const VPBasicBlock *VPBB :
1966 vp_depth_first_shallow(VPRB->getEntry()))) {
1967 for (const VPRecipeBase &Recipe : *VPBB) {
1968 auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe);
1969 if (!RepR)
1970 continue;
1971 if (!isa<StoreInst>(RepR->getUnderlyingInstr()))
1972 continue;
1973 // Check if scatter is legal for this store. If so, don't count it.
1974 Type *Ty = Types.inferScalarType(RepR->getOperand(0));
1975 auto *VTy = VectorType::get(Ty, VF);
1976 const Align Alignment =
1977 getLoadStoreAlignment(RepR->getUnderlyingInstr());
1978 if (!TTI.isLegalMaskedScatter(VTy, Alignment))
1979 ++(*NumPredStores);
1980 }
1981 }
1982 }
1983 }
1985}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu next use AMDGPU Next Use Analysis Printer
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
Flatten the CFG
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
cl::opt< unsigned > NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
The number of stores in a loop that are allowed to need predication.
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS(PASS,...)
static void addRuntimeUnrollDisableMetaData(Loop *L)
Definition VPlan.cpp:1734
static T * getPlanEntry(T *Start)
Definition VPlan.cpp:158
static void printFinalVPlan(VPlan &)
To make RUN_VPLAN_PASS print final VPlan.
Definition VPlan.cpp:946
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
Definition VPlan.cpp:589
const char LLVMLoopVectorizeFollowupAll[]
Definition VPlan.cpp:62
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
Definition VPlan.cpp:1488
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition VPlan.cpp:607
const char LLVMLoopVectorizeFollowupVectorized[]
Definition VPlan.cpp:63
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1206
const char LLVMLoopVectorizeFollowupEpilogue[]
Definition VPlan.cpp:65
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1027
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition DebugLoc.h:123
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
std::optional< ProfileCount > getEntryCount(bool AllowSynthetic=false) const
Get the entry count for this function.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A helper class to return the specified delimiter string after the first invocation of operator String...
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
iterator begin() const
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1721
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1772
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1705
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1686
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1878
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
LLVM_ABI void addIntLoopAttribute(StringRef Name, unsigned Value, ArrayRef< StringRef > RemovePrefixes={}) const
Add an integer metadata attribute to this loop's loop-ID node.
Definition LoopInfo.cpp:579
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:547
Metadata node.
Definition Metadata.h:1080
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:832
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4154
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4229
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4181
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:494
iterator end()
Definition VPlan.h:4191
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4189
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:545
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition VPlan.cpp:780
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:787
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:233
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition VPlan.cpp:394
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:599
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:566
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4169
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition VPlan.cpp:552
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition VPlan.cpp:679
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition VPlan.cpp:657
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:645
const VPRecipeBase & back() const
Definition VPlan.h:4203
bool empty() const
Definition VPlan.h:4200
size_t size() const
Definition VPlan.h:4199
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:97
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:318
VPRegionBlock * getParent()
Definition VPlan.h:189
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:203
void setName(const Twine &newName)
Definition VPlan.h:182
size_t getNumSuccessors() const
Definition VPlan.h:240
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:222
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:220
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:667
size_t getNumPredecessors() const
Definition VPlan.h:241
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:309
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:225
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:218
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:225
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
VPlan * getPlan()
Definition VPlan.cpp:178
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:197
const std::string & getName() const
Definition VPlan.h:180
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:236
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:260
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:166
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:217
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:183
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:282
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:230
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:214
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:184
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:232
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:250
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:286
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:270
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Clone the CFG for all nodes reachable from Entry, including cloning the blocks and their recipes.
Definition VPlan.cpp:694
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
Definition VPlan.cpp:1671
VPBasicBlock * getInsertBlock() const
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:432
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4307
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:462
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4331
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:487
Class to record and manage LLVM IR flags.
Definition VPlan.h:687
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1222
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1265
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition VPlan.cpp:88
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:405
LLVM_ABI_FOR_TEST void dump() const
Dump the recipe to stderr (for debugging).
Definition VPlan.cpp:117
VPBasicBlock * getParent()
Definition VPlan.h:479
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const
Print the recipe, delegating to printRecipe().
virtual LLVM_ABI_FOR_TEST ~VPRecipeValue()
Definition VPlan.cpp:150
friend class VPValue
Definition VPlanValue.h:304
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4364
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:744
const VPBlockBase * getEntry() const
Definition VPlan.h:4408
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
Definition VPlan.cpp:855
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4440
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
Definition VPlan.cpp:881
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition VPlan.cpp:806
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:836
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4489
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition VPlan.cpp:758
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4476
const VPBlockBase * getExiting() const
Definition VPlan.h:4420
friend class VPlan
Definition VPlan.h:4365
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:209
Type * getType() const
Returns the type of the VPRegionValue.
Definition VPlanValue.h:225
DebugLoc getDebugLoc() const
Returns the debug location of the VPRegionValue.
Definition VPlanValue.h:228
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3190
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
Definition VPlan.cpp:1643
An analysis for type-inference for VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:329
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
Definition VPlan.cpp:1533
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1545
operand_range operands()
Definition VPlanValue.h:397
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:373
unsigned getNumOperands() const
Definition VPlanValue.h:367
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:368
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:49
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:138
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1496
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:128
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1541
friend class VPRecipeValue
Definition VPlanValue.h:52
void assertNotMaterialized() const
Assert that this VPValue has not been materialized, if it is a VPSymbolicValue.
Definition VPlanValue.h:501
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:74
@ VPVRecipeValueSC
A symbolic live-in VPValue without IR backing.
Definition VPlanValue.h:84
void dump() const
Dump the value to stderr (for debugging).
Definition VPlan.cpp:109
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:102
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1499
unsigned getNumUsers() const
Definition VPlanValue.h:113
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1505
LLVM_DUMP_METHOD void dump()
Definition VPlan.cpp:1358
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4512
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1197
friend class VPSlotTracker
Definition VPlan.h:4514
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1173
VPBasicBlock * getEntry()
Definition VPlan.h:4604
void setName(const Twine &newName)
Definition VPlan.h:4772
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:933
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:908
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:941
friend class VPlanPrinter
Definition VPlan.h:4513
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4702
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1330
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4831
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4653
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1096
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4899
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1078
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4754
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4593
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4854
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1336
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1203
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4643
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:951
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1156
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4649
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1112
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1244
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
bool match(Val *V, const Pattern &P)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2312
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
Definition VPlan.cpp:1891
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Or
Bitwise or logical OR of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Parameters that control the generic loop unrolling transformation.
bool UnrollVectorizedLoop
Disable runtime unrolling by default for vectorized loops.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
ElementCount End
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1902
std::optional< unsigned > NumPredStores
Number of predicated stores in the VPlan, computed on demand.
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None, bool AlwaysIncludeReplicatingR=false)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
Definition VPlan.cpp:1909
TargetTransformInfo::TargetCostKind CostKind
VPTypeAnalysis Types
const TargetTransformInfo & TTI
bool useEmulatedMaskMemRefHack(const VPReplicateRecipe *R, ElementCount VF)
Returns true if an artificially high cost for emulated masked memrefs should be used.
Definition VPlan.cpp:1947
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:240
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:142
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:280
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
struct llvm::VPTransformState::DataState Data
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:280
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
Definition VPlan.cpp:240
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
bool hasScalarValue(const VPValue *Def, VPLane Lane)
const TargetTransformInfo * TTI
Target Transform Info.
VPlan * Plan
Pointer to the VPlan code is generated for.
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
bool hasVectorValue(const VPValue *Def)
VPDominatorTree VPDT
VPlan-based dominator tree.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition VPlan.cpp:362
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition VPlan.cpp:340
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...