LLVM 17.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
20#include "VPlanCFG.h"
21#include "VPlanDominatorTree.h"
24#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/Twine.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/Value.h"
37#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <string>
46#include <vector>
47
48using namespace llvm;
49
50namespace llvm {
52}
53
54#define DEBUG_TYPE "vplan"
55
56#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
58 const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
60 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
61 V.print(OS, SlotTracker);
62 return OS;
63}
64#endif
65
67 const ElementCount &VF) const {
68 switch (LaneKind) {
70 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
71 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
72 Builder.getInt32(VF.getKnownMinValue() - Lane));
74 return Builder.getInt32(Lane);
75 }
76 llvm_unreachable("Unknown lane kind");
77}
78
79VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
80 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
81 if (Def)
82 Def->addDefinedValue(this);
83}
84
86 assert(Users.empty() && "trying to delete a VPValue with remaining users");
87 if (Def)
88 Def->removeDefinedValue(this);
89}
90
91#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
93 if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
94 R->print(OS, "", SlotTracker);
95 else
97}
98
99void VPValue::dump() const {
100 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
102 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
104 dbgs() << "\n";
105}
106
107void VPDef::dump() const {
108 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
110 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
111 print(dbgs(), "", SlotTracker);
112 dbgs() << "\n";
113}
114#endif
115
117 return cast_or_null<VPRecipeBase>(Def);
118}
119
121 return cast_or_null<VPRecipeBase>(Def);
122}
123
124// Get the top-most entry block of \p Start. This is the entry block of the
125// containing VPlan. This function is templated to support both const and non-const blocks
126template <typename T> static T *getPlanEntry(T *Start) {
127 T *Next = Start;
128 T *Current = Start;
129 while ((Next = Next->getParent()))
130 Current = Next;
131
132 SmallSetVector<T *, 8> WorkList;
133 WorkList.insert(Current);
134
135 for (unsigned i = 0; i < WorkList.size(); i++) {
136 T *Current = WorkList[i];
137 if (Current->getNumPredecessors() == 0)
138 return Current;
139 auto &Predecessors = Current->getPredecessors();
140 WorkList.insert(Predecessors.begin(), Predecessors.end());
141 }
142
143 llvm_unreachable("VPlan without any entry node without predecessors");
144}
145
146VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
147
148const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
149
150/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
152 const VPBlockBase *Block = this;
153 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
154 Block = Region->getEntry();
155 return cast<VPBasicBlock>(Block);
156}
157
159 VPBlockBase *Block = this;
160 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
161 Block = Region->getEntry();
162 return cast<VPBasicBlock>(Block);
163}
164
165void VPBlockBase::setPlan(VPlan *ParentPlan) {
166 assert(
167 (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
168 "Can only set plan on its entry or preheader block.");
169 Plan = ParentPlan;
170}
171
172/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
174 const VPBlockBase *Block = this;
175 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
176 Block = Region->getExiting();
177 return cast<VPBasicBlock>(Block);
178}
179
181 VPBlockBase *Block = this;
182 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
183 Block = Region->getExiting();
184 return cast<VPBasicBlock>(Block);
185}
186
188 if (!Successors.empty() || !Parent)
189 return this;
190 assert(Parent->getExiting() == this &&
191 "Block w/o successors not the exiting block of its parent.");
192 return Parent->getEnclosingBlockWithSuccessors();
193}
194
196 if (!Predecessors.empty() || !Parent)
197 return this;
198 assert(Parent->getEntry() == this &&
199 "Block w/o predecessors not the entry of its parent.");
200 return Parent->getEnclosingBlockWithPredecessors();
201}
202
205 delete Block;
206}
207
209 iterator It = begin();
210 while (It != end() && It->isPhi())
211 It++;
212 return It;
213}
214
216 if (Def->isLiveIn())
217 return Def->getLiveInIRValue();
218
219 if (hasScalarValue(Def, Instance)) {
220 return Data
221 .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
222 }
223
224 assert(hasVectorValue(Def, Instance.Part));
225 auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
226 if (!VecPart->getType()->isVectorTy()) {
227 assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
228 return VecPart;
229 }
230 // TODO: Cache created scalar values.
231 Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
232 auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
233 // set(Def, Extract, Instance);
234 return Extract;
235}
237 VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
238 return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
239}
240
242 const Instruction *Orig) {
243 // If the loop was versioned with memchecks, add the corresponding no-alias
244 // metadata.
245 if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
246 LVer->annotateInstWithNoAlias(To, Orig);
247}
248
250 // No source instruction to transfer metadata from?
251 if (!From)
252 return;
253
255 addNewMetadata(To, From);
256}
257
259 // No source instruction to transfer metadata from?
260 if (!From)
261 return;
262
263 for (Value *V : To) {
264 if (Instruction *I = dyn_cast<Instruction>(V))
266 }
267}
268
270 const Instruction *Inst = dyn_cast<Instruction>(V);
271 if (!Inst) {
273 return;
274 }
275
276 const DILocation *DIL = Inst->getDebugLoc();
277 // When a FSDiscriminator is enabled, we don't need to add the multiply
278 // factors to the discriminators.
279 if (DIL && Inst->getFunction()->shouldEmitDebugInfoForProfiling() &&
281 // FIXME: For scalable vectors, assume vscale=1.
282 auto NewDIL =
284 if (NewDIL)
286 else
287 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
288 << DIL->getFilename() << " Line: " << DIL->getLine());
289 } else
291}
292
294VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
295 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
296 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
297 BasicBlock *PrevBB = CFG.PrevBB;
298 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
299 PrevBB->getParent(), CFG.ExitBB);
300 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
301
302 // Hook up the new basic block to its predecessors.
303 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
304 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
305 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
306 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
307
308 assert(PredBB && "Predecessor basic-block not found building successor.");
309 auto *PredBBTerminator = PredBB->getTerminator();
310 LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
311
312 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
313 if (isa<UnreachableInst>(PredBBTerminator)) {
314 assert(PredVPSuccessors.size() == 1 &&
315 "Predecessor ending w/o branch must have single successor.");
316 DebugLoc DL = PredBBTerminator->getDebugLoc();
317 PredBBTerminator->eraseFromParent();
318 auto *Br = BranchInst::Create(NewBB, PredBB);
319 Br->setDebugLoc(DL);
320 } else if (TermBr && !TermBr->isConditional()) {
321 TermBr->setSuccessor(0, NewBB);
322 } else {
323 // Set each forward successor here when it is created, excluding
324 // backedges. A backward successor is set when the branch is created.
325 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
326 assert(!TermBr->getSuccessor(idx) &&
327 "Trying to reset an existing successor block.");
328 TermBr->setSuccessor(idx, NewBB);
329 }
330 }
331 return NewBB;
332}
333
335 bool Replica = State->Instance && !State->Instance->isFirstIteration();
336 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
337 VPBlockBase *SingleHPred = nullptr;
338 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
339
340 auto IsLoopRegion = [](VPBlockBase *BB) {
341 auto *R = dyn_cast<VPRegionBlock>(BB);
342 return R && !R->isReplicator();
343 };
344
345 // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
346 if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
347 // ExitBB can be re-used for the exit block of the Plan.
348 NewBB = State->CFG.ExitBB;
349 State->CFG.PrevBB = NewBB;
350
351 // Update the branch instruction in the predecessor to branch to ExitBB.
352 VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
353 VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
354 assert(PredVPB->getSingleSuccessor() == this &&
355 "predecessor must have the current block as only successor");
356 BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
357 // The Exit block of a loop is always set to be successor 0 of the Exiting
358 // block.
359 cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
360 } else if (PrevVPBB && /* A */
361 !((SingleHPred = getSingleHierarchicalPredecessor()) &&
362 SingleHPred->getExitingBasicBlock() == PrevVPBB &&
363 PrevVPBB->getSingleHierarchicalSuccessor() &&
364 (SingleHPred->getParent() == getEnclosingLoopRegion() &&
365 !IsLoopRegion(SingleHPred))) && /* B */
366 !(Replica && getPredecessors().empty())) { /* C */
367 // The last IR basic block is reused, as an optimization, in three cases:
368 // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
369 // B. when the current VPBB has a single (hierarchical) predecessor which
370 // is PrevVPBB and the latter has a single (hierarchical) successor which
371 // both are in the same non-replicator region; and
372 // C. when the current VPBB is an entry of a region replica - where PrevVPBB
373 // is the exiting VPBB of this region from a previous instance, or the
374 // predecessor of this region.
375
376 NewBB = createEmptyBasicBlock(State->CFG);
377 State->Builder.SetInsertPoint(NewBB);
378 // Temporarily terminate with unreachable until CFG is rewired.
379 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
380 // Register NewBB in its loop. In innermost loops its the same for all
381 // BB's.
382 if (State->CurrentVectorLoop)
383 State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
384 State->Builder.SetInsertPoint(Terminator);
385 State->CFG.PrevBB = NewBB;
386 }
387
388 // 2. Fill the IR basic block with IR instructions.
389 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
390 << " in BB:" << NewBB->getName() << '\n');
391
392 State->CFG.VPBB2IRBB[this] = NewBB;
393 State->CFG.PrevVPBB = this;
394
395 for (VPRecipeBase &Recipe : Recipes)
396 Recipe.execute(*State);
397
398 LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
399}
400
402 for (VPRecipeBase &R : Recipes) {
403 for (auto *Def : R.definedValues())
404 Def->replaceAllUsesWith(NewValue);
405
406 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
407 R.setOperand(I, NewValue);
408 }
409}
410
412 assert((SplitAt == end() || SplitAt->getParent() == this) &&
413 "can only split at a position in the same block");
414
416 // First, disconnect the current block from its successors.
417 for (VPBlockBase *Succ : Succs)
419
420 // Create new empty block after the block to split.
421 auto *SplitBlock = new VPBasicBlock(getName() + ".split");
423
424 // Add successors for block to split to new block.
425 for (VPBlockBase *Succ : Succs)
427
428 // Finally, move the recipes starting at SplitAt to new block.
429 for (VPRecipeBase &ToMove :
430 make_early_inc_range(make_range(SplitAt, this->end())))
431 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
432
433 return SplitBlock;
434}
435
438 if (P && P->isReplicator()) {
439 P = P->getParent();
440 assert(!cast<VPRegionBlock>(P)->isReplicator() &&
441 "unexpected nested replicate regions");
442 }
443 return P;
444}
445
446static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
447 if (VPBB->empty()) {
448 assert(
449 VPBB->getNumSuccessors() < 2 &&
450 "block with multiple successors doesn't have a recipe as terminator");
451 return false;
452 }
453
454 const VPRecipeBase *R = &VPBB->back();
455 auto *VPI = dyn_cast<VPInstruction>(R);
456 bool IsCondBranch =
457 isa<VPBranchOnMaskRecipe>(R) ||
458 (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
459 VPI->getOpcode() == VPInstruction::BranchOnCount));
460 (void)IsCondBranch;
461
462 if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
463 assert(IsCondBranch && "block with multiple successors not terminated by "
464 "conditional branch recipe");
465
466 return true;
467 }
468
469 assert(
470 !IsCondBranch &&
471 "block with 0 or 1 successors terminated by conditional branch recipe");
472 return false;
473}
474
476 if (hasConditionalTerminator(this))
477 return &back();
478 return nullptr;
479}
480
482 if (hasConditionalTerminator(this))
483 return &back();
484 return nullptr;
485}
486
488 return getParent()->getExitingBasicBlock() == this;
489}
490
491#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
492void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
493 if (getSuccessors().empty()) {
494 O << Indent << "No successors\n";
495 } else {
496 O << Indent << "Successor(s): ";
497 ListSeparator LS;
498 for (auto *Succ : getSuccessors())
499 O << LS << Succ->getName();
500 O << '\n';
501 }
502}
503
504void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
505 VPSlotTracker &SlotTracker) const {
506 O << Indent << getName() << ":\n";
507
508 auto RecipeIndent = Indent + " ";
509 for (const VPRecipeBase &Recipe : *this) {
510 Recipe.print(O, RecipeIndent, SlotTracker);
511 O << '\n';
512 }
513
514 printSuccessors(O, Indent);
515}
516#endif
517
520 // Drop all references in VPBasicBlocks and replace all uses with
521 // DummyValue.
522 Block->dropAllReferences(NewValue);
523}
524
527 RPOT(Entry);
528
529 if (!isReplicator()) {
530 // Create and register the new vector loop.
531 Loop *PrevLoop = State->CurrentVectorLoop;
532 State->CurrentVectorLoop = State->LI->AllocateLoop();
533 BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
534 Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
535
536 // Insert the new loop into the loop nest and register the new basic blocks
537 // before calling any utilities such as SCEV that require valid LoopInfo.
538 if (ParentLoop)
539 ParentLoop->addChildLoop(State->CurrentVectorLoop);
540 else
541 State->LI->addTopLevelLoop(State->CurrentVectorLoop);
542
543 // Visit the VPBlocks connected to "this", starting from it.
544 for (VPBlockBase *Block : RPOT) {
545 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
546 Block->execute(State);
547 }
548
549 State->CurrentVectorLoop = PrevLoop;
550 return;
551 }
552
553 assert(!State->Instance && "Replicating a Region with non-null instance.");
554
555 // Enter replicating mode.
556 State->Instance = VPIteration(0, 0);
557
558 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
559 State->Instance->Part = Part;
560 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
561 for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
562 ++Lane) {
563 State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
564 // Visit the VPBlocks connected to \p this, starting from it.
565 for (VPBlockBase *Block : RPOT) {
566 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
567 Block->execute(State);
568 }
569 }
570 }
571
572 // Exit replicating mode.
573 State->Instance.reset();
574}
575
576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
578 VPSlotTracker &SlotTracker) const {
579 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
580 auto NewIndent = Indent + " ";
581 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
582 O << '\n';
583 BlockBase->print(O, NewIndent, SlotTracker);
584 }
585 O << Indent << "}\n";
586
587 printSuccessors(O, Indent);
588}
589#endif
590
592 for (auto &KV : LiveOuts)
593 delete KV.second;
594 LiveOuts.clear();
595
596 if (Entry) {
597 VPValue DummyValue;
599 Block->dropAllReferences(&DummyValue);
600
602
603 Preheader->dropAllReferences(&DummyValue);
604 delete Preheader;
605 }
606 for (VPValue *VPV : VPLiveInsToFree)
607 delete VPV;
608 if (BackedgeTakenCount)
609 delete BackedgeTakenCount;
610}
611
613 VPBasicBlock *Preheader = new VPBasicBlock("ph");
614 VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
615 auto Plan = std::make_unique<VPlan>(Preheader, VecPreheader);
616 Plan->TripCount =
618 return Plan;
619}
620
622 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
623 for (VPRecipeBase &R : Header->phis()) {
624 if (isa<VPActiveLaneMaskPHIRecipe>(&R))
625 return cast<VPActiveLaneMaskPHIRecipe>(&R);
626 }
627 return nullptr;
628}
629
630void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
631 Value *CanonicalIVStartValue,
632 VPTransformState &State,
633 bool IsEpilogueVectorization) {
634 // Check if the backedge taken count is needed, and if so build it.
635 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
637 auto *TCMO = Builder.CreateSub(TripCountV,
638 ConstantInt::get(TripCountV->getType(), 1),
639 "trip.count.minus.1");
640 auto VF = State.VF;
641 Value *VTCMO =
642 VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
643 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
644 State.set(BackedgeTakenCount, VTCMO, Part);
645 }
646
647 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
648 State.set(&VectorTripCount, VectorTripCountV, Part);
649
650 // When vectorizing the epilogue loop, the canonical induction start value
651 // needs to be changed from zero to the value after the main vector loop.
652 // FIXME: Improve modeling for canonical IV start values in the epilogue loop.
653 if (CanonicalIVStartValue) {
654 VPValue *VPV = getVPValueOrAddLiveIn(CanonicalIVStartValue);
655 auto *IV = getCanonicalIV();
656 assert(all_of(IV->users(),
657 [](const VPUser *U) {
658 if (isa<VPScalarIVStepsRecipe>(U) ||
659 isa<VPDerivedIVRecipe>(U))
660 return true;
661 auto *VPI = cast<VPInstruction>(U);
662 return VPI->getOpcode() ==
663 VPInstruction::CanonicalIVIncrement ||
664 VPI->getOpcode() ==
665 VPInstruction::CanonicalIVIncrementNUW;
666 }) &&
667 "the canonical IV should only be used by its increments or "
668 "ScalarIVSteps when resetting the start value");
669 IV->setOperand(0, VPV);
670 }
671}
672
673/// Generate the code inside the preheader and body of the vectorized loop.
674/// Assumes a single pre-header basic-block was created for this. Introduce
675/// additional basic-blocks as needed, and fill them all.
677 // Set the reverse mapping from VPValues to Values for code generation.
678 for (auto &Entry : Value2VPValue)
679 State->VPValue2Value[Entry.second] = Entry.first;
680
681 // Initialize CFG state.
682 State->CFG.PrevVPBB = nullptr;
683 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
684 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
685 State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
686
687 // Generate code in the loop pre-header and body.
689 Block->execute(State);
690
691 VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
692 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
693
694 // Fix the latch value of canonical, reduction and first-order recurrences
695 // phis in the vector loop.
696 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
697 for (VPRecipeBase &R : Header->phis()) {
698 // Skip phi-like recipes that generate their backedege values themselves.
699 if (isa<VPWidenPHIRecipe>(&R))
700 continue;
701
702 if (isa<VPWidenPointerInductionRecipe>(&R) ||
703 isa<VPWidenIntOrFpInductionRecipe>(&R)) {
704 PHINode *Phi = nullptr;
705 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
706 Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
707 } else {
708 auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
709 // TODO: Split off the case that all users of a pointer phi are scalar
710 // from the VPWidenPointerInductionRecipe.
711 if (WidenPhi->onlyScalarsGenerated(State->VF))
712 continue;
713
714 auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
715 Phi = cast<PHINode>(GEP->getPointerOperand());
716 }
717
718 Phi->setIncomingBlock(1, VectorLatchBB);
719
720 // Move the last step to the end of the latch block. This ensures
721 // consistent placement of all induction updates.
722 Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
723 Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
724 continue;
725 }
726
727 auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
728 // For canonical IV, first-order recurrences and in-order reduction phis,
729 // only a single part is generated, which provides the last part from the
730 // previous iteration. For non-ordered reductions all UF parts are
731 // generated.
732 bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
733 isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
734 (isa<VPReductionPHIRecipe>(PhiR) &&
735 cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
736 unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
737
738 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
739 Value *Phi = State->get(PhiR, Part);
740 Value *Val = State->get(PhiR->getBackedgeValue(),
741 SinglePartNeeded ? State->UF - 1 : Part);
742 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
743 }
744 }
745
746 // We do not attempt to preserve DT for outer loop vectorization currently.
748 BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
749 State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
750 updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
751 State->CFG.ExitBB);
752 }
753}
754
755#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
757void VPlan::print(raw_ostream &O) const {
759
760 O << "VPlan '" << getName() << "' {";
761
762 if (VectorTripCount.getNumUsers() > 0) {
763 O << "\nLive-in ";
764 VectorTripCount.printAsOperand(O, SlotTracker);
765 O << " = vector-trip-count";
766 }
767
768 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
769 O << "\nLive-in ";
770 BackedgeTakenCount->printAsOperand(O, SlotTracker);
771 O << " = backedge-taken count";
772 }
773
774 O << "\n";
775 if (TripCount->isLiveIn())
776 O << "Live-in ";
777 TripCount->printAsOperand(O, SlotTracker);
778 O << " = original trip-count";
779 O << "\n";
780
781 if (!getPreheader()->empty()) {
782 O << "\n";
783 getPreheader()->print(O, "", SlotTracker);
784 }
785
786 for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
787 O << '\n';
788 Block->print(O, "", SlotTracker);
789 }
790
791 if (!LiveOuts.empty())
792 O << "\n";
793 for (const auto &KV : LiveOuts) {
794 KV.second->print(O, SlotTracker);
795 }
796
797 O << "}\n";
798}
799
800std::string VPlan::getName() const {
801 std::string Out;
802 raw_string_ostream RSO(Out);
803 RSO << Name << " for ";
804 if (!VFs.empty()) {
805 RSO << "VF={" << VFs[0];
806 for (ElementCount VF : drop_begin(VFs))
807 RSO << "," << VF;
808 RSO << "},";
809 }
810
811 if (UFs.empty()) {
812 RSO << "UF>=1";
813 } else {
814 RSO << "UF={" << UFs[0];
815 for (unsigned UF : drop_begin(UFs))
816 RSO << "," << UF;
817 RSO << "}";
818 }
819
820 return Out;
821}
822
825 VPlanPrinter Printer(O, *this);
826 Printer.dump();
827}
828
830void VPlan::dump() const { print(dbgs()); }
831#endif
832
834 assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
835 LiveOuts.insert({PN, new VPLiveOut(PN, V)});
836}
837
838void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
839 BasicBlock *LoopLatchBB,
840 BasicBlock *LoopExitBB) {
841 // The vector body may be more than a single basic-block by this point.
842 // Update the dominator tree information inside the vector body by propagating
843 // it from header to latch, expecting only triangular control-flow, if any.
844 BasicBlock *PostDomSucc = nullptr;
845 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
846 // Get the list of successors of this block.
847 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
848 assert(Succs.size() <= 2 &&
849 "Basic block in vector loop has more than 2 successors.");
850 PostDomSucc = Succs[0];
851 if (Succs.size() == 1) {
852 assert(PostDomSucc->getSinglePredecessor() &&
853 "PostDom successor has more than one predecessor.");
854 DT->addNewBlock(PostDomSucc, BB);
855 continue;
856 }
857 BasicBlock *InterimSucc = Succs[1];
858 if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
859 PostDomSucc = Succs[1];
860 InterimSucc = Succs[0];
861 }
862 assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
863 "One successor of a basic block does not lead to the other.");
864 assert(InterimSucc->getSinglePredecessor() &&
865 "Interim successor has more than one predecessor.");
866 assert(PostDomSucc->hasNPredecessors(2) &&
867 "PostDom successor has more than two predecessors.");
868 DT->addNewBlock(InterimSucc, BB);
869 DT->addNewBlock(PostDomSucc, BB);
870 }
871 // Latch block is a new dominator for the loop exit.
872 DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
873 assert(DT->verify(DominatorTree::VerificationLevel::Fast));
874}
875
876#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
877
878Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
879 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
880 Twine(getOrCreateBID(Block));
881}
882
883Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
884 const std::string &Name = Block->getName();
885 if (!Name.empty())
886 return Name;
887 return "VPB" + Twine(getOrCreateBID(Block));
888}
889
891 Depth = 1;
892 bumpIndent(0);
893 OS << "digraph VPlan {\n";
894 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
895 if (!Plan.getName().empty())
896 OS << "\\n" << DOT::EscapeString(Plan.getName());
897 if (Plan.BackedgeTakenCount) {
898 OS << ", where:\\n";
899 Plan.BackedgeTakenCount->print(OS, SlotTracker);
900 OS << " := BackedgeTakenCount";
901 }
902 OS << "\"]\n";
903 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
904 OS << "edge [fontname=Courier, fontsize=30]\n";
905 OS << "compound=true\n";
906
907 dumpBlock(Plan.getPreheader());
908
910 dumpBlock(Block);
911
912 OS << "}\n";
913}
914
915void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
916 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
917 dumpBasicBlock(BasicBlock);
918 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
919 dumpRegion(Region);
920 else
921 llvm_unreachable("Unsupported kind of VPBlock.");
922}
923
924void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
925 bool Hidden, const Twine &Label) {
926 // Due to "dot" we print an edge between two regions as an edge between the
927 // exiting basic block and the entry basic of the respective regions.
928 const VPBlockBase *Tail = From->getExitingBasicBlock();
929 const VPBlockBase *Head = To->getEntryBasicBlock();
930 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
931 OS << " [ label=\"" << Label << '\"';
932 if (Tail != From)
933 OS << " ltail=" << getUID(From);
934 if (Head != To)
935 OS << " lhead=" << getUID(To);
936 if (Hidden)
937 OS << "; splines=none";
938 OS << "]\n";
939}
940
941void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
942 auto &Successors = Block->getSuccessors();
943 if (Successors.size() == 1)
944 drawEdge(Block, Successors.front(), false, "");
945 else if (Successors.size() == 2) {
946 drawEdge(Block, Successors.front(), false, "T");
947 drawEdge(Block, Successors.back(), false, "F");
948 } else {
949 unsigned SuccessorNumber = 0;
950 for (auto *Successor : Successors)
951 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
952 }
953}
954
955void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
956 // Implement dot-formatted dump by performing plain-text dump into the
957 // temporary storage followed by some post-processing.
958 OS << Indent << getUID(BasicBlock) << " [label =\n";
959 bumpIndent(1);
960 std::string Str;
962 // Use no indentation as we need to wrap the lines into quotes ourselves.
963 BasicBlock->print(SS, "", SlotTracker);
964
965 // We need to process each line of the output separately, so split
966 // single-string plain-text dump.
968 StringRef(Str).rtrim('\n').split(Lines, "\n");
969
970 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
971 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
972 };
973
974 // Don't need the "+" after the last line.
975 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
976 EmitLine(Line, " +\n");
977 EmitLine(Lines.back(), "\n");
978
979 bumpIndent(-1);
980 OS << Indent << "]\n";
981
983}
984
985void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
986 OS << Indent << "subgraph " << getUID(Region) << " {\n";
987 bumpIndent(1);
988 OS << Indent << "fontname=Courier\n"
989 << Indent << "label=\""
990 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
991 << DOT::EscapeString(Region->getName()) << "\"\n";
992 // Dump the blocks of the region.
993 assert(Region->getEntry() && "Region contains no inner blocks.");
995 dumpBlock(Block);
996 bumpIndent(-1);
997 OS << Indent << "}\n";
999}
1000
1002 if (auto *Inst = dyn_cast<Instruction>(V)) {
1003 if (!Inst->getType()->isVoidTy()) {
1004 Inst->printAsOperand(O, false);
1005 O << " = ";
1006 }
1007 O << Inst->getOpcodeName() << " ";
1008 unsigned E = Inst->getNumOperands();
1009 if (E > 0) {
1010 Inst->getOperand(0)->printAsOperand(O, false);
1011 for (unsigned I = 1; I < E; ++I)
1012 Inst->getOperand(I)->printAsOperand(O << ", ", false);
1013 }
1014 } else // !Inst
1015 V->printAsOperand(O, false);
1016}
1017
1018#endif
1019
1020template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
1021
1023 for (unsigned J = 0; J < getNumUsers();) {
1024 VPUser *User = Users[J];
1025 unsigned NumUsers = getNumUsers();
1026 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
1027 if (User->getOperand(I) == this)
1028 User->setOperand(I, New);
1029 // If a user got removed after updating the current user, the next user to
1030 // update will be moved to the current position, so we only need to
1031 // increment the index if the number of users did not change.
1032 if (NumUsers == getNumUsers())
1033 J++;
1034 }
1035}
1036
1037#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1039 if (const Value *UV = getUnderlyingValue()) {
1040 OS << "ir<";
1041 UV->printAsOperand(OS, false);
1042 OS << ">";
1043 return;
1044 }
1045
1046 unsigned Slot = Tracker.getSlot(this);
1047 if (Slot == unsigned(-1))
1048 OS << "<badref>";
1049 else
1050 OS << "vp<%" << Tracker.getSlot(this) << ">";
1051}
1052
1054 interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1055 Op->printAsOperand(O, SlotTracker);
1056 });
1057}
1058#endif
1059
1060void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1061 Old2NewTy &Old2New,
1062 InterleavedAccessInfo &IAI) {
1064 RPOT(Region->getEntry());
1065 for (VPBlockBase *Base : RPOT) {
1066 visitBlock(Base, Old2New, IAI);
1067 }
1068}
1069
1070void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1071 InterleavedAccessInfo &IAI) {
1072 if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1073 for (VPRecipeBase &VPI : *VPBB) {
1074 if (isa<VPHeaderPHIRecipe>(&VPI))
1075 continue;
1076 assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1077 auto *VPInst = cast<VPInstruction>(&VPI);
1078
1079 auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1080 if (!Inst)
1081 continue;
1082 auto *IG = IAI.getInterleaveGroup(Inst);
1083 if (!IG)
1084 continue;
1085
1086 auto NewIGIter = Old2New.find(IG);
1087 if (NewIGIter == Old2New.end())
1088 Old2New[IG] = new InterleaveGroup<VPInstruction>(
1089 IG->getFactor(), IG->isReverse(), IG->getAlign());
1090
1091 if (Inst == IG->getInsertPos())
1092 Old2New[IG]->setInsertPos(VPInst);
1093
1094 InterleaveGroupMap[VPInst] = Old2New[IG];
1095 InterleaveGroupMap[VPInst]->insertMember(
1096 VPInst, IG->getIndex(Inst),
1097 Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1098 : IG->getFactor()));
1099 }
1100 } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1101 visitRegion(Region, Old2New, IAI);
1102 else
1103 llvm_unreachable("Unsupported kind of VPBlock.");
1104}
1105
1107 InterleavedAccessInfo &IAI) {
1108 Old2NewTy Old2New;
1109 visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1110}
1111
1112void VPSlotTracker::assignSlot(const VPValue *V) {
1113 assert(!Slots.contains(V) && "VPValue already has a slot!");
1114 Slots[V] = NextSlot++;
1115}
1116
1117void VPSlotTracker::assignSlots(const VPlan &Plan) {
1118 assignSlot(&Plan.VectorTripCount);
1119 if (Plan.BackedgeTakenCount)
1120 assignSlot(Plan.BackedgeTakenCount);
1121 assignSlots(Plan.getPreheader());
1122
1125 for (const VPBasicBlock *VPBB :
1126 VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1127 assignSlots(VPBB);
1128}
1129
1130void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) {
1131 for (const VPRecipeBase &Recipe : *VPBB)
1132 for (VPValue *Def : Recipe.definedValues())
1133 assignSlot(Def);
1134}
1135
1137 return all_of(Def->users(),
1138 [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
1139}
1140
1142 ScalarEvolution &SE) {
1143 if (auto *Expanded = Plan.getSCEVExpansion(Expr))
1144 return Expanded;
1145 VPValue *Expanded = nullptr;
1146 if (auto *E = dyn_cast<SCEVConstant>(Expr))
1147 Expanded = Plan.getVPValueOrAddLiveIn(E->getValue());
1148 else if (auto *E = dyn_cast<SCEVUnknown>(Expr))
1149 Expanded = Plan.getVPValueOrAddLiveIn(E->getValue());
1150 else {
1151 Expanded = new VPExpandSCEVRecipe(Expr, SE);
1153 }
1154 Plan.addSCEVExpansion(Expr, Expanded);
1155 return Expanded;
1156}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
assume Assume Builder
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
std::string Name
static void dumpEdges(CFGMST< Edge, BBInfo > &MST, GCOVFunction &GF)
Generic dominator tree construction - this file provides routines to construct immediate dominator in...
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define I(x, y, z)
Definition: MD5.cpp:58
Memory true print Memory SSA Printer
Definition: MemorySSA.cpp:78
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static T * getPlanEntry(T *Start)
Definition: VPlan.cpp:126
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition: VPlan.cpp:446
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
static const uint32_t IV[8]
Definition: blake3_impl.h:77
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:325
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the basic block to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:4601
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:315
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:293
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:323
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
Debug location.
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition: DebugLoc.h:33
Core dominator tree base class.
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:302
bool shouldEmitDebugInfoForProfiling() const
Returns true if we should emit debug info for profiling.
Definition: Metadata.cpp:1729
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2355
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1192
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1257
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:212
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1273
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:365
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:636
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:778
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:823
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void annotateInstWithNoAlias(Instruction *VersionedInst, const Instruction *OrigInst)
Add the noalias annotations to VersionedInst.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
void setIncomingBlock(unsigned i, BasicBlock *BB)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition: RegionInfo.h:322
This class represents an analyzed expression in the program.
The main scalar evolution driver.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:88
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:152
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:678
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:312
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:698
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition: StringRef.h:801
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:2056
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2192
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:2260
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:2213
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:334
iterator end()
Definition: VPlan.h:2223
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:2221
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:208
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:436
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:401
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:411
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:504
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:487
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:475
const VPRecipeBase & back() const
Definition: VPlan.h:2235
bool empty() const
Definition: VPlan.h:2232
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:418
VPRegionBlock * getParent()
Definition: VPlan.h:490
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:173
size_t getNumSuccessors() const
Definition: VPlan.h:535
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:492
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:195
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:203
VPlan * getPlan()
Definition: VPlan.cpp:146
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:165
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:561
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:555
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:187
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:151
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:525
Helper for GraphTraits specialization that traverses through VPRegionBlocks.
Definition: VPlanCFG.h:115
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:2752
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:2799
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:2788
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:306
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:107
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
Recipe to expand a SCEV expression.
Definition: VPlan.h:1980
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:816
VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)
Definition: VPlan.cpp:1106
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:141
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:66
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
A value that is used outside the VPlan.
Definition: VPlan.h:663
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:703
VPBasicBlock * getParent()
Definition: VPlan.h:720
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2316
const VPBlockBase * getEntry() const
Definition: VPlan.h:2355
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:518
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:577
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:525
const VPBlockBase * getExiting() const
Definition: VPlan.h:2367
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:2380
This class can be used to assign consecutive numbers to all VPValues in a VPlan and allows querying t...
Definition: VPlanValue.h:438
unsigned getSlot(const VPValue *V) const
Definition: VPlanValue.h:452
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:204
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1053
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:116
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1038
void dump() const
Dump the value to stderr (for debugging).
Definition: VPlan.cpp:99
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition: VPlan.cpp:79
virtual ~VPValue()
Definition: VPlan.cpp:85
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:92
void replaceAllUsesWith(VPValue *New)
Definition: VPlan.cpp:1022
VPDef * Def
Pointer to the VPDef that defines this VPValue.
Definition: VPlanValue.h:65
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:2672
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:890
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2413
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:824
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:800
VPBasicBlock * getEntry()
Definition: VPlan.h:2505
void addLiveOut(PHINode *PN, VPValue *V)
Definition: VPlan.cpp:833
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State, bool IsEpilogueVectorization)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:630
VPBasicBlock * getPreheader()
Definition: VPlan.h:2658
VPValue * getVPValueOrAddLiveIn(Value *V)
Gets the VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:2574
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:2613
static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE)
Create an initial VPlan with preheader and entry blocks.
Definition: VPlan.cpp:612
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:2652
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:830
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:676
VPActiveLaneMaskPHIRecipe * getActiveLaneMaskPhi()
Find and return the VPActiveLaneMaskPHIRecipe from the header - there be only one at most.
Definition: VPlan.cpp:621
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:757
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:2645
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
std::string EscapeString(const std::string &Label)
Definition: GraphWriter.cpp:55
@ SS
Definition: X86.h:209
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlan.cpp:1141
bool onlyFirstLaneUsed(VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1136
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:413
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
auto successors(const MachineBasicBlock *BB)
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2213
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition: VPlanCFG.h:214
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
cl::opt< bool > EnableFSDiscriminator
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:51
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:132
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1303
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:217
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:358
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:364
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:372
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:360
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:368
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:236
DenseMap< VPValue *, ScalarsPerPartValuesTy > PerPartScalars
Definition: VPlan.h:259
DenseMap< VPValue *, PerPartValuesTy > PerPartOutput
Definition: VPlan.h:256
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:234
VPValue2ValueTy VPValue2Value
Definition: VPlan.h:390
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:382
void addMetadata(Instruction *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:249
Value * get(VPValue *Def, unsigned Part)
Get the generated Value for a given VPValue and a given Part.
struct llvm::VPTransformState::DataState Data
void setDebugLocFromInst(const Value *V)
Set the debug location in the builder using the debug location in V.
Definition: VPlan.cpp:269
struct llvm::VPTransformState::CFGState CFG
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:409
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:241
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:248
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:388
DominatorTree * DT
Hold a pointer to Dominator Tree to register new basic blocks in the loop.
Definition: VPlan.h:385
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:282
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:399
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:272
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:242
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:402
void set(VPValue *Def, Value *V, unsigned Part)
Set the generated Value for a given VPValue and a given Part.
Definition: VPlan.h:293
void print(raw_ostream &O) const
Definition: VPlan.cpp:1001