LLVM 17.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
20#include "VPlanCFG.h"
21#include "VPlanDominatorTree.h"
24#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/Twine.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/Value.h"
37#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <string>
46#include <vector>
47
48using namespace llvm;
50
51#define DEBUG_TYPE "vplan"
52
53#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
55 const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
57 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
58 V.print(OS, SlotTracker);
59 return OS;
60}
61#endif
62
64 const ElementCount &VF) const {
65 switch (LaneKind) {
67 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
68 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
69 Builder.getInt32(VF.getKnownMinValue() - Lane));
71 return Builder.getInt32(Lane);
72 }
73 llvm_unreachable("Unknown lane kind");
74}
75
76VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
77 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
78 if (Def)
79 Def->addDefinedValue(this);
80}
81
83 assert(Users.empty() && "trying to delete a VPValue with remaining users");
84 if (Def)
85 Def->removeDefinedValue(this);
86}
87
88#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
90 if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
91 R->print(OS, "", SlotTracker);
92 else
94}
95
96void VPValue::dump() const {
97 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
99 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
101 dbgs() << "\n";
102}
103
104void VPDef::dump() const {
105 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
107 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
108 print(dbgs(), "", SlotTracker);
109 dbgs() << "\n";
110}
111#endif
112
114 return cast_or_null<VPRecipeBase>(Def);
115}
116
118 return cast_or_null<VPRecipeBase>(Def);
119}
120
121// Get the top-most entry block of \p Start. This is the entry block of the
122// containing VPlan. This function is templated to support both const and non-const blocks
123template <typename T> static T *getPlanEntry(T *Start) {
124 T *Next = Start;
125 T *Current = Start;
126 while ((Next = Next->getParent()))
127 Current = Next;
128
129 SmallSetVector<T *, 8> WorkList;
130 WorkList.insert(Current);
131
132 for (unsigned i = 0; i < WorkList.size(); i++) {
133 T *Current = WorkList[i];
134 if (Current->getNumPredecessors() == 0)
135 return Current;
136 auto &Predecessors = Current->getPredecessors();
137 WorkList.insert(Predecessors.begin(), Predecessors.end());
138 }
139
140 llvm_unreachable("VPlan without any entry node without predecessors");
141}
142
143VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
144
145const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
146
147/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
149 const VPBlockBase *Block = this;
150 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
151 Block = Region->getEntry();
152 return cast<VPBasicBlock>(Block);
153}
154
156 VPBlockBase *Block = this;
157 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
158 Block = Region->getEntry();
159 return cast<VPBasicBlock>(Block);
160}
161
162void VPBlockBase::setPlan(VPlan *ParentPlan) {
163 assert(ParentPlan->getEntry() == this &&
164 "Can only set plan on its entry block.");
165 Plan = ParentPlan;
166}
167
168/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
170 const VPBlockBase *Block = this;
171 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
172 Block = Region->getExiting();
173 return cast<VPBasicBlock>(Block);
174}
175
177 VPBlockBase *Block = this;
178 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
179 Block = Region->getExiting();
180 return cast<VPBasicBlock>(Block);
181}
182
184 if (!Successors.empty() || !Parent)
185 return this;
186 assert(Parent->getExiting() == this &&
187 "Block w/o successors not the exiting block of its parent.");
188 return Parent->getEnclosingBlockWithSuccessors();
189}
190
192 if (!Predecessors.empty() || !Parent)
193 return this;
194 assert(Parent->getEntry() == this &&
195 "Block w/o predecessors not the entry of its parent.");
196 return Parent->getEnclosingBlockWithPredecessors();
197}
198
201 delete Block;
202}
203
205 iterator It = begin();
206 while (It != end() && It->isPhi())
207 It++;
208 return It;
209}
210
212 if (!Def->hasDefiningRecipe())
213 return Def->getLiveInIRValue();
214
215 if (hasScalarValue(Def, Instance)) {
216 return Data
217 .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
218 }
219
220 assert(hasVectorValue(Def, Instance.Part));
221 auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
222 if (!VecPart->getType()->isVectorTy()) {
223 assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
224 return VecPart;
225 }
226 // TODO: Cache created scalar values.
227 Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
228 auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
229 // set(Def, Extract, Instance);
230 return Extract;
231}
233 VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
234 return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
235}
236
238 const Instruction *Orig) {
239 // If the loop was versioned with memchecks, add the corresponding no-alias
240 // metadata.
241 if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
242 LVer->annotateInstWithNoAlias(To, Orig);
243}
244
247 addNewMetadata(To, From);
248}
249
251 for (Value *V : To) {
252 if (Instruction *I = dyn_cast<Instruction>(V))
254 }
255}
256
258 const Instruction *Inst = dyn_cast<Instruction>(V);
259 if (!Inst) {
261 return;
262 }
263
264 const DILocation *DIL = Inst->getDebugLoc();
265 // When a FSDiscriminator is enabled, we don't need to add the multiply
266 // factors to the discriminators.
267 if (DIL && Inst->getFunction()->shouldEmitDebugInfoForProfiling() &&
268 !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
269 // FIXME: For scalable vectors, assume vscale=1.
270 auto NewDIL =
272 if (NewDIL)
274 else
275 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
276 << DIL->getFilename() << " Line: " << DIL->getLine());
277 } else
279}
280
282VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
283 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
284 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
285 BasicBlock *PrevBB = CFG.PrevBB;
286 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
287 PrevBB->getParent(), CFG.ExitBB);
288 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
289
290 // Hook up the new basic block to its predecessors.
291 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
292 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
293 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
294 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
295
296 assert(PredBB && "Predecessor basic-block not found building successor.");
297 auto *PredBBTerminator = PredBB->getTerminator();
298 LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
299
300 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
301 if (isa<UnreachableInst>(PredBBTerminator)) {
302 assert(PredVPSuccessors.size() == 1 &&
303 "Predecessor ending w/o branch must have single successor.");
304 DebugLoc DL = PredBBTerminator->getDebugLoc();
305 PredBBTerminator->eraseFromParent();
306 auto *Br = BranchInst::Create(NewBB, PredBB);
307 Br->setDebugLoc(DL);
308 } else if (TermBr && !TermBr->isConditional()) {
309 TermBr->setSuccessor(0, NewBB);
310 } else {
311 // Set each forward successor here when it is created, excluding
312 // backedges. A backward successor is set when the branch is created.
313 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
314 assert(!TermBr->getSuccessor(idx) &&
315 "Trying to reset an existing successor block.");
316 TermBr->setSuccessor(idx, NewBB);
317 }
318 }
319 return NewBB;
320}
321
323 bool Replica = State->Instance && !State->Instance->isFirstIteration();
324 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
325 VPBlockBase *SingleHPred = nullptr;
326 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
327
328 auto IsLoopRegion = [](VPBlockBase *BB) {
329 auto *R = dyn_cast<VPRegionBlock>(BB);
330 return R && !R->isReplicator();
331 };
332
333 // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
334 if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
335 // ExitBB can be re-used for the exit block of the Plan.
336 NewBB = State->CFG.ExitBB;
337 State->CFG.PrevBB = NewBB;
338
339 // Update the branch instruction in the predecessor to branch to ExitBB.
340 VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
341 VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
342 assert(PredVPB->getSingleSuccessor() == this &&
343 "predecessor must have the current block as only successor");
344 BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
345 // The Exit block of a loop is always set to be successor 0 of the Exiting
346 // block.
347 cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
348 } else if (PrevVPBB && /* A */
349 !((SingleHPred = getSingleHierarchicalPredecessor()) &&
350 SingleHPred->getExitingBasicBlock() == PrevVPBB &&
351 PrevVPBB->getSingleHierarchicalSuccessor() &&
352 (SingleHPred->getParent() == getEnclosingLoopRegion() &&
353 !IsLoopRegion(SingleHPred))) && /* B */
354 !(Replica && getPredecessors().empty())) { /* C */
355 // The last IR basic block is reused, as an optimization, in three cases:
356 // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
357 // B. when the current VPBB has a single (hierarchical) predecessor which
358 // is PrevVPBB and the latter has a single (hierarchical) successor which
359 // both are in the same non-replicator region; and
360 // C. when the current VPBB is an entry of a region replica - where PrevVPBB
361 // is the exiting VPBB of this region from a previous instance, or the
362 // predecessor of this region.
363
364 NewBB = createEmptyBasicBlock(State->CFG);
365 State->Builder.SetInsertPoint(NewBB);
366 // Temporarily terminate with unreachable until CFG is rewired.
367 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
368 // Register NewBB in its loop. In innermost loops its the same for all
369 // BB's.
370 if (State->CurrentVectorLoop)
371 State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
372 State->Builder.SetInsertPoint(Terminator);
373 State->CFG.PrevBB = NewBB;
374 }
375
376 // 2. Fill the IR basic block with IR instructions.
377 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
378 << " in BB:" << NewBB->getName() << '\n');
379
380 State->CFG.VPBB2IRBB[this] = NewBB;
381 State->CFG.PrevVPBB = this;
382
383 for (VPRecipeBase &Recipe : Recipes)
384 Recipe.execute(*State);
385
386 LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
387}
388
390 for (VPRecipeBase &R : Recipes) {
391 for (auto *Def : R.definedValues())
392 Def->replaceAllUsesWith(NewValue);
393
394 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
395 R.setOperand(I, NewValue);
396 }
397}
398
400 assert((SplitAt == end() || SplitAt->getParent() == this) &&
401 "can only split at a position in the same block");
402
404 // First, disconnect the current block from its successors.
405 for (VPBlockBase *Succ : Succs)
407
408 // Create new empty block after the block to split.
409 auto *SplitBlock = new VPBasicBlock(getName() + ".split");
411
412 // Add successors for block to split to new block.
413 for (VPBlockBase *Succ : Succs)
415
416 // Finally, move the recipes starting at SplitAt to new block.
417 for (VPRecipeBase &ToMove :
418 make_early_inc_range(make_range(SplitAt, this->end())))
419 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
420
421 return SplitBlock;
422}
423
426 if (P && P->isReplicator()) {
427 P = P->getParent();
428 assert(!cast<VPRegionBlock>(P)->isReplicator() &&
429 "unexpected nested replicate regions");
430 }
431 return P;
432}
433
434static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
435 if (VPBB->empty()) {
436 assert(
437 VPBB->getNumSuccessors() < 2 &&
438 "block with multiple successors doesn't have a recipe as terminator");
439 return false;
440 }
441
442 const VPRecipeBase *R = &VPBB->back();
443 auto *VPI = dyn_cast<VPInstruction>(R);
444 bool IsCondBranch =
445 isa<VPBranchOnMaskRecipe>(R) ||
446 (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
447 VPI->getOpcode() == VPInstruction::BranchOnCount));
448 (void)IsCondBranch;
449
450 if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
451 assert(IsCondBranch && "block with multiple successors not terminated by "
452 "conditional branch recipe");
453
454 return true;
455 }
456
457 assert(
458 !IsCondBranch &&
459 "block with 0 or 1 successors terminated by conditional branch recipe");
460 return false;
461}
462
464 if (hasConditionalTerminator(this))
465 return &back();
466 return nullptr;
467}
468
470 if (hasConditionalTerminator(this))
471 return &back();
472 return nullptr;
473}
474
476 return getParent()->getExitingBasicBlock() == this;
477}
478
479#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
480void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
481 if (getSuccessors().empty()) {
482 O << Indent << "No successors\n";
483 } else {
484 O << Indent << "Successor(s): ";
485 ListSeparator LS;
486 for (auto *Succ : getSuccessors())
487 O << LS << Succ->getName();
488 O << '\n';
489 }
490}
491
492void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
493 VPSlotTracker &SlotTracker) const {
494 O << Indent << getName() << ":\n";
495
496 auto RecipeIndent = Indent + " ";
497 for (const VPRecipeBase &Recipe : *this) {
498 Recipe.print(O, RecipeIndent, SlotTracker);
499 O << '\n';
500 }
501
502 printSuccessors(O, Indent);
503}
504#endif
505
508 // Drop all references in VPBasicBlocks and replace all uses with
509 // DummyValue.
510 Block->dropAllReferences(NewValue);
511}
512
515 RPOT(Entry);
516
517 if (!isReplicator()) {
518 // Create and register the new vector loop.
519 Loop *PrevLoop = State->CurrentVectorLoop;
520 State->CurrentVectorLoop = State->LI->AllocateLoop();
521 BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
522 Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
523
524 // Insert the new loop into the loop nest and register the new basic blocks
525 // before calling any utilities such as SCEV that require valid LoopInfo.
526 if (ParentLoop)
527 ParentLoop->addChildLoop(State->CurrentVectorLoop);
528 else
529 State->LI->addTopLevelLoop(State->CurrentVectorLoop);
530
531 // Visit the VPBlocks connected to "this", starting from it.
532 for (VPBlockBase *Block : RPOT) {
533 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
534 Block->execute(State);
535 }
536
537 State->CurrentVectorLoop = PrevLoop;
538 return;
539 }
540
541 assert(!State->Instance && "Replicating a Region with non-null instance.");
542
543 // Enter replicating mode.
544 State->Instance = VPIteration(0, 0);
545
546 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
547 State->Instance->Part = Part;
548 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
549 for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
550 ++Lane) {
551 State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
552 // Visit the VPBlocks connected to \p this, starting from it.
553 for (VPBlockBase *Block : RPOT) {
554 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
555 Block->execute(State);
556 }
557 }
558 }
559
560 // Exit replicating mode.
561 State->Instance.reset();
562}
563
564#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
566 VPSlotTracker &SlotTracker) const {
567 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
568 auto NewIndent = Indent + " ";
569 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
570 O << '\n';
571 BlockBase->print(O, NewIndent, SlotTracker);
572 }
573 O << Indent << "}\n";
574
575 printSuccessors(O, Indent);
576}
577#endif
578
580 clearLiveOuts();
581
582 if (Entry) {
583 VPValue DummyValue;
585 Block->dropAllReferences(&DummyValue);
586
588 }
589 for (VPValue *VPV : VPValuesToFree)
590 delete VPV;
591 if (TripCount)
592 delete TripCount;
593 if (BackedgeTakenCount)
594 delete BackedgeTakenCount;
595 for (auto &P : VPExternalDefs)
596 delete P.second;
597}
598
600 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
601 for (VPRecipeBase &R : Header->phis()) {
602 if (isa<VPActiveLaneMaskPHIRecipe>(&R))
603 return cast<VPActiveLaneMaskPHIRecipe>(&R);
604 }
605 return nullptr;
606}
607
608void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
609 Value *CanonicalIVStartValue,
610 VPTransformState &State,
611 bool IsEpilogueVectorization) {
612
613 // Check if the trip count is needed, and if so build it.
614 if (TripCount && TripCount->getNumUsers()) {
615 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
616 State.set(TripCount, TripCountV, Part);
617 }
618
619 // Check if the backedge taken count is needed, and if so build it.
620 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
622 auto *TCMO = Builder.CreateSub(TripCountV,
623 ConstantInt::get(TripCountV->getType(), 1),
624 "trip.count.minus.1");
625 auto VF = State.VF;
626 Value *VTCMO =
627 VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
628 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
629 State.set(BackedgeTakenCount, VTCMO, Part);
630 }
631
632 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
633 State.set(&VectorTripCount, VectorTripCountV, Part);
634
635 // When vectorizing the epilogue loop, the canonical induction start value
636 // needs to be changed from zero to the value after the main vector loop.
637 // FIXME: Improve modeling for canonical IV start values in the epilogue loop.
638 if (CanonicalIVStartValue) {
639 VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
640 auto *IV = getCanonicalIV();
641 assert(all_of(IV->users(),
642 [](const VPUser *U) {
643 if (isa<VPScalarIVStepsRecipe>(U) ||
644 isa<VPDerivedIVRecipe>(U))
645 return true;
646 auto *VPI = cast<VPInstruction>(U);
647 return VPI->getOpcode() ==
648 VPInstruction::CanonicalIVIncrement ||
649 VPI->getOpcode() ==
650 VPInstruction::CanonicalIVIncrementNUW;
651 }) &&
652 "the canonical IV should only be used by its increments or "
653 "ScalarIVSteps when "
654 "resetting the start value");
655 IV->setOperand(0, VPV);
656 }
657}
658
659/// Generate the code inside the preheader and body of the vectorized loop.
660/// Assumes a single pre-header basic-block was created for this. Introduce
661/// additional basic-blocks as needed, and fill them all.
663 // Set the reverse mapping from VPValues to Values for code generation.
664 for (auto &Entry : Value2VPValue)
665 State->VPValue2Value[Entry.second] = Entry.first;
666
667 // Initialize CFG state.
668 State->CFG.PrevVPBB = nullptr;
669 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
670 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
671 State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
672
673 // Generate code in the loop pre-header and body.
675 Block->execute(State);
676
677 VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
678 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
679
680 // Fix the latch value of canonical, reduction and first-order recurrences
681 // phis in the vector loop.
682 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
683 for (VPRecipeBase &R : Header->phis()) {
684 // Skip phi-like recipes that generate their backedege values themselves.
685 if (isa<VPWidenPHIRecipe>(&R))
686 continue;
687
688 if (isa<VPWidenPointerInductionRecipe>(&R) ||
689 isa<VPWidenIntOrFpInductionRecipe>(&R)) {
690 PHINode *Phi = nullptr;
691 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
692 Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
693 } else {
694 auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
695 // TODO: Split off the case that all users of a pointer phi are scalar
696 // from the VPWidenPointerInductionRecipe.
697 if (WidenPhi->onlyScalarsGenerated(State->VF))
698 continue;
699
700 auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
701 Phi = cast<PHINode>(GEP->getPointerOperand());
702 }
703
704 Phi->setIncomingBlock(1, VectorLatchBB);
705
706 // Move the last step to the end of the latch block. This ensures
707 // consistent placement of all induction updates.
708 Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
709 Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
710 continue;
711 }
712
713 auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
714 // For canonical IV, first-order recurrences and in-order reduction phis,
715 // only a single part is generated, which provides the last part from the
716 // previous iteration. For non-ordered reductions all UF parts are
717 // generated.
718 bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
719 isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
720 (isa<VPReductionPHIRecipe>(PhiR) &&
721 cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
722 unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
723
724 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
725 Value *Phi = State->get(PhiR, Part);
726 Value *Val = State->get(PhiR->getBackedgeValue(),
727 SinglePartNeeded ? State->UF - 1 : Part);
728 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
729 }
730 }
731
732 // We do not attempt to preserve DT for outer loop vectorization currently.
734 BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
735 State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
736 updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
737 State->CFG.ExitBB);
738 }
739}
740
741#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
743void VPlan::print(raw_ostream &O) const {
745
746 O << "VPlan '" << getName() << "' {";
747
748 if (VectorTripCount.getNumUsers() > 0) {
749 O << "\nLive-in ";
750 VectorTripCount.printAsOperand(O, SlotTracker);
751 O << " = vector-trip-count\n";
752 }
753
754 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
755 O << "\nLive-in ";
756 BackedgeTakenCount->printAsOperand(O, SlotTracker);
757 O << " = backedge-taken count\n";
758 }
759
760 for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
761 O << '\n';
762 Block->print(O, "", SlotTracker);
763 }
764
765 if (!LiveOuts.empty())
766 O << "\n";
767 for (const auto &KV : LiveOuts) {
768 O << "Live-out ";
769 KV.second->getPhi()->printAsOperand(O);
770 O << " = ";
771 KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
772 O << "\n";
773 }
774
775 O << "}\n";
776}
777
778std::string VPlan::getName() const {
779 std::string Out;
780 raw_string_ostream RSO(Out);
781 RSO << Name << " for ";
782 if (!VFs.empty()) {
783 RSO << "VF={" << VFs[0];
784 for (ElementCount VF : drop_begin(VFs))
785 RSO << "," << VF;
786 RSO << "},";
787 }
788
789 if (UFs.empty()) {
790 RSO << "UF>=1";
791 } else {
792 RSO << "UF={" << UFs[0];
793 for (unsigned UF : drop_begin(UFs))
794 RSO << "," << UF;
795 RSO << "}";
796 }
797
798 return Out;
799}
800
803 VPlanPrinter Printer(O, *this);
804 Printer.dump();
805}
806
808void VPlan::dump() const { print(dbgs()); }
809#endif
810
812 assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
813 LiveOuts.insert({PN, new VPLiveOut(PN, V)});
814}
815
816void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
817 BasicBlock *LoopLatchBB,
818 BasicBlock *LoopExitBB) {
819 // The vector body may be more than a single basic-block by this point.
820 // Update the dominator tree information inside the vector body by propagating
821 // it from header to latch, expecting only triangular control-flow, if any.
822 BasicBlock *PostDomSucc = nullptr;
823 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
824 // Get the list of successors of this block.
825 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
826 assert(Succs.size() <= 2 &&
827 "Basic block in vector loop has more than 2 successors.");
828 PostDomSucc = Succs[0];
829 if (Succs.size() == 1) {
830 assert(PostDomSucc->getSinglePredecessor() &&
831 "PostDom successor has more than one predecessor.");
832 DT->addNewBlock(PostDomSucc, BB);
833 continue;
834 }
835 BasicBlock *InterimSucc = Succs[1];
836 if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
837 PostDomSucc = Succs[1];
838 InterimSucc = Succs[0];
839 }
840 assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
841 "One successor of a basic block does not lead to the other.");
842 assert(InterimSucc->getSinglePredecessor() &&
843 "Interim successor has more than one predecessor.");
844 assert(PostDomSucc->hasNPredecessors(2) &&
845 "PostDom successor has more than two predecessors.");
846 DT->addNewBlock(InterimSucc, BB);
847 DT->addNewBlock(PostDomSucc, BB);
848 }
849 // Latch block is a new dominator for the loop exit.
850 DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
851 assert(DT->verify(DominatorTree::VerificationLevel::Fast));
852}
853
854#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
855
856Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
857 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
858 Twine(getOrCreateBID(Block));
859}
860
861Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
862 const std::string &Name = Block->getName();
863 if (!Name.empty())
864 return Name;
865 return "VPB" + Twine(getOrCreateBID(Block));
866}
867
869 Depth = 1;
870 bumpIndent(0);
871 OS << "digraph VPlan {\n";
872 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
873 if (!Plan.getName().empty())
874 OS << "\\n" << DOT::EscapeString(Plan.getName());
875 if (Plan.BackedgeTakenCount) {
876 OS << ", where:\\n";
877 Plan.BackedgeTakenCount->print(OS, SlotTracker);
878 OS << " := BackedgeTakenCount";
879 }
880 OS << "\"]\n";
881 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
882 OS << "edge [fontname=Courier, fontsize=30]\n";
883 OS << "compound=true\n";
884
886 dumpBlock(Block);
887
888 OS << "}\n";
889}
890
891void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
892 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
893 dumpBasicBlock(BasicBlock);
894 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
895 dumpRegion(Region);
896 else
897 llvm_unreachable("Unsupported kind of VPBlock.");
898}
899
900void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
901 bool Hidden, const Twine &Label) {
902 // Due to "dot" we print an edge between two regions as an edge between the
903 // exiting basic block and the entry basic of the respective regions.
904 const VPBlockBase *Tail = From->getExitingBasicBlock();
905 const VPBlockBase *Head = To->getEntryBasicBlock();
906 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
907 OS << " [ label=\"" << Label << '\"';
908 if (Tail != From)
909 OS << " ltail=" << getUID(From);
910 if (Head != To)
911 OS << " lhead=" << getUID(To);
912 if (Hidden)
913 OS << "; splines=none";
914 OS << "]\n";
915}
916
917void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
918 auto &Successors = Block->getSuccessors();
919 if (Successors.size() == 1)
920 drawEdge(Block, Successors.front(), false, "");
921 else if (Successors.size() == 2) {
922 drawEdge(Block, Successors.front(), false, "T");
923 drawEdge(Block, Successors.back(), false, "F");
924 } else {
925 unsigned SuccessorNumber = 0;
926 for (auto *Successor : Successors)
927 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
928 }
929}
930
931void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
932 // Implement dot-formatted dump by performing plain-text dump into the
933 // temporary storage followed by some post-processing.
934 OS << Indent << getUID(BasicBlock) << " [label =\n";
935 bumpIndent(1);
936 std::string Str;
938 // Use no indentation as we need to wrap the lines into quotes ourselves.
939 BasicBlock->print(SS, "", SlotTracker);
940
941 // We need to process each line of the output separately, so split
942 // single-string plain-text dump.
944 StringRef(Str).rtrim('\n').split(Lines, "\n");
945
946 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
947 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
948 };
949
950 // Don't need the "+" after the last line.
951 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
952 EmitLine(Line, " +\n");
953 EmitLine(Lines.back(), "\n");
954
955 bumpIndent(-1);
956 OS << Indent << "]\n";
957
959}
960
961void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
962 OS << Indent << "subgraph " << getUID(Region) << " {\n";
963 bumpIndent(1);
964 OS << Indent << "fontname=Courier\n"
965 << Indent << "label=\""
966 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
967 << DOT::EscapeString(Region->getName()) << "\"\n";
968 // Dump the blocks of the region.
969 assert(Region->getEntry() && "Region contains no inner blocks.");
971 dumpBlock(Block);
972 bumpIndent(-1);
973 OS << Indent << "}\n";
975}
976
978 if (auto *Inst = dyn_cast<Instruction>(V)) {
979 if (!Inst->getType()->isVoidTy()) {
980 Inst->printAsOperand(O, false);
981 O << " = ";
982 }
983 O << Inst->getOpcodeName() << " ";
984 unsigned E = Inst->getNumOperands();
985 if (E > 0) {
986 Inst->getOperand(0)->printAsOperand(O, false);
987 for (unsigned I = 1; I < E; ++I)
988 Inst->getOperand(I)->printAsOperand(O << ", ", false);
989 }
990 } else // !Inst
991 V->printAsOperand(O, false);
992}
993
994#endif
995
996template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
997
999 for (unsigned J = 0; J < getNumUsers();) {
1000 VPUser *User = Users[J];
1001 unsigned NumUsers = getNumUsers();
1002 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
1003 if (User->getOperand(I) == this)
1004 User->setOperand(I, New);
1005 // If a user got removed after updating the current user, the next user to
1006 // update will be moved to the current position, so we only need to
1007 // increment the index if the number of users did not change.
1008 if (NumUsers == getNumUsers())
1009 J++;
1010 }
1011}
1012
1013#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1015 if (const Value *UV = getUnderlyingValue()) {
1016 OS << "ir<";
1017 UV->printAsOperand(OS, false);
1018 OS << ">";
1019 return;
1020 }
1021
1022 unsigned Slot = Tracker.getSlot(this);
1023 if (Slot == unsigned(-1))
1024 OS << "<badref>";
1025 else
1026 OS << "vp<%" << Tracker.getSlot(this) << ">";
1027}
1028
1030 interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1031 Op->printAsOperand(O, SlotTracker);
1032 });
1033}
1034#endif
1035
1036void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1037 Old2NewTy &Old2New,
1038 InterleavedAccessInfo &IAI) {
1040 RPOT(Region->getEntry());
1041 for (VPBlockBase *Base : RPOT) {
1042 visitBlock(Base, Old2New, IAI);
1043 }
1044}
1045
1046void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1047 InterleavedAccessInfo &IAI) {
1048 if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1049 for (VPRecipeBase &VPI : *VPBB) {
1050 if (isa<VPHeaderPHIRecipe>(&VPI))
1051 continue;
1052 assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1053 auto *VPInst = cast<VPInstruction>(&VPI);
1054
1055 auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1056 if (!Inst)
1057 continue;
1058 auto *IG = IAI.getInterleaveGroup(Inst);
1059 if (!IG)
1060 continue;
1061
1062 auto NewIGIter = Old2New.find(IG);
1063 if (NewIGIter == Old2New.end())
1064 Old2New[IG] = new InterleaveGroup<VPInstruction>(
1065 IG->getFactor(), IG->isReverse(), IG->getAlign());
1066
1067 if (Inst == IG->getInsertPos())
1068 Old2New[IG]->setInsertPos(VPInst);
1069
1070 InterleaveGroupMap[VPInst] = Old2New[IG];
1071 InterleaveGroupMap[VPInst]->insertMember(
1072 VPInst, IG->getIndex(Inst),
1073 Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1074 : IG->getFactor()));
1075 }
1076 } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1077 visitRegion(Region, Old2New, IAI);
1078 else
1079 llvm_unreachable("Unsupported kind of VPBlock.");
1080}
1081
1083 InterleavedAccessInfo &IAI) {
1084 Old2NewTy Old2New;
1085 visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1086}
1087
1088void VPSlotTracker::assignSlot(const VPValue *V) {
1089 assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
1090 Slots[V] = NextSlot++;
1091}
1092
1093void VPSlotTracker::assignSlots(const VPlan &Plan) {
1094
1095 for (const auto &P : Plan.VPExternalDefs)
1096 assignSlot(P.second);
1097
1098 assignSlot(&Plan.VectorTripCount);
1099 if (Plan.BackedgeTakenCount)
1100 assignSlot(Plan.BackedgeTakenCount);
1101
1104 for (const VPBasicBlock *VPBB :
1105 VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1106 for (const VPRecipeBase &Recipe : *VPBB)
1107 for (VPValue *Def : Recipe.definedValues())
1108 assignSlot(Def);
1109}
1110
1112 return all_of(Def->users(),
1113 [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
1114}
1115
1117 ScalarEvolution &SE) {
1118 if (auto *E = dyn_cast<SCEVConstant>(Expr))
1119 return Plan.getOrAddExternalDef(E->getValue());
1120 if (auto *E = dyn_cast<SCEVUnknown>(Expr))
1121 return Plan.getOrAddExternalDef(E->getValue());
1122
1123 VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
1124 VPExpandSCEVRecipe *Step = new VPExpandSCEVRecipe(Expr, SE);
1125 Preheader->appendRecipe(Step);
1126 return Step;
1127}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
assume Assume Builder
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
std::string Name
static void dumpEdges(CFGMST< Edge, BBInfo > &MST, GCOVFunction &GF)
Generic dominator tree construction - this file provides routines to construct immediate dominator in...
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::init(false), cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
#define I(x, y, z)
Definition: MD5.cpp:58
Memory true print Memory SSA Printer
Definition: MemorySSA.cpp:78
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static T * getPlanEntry(T *Start)
Definition: VPlan.cpp:123
cl::opt< bool > EnableVPlanNativePath
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition: VPlan.cpp:434
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
static const uint32_t IV[8]
Definition: blake3_impl.h:85
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:316
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the basic block to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:4601
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:306
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:284
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:314
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:887
Debug location.
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition: DebugLoc.h:33
Core dominator tree base class.
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:302
bool shouldEmitDebugInfoForProfiling() const
Returns true if we should emit debug info for profiling.
Definition: Metadata.cpp:1629
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2341
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1178
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1249
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:212
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1259
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2550
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:623
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:765
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:810
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:258
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:412
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:956
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1049
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
void setIncomingBlock(unsigned i, BasicBlock *BB)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition: RegionInfo.h:322
This class represents an analyzed expression in the program.
The main scalar evolution driver.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:674
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:301
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:688
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition: StringRef.h:791
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:1803
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:1939
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:2007
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:1960
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:322
iterator end()
Definition: VPlan.h:1970
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:1968
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:204
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:424
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:389
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:399
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:492
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:475
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:463
const VPRecipeBase & back() const
Definition: VPlan.h:1982
bool empty() const
Definition: VPlan.h:1979
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:390
VPRegionBlock * getParent()
Definition: VPlan.h:462
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:169
size_t getNumSuccessors() const
Definition: VPlan.h:507
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:480
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:191
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:199
VPlan * getPlan()
Definition: VPlan.cpp:143
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:162
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:533
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:527
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:183
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:148
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:497
Helper for GraphTraits specialization that traverses through VPRegionBlocks.
Definition: VPlanCFG.h:114
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:2482
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:2529
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:2518
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:303
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:104
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
Recipe to expand a SCEV expression.
Definition: VPlan.h:1729
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:779
VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)
Definition: VPlan.cpp:1082
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:113
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:63
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
A value that is used outside the VPlan.
Definition: VPlan.h:635
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:666
VPBasicBlock * getParent()
Definition: VPlan.h:683
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2063
const VPBlockBase * getEntry() const
Definition: VPlan.h:2102
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:506
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:565
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:513
const VPBlockBase * getExiting() const
Definition: VPlan.h:2114
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:2127
This class can be used to assign consecutive numbers to all VPValues in a VPlan and allows querying t...
Definition: VPlanValue.h:431
unsigned getSlot(const VPValue *V) const
Definition: VPlanValue.h:444
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:201
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1029
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:113
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1014
void dump() const
Dump the value to stderr (for debugging).
Definition: VPlan.cpp:96
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition: VPlan.cpp:76
virtual ~VPValue()
Definition: VPlan.cpp:82
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:89
void replaceAllUsesWith(VPValue *New)
Definition: VPlan.cpp:998
VPDef * Def
Pointer to the VPDef that defines this VPValue.
Definition: VPlanValue.h:65
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:2402
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:868
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2160
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:802
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:778
VPBlockBase * getEntry()
Definition: VPlan.h:2223
void addLiveOut(PHINode *PN, VPValue *V)
Definition: VPlan.cpp:811
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State, bool IsEpilogueVectorization)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:608
VPValue * getOrAddExternalDef(Value *V)
Get the existing or add a new external definition for V.
Definition: VPlan.h:2279
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:2353
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:808
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:662
VPActiveLaneMaskPHIRecipe * getActiveLaneMaskPhi()
Find and return the VPActiveLaneMaskPHIRecipe from the header - there be only one at most.
Definition: VPlan.cpp:599
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:743
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
std::string EscapeString(const std::string &Label)
Definition: GraphWriter.cpp:55
@ SS
Definition: X86.h:202
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlan.cpp:1116
bool onlyFirstLaneUsed(VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1111
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:386
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
auto successors(const MachineBasicBlock *BB)
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2098
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:721
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition: VPlanCFG.h:213
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
cl::opt< bool > EnableFSDiscriminator
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1298
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:189
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:330
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:336
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:344
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:332
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:340
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:232
DenseMap< VPValue *, ScalarsPerPartValuesTy > PerPartScalars
Definition: VPlan.h:231
DenseMap< VPValue *, PerPartValuesTy > PerPartOutput
Definition: VPlan.h:228
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:206
VPValue2ValueTy VPValue2Value
Definition: VPlan.h:362
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:354
void addMetadata(Instruction *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:245
Value * get(VPValue *Def, unsigned Part)
Get the generated Value for a given VPValue and a given Part.
struct llvm::VPTransformState::DataState Data
void setDebugLocFromInst(const Value *V)
Set the debug location in the builder using the debug location in V.
Definition: VPlan.cpp:257
struct llvm::VPTransformState::CFGState CFG
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:237
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:220
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:360
DominatorTree * DT
Hold a pointer to Dominator Tree to register new basic blocks in the loop.
Definition: VPlan.h:357
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:254
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:371
std::unique_ptr< LoopVersioning > LVer
LoopVersioning.
Definition: VPlan.h:385
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:244
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:214
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, unsigned Part)
Set the generated Value for a given VPValue and a given Part.
Definition: VPlan.h:265
void print(raw_ostream &O) const
Definition: VPlan.cpp:977