LLVM 19.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Twine.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/Value.h"
28#include "llvm/Support/Debug.h"
33#include <cassert>
34
35using namespace llvm;
36
38
39namespace llvm {
41}
42
43#define LV_NAME "loop-vectorize"
44#define DEBUG_TYPE LV_NAME
45
47 switch (getVPDefID()) {
48 case VPInterleaveSC:
49 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
50 case VPWidenMemoryInstructionSC: {
51 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
52 }
53 case VPReplicateSC:
54 case VPWidenCallSC:
55 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
56 ->mayWriteToMemory();
57 case VPBranchOnMaskSC:
58 case VPScalarIVStepsSC:
59 case VPPredInstPHISC:
60 return false;
61 case VPBlendSC:
62 case VPReductionSC:
63 case VPWidenCanonicalIVSC:
64 case VPWidenCastSC:
65 case VPWidenGEPSC:
66 case VPWidenIntOrFpInductionSC:
67 case VPWidenPHISC:
68 case VPWidenSC:
69 case VPWidenSelectSC: {
70 const Instruction *I =
71 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
72 (void)I;
73 assert((!I || !I->mayWriteToMemory()) &&
74 "underlying instruction may write to memory");
75 return false;
76 }
77 default:
78 return true;
79 }
80}
81
83 switch (getVPDefID()) {
84 case VPWidenMemoryInstructionSC: {
85 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
86 }
87 case VPReplicateSC:
88 case VPWidenCallSC:
89 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
90 ->mayReadFromMemory();
91 case VPBranchOnMaskSC:
92 case VPScalarIVStepsSC:
93 case VPPredInstPHISC:
94 return false;
95 case VPBlendSC:
96 case VPReductionSC:
97 case VPWidenCanonicalIVSC:
98 case VPWidenCastSC:
99 case VPWidenGEPSC:
100 case VPWidenIntOrFpInductionSC:
101 case VPWidenPHISC:
102 case VPWidenSC:
103 case VPWidenSelectSC: {
104 const Instruction *I =
105 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
106 (void)I;
107 assert((!I || !I->mayReadFromMemory()) &&
108 "underlying instruction may read from memory");
109 return false;
110 }
111 default:
112 return true;
113 }
114}
115
117 switch (getVPDefID()) {
118 case VPDerivedIVSC:
119 case VPPredInstPHISC:
120 case VPScalarCastSC:
121 return false;
122 case VPInstructionSC:
123 switch (cast<VPInstruction>(this)->getOpcode()) {
124 case Instruction::Or:
125 case Instruction::ICmp:
126 case Instruction::Select:
131 return false;
132 default:
133 return true;
134 }
135 case VPWidenCallSC:
136 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
137 ->mayHaveSideEffects();
138 case VPBlendSC:
139 case VPReductionSC:
140 case VPScalarIVStepsSC:
141 case VPWidenCanonicalIVSC:
142 case VPWidenCastSC:
143 case VPWidenGEPSC:
144 case VPWidenIntOrFpInductionSC:
145 case VPWidenPHISC:
146 case VPWidenPointerInductionSC:
147 case VPWidenSC:
148 case VPWidenSelectSC: {
149 const Instruction *I =
150 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
151 (void)I;
152 assert((!I || !I->mayHaveSideEffects()) &&
153 "underlying instruction has side-effects");
154 return false;
155 }
156 case VPInterleaveSC:
157 return mayWriteToMemory();
158 case VPWidenMemoryInstructionSC:
159 assert(cast<VPWidenMemoryInstructionRecipe>(this)
160 ->getIngredient()
162 "mayHaveSideffects result for ingredient differs from this "
163 "implementation");
164 return mayWriteToMemory();
165 case VPReplicateSC: {
166 auto *R = cast<VPReplicateRecipe>(this);
167 return R->getUnderlyingInstr()->mayHaveSideEffects();
168 }
169 default:
170 return true;
171 }
172}
173
175 auto Lane = VPLane::getLastLaneForVF(State.VF);
176 VPValue *ExitValue = getOperand(0);
178 Lane = VPLane::getFirstLane();
179 VPBasicBlock *MiddleVPBB =
180 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
181 assert(MiddleVPBB->getNumSuccessors() == 0 &&
182 "the middle block must not have any successors");
183 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
184 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
185 MiddleBB);
186}
187
188#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
190 O << "Live-out ";
192 O << " = ";
194 O << "\n";
195}
196#endif
197
199 assert(!Parent && "Recipe already in some VPBasicBlock");
200 assert(InsertPos->getParent() &&
201 "Insertion position not in any VPBasicBlock");
202 InsertPos->getParent()->insert(this, InsertPos->getIterator());
203}
204
207 assert(!Parent && "Recipe already in some VPBasicBlock");
208 assert(I == BB.end() || I->getParent() == &BB);
209 BB.insert(this, I);
210}
211
213 assert(!Parent && "Recipe already in some VPBasicBlock");
214 assert(InsertPos->getParent() &&
215 "Insertion position not in any VPBasicBlock");
216 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
217}
218
220 assert(getParent() && "Recipe not in any VPBasicBlock");
222 Parent = nullptr;
223}
224
226 assert(getParent() && "Recipe not in any VPBasicBlock");
228}
229
232 insertAfter(InsertPos);
233}
234
238 insertBefore(BB, I);
239}
240
242 assert(OpType == OperationType::FPMathOp &&
243 "recipe doesn't have fast math flags");
244 FastMathFlags Res;
245 Res.setAllowReassoc(FMFs.AllowReassoc);
246 Res.setNoNaNs(FMFs.NoNaNs);
247 Res.setNoInfs(FMFs.NoInfs);
248 Res.setNoSignedZeros(FMFs.NoSignedZeros);
249 Res.setAllowReciprocal(FMFs.AllowReciprocal);
250 Res.setAllowContract(FMFs.AllowContract);
251 Res.setApproxFunc(FMFs.ApproxFunc);
252 return Res;
253}
254
257 const Twine &Name)
258 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
259 Pred, DL),
260 Opcode(Opcode), Name(Name.str()) {
261 assert(Opcode == Instruction::ICmp &&
262 "only ICmp predicates supported at the moment");
263}
264
266 std::initializer_list<VPValue *> Operands,
267 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
268 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
269 Opcode(Opcode), Name(Name.str()) {
270 // Make sure the VPInstruction is a floating-point operation.
271 assert(isFPMathOp() && "this op can't take fast-math flags");
272}
273
274bool VPInstruction::doesGeneratePerAllLanes() const {
275 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
276}
277
278bool VPInstruction::canGenerateScalarForFirstLane() const {
280 return true;
281
282 switch (Opcode) {
290 return true;
291 default:
292 return false;
293 }
294}
295
296Value *VPInstruction::generatePerLane(VPTransformState &State,
297 const VPIteration &Lane) {
298 IRBuilderBase &Builder = State.Builder;
299
301 "only PtrAdd opcodes are supported for now");
302 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
303 State.get(getOperand(1), Lane), Name);
304}
305
306Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
307 IRBuilderBase &Builder = State.Builder;
308
310 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
311 if (Part != 0 && vputils::onlyFirstPartUsed(this))
312 return State.get(this, 0, OnlyFirstLaneUsed);
313
314 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
315 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
316 auto *Res =
317 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
318 if (auto *I = dyn_cast<Instruction>(Res))
319 setFlags(I);
320 return Res;
321 }
322
323 switch (getOpcode()) {
324 case VPInstruction::Not: {
325 Value *A = State.get(getOperand(0), Part);
326 return Builder.CreateNot(A, Name);
327 }
328 case Instruction::ICmp: {
329 Value *A = State.get(getOperand(0), Part);
330 Value *B = State.get(getOperand(1), Part);
331 return Builder.CreateCmp(getPredicate(), A, B, Name);
332 }
333 case Instruction::Select: {
334 Value *Cond = State.get(getOperand(0), Part);
335 Value *Op1 = State.get(getOperand(1), Part);
336 Value *Op2 = State.get(getOperand(2), Part);
337 return Builder.CreateSelect(Cond, Op1, Op2, Name);
338 }
340 // Get first lane of vector induction variable.
341 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
342 // Get the original loop tripcount.
343 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
344
345 // If this part of the active lane mask is scalar, generate the CMP directly
346 // to avoid unnecessary extracts.
347 if (State.VF.isScalar())
348 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
349 Name);
350
351 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
352 auto *PredTy = VectorType::get(Int1Ty, State.VF);
353 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
354 {PredTy, ScalarTC->getType()},
355 {VIVElem0, ScalarTC}, nullptr, Name);
356 }
358 // Generate code to combine the previous and current values in vector v3.
359 //
360 // vector.ph:
361 // v_init = vector(..., ..., ..., a[-1])
362 // br vector.body
363 //
364 // vector.body
365 // i = phi [0, vector.ph], [i+4, vector.body]
366 // v1 = phi [v_init, vector.ph], [v2, vector.body]
367 // v2 = a[i, i+1, i+2, i+3];
368 // v3 = vector(v1(3), v2(0, 1, 2))
369
370 // For the first part, use the recurrence phi (v1), otherwise v2.
371 auto *V1 = State.get(getOperand(0), 0);
372 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
373 if (!PartMinus1->getType()->isVectorTy())
374 return PartMinus1;
375 Value *V2 = State.get(getOperand(1), Part);
376 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
377 }
379 if (Part != 0)
380 return State.get(this, 0, /*IsScalar*/ true);
381
382 Value *ScalarTC = State.get(getOperand(0), {0, 0});
383 Value *Step =
384 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
385 Value *Sub = Builder.CreateSub(ScalarTC, Step);
386 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
387 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
388 return Builder.CreateSelect(Cmp, Sub, Zero);
389 }
391 // Compute EVL
392 auto GetEVL = [=](VPTransformState &State, Value *AVL) {
393 assert(AVL->getType()->isIntegerTy() &&
394 "Requested vector length should be an integer.");
395
396 // TODO: Add support for MaxSafeDist for correct loop emission.
397 assert(State.VF.isScalable() && "Expected scalable vector factor.");
398 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
399
400 Value *EVL = State.Builder.CreateIntrinsic(
401 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
402 {AVL, VFArg, State.Builder.getTrue()});
403 return EVL;
404 };
405 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
406 // be outside of the main loop.
407 assert(Part == 0 && "No unrolling expected for predicated vectorization.");
408 // Compute VTC - IV as the AVL (requested vector length).
409 Value *Index = State.get(getOperand(0), VPIteration(0, 0));
410 Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
411 Value *AVL = State.Builder.CreateSub(TripCount, Index);
412 Value *EVL = GetEVL(State, AVL);
413 assert(!State.EVL && "multiple EVL recipes");
414 State.EVL = this;
415 return EVL;
416 }
418 auto *IV = State.get(getOperand(0), VPIteration(0, 0));
419 if (Part == 0)
420 return IV;
421
422 // The canonical IV is incremented by the vectorization factor (num of SIMD
423 // elements) times the unroll part.
424 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
425 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
427 }
429 if (Part != 0)
430 return nullptr;
431
432 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
433 VPRegionBlock *ParentRegion = getParent()->getParent();
434 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
435
436 // Replace the temporary unreachable terminator with a new conditional
437 // branch, hooking it up to backward destination for exiting blocks now and
438 // to forward destination(s) later when they are created.
439 BranchInst *CondBr =
440 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
441
442 if (getParent()->isExiting())
443 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
444
445 CondBr->setSuccessor(0, nullptr);
447 return CondBr;
448 }
450 if (Part != 0)
451 return nullptr;
452 // First create the compare.
453 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
454 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
455 Value *Cond = Builder.CreateICmpEQ(IV, TC);
456
457 // Now create the branch.
458 auto *Plan = getParent()->getPlan();
459 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
460 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
461
462 // Replace the temporary unreachable terminator with a new conditional
463 // branch, hooking it up to backward destination (the header) now and to the
464 // forward destination (the exit/middle block) later when it is created.
465 // Note that CreateCondBr expects a valid BB as first argument, so we need
466 // to set it to nullptr later.
467 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
468 State.CFG.VPBB2IRBB[Header]);
469 CondBr->setSuccessor(0, nullptr);
471 return CondBr;
472 }
474 if (Part != 0)
475 return State.get(this, 0, /*IsScalar*/ true);
476
477 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
478 // and will be removed by breaking up the recipe further.
479 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
480 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
481 // Get its reduction variable descriptor.
482 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
483
484 RecurKind RK = RdxDesc.getRecurrenceKind();
485
486 VPValue *LoopExitingDef = getOperand(1);
487 Type *PhiTy = OrigPhi->getType();
488 VectorParts RdxParts(State.UF);
489 for (unsigned Part = 0; Part < State.UF; ++Part)
490 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
491
492 // If the vector reduction can be performed in a smaller type, we truncate
493 // then extend the loop exit value to enable InstCombine to evaluate the
494 // entire expression in the smaller type.
495 // TODO: Handle this in truncateToMinBW.
496 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
497 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
498 for (unsigned Part = 0; Part < State.UF; ++Part)
499 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
500 }
501 // Reduce all of the unrolled parts into a single vector.
502 Value *ReducedPartRdx = RdxParts[0];
503 unsigned Op = RecurrenceDescriptor::getOpcode(RK);
505 Op = Instruction::Or;
506
507 if (PhiR->isOrdered()) {
508 ReducedPartRdx = RdxParts[State.UF - 1];
509 } else {
510 // Floating-point operations should have some FMF to enable the reduction.
512 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
513 for (unsigned Part = 1; Part < State.UF; ++Part) {
514 Value *RdxPart = RdxParts[Part];
515 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
516 ReducedPartRdx = Builder.CreateBinOp(
517 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
518 else
519 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
520 }
521 }
522
523 // Create the reduction after the loop. Note that inloop reductions create
524 // the target reduction in the loop using a Reduction recipe.
525 if ((State.VF.isVector() ||
527 !PhiR->isInLoop()) {
528 ReducedPartRdx =
529 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
530 // If the reduction can be performed in a smaller type, we need to extend
531 // the reduction to the wider type before we branch to the original loop.
532 if (PhiTy != RdxDesc.getRecurrenceType())
533 ReducedPartRdx = RdxDesc.isSigned()
534 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
535 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
536 }
537
538 // If there were stores of the reduction value to a uniform memory address
539 // inside the loop, create the final store here.
540 if (StoreInst *SI = RdxDesc.IntermediateStore) {
541 auto *NewSI = Builder.CreateAlignedStore(
542 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
543 propagateMetadata(NewSI, SI);
544 }
545
546 return ReducedPartRdx;
547 }
550 "can only generate first lane for PtrAdd");
551 Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
552 Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
553 return Builder.CreatePtrAdd(Ptr, Addend, Name);
554 }
555 default:
556 llvm_unreachable("Unsupported opcode for instruction");
557 }
558}
559
560#if !defined(NDEBUG)
561bool VPInstruction::isFPMathOp() const {
562 // Inspired by FPMathOperator::classof. Notable differences are that we don't
563 // support Call, PHI and Select opcodes here yet.
564 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
565 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
566 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
567 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
568}
569#endif
570
572 assert(!State.Instance && "VPInstruction executing an Instance");
574 assert((hasFastMathFlags() == isFPMathOp() ||
575 getOpcode() == Instruction::Select) &&
576 "Recipe not a FPMathOp but has fast-math flags?");
577 if (hasFastMathFlags())
580 bool GeneratesPerFirstLaneOnly =
581 canGenerateScalarForFirstLane() &&
584 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
585 for (unsigned Part = 0; Part < State.UF; ++Part) {
586 if (GeneratesPerAllLanes) {
587 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
588 Lane != NumLanes; ++Lane) {
589 Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
590 assert(GeneratedValue && "generatePerLane must produce a value");
591 State.set(this, GeneratedValue, VPIteration(Part, Lane));
592 }
593 continue;
594 }
595
596 Value *GeneratedValue = generatePerPart(State, Part);
597 if (!hasResult())
598 continue;
599 assert(GeneratedValue && "generatePerPart must produce a value");
600 assert((GeneratedValue->getType()->isVectorTy() ==
601 !GeneratesPerFirstLaneOnly ||
602 State.VF.isScalar()) &&
603 "scalar value but not only first lane defined");
604 State.set(this, GeneratedValue, Part,
605 /*IsScalar*/ GeneratesPerFirstLaneOnly);
606 }
607}
608
610 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
612 return vputils::onlyFirstLaneUsed(this);
613
614 switch (getOpcode()) {
615 default:
616 return false;
617 case Instruction::ICmp:
619 // TODO: Cover additional opcodes.
620 return vputils::onlyFirstLaneUsed(this);
626 return true;
627 };
628 llvm_unreachable("switch should return");
629}
630
631#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
633 VPSlotTracker SlotTracker(getParent()->getPlan());
634 print(dbgs(), "", SlotTracker);
635}
636
638 VPSlotTracker &SlotTracker) const {
639 O << Indent << "EMIT ";
640
641 if (hasResult()) {
643 O << " = ";
644 }
645
646 switch (getOpcode()) {
648 O << "not";
649 break;
651 O << "combined load";
652 break;
654 O << "combined store";
655 break;
657 O << "active lane mask";
658 break;
660 O << "EXPLICIT-VECTOR-LENGTH";
661 break;
663 O << "first-order splice";
664 break;
666 O << "branch-on-cond";
667 break;
669 O << "TC > VF ? TC - VF : 0";
670 break;
672 O << "VF * Part +";
673 break;
675 O << "branch-on-count";
676 break;
678 O << "compute-reduction-result";
679 break;
681 O << "ptradd";
682 break;
683 default:
685 }
686
687 printFlags(O);
689
690 if (auto DL = getDebugLoc()) {
691 O << ", !dbg ";
692 DL.print(O);
693 }
694}
695#endif
696
698 assert(State.VF.isVector() && "not widening");
699 auto &CI = *cast<CallInst>(getUnderlyingInstr());
700 assert(!isa<DbgInfoIntrinsic>(CI) &&
701 "DbgInfoIntrinsic should have been dropped during VPlan construction");
703
704 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
705 FunctionType *VFTy = nullptr;
706 if (Variant)
707 VFTy = Variant->getFunctionType();
708 for (unsigned Part = 0; Part < State.UF; ++Part) {
709 SmallVector<Type *, 2> TysForDecl;
710 // Add return type if intrinsic is overloaded on it.
711 if (UseIntrinsic &&
712 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
713 TysForDecl.push_back(
714 VectorType::get(CI.getType()->getScalarType(), State.VF));
716 for (const auto &I : enumerate(operands())) {
717 // Some intrinsics have a scalar argument - don't replace it with a
718 // vector.
719 Value *Arg;
720 if (UseIntrinsic &&
721 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
722 Arg = State.get(I.value(), VPIteration(0, 0));
723 // Some vectorized function variants may also take a scalar argument,
724 // e.g. linear parameters for pointers. This needs to be the scalar value
725 // from the start of the respective part when interleaving.
726 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
727 Arg = State.get(I.value(), VPIteration(Part, 0));
728 else
729 Arg = State.get(I.value(), Part);
730 if (UseIntrinsic &&
731 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
732 TysForDecl.push_back(Arg->getType());
733 Args.push_back(Arg);
734 }
735
736 Function *VectorF;
737 if (UseIntrinsic) {
738 // Use vector version of the intrinsic.
739 Module *M = State.Builder.GetInsertBlock()->getModule();
740 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
741 assert(VectorF && "Can't retrieve vector intrinsic.");
742 } else {
743#ifndef NDEBUG
744 assert(Variant != nullptr && "Can't create vector function.");
745#endif
746 VectorF = Variant;
747 }
748
750 CI.getOperandBundlesAsDefs(OpBundles);
751 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
752
753 if (isa<FPMathOperator>(V))
754 V->copyFastMathFlags(&CI);
755
756 if (!V->getType()->isVoidTy())
757 State.set(this, V, Part);
758 State.addMetadata(V, &CI);
759 }
760}
761
762#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
764 VPSlotTracker &SlotTracker) const {
765 O << Indent << "WIDEN-CALL ";
766
767 auto *CI = cast<CallInst>(getUnderlyingInstr());
768 if (CI->getType()->isVoidTy())
769 O << "void ";
770 else {
772 O << " = ";
773 }
774
775 O << "call @" << CI->getCalledFunction()->getName() << "(";
777 O << ")";
778
779 if (VectorIntrinsicID)
780 O << " (using vector intrinsic)";
781 else {
782 O << " (using library function";
783 if (Variant->hasName())
784 O << ": " << Variant->getName();
785 O << ")";
786 }
787}
788
790 VPSlotTracker &SlotTracker) const {
791 O << Indent << "WIDEN-SELECT ";
793 O << " = select ";
795 O << ", ";
797 O << ", ";
799 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
800}
801#endif
802
805
806 // The condition can be loop invariant but still defined inside the
807 // loop. This means that we can't just use the original 'cond' value.
808 // We have to take the 'vectorized' value and pick the first lane.
809 // Instcombine will make this a no-op.
810 auto *InvarCond =
811 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
812
813 for (unsigned Part = 0; Part < State.UF; ++Part) {
814 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
815 Value *Op0 = State.get(getOperand(1), Part);
816 Value *Op1 = State.get(getOperand(2), Part);
817 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
818 State.set(this, Sel, Part);
819 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
820 }
821}
822
823VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
824 const FastMathFlags &FMF) {
825 AllowReassoc = FMF.allowReassoc();
826 NoNaNs = FMF.noNaNs();
827 NoInfs = FMF.noInfs();
828 NoSignedZeros = FMF.noSignedZeros();
829 AllowReciprocal = FMF.allowReciprocal();
830 AllowContract = FMF.allowContract();
831 ApproxFunc = FMF.approxFunc();
832}
833
834#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
836 switch (OpType) {
837 case OperationType::Cmp:
839 break;
840 case OperationType::DisjointOp:
842 O << " disjoint";
843 break;
844 case OperationType::PossiblyExactOp:
845 if (ExactFlags.IsExact)
846 O << " exact";
847 break;
848 case OperationType::OverflowingBinOp:
849 if (WrapFlags.HasNUW)
850 O << " nuw";
851 if (WrapFlags.HasNSW)
852 O << " nsw";
853 break;
854 case OperationType::FPMathOp:
856 break;
857 case OperationType::GEPOp:
859 O << " inbounds";
860 break;
861 case OperationType::NonNegOp:
862 if (NonNegFlags.NonNeg)
863 O << " nneg";
864 break;
865 case OperationType::Other:
866 break;
867 }
868 if (getNumOperands() > 0)
869 O << " ";
870}
871#endif
872
875 auto &Builder = State.Builder;
876 switch (Opcode) {
877 case Instruction::Call:
878 case Instruction::Br:
879 case Instruction::PHI:
880 case Instruction::GetElementPtr:
881 case Instruction::Select:
882 llvm_unreachable("This instruction is handled by a different recipe.");
883 case Instruction::UDiv:
884 case Instruction::SDiv:
885 case Instruction::SRem:
886 case Instruction::URem:
887 case Instruction::Add:
888 case Instruction::FAdd:
889 case Instruction::Sub:
890 case Instruction::FSub:
891 case Instruction::FNeg:
892 case Instruction::Mul:
893 case Instruction::FMul:
894 case Instruction::FDiv:
895 case Instruction::FRem:
896 case Instruction::Shl:
897 case Instruction::LShr:
898 case Instruction::AShr:
899 case Instruction::And:
900 case Instruction::Or:
901 case Instruction::Xor: {
902 // Just widen unops and binops.
903 for (unsigned Part = 0; Part < State.UF; ++Part) {
905 for (VPValue *VPOp : operands())
906 Ops.push_back(State.get(VPOp, Part));
907
908 Value *V = Builder.CreateNAryOp(Opcode, Ops);
909
910 if (auto *VecOp = dyn_cast<Instruction>(V))
911 setFlags(VecOp);
912
913 // Use this vector value for all users of the original instruction.
914 State.set(this, V, Part);
915 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
916 }
917
918 break;
919 }
920 case Instruction::Freeze: {
921 for (unsigned Part = 0; Part < State.UF; ++Part) {
922 Value *Op = State.get(getOperand(0), Part);
923
924 Value *Freeze = Builder.CreateFreeze(Op);
925 State.set(this, Freeze, Part);
926 }
927 break;
928 }
929 case Instruction::ICmp:
930 case Instruction::FCmp: {
931 // Widen compares. Generate vector compares.
932 bool FCmp = Opcode == Instruction::FCmp;
933 for (unsigned Part = 0; Part < State.UF; ++Part) {
934 Value *A = State.get(getOperand(0), Part);
935 Value *B = State.get(getOperand(1), Part);
936 Value *C = nullptr;
937 if (FCmp) {
938 // Propagate fast math flags.
939 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
940 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
941 Builder.setFastMathFlags(I->getFastMathFlags());
942 C = Builder.CreateFCmp(getPredicate(), A, B);
943 } else {
944 C = Builder.CreateICmp(getPredicate(), A, B);
945 }
946 State.set(this, C, Part);
947 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
948 }
949
950 break;
951 }
952 default:
953 // This instruction is not vectorized by simple widening.
954 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
955 << Instruction::getOpcodeName(Opcode));
956 llvm_unreachable("Unhandled instruction!");
957 } // end of switch.
958
959#if !defined(NDEBUG)
960 // Verify that VPlan type inference results agree with the type of the
961 // generated values.
962 for (unsigned Part = 0; Part < State.UF; ++Part) {
964 State.VF) == State.get(this, Part)->getType() &&
965 "inferred type and type from generated instructions do not match");
966 }
967#endif
968}
969
970#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
972 VPSlotTracker &SlotTracker) const {
973 O << Indent << "WIDEN ";
975 O << " = " << Instruction::getOpcodeName(Opcode);
976 printFlags(O);
978}
979#endif
980
983 auto &Builder = State.Builder;
984 /// Vectorize casts.
985 assert(State.VF.isVector() && "Not vectorizing?");
986 Type *DestTy = VectorType::get(getResultType(), State.VF);
987 VPValue *Op = getOperand(0);
988 for (unsigned Part = 0; Part < State.UF; ++Part) {
989 if (Part > 0 && Op->isLiveIn()) {
990 // FIXME: Remove once explicit unrolling is implemented using VPlan.
991 State.set(this, State.get(this, 0), Part);
992 continue;
993 }
994 Value *A = State.get(Op, Part);
995 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
996 State.set(this, Cast, Part);
997 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
998 }
999}
1000
1001#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1003 VPSlotTracker &SlotTracker) const {
1004 O << Indent << "WIDEN-CAST ";
1006 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1007 printFlags(O);
1009 O << " to " << *getResultType();
1010}
1011#endif
1012
1013/// This function adds
1014/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
1015/// to each vector element of Val. The sequence starts at StartIndex.
1016/// \p Opcode is relevant for FP induction variable.
1017static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
1019 IRBuilderBase &Builder) {
1020 assert(VF.isVector() && "only vector VFs are supported");
1021
1022 // Create and check the types.
1023 auto *ValVTy = cast<VectorType>(Val->getType());
1024 ElementCount VLen = ValVTy->getElementCount();
1025
1026 Type *STy = Val->getType()->getScalarType();
1027 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1028 "Induction Step must be an integer or FP");
1029 assert(Step->getType() == STy && "Step has wrong type");
1030
1032
1033 // Create a vector of consecutive numbers from zero to VF.
1034 VectorType *InitVecValVTy = ValVTy;
1035 if (STy->isFloatingPointTy()) {
1036 Type *InitVecValSTy =
1038 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1039 }
1040 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1041
1042 // Splat the StartIdx
1043 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
1044
1045 if (STy->isIntegerTy()) {
1046 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
1047 Step = Builder.CreateVectorSplat(VLen, Step);
1048 assert(Step->getType() == Val->getType() && "Invalid step vec");
1049 // FIXME: The newly created binary instructions should contain nsw/nuw
1050 // flags, which can be found from the original scalar operations.
1051 Step = Builder.CreateMul(InitVec, Step);
1052 return Builder.CreateAdd(Val, Step, "induction");
1053 }
1054
1055 // Floating point induction.
1056 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1057 "Binary Opcode should be specified for FP induction");
1058 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1059 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
1060
1061 Step = Builder.CreateVectorSplat(VLen, Step);
1062 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1063 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1064}
1065
1066/// A helper function that returns an integer or floating-point constant with
1067/// value C.
1069 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1070 : ConstantFP::get(Ty, C);
1071}
1072
1074 ElementCount VF) {
1075 assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1076 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
1077 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
1078 return B.CreateUIToFP(RuntimeVF, FTy);
1079}
1080
1082 assert(!State.Instance && "Int or FP induction being replicated.");
1083
1084 Value *Start = getStartValue()->getLiveInIRValue();
1086 TruncInst *Trunc = getTruncInst();
1087 IRBuilderBase &Builder = State.Builder;
1088 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1089 assert(State.VF.isVector() && "must have vector VF");
1090
1091 // The value from the original loop to which we are mapping the new induction
1092 // variable.
1093 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
1094
1095 // Fast-math-flags propagate from the original induction instruction.
1096 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1097 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1098 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1099
1100 // Now do the actual transformations, and start with fetching the step value.
1101 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1102
1103 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1104 "Expected either an induction phi-node or a truncate of it!");
1105
1106 // Construct the initial value of the vector IV in the vector loop preheader
1107 auto CurrIP = Builder.saveIP();
1108 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1109 Builder.SetInsertPoint(VectorPH->getTerminator());
1110 if (isa<TruncInst>(EntryVal)) {
1111 assert(Start->getType()->isIntegerTy() &&
1112 "Truncation requires an integer type");
1113 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1114 Step = Builder.CreateTrunc(Step, TruncType);
1115 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1116 }
1117
1118 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
1119 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1120 Value *SteppedStart = getStepVector(
1121 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
1122
1123 // We create vector phi nodes for both integer and floating-point induction
1124 // variables. Here, we determine the kind of arithmetic we will perform.
1127 if (Step->getType()->isIntegerTy()) {
1128 AddOp = Instruction::Add;
1129 MulOp = Instruction::Mul;
1130 } else {
1131 AddOp = ID.getInductionOpcode();
1132 MulOp = Instruction::FMul;
1133 }
1134
1135 // Multiply the vectorization factor by the step using integer or
1136 // floating-point arithmetic as appropriate.
1137 Type *StepType = Step->getType();
1138 Value *RuntimeVF;
1139 if (Step->getType()->isFloatingPointTy())
1140 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1141 else
1142 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1143 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1144
1145 // Create a vector splat to use in the induction update.
1146 //
1147 // FIXME: If the step is non-constant, we create the vector splat with
1148 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1149 // handle a constant vector splat.
1150 Value *SplatVF = isa<Constant>(Mul)
1151 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
1152 : Builder.CreateVectorSplat(State.VF, Mul);
1153 Builder.restoreIP(CurrIP);
1154
1155 // We may need to add the step a number of times, depending on the unroll
1156 // factor. The last of those goes into the PHI.
1157 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1158 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1159 VecInd->setDebugLoc(EntryVal->getDebugLoc());
1160 Instruction *LastInduction = VecInd;
1161 for (unsigned Part = 0; Part < State.UF; ++Part) {
1162 State.set(this, LastInduction, Part);
1163
1164 if (isa<TruncInst>(EntryVal))
1165 State.addMetadata(LastInduction, EntryVal);
1166
1167 LastInduction = cast<Instruction>(
1168 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
1169 LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1170 }
1171
1172 LastInduction->setName("vec.ind.next");
1173 VecInd->addIncoming(SteppedStart, VectorPH);
1174 // Add induction update using an incorrect block temporarily. The phi node
1175 // will be fixed after VPlan execution. Note that at this point the latch
1176 // block cannot be used, as it does not exist yet.
1177 // TODO: Model increment value in VPlan, by turning the recipe into a
1178 // multi-def and a subclass of VPHeaderPHIRecipe.
1179 VecInd->addIncoming(LastInduction, VectorPH);
1180}
1181
1182#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1184 VPSlotTracker &SlotTracker) const {
1185 O << Indent << "WIDEN-INDUCTION";
1186 if (getTruncInst()) {
1187 O << "\\l\"";
1188 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
1189 O << " +\n" << Indent << "\" ";
1191 } else
1192 O << " " << VPlanIngredient(IV);
1193
1194 O << ", ";
1196}
1197#endif
1198
1200 // The step may be defined by a recipe in the preheader (e.g. if it requires
1201 // SCEV expansion), but for the canonical induction the step is required to be
1202 // 1, which is represented as live-in.
1204 return false;
1205 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1206 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1207 return StartC && StartC->isZero() && StepC && StepC->isOne();
1208}
1209
1210#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1212 VPSlotTracker &SlotTracker) const {
1213 O << Indent;
1215 O << Indent << "= DERIVED-IV ";
1217 O << " + ";
1219 O << " * ";
1221}
1222#endif
1223
1225 // Fast-math-flags propagate from the original induction instruction.
1227 if (hasFastMathFlags())
1229
1230 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1231 /// variable on which to base the steps, \p Step is the size of the step.
1232
1233 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1234 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1235 IRBuilderBase &Builder = State.Builder;
1236
1237 // Ensure step has the same type as that of scalar IV.
1238 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1239 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1240
1241 // We build scalar steps for both integer and floating-point induction
1242 // variables. Here, we determine the kind of arithmetic we will perform.
1245 if (BaseIVTy->isIntegerTy()) {
1246 AddOp = Instruction::Add;
1247 MulOp = Instruction::Mul;
1248 } else {
1249 AddOp = InductionOpcode;
1250 MulOp = Instruction::FMul;
1251 }
1252
1253 // Determine the number of scalars we need to generate for each unroll
1254 // iteration.
1255 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1256 // Compute the scalar steps and save the results in State.
1257 Type *IntStepTy =
1258 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1259 Type *VecIVTy = nullptr;
1260 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1261 if (!FirstLaneOnly && State.VF.isScalable()) {
1262 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1263 UnitStepVec =
1264 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1265 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1266 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1267 }
1268
1269 unsigned StartPart = 0;
1270 unsigned EndPart = State.UF;
1271 unsigned StartLane = 0;
1272 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1273 if (State.Instance) {
1274 StartPart = State.Instance->Part;
1275 EndPart = StartPart + 1;
1276 StartLane = State.Instance->Lane.getKnownLane();
1277 EndLane = StartLane + 1;
1278 }
1279 for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1280 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1281
1282 if (!FirstLaneOnly && State.VF.isScalable()) {
1283 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1284 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1285 if (BaseIVTy->isFloatingPointTy())
1286 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1287 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1288 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1289 State.set(this, Add, Part);
1290 // It's useful to record the lane values too for the known minimum number
1291 // of elements so we do those below. This improves the code quality when
1292 // trying to extract the first element, for example.
1293 }
1294
1295 if (BaseIVTy->isFloatingPointTy())
1296 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1297
1298 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1299 Value *StartIdx = Builder.CreateBinOp(
1300 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1301 // The step returned by `createStepForVF` is a runtime-evaluated value
1302 // when VF is scalable. Otherwise, it should be folded into a Constant.
1303 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1304 "Expected StartIdx to be folded to a constant when VF is not "
1305 "scalable");
1306 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1307 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1308 State.set(this, Add, VPIteration(Part, Lane));
1309 }
1310 }
1311}
1312
1313#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1315 VPSlotTracker &SlotTracker) const {
1316 O << Indent;
1318 O << " = SCALAR-STEPS ";
1320}
1321#endif
1322
1324 assert(State.VF.isVector() && "not widening");
1325 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1326 // Construct a vector GEP by widening the operands of the scalar GEP as
1327 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1328 // results in a vector of pointers when at least one operand of the GEP
1329 // is vector-typed. Thus, to keep the representation compact, we only use
1330 // vector-typed operands for loop-varying values.
1331
1332 if (areAllOperandsInvariant()) {
1333 // If we are vectorizing, but the GEP has only loop-invariant operands,
1334 // the GEP we build (by only using vector-typed operands for
1335 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1336 // produce a vector of pointers, we need to either arbitrarily pick an
1337 // operand to broadcast, or broadcast a clone of the original GEP.
1338 // Here, we broadcast a clone of the original.
1339 //
1340 // TODO: If at some point we decide to scalarize instructions having
1341 // loop-invariant operands, this special case will no longer be
1342 // required. We would add the scalarization decision to
1343 // collectLoopScalars() and teach getVectorValue() to broadcast
1344 // the lane-zero scalar value.
1346 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1347 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1348
1349 auto *NewGEP =
1350 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1351 ArrayRef(Ops).drop_front(), "", isInBounds());
1352 for (unsigned Part = 0; Part < State.UF; ++Part) {
1353 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1354 State.set(this, EntryPart, Part);
1355 State.addMetadata(EntryPart, GEP);
1356 }
1357 } else {
1358 // If the GEP has at least one loop-varying operand, we are sure to
1359 // produce a vector of pointers. But if we are only unrolling, we want
1360 // to produce a scalar GEP for each unroll part. Thus, the GEP we
1361 // produce with the code below will be scalar (if VF == 1) or vector
1362 // (otherwise). Note that for the unroll-only case, we still maintain
1363 // values in the vector mapping with initVector, as we do for other
1364 // instructions.
1365 for (unsigned Part = 0; Part < State.UF; ++Part) {
1366 // The pointer operand of the new GEP. If it's loop-invariant, we
1367 // won't broadcast it.
1368 auto *Ptr = isPointerLoopInvariant()
1369 ? State.get(getOperand(0), VPIteration(0, 0))
1370 : State.get(getOperand(0), Part);
1371
1372 // Collect all the indices for the new GEP. If any index is
1373 // loop-invariant, we won't broadcast it.
1375 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1376 VPValue *Operand = getOperand(I);
1377 if (isIndexLoopInvariant(I - 1))
1378 Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1379 else
1380 Indices.push_back(State.get(Operand, Part));
1381 }
1382
1383 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1384 // but it should be a vector, otherwise.
1385 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1386 Indices, "", isInBounds());
1387 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1388 "NewGEP is not a pointer vector");
1389 State.set(this, NewGEP, Part);
1390 State.addMetadata(NewGEP, GEP);
1391 }
1392 }
1393}
1394
1395#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1397 VPSlotTracker &SlotTracker) const {
1398 O << Indent << "WIDEN-GEP ";
1399 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1400 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1401 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1402
1403 O << " ";
1405 O << " = getelementptr";
1406 printFlags(O);
1408}
1409#endif
1410
1411void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1412 auto &Builder = State.Builder;
1414 for (unsigned Part = 0; Part < State.UF; ++Part) {
1415 // Calculate the pointer for the specific unroll-part.
1416 Value *PartPtr = nullptr;
1417 // Use i32 for the gep index type when the value is constant,
1418 // or query DataLayout for a more suitable index type otherwise.
1419 const DataLayout &DL =
1420 Builder.GetInsertBlock()->getModule()->getDataLayout();
1421 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1422 ? DL.getIndexType(IndexedTy->getPointerTo())
1423 : Builder.getInt32Ty();
1424 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1425 bool InBounds = isInBounds();
1426 if (IsReverse) {
1427 // If the address is consecutive but reversed, then the
1428 // wide store needs to start at the last vector element.
1429 // RunTimeVF = VScale * VF.getKnownMinValue()
1430 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1431 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1432 // NumElt = -Part * RunTimeVF
1433 Value *NumElt = Builder.CreateMul(
1434 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1435 // LastLane = 1 - RunTimeVF
1436 Value *LastLane =
1437 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1438 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1439 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1440 } else {
1441 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1442 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1443 }
1444
1445 State.set(this, PartPtr, Part, /*IsScalar*/ true);
1446 }
1447}
1448
1449#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1451 VPSlotTracker &SlotTracker) const {
1452 O << Indent;
1454 O << " = vector-pointer ";
1455 if (IsReverse)
1456 O << "(reverse) ";
1457
1459}
1460#endif
1461
1464 // We know that all PHIs in non-header blocks are converted into
1465 // selects, so we don't have to worry about the insertion order and we
1466 // can just use the builder.
1467 // At this point we generate the predication tree. There may be
1468 // duplications since this is a simple recursive scan, but future
1469 // optimizations will clean it up.
1470
1471 unsigned NumIncoming = getNumIncomingValues();
1472
1473 // Generate a sequence of selects of the form:
1474 // SELECT(Mask3, In3,
1475 // SELECT(Mask2, In2,
1476 // SELECT(Mask1, In1,
1477 // In0)))
1478 // Note that Mask0 is never used: lanes for which no path reaches this phi and
1479 // are essentially undef are taken from In0.
1480 VectorParts Entry(State.UF);
1481 for (unsigned In = 0; In < NumIncoming; ++In) {
1482 for (unsigned Part = 0; Part < State.UF; ++Part) {
1483 // We might have single edge PHIs (blocks) - use an identity
1484 // 'select' for the first PHI operand.
1485 Value *In0 = State.get(getIncomingValue(In), Part);
1486 if (In == 0)
1487 Entry[Part] = In0; // Initialize with the first incoming value.
1488 else {
1489 // Select between the current value and the previous incoming edge
1490 // based on the incoming mask.
1491 Value *Cond = State.get(getMask(In), Part);
1492 Entry[Part] =
1493 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1494 }
1495 }
1496 }
1497 for (unsigned Part = 0; Part < State.UF; ++Part)
1498 State.set(this, Entry[Part], Part);
1499}
1500
1501#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1503 VPSlotTracker &SlotTracker) const {
1504 O << Indent << "BLEND ";
1506 O << " =";
1507 if (getNumIncomingValues() == 1) {
1508 // Not a User of any mask: not really blending, this is a
1509 // single-predecessor phi.
1510 O << " ";
1512 } else {
1513 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1514 O << " ";
1516 if (I == 0)
1517 continue;
1518 O << "/";
1520 }
1521 }
1522}
1523#endif
1524
1526 assert(!State.Instance && "Reduction being replicated.");
1527 Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1528 RecurKind Kind = RdxDesc.getRecurrenceKind();
1529 // Propagate the fast-math flags carried by the underlying instruction.
1531 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1532 for (unsigned Part = 0; Part < State.UF; ++Part) {
1533 Value *NewVecOp = State.get(getVecOp(), Part);
1534 if (VPValue *Cond = getCondOp()) {
1535 Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1536 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1537 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1538 Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1539 RdxDesc.getFastMathFlags());
1540 if (State.VF.isVector()) {
1541 Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1542 }
1543
1544 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1545 NewVecOp = Select;
1546 }
1547 Value *NewRed;
1548 Value *NextInChain;
1549 if (IsOrdered) {
1550 if (State.VF.isVector())
1551 NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1552 PrevInChain);
1553 else
1554 NewRed = State.Builder.CreateBinOp(
1555 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1556 NewVecOp);
1557 PrevInChain = NewRed;
1558 } else {
1559 PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1560 NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1561 }
1563 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1564 NewRed, PrevInChain);
1565 } else if (IsOrdered)
1566 NextInChain = NewRed;
1567 else
1568 NextInChain = State.Builder.CreateBinOp(
1569 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1570 State.set(this, NextInChain, Part, /*IsScalar*/ true);
1571 }
1572}
1573
1574#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1576 VPSlotTracker &SlotTracker) const {
1577 O << Indent << "REDUCE ";
1579 O << " = ";
1581 O << " +";
1582 if (isa<FPMathOperator>(getUnderlyingInstr()))
1584 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1586 if (getCondOp()) {
1587 O << ", ";
1589 }
1590 O << ")";
1591 if (RdxDesc.IntermediateStore)
1592 O << " (with final reduction value stored in invariant address sank "
1593 "outside of loop)";
1594}
1595#endif
1596
1598 // Find if the recipe is used by a widened recipe via an intervening
1599 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1600 return any_of(users(), [](const VPUser *U) {
1601 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1602 return any_of(PredR->users(), [PredR](const VPUser *U) {
1603 return !U->usesScalars(PredR);
1604 });
1605 return false;
1606 });
1607}
1608
1609#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1611 VPSlotTracker &SlotTracker) const {
1612 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1613
1614 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1616 O << " = ";
1617 }
1618 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1619 O << "call";
1620 printFlags(O);
1621 O << "@" << CB->getCalledFunction()->getName() << "(";
1623 O, [&O, &SlotTracker](VPValue *Op) {
1624 Op->printAsOperand(O, SlotTracker);
1625 });
1626 O << ")";
1627 } else {
1629 printFlags(O);
1631 }
1632
1633 if (shouldPack())
1634 O << " (S->V)";
1635}
1636#endif
1637
1638/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1639/// if it is either defined outside the vector region or its operand is known to
1640/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1641/// TODO: Uniformity should be associated with a VPValue and there should be a
1642/// generic way to check.
1644 return C->isDefinedOutsideVectorRegions() ||
1645 isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1646 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1647}
1648
1649Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1651 "Codegen only implemented for first lane.");
1652 switch (Opcode) {
1653 case Instruction::SExt:
1654 case Instruction::ZExt:
1655 case Instruction::Trunc: {
1656 // Note: SExt/ZExt not used yet.
1657 Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1658 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1659 }
1660 default:
1661 llvm_unreachable("opcode not implemented yet");
1662 }
1663}
1664
1665void VPScalarCastRecipe ::execute(VPTransformState &State) {
1666 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1667 for (unsigned Part = 0; Part != State.UF; ++Part) {
1668 Value *Res;
1669 // Only generate a single instance, if the recipe is uniform across UFs and
1670 // VFs.
1671 if (Part > 0 && IsUniformAcrossVFsAndUFs)
1672 Res = State.get(this, VPIteration(0, 0));
1673 else
1674 Res = generate(State, Part);
1675 State.set(this, Res, VPIteration(Part, 0));
1676 }
1677}
1678
1679#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1680void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1681 VPSlotTracker &SlotTracker) const {
1682 O << Indent << "SCALAR-CAST ";
1683 printAsOperand(O, SlotTracker);
1684 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1685 printOperands(O, SlotTracker);
1686 O << " to " << *ResultTy;
1687}
1688#endif
1689
1691 assert(State.Instance && "Branch on Mask works only on single instance.");
1692
1693 unsigned Part = State.Instance->Part;
1694 unsigned Lane = State.Instance->Lane.getKnownLane();
1695
1696 Value *ConditionBit = nullptr;
1697 VPValue *BlockInMask = getMask();
1698 if (BlockInMask) {
1699 ConditionBit = State.get(BlockInMask, Part);
1700 if (ConditionBit->getType()->isVectorTy())
1701 ConditionBit = State.Builder.CreateExtractElement(
1702 ConditionBit, State.Builder.getInt32(Lane));
1703 } else // Block in mask is all-one.
1704 ConditionBit = State.Builder.getTrue();
1705
1706 // Replace the temporary unreachable terminator with a new conditional branch,
1707 // whose two destinations will be set later when they are created.
1708 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1709 assert(isa<UnreachableInst>(CurrentTerminator) &&
1710 "Expected to replace unreachable terminator with conditional branch.");
1711 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1712 CondBr->setSuccessor(0, nullptr);
1713 ReplaceInstWithInst(CurrentTerminator, CondBr);
1714}
1715
1717 assert(State.Instance && "Predicated instruction PHI works per instance.");
1718 Instruction *ScalarPredInst =
1719 cast<Instruction>(State.get(getOperand(0), *State.Instance));
1720 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1721 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1722 assert(PredicatingBB && "Predicated block has no single predecessor.");
1723 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1724 "operand must be VPReplicateRecipe");
1725
1726 // By current pack/unpack logic we need to generate only a single phi node: if
1727 // a vector value for the predicated instruction exists at this point it means
1728 // the instruction has vector users only, and a phi for the vector value is
1729 // needed. In this case the recipe of the predicated instruction is marked to
1730 // also do that packing, thereby "hoisting" the insert-element sequence.
1731 // Otherwise, a phi node for the scalar value is needed.
1732 unsigned Part = State.Instance->Part;
1733 if (State.hasVectorValue(getOperand(0), Part)) {
1734 Value *VectorValue = State.get(getOperand(0), Part);
1735 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1736 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1737 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1738 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1739 if (State.hasVectorValue(this, Part))
1740 State.reset(this, VPhi, Part);
1741 else
1742 State.set(this, VPhi, Part);
1743 // NOTE: Currently we need to update the value of the operand, so the next
1744 // predicated iteration inserts its generated value in the correct vector.
1745 State.reset(getOperand(0), VPhi, Part);
1746 } else {
1747 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1748 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1749 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1750 PredicatingBB);
1751 Phi->addIncoming(ScalarPredInst, PredicatedBB);
1752 if (State.hasScalarValue(this, *State.Instance))
1753 State.reset(this, Phi, *State.Instance);
1754 else
1755 State.set(this, Phi, *State.Instance);
1756 // NOTE: Currently we need to update the value of the operand, so the next
1757 // predicated iteration inserts its generated value in the correct vector.
1758 State.reset(getOperand(0), Phi, *State.Instance);
1759 }
1760}
1761
1762#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1764 VPSlotTracker &SlotTracker) const {
1765 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1767 O << " = ";
1769}
1770
1772 VPSlotTracker &SlotTracker) const {
1773 O << Indent << "WIDEN ";
1774
1775 if (!isStore()) {
1777 O << " = ";
1778 }
1779 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1780
1782}
1783#endif
1784
1786 Value *Start = getStartValue()->getLiveInIRValue();
1787 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1788 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1789
1790 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1791 EntryPart->addIncoming(Start, VectorPH);
1792 EntryPart->setDebugLoc(getDebugLoc());
1793 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1794 State.set(this, EntryPart, Part, /*IsScalar*/ true);
1795}
1796
1797#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1799 VPSlotTracker &SlotTracker) const {
1800 O << Indent << "EMIT ";
1802 O << " = CANONICAL-INDUCTION ";
1804}
1805#endif
1806
1809 VPValue *Step) const {
1810 // Must be an integer induction.
1812 return false;
1813 // Start must match the start value of this canonical induction.
1814 if (Start != getStartValue())
1815 return false;
1816
1817 // If the step is defined by a recipe, it is not a ConstantInt.
1818 if (Step->getDefiningRecipe())
1819 return false;
1820
1821 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1822 return StepC && StepC->isOne();
1823}
1824
1826 return IsScalarAfterVectorization &&
1827 (!IsScalable || vputils::onlyFirstLaneUsed(this));
1828}
1829
1830#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1832 VPSlotTracker &SlotTracker) const {
1833 O << Indent << "EMIT ";
1835 O << " = WIDEN-POINTER-INDUCTION ";
1837 O << ", " << *IndDesc.getStep();
1838}
1839#endif
1840
1842 assert(!State.Instance && "cannot be used in per-lane");
1843 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1844 SCEVExpander Exp(SE, DL, "induction");
1845
1846 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1847 &*State.Builder.GetInsertPoint());
1848 assert(!State.ExpandedSCEVs.contains(Expr) &&
1849 "Same SCEV expanded multiple times");
1850 State.ExpandedSCEVs[Expr] = Res;
1851 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1852 State.set(this, Res, {Part, 0});
1853}
1854
1855#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1857 VPSlotTracker &SlotTracker) const {
1858 O << Indent << "EMIT ";
1860 O << " = EXPAND SCEV " << *Expr;
1861}
1862#endif
1863
1865 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
1866 Type *STy = CanonicalIV->getType();
1867 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1868 ElementCount VF = State.VF;
1869 Value *VStart = VF.isScalar()
1870 ? CanonicalIV
1871 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1872 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1873 Value *VStep = createStepForVF(Builder, STy, VF, Part);
1874 if (VF.isVector()) {
1875 VStep = Builder.CreateVectorSplat(VF, VStep);
1876 VStep =
1877 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1878 }
1879 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1880 State.set(this, CanonicalVectorIV, Part);
1881 }
1882}
1883
1884#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1886 VPSlotTracker &SlotTracker) const {
1887 O << Indent << "EMIT ";
1889 O << " = WIDEN-CANONICAL-INDUCTION ";
1891}
1892#endif
1893
1895 auto &Builder = State.Builder;
1896 // Create a vector from the initial value.
1897 auto *VectorInit = getStartValue()->getLiveInIRValue();
1898
1899 Type *VecTy = State.VF.isScalar()
1900 ? VectorInit->getType()
1901 : VectorType::get(VectorInit->getType(), State.VF);
1902
1903 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1904 if (State.VF.isVector()) {
1905 auto *IdxTy = Builder.getInt32Ty();
1906 auto *One = ConstantInt::get(IdxTy, 1);
1907 IRBuilder<>::InsertPointGuard Guard(Builder);
1908 Builder.SetInsertPoint(VectorPH->getTerminator());
1909 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1910 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1911 VectorInit = Builder.CreateInsertElement(
1912 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1913 }
1914
1915 // Create a phi node for the new recurrence.
1916 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1917 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1918 EntryPart->addIncoming(VectorInit, VectorPH);
1919 State.set(this, EntryPart, 0);
1920}
1921
1922#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1924 VPSlotTracker &SlotTracker) const {
1925 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1927 O << " = phi ";
1929}
1930#endif
1931
1933 auto &Builder = State.Builder;
1934
1935 // Reductions do not have to start at zero. They can start with
1936 // any loop invariant values.
1937 VPValue *StartVPV = getStartValue();
1938 Value *StartV = StartVPV->getLiveInIRValue();
1939
1940 // In order to support recurrences we need to be able to vectorize Phi nodes.
1941 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1942 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1943 // this value when we vectorize all of the instructions that use the PHI.
1944 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1945 Type *VecTy = ScalarPHI ? StartV->getType()
1946 : VectorType::get(StartV->getType(), State.VF);
1947
1948 BasicBlock *HeaderBB = State.CFG.PrevBB;
1949 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1950 "recipe must be in the vector loop header");
1951 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1952 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1953 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
1954 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
1955 State.set(this, EntryPart, Part, IsInLoop);
1956 }
1957
1958 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1959
1960 Value *Iden = nullptr;
1961 RecurKind RK = RdxDesc.getRecurrenceKind();
1964 // MinMax and AnyOf reductions have the start value as their identity.
1965 if (ScalarPHI) {
1966 Iden = StartV;
1967 } else {
1968 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1969 Builder.SetInsertPoint(VectorPH->getTerminator());
1970 StartV = Iden =
1971 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1972 }
1973 } else {
1974 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1975 RdxDesc.getFastMathFlags());
1976
1977 if (!ScalarPHI) {
1978 Iden = Builder.CreateVectorSplat(State.VF, Iden);
1979 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1980 Builder.SetInsertPoint(VectorPH->getTerminator());
1981 Constant *Zero = Builder.getInt32(0);
1982 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1983 }
1984 }
1985
1986 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1987 Value *EntryPart = State.get(this, Part, IsInLoop);
1988 // Make sure to add the reduction start value only to the
1989 // first unroll part.
1990 Value *StartVal = (Part == 0) ? StartV : Iden;
1991 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1992 }
1993}
1994
1995#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1997 VPSlotTracker &SlotTracker) const {
1998 O << Indent << "WIDEN-REDUCTION-PHI ";
1999
2001 O << " = phi ";
2003}
2004#endif
2005
2008 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
2009
2010 Value *Op0 = State.get(getOperand(0), 0);
2011 Type *VecTy = Op0->getType();
2012 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
2013 State.set(this, VecPhi, 0);
2014}
2015
2016#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2018 VPSlotTracker &SlotTracker) const {
2019 O << Indent << "WIDEN-PHI ";
2020
2021 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
2022 // Unless all incoming values are modeled in VPlan print the original PHI
2023 // directly.
2024 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
2025 // values as VPValues.
2026 if (getNumOperands() != OriginalPhi->getNumOperands()) {
2027 O << VPlanIngredient(OriginalPhi);
2028 return;
2029 }
2030
2032 O << " = phi ";
2034}
2035#endif
2036
2037// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2038// remove VPActiveLaneMaskPHIRecipe.
2040 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2041 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2042 Value *StartMask = State.get(getOperand(0), Part);
2043 PHINode *EntryPart =
2044 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2045 EntryPart->addIncoming(StartMask, VectorPH);
2046 EntryPart->setDebugLoc(getDebugLoc());
2047 State.set(this, EntryPart, Part);
2048 }
2049}
2050
2051#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2053 VPSlotTracker &SlotTracker) const {
2054 O << Indent << "ACTIVE-LANE-MASK-PHI ";
2055
2057 O << " = phi ";
2059}
2060#endif
2061
2063 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2064 assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2065 Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2066 PHINode *EntryPart =
2067 State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2068 EntryPart->addIncoming(Start, VectorPH);
2069 EntryPart->setDebugLoc(getDebugLoc());
2070 State.set(this, EntryPart, 0, /*IsScalar=*/true);
2071}
2072
2073#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2075 VPSlotTracker &SlotTracker) const {
2076 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2077
2079 O << " = phi ";
2081}
2082#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
Hexagon Common GEP
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static Value * getStepVector(Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step,...
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C)
Checks if C is uniform across all VFs and UFs.
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static Value * getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, ElementCount VF)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:966
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:989
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:991
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:211
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:123
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1449
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:307
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:258
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:201
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2007
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2351
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2462
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1978
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2084
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2450
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1533
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1186
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1214
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:466
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2023
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2525
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2071
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:277
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2356
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2387
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2231
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:1005
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2011
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2151
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:289
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2402
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2341
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1587
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:109
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2656
A struct for saving information about induction variables.
const SCEV * getStep() const
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_IntInduction
Integer induction variable. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
bool isBinaryOp() const
Definition: Instruction.h:257
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:254
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
Type * getRecurrenceType() const
Returns the type of the recurrence.
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:693
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2693
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2740
iterator end()
Definition: VPlan.h:2724
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2752
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1957
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1962
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:1954
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPRegionBlock * getParent()
Definition: VPlan.h:498
size_t getNumSuccessors() const
Definition: VPlan.h:543
VPlan * getPlan()
Definition: VPlan.cpp:148
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:153
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:533
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2233
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:314
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:399
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:411
unsigned getVPDefID() const
Definition: VPlanValue.h:431
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2631
VPValue * getStartValue() const
Definition: VPlan.h:2630
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1654
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1166
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1176
@ CalculateTripCountMinusVF
Definition: VPlan.h:1174
bool hasResult() const
Definition: VPlan.h:1284
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1260
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
void execute(VPTransformState &State) override
Generate the instruction.
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:169
static VPLane getFirstLane()
Definition: VPlan.h:167
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:703
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:718
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
VPBasicBlock * getParent()
Definition: VPlan.h:743
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:809
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:903
ExactFlagsTy ExactFlags
Definition: VPlan.h:959
FastMathFlagsTy FMFs
Definition: VPlan.h:962
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:961
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1083
bool isInBounds() const
Definition: VPlan.h:1122
GEPFlagsTy GEPFlags
Definition: VPlan.h:960
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1129
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:958
WrapFlagsTy WrapFlags
Definition: VPlan.h:957
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1133
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1116
bool hasNoSignedWrap() const
Definition: VPlan.h:1139
FastMathFlags getFastMathFlags() const
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1926
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2117
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2119
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2115
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2826
const VPBlockBase * getEntry() const
Definition: VPlan.h:2865
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:2197
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1411
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2680
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:894
This class can be used to assign consecutive numbers to all VPValues in a VPlan and allows querying t...
Definition: VPlanValue.h:449
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:204
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1317
operand_range operands()
Definition: VPlanValue.h:279
unsigned getNumOperands() const
Definition: VPlanValue.h:253
operand_iterator op_begin()
Definition: VPlanValue.h:275
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:254
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:78
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:118
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1302
friend class VPInstruction
Definition: VPlanValue.h:47
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:174
user_range users()
Definition: VPlanValue.h:134
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1407
void execute(VPTransformState &State) override
Produce widened copies of the cast.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1738
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1733
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1744
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isStore() const
Returns true if this recipe is a store.
Definition: VPlan.h:2354
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2927
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3111
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:5079
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1469
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3471
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1409
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1404
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2165
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1037
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1211
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ Mul
Product of integers.
@ Add
Sum of integers.
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1195
void execute(VPTransformState &State) override
Generate the phi nodes.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:219
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:374
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:382
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:247
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:418
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:421
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:312
struct llvm::VPTransformState::CFGState CFG
VPValue * EVL
If EVL (Explicit Vector Length) is not nullptr, then EVL must be a valid value set during plan transf...
Definition: VPlan.h:252
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:297
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:257
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:398
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:285
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:279
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:242
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:407
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
bool isInvariantCond() const
Definition: VPlan.h:1517
VPValue * getCond() const
Definition: VPlan.h:1513
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.