LLVM 19.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Twine.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/Value.h"
28#include "llvm/Support/Debug.h"
33#include <cassert>
34
35using namespace llvm;
36
38
39namespace llvm {
41}
42
43#define LV_NAME "loop-vectorize"
44#define DEBUG_TYPE LV_NAME
45
47 switch (getVPDefID()) {
48 case VPInterleaveSC:
49 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
50 case VPWidenStoreEVLSC:
51 case VPWidenStoreSC:
52 return true;
53 case VPReplicateSC:
54 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
55 ->mayWriteToMemory();
56 case VPWidenCallSC:
57 return !cast<VPWidenCallRecipe>(this)
58 ->getCalledScalarFunction()
59 ->onlyReadsMemory();
60 case VPBranchOnMaskSC:
61 case VPScalarIVStepsSC:
62 case VPPredInstPHISC:
63 return false;
64 case VPBlendSC:
65 case VPReductionSC:
66 case VPWidenCanonicalIVSC:
67 case VPWidenCastSC:
68 case VPWidenGEPSC:
69 case VPWidenIntOrFpInductionSC:
70 case VPWidenLoadEVLSC:
71 case VPWidenLoadSC:
72 case VPWidenPHISC:
73 case VPWidenSC:
74 case VPWidenSelectSC: {
75 const Instruction *I =
76 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
77 (void)I;
78 assert((!I || !I->mayWriteToMemory()) &&
79 "underlying instruction may write to memory");
80 return false;
81 }
82 default:
83 return true;
84 }
85}
86
88 switch (getVPDefID()) {
89 case VPWidenLoadEVLSC:
90 case VPWidenLoadSC:
91 return true;
92 case VPReplicateSC:
93 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
94 ->mayReadFromMemory();
95 case VPWidenCallSC:
96 return !cast<VPWidenCallRecipe>(this)
97 ->getCalledScalarFunction()
98 ->onlyWritesMemory();
99 case VPBranchOnMaskSC:
100 case VPPredInstPHISC:
101 case VPScalarIVStepsSC:
102 case VPWidenStoreEVLSC:
103 case VPWidenStoreSC:
104 return false;
105 case VPBlendSC:
106 case VPReductionSC:
107 case VPWidenCanonicalIVSC:
108 case VPWidenCastSC:
109 case VPWidenGEPSC:
110 case VPWidenIntOrFpInductionSC:
111 case VPWidenPHISC:
112 case VPWidenSC:
113 case VPWidenSelectSC: {
114 const Instruction *I =
115 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
116 (void)I;
117 assert((!I || !I->mayReadFromMemory()) &&
118 "underlying instruction may read from memory");
119 return false;
120 }
121 default:
122 return true;
123 }
124}
125
127 switch (getVPDefID()) {
128 case VPDerivedIVSC:
129 case VPPredInstPHISC:
130 case VPScalarCastSC:
131 return false;
132 case VPInstructionSC:
133 switch (cast<VPInstruction>(this)->getOpcode()) {
134 case Instruction::Or:
135 case Instruction::ICmp:
136 case Instruction::Select:
144 return false;
145 default:
146 return true;
147 }
148 case VPWidenCallSC: {
149 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
150 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
151 }
152 case VPBlendSC:
153 case VPReductionSC:
154 case VPScalarIVStepsSC:
155 case VPWidenCanonicalIVSC:
156 case VPWidenCastSC:
157 case VPWidenGEPSC:
158 case VPWidenIntOrFpInductionSC:
159 case VPWidenPHISC:
160 case VPWidenPointerInductionSC:
161 case VPWidenSC:
162 case VPWidenSelectSC: {
163 const Instruction *I =
164 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
165 (void)I;
166 assert((!I || !I->mayHaveSideEffects()) &&
167 "underlying instruction has side-effects");
168 return false;
169 }
170 case VPInterleaveSC:
171 return mayWriteToMemory();
172 case VPWidenLoadEVLSC:
173 case VPWidenLoadSC:
174 case VPWidenStoreEVLSC:
175 case VPWidenStoreSC:
176 assert(
177 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
179 "mayHaveSideffects result for ingredient differs from this "
180 "implementation");
181 return mayWriteToMemory();
182 case VPReplicateSC: {
183 auto *R = cast<VPReplicateRecipe>(this);
184 return R->getUnderlyingInstr()->mayHaveSideEffects();
185 }
186 default:
187 return true;
188 }
189}
190
192 auto Lane = VPLane::getLastLaneForVF(State.VF);
193 VPValue *ExitValue = getOperand(0);
195 Lane = VPLane::getFirstLane();
196 VPBasicBlock *MiddleVPBB =
197 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
198 assert(MiddleVPBB->getNumSuccessors() == 0 &&
199 "the middle block must not have any successors");
200 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
201 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
202 MiddleBB);
203}
204
205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
207 O << "Live-out ";
209 O << " = ";
211 O << "\n";
212}
213#endif
214
216 assert(!Parent && "Recipe already in some VPBasicBlock");
217 assert(InsertPos->getParent() &&
218 "Insertion position not in any VPBasicBlock");
219 InsertPos->getParent()->insert(this, InsertPos->getIterator());
220}
221
224 assert(!Parent && "Recipe already in some VPBasicBlock");
225 assert(I == BB.end() || I->getParent() == &BB);
226 BB.insert(this, I);
227}
228
230 assert(!Parent && "Recipe already in some VPBasicBlock");
231 assert(InsertPos->getParent() &&
232 "Insertion position not in any VPBasicBlock");
233 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
234}
235
237 assert(getParent() && "Recipe not in any VPBasicBlock");
239 Parent = nullptr;
240}
241
243 assert(getParent() && "Recipe not in any VPBasicBlock");
245}
246
249 insertAfter(InsertPos);
250}
251
255 insertBefore(BB, I);
256}
257
259 assert(OpType == OperationType::FPMathOp &&
260 "recipe doesn't have fast math flags");
261 FastMathFlags Res;
262 Res.setAllowReassoc(FMFs.AllowReassoc);
263 Res.setNoNaNs(FMFs.NoNaNs);
264 Res.setNoInfs(FMFs.NoInfs);
265 Res.setNoSignedZeros(FMFs.NoSignedZeros);
266 Res.setAllowReciprocal(FMFs.AllowReciprocal);
267 Res.setAllowContract(FMFs.AllowContract);
268 Res.setApproxFunc(FMFs.ApproxFunc);
269 return Res;
270}
271
274 const Twine &Name)
275 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
276 Pred, DL),
277 Opcode(Opcode), Name(Name.str()) {
278 assert(Opcode == Instruction::ICmp &&
279 "only ICmp predicates supported at the moment");
280}
281
283 std::initializer_list<VPValue *> Operands,
284 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
285 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
286 Opcode(Opcode), Name(Name.str()) {
287 // Make sure the VPInstruction is a floating-point operation.
288 assert(isFPMathOp() && "this op can't take fast-math flags");
289}
290
291bool VPInstruction::doesGeneratePerAllLanes() const {
292 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
293}
294
295bool VPInstruction::canGenerateScalarForFirstLane() const {
297 return true;
298 if (isVectorToScalar())
299 return true;
300 switch (Opcode) {
307 return true;
308 default:
309 return false;
310 }
311}
312
313Value *VPInstruction::generatePerLane(VPTransformState &State,
314 const VPIteration &Lane) {
315 IRBuilderBase &Builder = State.Builder;
316
318 "only PtrAdd opcodes are supported for now");
319 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
320 State.get(getOperand(1), Lane), Name);
321}
322
323Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
324 IRBuilderBase &Builder = State.Builder;
325
327 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
328 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
329 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
330 auto *Res =
331 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
332 if (auto *I = dyn_cast<Instruction>(Res))
333 setFlags(I);
334 return Res;
335 }
336
337 switch (getOpcode()) {
338 case VPInstruction::Not: {
339 Value *A = State.get(getOperand(0), Part);
340 return Builder.CreateNot(A, Name);
341 }
342 case Instruction::ICmp: {
343 Value *A = State.get(getOperand(0), Part);
344 Value *B = State.get(getOperand(1), Part);
345 return Builder.CreateCmp(getPredicate(), A, B, Name);
346 }
347 case Instruction::Select: {
348 Value *Cond = State.get(getOperand(0), Part);
349 Value *Op1 = State.get(getOperand(1), Part);
350 Value *Op2 = State.get(getOperand(2), Part);
351 return Builder.CreateSelect(Cond, Op1, Op2, Name);
352 }
354 // Get first lane of vector induction variable.
355 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
356 // Get the original loop tripcount.
357 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
358
359 // If this part of the active lane mask is scalar, generate the CMP directly
360 // to avoid unnecessary extracts.
361 if (State.VF.isScalar())
362 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
363 Name);
364
365 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
366 auto *PredTy = VectorType::get(Int1Ty, State.VF);
367 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
368 {PredTy, ScalarTC->getType()},
369 {VIVElem0, ScalarTC}, nullptr, Name);
370 }
372 // Generate code to combine the previous and current values in vector v3.
373 //
374 // vector.ph:
375 // v_init = vector(..., ..., ..., a[-1])
376 // br vector.body
377 //
378 // vector.body
379 // i = phi [0, vector.ph], [i+4, vector.body]
380 // v1 = phi [v_init, vector.ph], [v2, vector.body]
381 // v2 = a[i, i+1, i+2, i+3];
382 // v3 = vector(v1(3), v2(0, 1, 2))
383
384 // For the first part, use the recurrence phi (v1), otherwise v2.
385 auto *V1 = State.get(getOperand(0), 0);
386 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
387 if (!PartMinus1->getType()->isVectorTy())
388 return PartMinus1;
389 Value *V2 = State.get(getOperand(1), Part);
390 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
391 }
393 if (Part != 0)
394 return State.get(this, 0, /*IsScalar*/ true);
395
396 Value *ScalarTC = State.get(getOperand(0), {0, 0});
397 Value *Step =
398 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
399 Value *Sub = Builder.CreateSub(ScalarTC, Step);
400 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
401 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
402 return Builder.CreateSelect(Cmp, Sub, Zero);
403 }
405 // Compute EVL
406 auto GetEVL = [=](VPTransformState &State, Value *AVL) {
407 assert(AVL->getType()->isIntegerTy() &&
408 "Requested vector length should be an integer.");
409
410 // TODO: Add support for MaxSafeDist for correct loop emission.
411 assert(State.VF.isScalable() && "Expected scalable vector factor.");
412 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
413
414 Value *EVL = State.Builder.CreateIntrinsic(
415 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
416 {AVL, VFArg, State.Builder.getTrue()});
417 return EVL;
418 };
419 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
420 // be outside of the main loop.
421 assert(Part == 0 && "No unrolling expected for predicated vectorization.");
422 // Compute VTC - IV as the AVL (requested vector length).
423 Value *Index = State.get(getOperand(0), VPIteration(0, 0));
424 Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
425 Value *AVL = State.Builder.CreateSub(TripCount, Index);
426 Value *EVL = GetEVL(State, AVL);
427 return EVL;
428 }
430 auto *IV = State.get(getOperand(0), VPIteration(0, 0));
431 if (Part == 0)
432 return IV;
433
434 // The canonical IV is incremented by the vectorization factor (num of SIMD
435 // elements) times the unroll part.
436 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
437 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
439 }
441 if (Part != 0)
442 return nullptr;
443
444 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
445 // Replace the temporary unreachable terminator with a new conditional
446 // branch, hooking it up to backward destination for exiting blocks now and
447 // to forward destination(s) later when they are created.
448 BranchInst *CondBr =
449 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
450 CondBr->setSuccessor(0, nullptr);
452
453 if (!getParent()->isExiting())
454 return CondBr;
455
456 VPRegionBlock *ParentRegion = getParent()->getParent();
457 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
458 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
459 return CondBr;
460 }
462 if (Part != 0)
463 return nullptr;
464 // First create the compare.
465 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
466 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
467 Value *Cond = Builder.CreateICmpEQ(IV, TC);
468
469 // Now create the branch.
470 auto *Plan = getParent()->getPlan();
471 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
472 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
473
474 // Replace the temporary unreachable terminator with a new conditional
475 // branch, hooking it up to backward destination (the header) now and to the
476 // forward destination (the exit/middle block) later when it is created.
477 // Note that CreateCondBr expects a valid BB as first argument, so we need
478 // to set it to nullptr later.
479 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
480 State.CFG.VPBB2IRBB[Header]);
481 CondBr->setSuccessor(0, nullptr);
483 return CondBr;
484 }
486 if (Part != 0)
487 return State.get(this, 0, /*IsScalar*/ true);
488
489 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
490 // and will be removed by breaking up the recipe further.
491 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
492 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
493 // Get its reduction variable descriptor.
494 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
495
496 RecurKind RK = RdxDesc.getRecurrenceKind();
497
498 VPValue *LoopExitingDef = getOperand(1);
499 Type *PhiTy = OrigPhi->getType();
500 VectorParts RdxParts(State.UF);
501 for (unsigned Part = 0; Part < State.UF; ++Part)
502 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
503
504 // If the vector reduction can be performed in a smaller type, we truncate
505 // then extend the loop exit value to enable InstCombine to evaluate the
506 // entire expression in the smaller type.
507 // TODO: Handle this in truncateToMinBW.
508 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
509 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
510 for (unsigned Part = 0; Part < State.UF; ++Part)
511 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
512 }
513 // Reduce all of the unrolled parts into a single vector.
514 Value *ReducedPartRdx = RdxParts[0];
515 unsigned Op = RecurrenceDescriptor::getOpcode(RK);
517 Op = Instruction::Or;
518
519 if (PhiR->isOrdered()) {
520 ReducedPartRdx = RdxParts[State.UF - 1];
521 } else {
522 // Floating-point operations should have some FMF to enable the reduction.
524 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
525 for (unsigned Part = 1; Part < State.UF; ++Part) {
526 Value *RdxPart = RdxParts[Part];
527 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
528 ReducedPartRdx = Builder.CreateBinOp(
529 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
530 else
531 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
532 }
533 }
534
535 // Create the reduction after the loop. Note that inloop reductions create
536 // the target reduction in the loop using a Reduction recipe.
537 if ((State.VF.isVector() ||
539 !PhiR->isInLoop()) {
540 ReducedPartRdx =
541 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
542 // If the reduction can be performed in a smaller type, we need to extend
543 // the reduction to the wider type before we branch to the original loop.
544 if (PhiTy != RdxDesc.getRecurrenceType())
545 ReducedPartRdx = RdxDesc.isSigned()
546 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
547 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
548 }
549
550 // If there were stores of the reduction value to a uniform memory address
551 // inside the loop, create the final store here.
552 if (StoreInst *SI = RdxDesc.IntermediateStore) {
553 auto *NewSI = Builder.CreateAlignedStore(
554 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
555 propagateMetadata(NewSI, SI);
556 }
557
558 return ReducedPartRdx;
559 }
561 if (Part != 0)
562 return State.get(this, 0, /*IsScalar*/ true);
563
564 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
565 unsigned Offset = CI->getZExtValue();
566 assert(Offset > 0 && "Offset from end must be positive");
567 Value *Res;
568 if (State.VF.isVector()) {
569 assert(Offset <= State.VF.getKnownMinValue() &&
570 "invalid offset to extract from");
571 // Extract lane VF - Offset from the operand.
572 Res = State.get(
573 getOperand(0),
574 VPIteration(State.UF - 1, VPLane::getLaneFromEnd(State.VF, Offset)));
575 } else {
576 assert(Offset <= State.UF && "invalid offset to extract from");
577 // When loop is unrolled without vectorizing, retrieve UF - Offset.
578 Res = State.get(getOperand(0), State.UF - Offset);
579 }
580 if (isa<ExtractElementInst>(Res))
581 Res->setName(Name);
582 return Res;
583 }
585 Value *A = State.get(getOperand(0), Part);
586 Value *B = State.get(getOperand(1), Part);
587 return Builder.CreateLogicalAnd(A, B, Name);
588 }
591 "can only generate first lane for PtrAdd");
592 Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
593 Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
594 return Builder.CreatePtrAdd(Ptr, Addend, Name);
595 }
596 default:
597 llvm_unreachable("Unsupported opcode for instruction");
598 }
599}
600
604}
605
606#if !defined(NDEBUG)
607bool VPInstruction::isFPMathOp() const {
608 // Inspired by FPMathOperator::classof. Notable differences are that we don't
609 // support Call, PHI and Select opcodes here yet.
610 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
611 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
612 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
613 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
614}
615#endif
616
618 assert(!State.Instance && "VPInstruction executing an Instance");
620 assert((hasFastMathFlags() == isFPMathOp() ||
621 getOpcode() == Instruction::Select) &&
622 "Recipe not a FPMathOp but has fast-math flags?");
623 if (hasFastMathFlags())
626 bool GeneratesPerFirstLaneOnly =
627 canGenerateScalarForFirstLane() &&
629 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
630 bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this);
631 for (unsigned Part = 0; Part < State.UF; ++Part) {
632 if (GeneratesPerAllLanes) {
633 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
634 Lane != NumLanes; ++Lane) {
635 Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
636 assert(GeneratedValue && "generatePerLane must produce a value");
637 State.set(this, GeneratedValue, VPIteration(Part, Lane));
638 }
639 continue;
640 }
641
642 if (Part != 0 && OnlyFirstPartUsed && hasResult()) {
643 Value *Part0 = State.get(this, 0, /*IsScalar*/ GeneratesPerFirstLaneOnly);
644 State.set(this, Part0, Part,
645 /*IsScalar*/ GeneratesPerFirstLaneOnly);
646 continue;
647 }
648
649 Value *GeneratedValue = generatePerPart(State, Part);
650 if (!hasResult())
651 continue;
652 assert(GeneratedValue && "generatePerPart must produce a value");
653 assert((GeneratedValue->getType()->isVectorTy() ==
654 !GeneratesPerFirstLaneOnly ||
655 State.VF.isScalar()) &&
656 "scalar value but not only first lane defined");
657 State.set(this, GeneratedValue, Part,
658 /*IsScalar*/ GeneratesPerFirstLaneOnly);
659 }
660}
661
663 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
665 return vputils::onlyFirstLaneUsed(this);
666
667 switch (getOpcode()) {
668 default:
669 return false;
670 case Instruction::ICmp:
672 // TODO: Cover additional opcodes.
673 return vputils::onlyFirstLaneUsed(this);
679 return true;
680 };
681 llvm_unreachable("switch should return");
682}
683
685 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
687 return vputils::onlyFirstPartUsed(this);
688
689 switch (getOpcode()) {
690 default:
691 return false;
692 case Instruction::ICmp:
693 case Instruction::Select:
694 return vputils::onlyFirstPartUsed(this);
698 return true;
699 };
700 llvm_unreachable("switch should return");
701}
702
703#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
705 VPSlotTracker SlotTracker(getParent()->getPlan());
706 print(dbgs(), "", SlotTracker);
707}
708
710 VPSlotTracker &SlotTracker) const {
711 O << Indent << "EMIT ";
712
713 if (hasResult()) {
715 O << " = ";
716 }
717
718 switch (getOpcode()) {
720 O << "not";
721 break;
723 O << "combined load";
724 break;
726 O << "combined store";
727 break;
729 O << "active lane mask";
730 break;
732 O << "EXPLICIT-VECTOR-LENGTH";
733 break;
735 O << "first-order splice";
736 break;
738 O << "branch-on-cond";
739 break;
741 O << "TC > VF ? TC - VF : 0";
742 break;
744 O << "VF * Part +";
745 break;
747 O << "branch-on-count";
748 break;
750 O << "extract-from-end";
751 break;
753 O << "compute-reduction-result";
754 break;
756 O << "logical-and";
757 break;
759 O << "ptradd";
760 break;
761 default:
763 }
764
765 printFlags(O);
767
768 if (auto DL = getDebugLoc()) {
769 O << ", !dbg ";
770 DL.print(O);
771 }
772}
773#endif
774
776 assert(State.VF.isVector() && "not widening");
777 Function *CalledScalarFn = getCalledScalarFunction();
778 assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) &&
779 "DbgInfoIntrinsic should have been dropped during VPlan construction");
781
782 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
783 FunctionType *VFTy = nullptr;
784 if (Variant)
785 VFTy = Variant->getFunctionType();
786 for (unsigned Part = 0; Part < State.UF; ++Part) {
787 SmallVector<Type *, 2> TysForDecl;
788 // Add return type if intrinsic is overloaded on it.
789 if (UseIntrinsic &&
790 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
791 TysForDecl.push_back(VectorType::get(
792 CalledScalarFn->getReturnType()->getScalarType(), State.VF));
794 for (const auto &I : enumerate(arg_operands())) {
795 // Some intrinsics have a scalar argument - don't replace it with a
796 // vector.
797 Value *Arg;
798 if (UseIntrinsic &&
799 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
800 Arg = State.get(I.value(), VPIteration(0, 0));
801 // Some vectorized function variants may also take a scalar argument,
802 // e.g. linear parameters for pointers. This needs to be the scalar value
803 // from the start of the respective part when interleaving.
804 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
805 Arg = State.get(I.value(), VPIteration(Part, 0));
806 else
807 Arg = State.get(I.value(), Part);
808 if (UseIntrinsic &&
809 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
810 TysForDecl.push_back(Arg->getType());
811 Args.push_back(Arg);
812 }
813
814 Function *VectorF;
815 if (UseIntrinsic) {
816 // Use vector version of the intrinsic.
817 Module *M = State.Builder.GetInsertBlock()->getModule();
818 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
819 assert(VectorF && "Can't retrieve vector intrinsic.");
820 } else {
821#ifndef NDEBUG
822 assert(Variant != nullptr && "Can't create vector function.");
823#endif
824 VectorF = Variant;
825 }
826
827 auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());
829 if (CI)
830 CI->getOperandBundlesAsDefs(OpBundles);
831
832 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
833
834 if (isa<FPMathOperator>(V))
835 V->copyFastMathFlags(CI);
836
837 if (!V->getType()->isVoidTy())
838 State.set(this, V, Part);
839 State.addMetadata(V, CI);
840 }
841}
842
843#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
845 VPSlotTracker &SlotTracker) const {
846 O << Indent << "WIDEN-CALL ";
847
848 Function *CalledFn = getCalledScalarFunction();
849 if (CalledFn->getReturnType()->isVoidTy())
850 O << "void ";
851 else {
853 O << " = ";
854 }
855
856 O << "call @" << CalledFn->getName() << "(";
858 Op->printAsOperand(O, SlotTracker);
859 });
860 O << ")";
861
862 if (VectorIntrinsicID)
863 O << " (using vector intrinsic)";
864 else {
865 O << " (using library function";
866 if (Variant->hasName())
867 O << ": " << Variant->getName();
868 O << ")";
869 }
870}
871
873 VPSlotTracker &SlotTracker) const {
874 O << Indent << "WIDEN-SELECT ";
876 O << " = select ";
878 O << ", ";
880 O << ", ";
882 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
883}
884#endif
885
888
889 // The condition can be loop invariant but still defined inside the
890 // loop. This means that we can't just use the original 'cond' value.
891 // We have to take the 'vectorized' value and pick the first lane.
892 // Instcombine will make this a no-op.
893 auto *InvarCond =
894 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
895
896 for (unsigned Part = 0; Part < State.UF; ++Part) {
897 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
898 Value *Op0 = State.get(getOperand(1), Part);
899 Value *Op1 = State.get(getOperand(2), Part);
900 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
901 State.set(this, Sel, Part);
902 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
903 }
904}
905
906VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
907 const FastMathFlags &FMF) {
908 AllowReassoc = FMF.allowReassoc();
909 NoNaNs = FMF.noNaNs();
910 NoInfs = FMF.noInfs();
911 NoSignedZeros = FMF.noSignedZeros();
912 AllowReciprocal = FMF.allowReciprocal();
913 AllowContract = FMF.allowContract();
914 ApproxFunc = FMF.approxFunc();
915}
916
917#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
919 switch (OpType) {
920 case OperationType::Cmp:
922 break;
923 case OperationType::DisjointOp:
925 O << " disjoint";
926 break;
927 case OperationType::PossiblyExactOp:
928 if (ExactFlags.IsExact)
929 O << " exact";
930 break;
931 case OperationType::OverflowingBinOp:
932 if (WrapFlags.HasNUW)
933 O << " nuw";
934 if (WrapFlags.HasNSW)
935 O << " nsw";
936 break;
937 case OperationType::FPMathOp:
939 break;
940 case OperationType::GEPOp:
942 O << " inbounds";
943 break;
944 case OperationType::NonNegOp:
945 if (NonNegFlags.NonNeg)
946 O << " nneg";
947 break;
948 case OperationType::Other:
949 break;
950 }
951 if (getNumOperands() > 0)
952 O << " ";
953}
954#endif
955
958 auto &Builder = State.Builder;
959 switch (Opcode) {
960 case Instruction::Call:
961 case Instruction::Br:
962 case Instruction::PHI:
963 case Instruction::GetElementPtr:
964 case Instruction::Select:
965 llvm_unreachable("This instruction is handled by a different recipe.");
966 case Instruction::UDiv:
967 case Instruction::SDiv:
968 case Instruction::SRem:
969 case Instruction::URem:
970 case Instruction::Add:
971 case Instruction::FAdd:
972 case Instruction::Sub:
973 case Instruction::FSub:
974 case Instruction::FNeg:
975 case Instruction::Mul:
976 case Instruction::FMul:
977 case Instruction::FDiv:
978 case Instruction::FRem:
979 case Instruction::Shl:
980 case Instruction::LShr:
981 case Instruction::AShr:
982 case Instruction::And:
983 case Instruction::Or:
984 case Instruction::Xor: {
985 // Just widen unops and binops.
986 for (unsigned Part = 0; Part < State.UF; ++Part) {
988 for (VPValue *VPOp : operands())
989 Ops.push_back(State.get(VPOp, Part));
990
991 Value *V = Builder.CreateNAryOp(Opcode, Ops);
992
993 if (auto *VecOp = dyn_cast<Instruction>(V))
994 setFlags(VecOp);
995
996 // Use this vector value for all users of the original instruction.
997 State.set(this, V, Part);
998 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
999 }
1000
1001 break;
1002 }
1003 case Instruction::Freeze: {
1004 for (unsigned Part = 0; Part < State.UF; ++Part) {
1005 Value *Op = State.get(getOperand(0), Part);
1006
1007 Value *Freeze = Builder.CreateFreeze(Op);
1008 State.set(this, Freeze, Part);
1009 }
1010 break;
1011 }
1012 case Instruction::ICmp:
1013 case Instruction::FCmp: {
1014 // Widen compares. Generate vector compares.
1015 bool FCmp = Opcode == Instruction::FCmp;
1016 for (unsigned Part = 0; Part < State.UF; ++Part) {
1017 Value *A = State.get(getOperand(0), Part);
1018 Value *B = State.get(getOperand(1), Part);
1019 Value *C = nullptr;
1020 if (FCmp) {
1021 // Propagate fast math flags.
1022 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1023 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
1024 Builder.setFastMathFlags(I->getFastMathFlags());
1025 C = Builder.CreateFCmp(getPredicate(), A, B);
1026 } else {
1027 C = Builder.CreateICmp(getPredicate(), A, B);
1028 }
1029 State.set(this, C, Part);
1030 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1031 }
1032
1033 break;
1034 }
1035 default:
1036 // This instruction is not vectorized by simple widening.
1037 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1038 << Instruction::getOpcodeName(Opcode));
1039 llvm_unreachable("Unhandled instruction!");
1040 } // end of switch.
1041
1042#if !defined(NDEBUG)
1043 // Verify that VPlan type inference results agree with the type of the
1044 // generated values.
1045 for (unsigned Part = 0; Part < State.UF; ++Part) {
1047 State.VF) == State.get(this, Part)->getType() &&
1048 "inferred type and type from generated instructions do not match");
1049 }
1050#endif
1051}
1052
1053#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1055 VPSlotTracker &SlotTracker) const {
1056 O << Indent << "WIDEN ";
1058 O << " = " << Instruction::getOpcodeName(Opcode);
1059 printFlags(O);
1061}
1062#endif
1063
1066 auto &Builder = State.Builder;
1067 /// Vectorize casts.
1068 assert(State.VF.isVector() && "Not vectorizing?");
1069 Type *DestTy = VectorType::get(getResultType(), State.VF);
1070 VPValue *Op = getOperand(0);
1071 for (unsigned Part = 0; Part < State.UF; ++Part) {
1072 if (Part > 0 && Op->isLiveIn()) {
1073 // FIXME: Remove once explicit unrolling is implemented using VPlan.
1074 State.set(this, State.get(this, 0), Part);
1075 continue;
1076 }
1077 Value *A = State.get(Op, Part);
1078 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1079 State.set(this, Cast, Part);
1080 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1081 }
1082}
1083
1084#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1086 VPSlotTracker &SlotTracker) const {
1087 O << Indent << "WIDEN-CAST ";
1089 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1090 printFlags(O);
1092 O << " to " << *getResultType();
1093}
1094#endif
1095
1096/// This function adds
1097/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
1098/// to each vector element of Val. The sequence starts at StartIndex.
1099/// \p Opcode is relevant for FP induction variable.
1100static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
1102 IRBuilderBase &Builder) {
1103 assert(VF.isVector() && "only vector VFs are supported");
1104
1105 // Create and check the types.
1106 auto *ValVTy = cast<VectorType>(Val->getType());
1107 ElementCount VLen = ValVTy->getElementCount();
1108
1109 Type *STy = Val->getType()->getScalarType();
1110 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1111 "Induction Step must be an integer or FP");
1112 assert(Step->getType() == STy && "Step has wrong type");
1113
1115
1116 // Create a vector of consecutive numbers from zero to VF.
1117 VectorType *InitVecValVTy = ValVTy;
1118 if (STy->isFloatingPointTy()) {
1119 Type *InitVecValSTy =
1121 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1122 }
1123 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1124
1125 // Splat the StartIdx
1126 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
1127
1128 if (STy->isIntegerTy()) {
1129 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
1130 Step = Builder.CreateVectorSplat(VLen, Step);
1131 assert(Step->getType() == Val->getType() && "Invalid step vec");
1132 // FIXME: The newly created binary instructions should contain nsw/nuw
1133 // flags, which can be found from the original scalar operations.
1134 Step = Builder.CreateMul(InitVec, Step);
1135 return Builder.CreateAdd(Val, Step, "induction");
1136 }
1137
1138 // Floating point induction.
1139 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1140 "Binary Opcode should be specified for FP induction");
1141 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1142 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
1143
1144 Step = Builder.CreateVectorSplat(VLen, Step);
1145 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1146 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1147}
1148
1149/// A helper function that returns an integer or floating-point constant with
1150/// value C.
1152 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1153 : ConstantFP::get(Ty, C);
1154}
1155
1157 ElementCount VF) {
1158 assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1159 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
1160 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
1161 return B.CreateUIToFP(RuntimeVF, FTy);
1162}
1163
1165 assert(!State.Instance && "Int or FP induction being replicated.");
1166
1167 Value *Start = getStartValue()->getLiveInIRValue();
1169 TruncInst *Trunc = getTruncInst();
1170 IRBuilderBase &Builder = State.Builder;
1171 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1172 assert(State.VF.isVector() && "must have vector VF");
1173
1174 // The value from the original loop to which we are mapping the new induction
1175 // variable.
1176 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
1177
1178 // Fast-math-flags propagate from the original induction instruction.
1179 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1180 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1181 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1182
1183 // Now do the actual transformations, and start with fetching the step value.
1184 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1185
1186 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1187 "Expected either an induction phi-node or a truncate of it!");
1188
1189 // Construct the initial value of the vector IV in the vector loop preheader
1190 auto CurrIP = Builder.saveIP();
1191 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1192 Builder.SetInsertPoint(VectorPH->getTerminator());
1193 if (isa<TruncInst>(EntryVal)) {
1194 assert(Start->getType()->isIntegerTy() &&
1195 "Truncation requires an integer type");
1196 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1197 Step = Builder.CreateTrunc(Step, TruncType);
1198 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1199 }
1200
1201 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
1202 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1203 Value *SteppedStart = getStepVector(
1204 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
1205
1206 // We create vector phi nodes for both integer and floating-point induction
1207 // variables. Here, we determine the kind of arithmetic we will perform.
1210 if (Step->getType()->isIntegerTy()) {
1211 AddOp = Instruction::Add;
1212 MulOp = Instruction::Mul;
1213 } else {
1214 AddOp = ID.getInductionOpcode();
1215 MulOp = Instruction::FMul;
1216 }
1217
1218 // Multiply the vectorization factor by the step using integer or
1219 // floating-point arithmetic as appropriate.
1220 Type *StepType = Step->getType();
1221 Value *RuntimeVF;
1222 if (Step->getType()->isFloatingPointTy())
1223 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1224 else
1225 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1226 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1227
1228 // Create a vector splat to use in the induction update.
1229 //
1230 // FIXME: If the step is non-constant, we create the vector splat with
1231 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1232 // handle a constant vector splat.
1233 Value *SplatVF = isa<Constant>(Mul)
1234 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
1235 : Builder.CreateVectorSplat(State.VF, Mul);
1236 Builder.restoreIP(CurrIP);
1237
1238 // We may need to add the step a number of times, depending on the unroll
1239 // factor. The last of those goes into the PHI.
1240 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1241 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1242 VecInd->setDebugLoc(EntryVal->getDebugLoc());
1243 Instruction *LastInduction = VecInd;
1244 for (unsigned Part = 0; Part < State.UF; ++Part) {
1245 State.set(this, LastInduction, Part);
1246
1247 if (isa<TruncInst>(EntryVal))
1248 State.addMetadata(LastInduction, EntryVal);
1249
1250 LastInduction = cast<Instruction>(
1251 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
1252 LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1253 }
1254
1255 LastInduction->setName("vec.ind.next");
1256 VecInd->addIncoming(SteppedStart, VectorPH);
1257 // Add induction update using an incorrect block temporarily. The phi node
1258 // will be fixed after VPlan execution. Note that at this point the latch
1259 // block cannot be used, as it does not exist yet.
1260 // TODO: Model increment value in VPlan, by turning the recipe into a
1261 // multi-def and a subclass of VPHeaderPHIRecipe.
1262 VecInd->addIncoming(LastInduction, VectorPH);
1263}
1264
1265#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1267 VPSlotTracker &SlotTracker) const {
1268 O << Indent << "WIDEN-INDUCTION";
1269 if (getTruncInst()) {
1270 O << "\\l\"";
1271 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
1272 O << " +\n" << Indent << "\" ";
1274 } else
1275 O << " " << VPlanIngredient(IV);
1276
1277 O << ", ";
1279}
1280#endif
1281
1283 // The step may be defined by a recipe in the preheader (e.g. if it requires
1284 // SCEV expansion), but for the canonical induction the step is required to be
1285 // 1, which is represented as live-in.
1287 return false;
1288 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1289 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1290 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1291 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1292 getScalarType() == CanIV->getScalarType();
1293}
1294
1295#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1297 VPSlotTracker &SlotTracker) const {
1298 O << Indent;
1300 O << Indent << "= DERIVED-IV ";
1302 O << " + ";
1304 O << " * ";
1306}
1307#endif
1308
1310 // Fast-math-flags propagate from the original induction instruction.
1312 if (hasFastMathFlags())
1314
1315 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1316 /// variable on which to base the steps, \p Step is the size of the step.
1317
1318 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1319 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1320 IRBuilderBase &Builder = State.Builder;
1321
1322 // Ensure step has the same type as that of scalar IV.
1323 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1324 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1325
1326 // We build scalar steps for both integer and floating-point induction
1327 // variables. Here, we determine the kind of arithmetic we will perform.
1330 if (BaseIVTy->isIntegerTy()) {
1331 AddOp = Instruction::Add;
1332 MulOp = Instruction::Mul;
1333 } else {
1334 AddOp = InductionOpcode;
1335 MulOp = Instruction::FMul;
1336 }
1337
1338 // Determine the number of scalars we need to generate for each unroll
1339 // iteration.
1340 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1341 // Compute the scalar steps and save the results in State.
1342 Type *IntStepTy =
1343 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1344 Type *VecIVTy = nullptr;
1345 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1346 if (!FirstLaneOnly && State.VF.isScalable()) {
1347 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1348 UnitStepVec =
1349 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1350 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1351 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1352 }
1353
1354 unsigned StartPart = 0;
1355 unsigned EndPart = State.UF;
1356 unsigned StartLane = 0;
1357 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1358 if (State.Instance) {
1359 StartPart = State.Instance->Part;
1360 EndPart = StartPart + 1;
1361 StartLane = State.Instance->Lane.getKnownLane();
1362 EndLane = StartLane + 1;
1363 }
1364 for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1365 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1366
1367 if (!FirstLaneOnly && State.VF.isScalable()) {
1368 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1369 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1370 if (BaseIVTy->isFloatingPointTy())
1371 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1372 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1373 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1374 State.set(this, Add, Part);
1375 // It's useful to record the lane values too for the known minimum number
1376 // of elements so we do those below. This improves the code quality when
1377 // trying to extract the first element, for example.
1378 }
1379
1380 if (BaseIVTy->isFloatingPointTy())
1381 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1382
1383 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1384 Value *StartIdx = Builder.CreateBinOp(
1385 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1386 // The step returned by `createStepForVF` is a runtime-evaluated value
1387 // when VF is scalable. Otherwise, it should be folded into a Constant.
1388 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1389 "Expected StartIdx to be folded to a constant when VF is not "
1390 "scalable");
1391 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1392 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1393 State.set(this, Add, VPIteration(Part, Lane));
1394 }
1395 }
1396}
1397
1398#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1400 VPSlotTracker &SlotTracker) const {
1401 O << Indent;
1403 O << " = SCALAR-STEPS ";
1405}
1406#endif
1407
1409 assert(State.VF.isVector() && "not widening");
1410 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1411 // Construct a vector GEP by widening the operands of the scalar GEP as
1412 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1413 // results in a vector of pointers when at least one operand of the GEP
1414 // is vector-typed. Thus, to keep the representation compact, we only use
1415 // vector-typed operands for loop-varying values.
1416
1417 if (areAllOperandsInvariant()) {
1418 // If we are vectorizing, but the GEP has only loop-invariant operands,
1419 // the GEP we build (by only using vector-typed operands for
1420 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1421 // produce a vector of pointers, we need to either arbitrarily pick an
1422 // operand to broadcast, or broadcast a clone of the original GEP.
1423 // Here, we broadcast a clone of the original.
1424 //
1425 // TODO: If at some point we decide to scalarize instructions having
1426 // loop-invariant operands, this special case will no longer be
1427 // required. We would add the scalarization decision to
1428 // collectLoopScalars() and teach getVectorValue() to broadcast
1429 // the lane-zero scalar value.
1431 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1432 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1433
1434 auto *NewGEP =
1435 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1436 ArrayRef(Ops).drop_front(), "", isInBounds());
1437 for (unsigned Part = 0; Part < State.UF; ++Part) {
1438 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1439 State.set(this, EntryPart, Part);
1440 State.addMetadata(EntryPart, GEP);
1441 }
1442 } else {
1443 // If the GEP has at least one loop-varying operand, we are sure to
1444 // produce a vector of pointers. But if we are only unrolling, we want
1445 // to produce a scalar GEP for each unroll part. Thus, the GEP we
1446 // produce with the code below will be scalar (if VF == 1) or vector
1447 // (otherwise). Note that for the unroll-only case, we still maintain
1448 // values in the vector mapping with initVector, as we do for other
1449 // instructions.
1450 for (unsigned Part = 0; Part < State.UF; ++Part) {
1451 // The pointer operand of the new GEP. If it's loop-invariant, we
1452 // won't broadcast it.
1453 auto *Ptr = isPointerLoopInvariant()
1454 ? State.get(getOperand(0), VPIteration(0, 0))
1455 : State.get(getOperand(0), Part);
1456
1457 // Collect all the indices for the new GEP. If any index is
1458 // loop-invariant, we won't broadcast it.
1460 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1461 VPValue *Operand = getOperand(I);
1462 if (isIndexLoopInvariant(I - 1))
1463 Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1464 else
1465 Indices.push_back(State.get(Operand, Part));
1466 }
1467
1468 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1469 // but it should be a vector, otherwise.
1470 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1471 Indices, "", isInBounds());
1472 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1473 "NewGEP is not a pointer vector");
1474 State.set(this, NewGEP, Part);
1475 State.addMetadata(NewGEP, GEP);
1476 }
1477 }
1478}
1479
1480#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1482 VPSlotTracker &SlotTracker) const {
1483 O << Indent << "WIDEN-GEP ";
1484 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1485 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1486 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1487
1488 O << " ";
1490 O << " = getelementptr";
1491 printFlags(O);
1493}
1494#endif
1495
1496void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1497 auto &Builder = State.Builder;
1499 for (unsigned Part = 0; Part < State.UF; ++Part) {
1500 // Calculate the pointer for the specific unroll-part.
1501 Value *PartPtr = nullptr;
1502 // Use i32 for the gep index type when the value is constant,
1503 // or query DataLayout for a more suitable index type otherwise.
1504 const DataLayout &DL =
1505 Builder.GetInsertBlock()->getDataLayout();
1506 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1507 ? DL.getIndexType(IndexedTy->getPointerTo())
1508 : Builder.getInt32Ty();
1509 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1510 bool InBounds = isInBounds();
1511 if (IsReverse) {
1512 // If the address is consecutive but reversed, then the
1513 // wide store needs to start at the last vector element.
1514 // RunTimeVF = VScale * VF.getKnownMinValue()
1515 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1516 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1517 // NumElt = -Part * RunTimeVF
1518 Value *NumElt = Builder.CreateMul(
1519 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1520 // LastLane = 1 - RunTimeVF
1521 Value *LastLane =
1522 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1523 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1524 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1525 } else {
1526 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1527 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1528 }
1529
1530 State.set(this, PartPtr, Part, /*IsScalar*/ true);
1531 }
1532}
1533
1534#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1536 VPSlotTracker &SlotTracker) const {
1537 O << Indent;
1539 O << " = vector-pointer ";
1540 if (IsReverse)
1541 O << "(reverse) ";
1542
1544}
1545#endif
1546
1549 // We know that all PHIs in non-header blocks are converted into
1550 // selects, so we don't have to worry about the insertion order and we
1551 // can just use the builder.
1552 // At this point we generate the predication tree. There may be
1553 // duplications since this is a simple recursive scan, but future
1554 // optimizations will clean it up.
1555
1556 unsigned NumIncoming = getNumIncomingValues();
1557
1558 // Generate a sequence of selects of the form:
1559 // SELECT(Mask3, In3,
1560 // SELECT(Mask2, In2,
1561 // SELECT(Mask1, In1,
1562 // In0)))
1563 // Note that Mask0 is never used: lanes for which no path reaches this phi and
1564 // are essentially undef are taken from In0.
1565 VectorParts Entry(State.UF);
1566 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
1567 for (unsigned In = 0; In < NumIncoming; ++In) {
1568 for (unsigned Part = 0; Part < State.UF; ++Part) {
1569 // We might have single edge PHIs (blocks) - use an identity
1570 // 'select' for the first PHI operand.
1571 Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed);
1572 if (In == 0)
1573 Entry[Part] = In0; // Initialize with the first incoming value.
1574 else {
1575 // Select between the current value and the previous incoming edge
1576 // based on the incoming mask.
1577 Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed);
1578 Entry[Part] =
1579 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1580 }
1581 }
1582 }
1583 for (unsigned Part = 0; Part < State.UF; ++Part)
1584 State.set(this, Entry[Part], Part, OnlyFirstLaneUsed);
1585}
1586
1587#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1589 VPSlotTracker &SlotTracker) const {
1590 O << Indent << "BLEND ";
1592 O << " =";
1593 if (getNumIncomingValues() == 1) {
1594 // Not a User of any mask: not really blending, this is a
1595 // single-predecessor phi.
1596 O << " ";
1598 } else {
1599 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1600 O << " ";
1602 if (I == 0)
1603 continue;
1604 O << "/";
1606 }
1607 }
1608}
1609#endif
1610
1612 assert(!State.Instance && "Reduction being replicated.");
1613 Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1614 RecurKind Kind = RdxDesc.getRecurrenceKind();
1615 // Propagate the fast-math flags carried by the underlying instruction.
1617 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1618 for (unsigned Part = 0; Part < State.UF; ++Part) {
1619 Value *NewVecOp = State.get(getVecOp(), Part);
1620 if (VPValue *Cond = getCondOp()) {
1621 Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1622 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1623 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1624 Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1625 RdxDesc.getFastMathFlags());
1626 if (State.VF.isVector()) {
1627 Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1628 }
1629
1630 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1631 NewVecOp = Select;
1632 }
1633 Value *NewRed;
1634 Value *NextInChain;
1635 if (IsOrdered) {
1636 if (State.VF.isVector())
1637 NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1638 PrevInChain);
1639 else
1640 NewRed = State.Builder.CreateBinOp(
1641 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1642 NewVecOp);
1643 PrevInChain = NewRed;
1644 } else {
1645 PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1646 NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1647 }
1649 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1650 NewRed, PrevInChain);
1651 } else if (IsOrdered)
1652 NextInChain = NewRed;
1653 else
1654 NextInChain = State.Builder.CreateBinOp(
1655 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1656 State.set(this, NextInChain, Part, /*IsScalar*/ true);
1657 }
1658}
1659
1660#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1662 VPSlotTracker &SlotTracker) const {
1663 O << Indent << "REDUCE ";
1665 O << " = ";
1667 O << " +";
1668 if (isa<FPMathOperator>(getUnderlyingInstr()))
1670 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1672 if (getCondOp()) {
1673 O << ", ";
1675 }
1676 O << ")";
1677 if (RdxDesc.IntermediateStore)
1678 O << " (with final reduction value stored in invariant address sank "
1679 "outside of loop)";
1680}
1681#endif
1682
1684 // Find if the recipe is used by a widened recipe via an intervening
1685 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1686 return any_of(users(), [](const VPUser *U) {
1687 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1688 return any_of(PredR->users(), [PredR](const VPUser *U) {
1689 return !U->usesScalars(PredR);
1690 });
1691 return false;
1692 });
1693}
1694
1695#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1697 VPSlotTracker &SlotTracker) const {
1698 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1699
1700 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1702 O << " = ";
1703 }
1704 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1705 O << "call";
1706 printFlags(O);
1707 O << "@" << CB->getCalledFunction()->getName() << "(";
1709 O, [&O, &SlotTracker](VPValue *Op) {
1710 Op->printAsOperand(O, SlotTracker);
1711 });
1712 O << ")";
1713 } else {
1715 printFlags(O);
1717 }
1718
1719 if (shouldPack())
1720 O << " (S->V)";
1721}
1722#endif
1723
1724/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1725/// if it is either defined outside the vector region or its operand is known to
1726/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1727/// TODO: Uniformity should be associated with a VPValue and there should be a
1728/// generic way to check.
1730 return C->isDefinedOutsideVectorRegions() ||
1731 isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1732 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1733}
1734
1735Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1737 "Codegen only implemented for first lane.");
1738 switch (Opcode) {
1739 case Instruction::SExt:
1740 case Instruction::ZExt:
1741 case Instruction::Trunc: {
1742 // Note: SExt/ZExt not used yet.
1743 Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1744 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1745 }
1746 default:
1747 llvm_unreachable("opcode not implemented yet");
1748 }
1749}
1750
1751void VPScalarCastRecipe ::execute(VPTransformState &State) {
1752 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1753 for (unsigned Part = 0; Part != State.UF; ++Part) {
1754 Value *Res;
1755 // Only generate a single instance, if the recipe is uniform across UFs and
1756 // VFs.
1757 if (Part > 0 && IsUniformAcrossVFsAndUFs)
1758 Res = State.get(this, VPIteration(0, 0));
1759 else
1760 Res = generate(State, Part);
1761 State.set(this, Res, VPIteration(Part, 0));
1762 }
1763}
1764
1765#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1766void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1767 VPSlotTracker &SlotTracker) const {
1768 O << Indent << "SCALAR-CAST ";
1769 printAsOperand(O, SlotTracker);
1770 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1771 printOperands(O, SlotTracker);
1772 O << " to " << *ResultTy;
1773}
1774#endif
1775
1777 assert(State.Instance && "Branch on Mask works only on single instance.");
1778
1779 unsigned Part = State.Instance->Part;
1780 unsigned Lane = State.Instance->Lane.getKnownLane();
1781
1782 Value *ConditionBit = nullptr;
1783 VPValue *BlockInMask = getMask();
1784 if (BlockInMask) {
1785 ConditionBit = State.get(BlockInMask, Part);
1786 if (ConditionBit->getType()->isVectorTy())
1787 ConditionBit = State.Builder.CreateExtractElement(
1788 ConditionBit, State.Builder.getInt32(Lane));
1789 } else // Block in mask is all-one.
1790 ConditionBit = State.Builder.getTrue();
1791
1792 // Replace the temporary unreachable terminator with a new conditional branch,
1793 // whose two destinations will be set later when they are created.
1794 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1795 assert(isa<UnreachableInst>(CurrentTerminator) &&
1796 "Expected to replace unreachable terminator with conditional branch.");
1797 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1798 CondBr->setSuccessor(0, nullptr);
1799 ReplaceInstWithInst(CurrentTerminator, CondBr);
1800}
1801
1803 assert(State.Instance && "Predicated instruction PHI works per instance.");
1804 Instruction *ScalarPredInst =
1805 cast<Instruction>(State.get(getOperand(0), *State.Instance));
1806 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1807 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1808 assert(PredicatingBB && "Predicated block has no single predecessor.");
1809 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1810 "operand must be VPReplicateRecipe");
1811
1812 // By current pack/unpack logic we need to generate only a single phi node: if
1813 // a vector value for the predicated instruction exists at this point it means
1814 // the instruction has vector users only, and a phi for the vector value is
1815 // needed. In this case the recipe of the predicated instruction is marked to
1816 // also do that packing, thereby "hoisting" the insert-element sequence.
1817 // Otherwise, a phi node for the scalar value is needed.
1818 unsigned Part = State.Instance->Part;
1819 if (State.hasVectorValue(getOperand(0), Part)) {
1820 Value *VectorValue = State.get(getOperand(0), Part);
1821 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1822 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1823 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1824 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1825 if (State.hasVectorValue(this, Part))
1826 State.reset(this, VPhi, Part);
1827 else
1828 State.set(this, VPhi, Part);
1829 // NOTE: Currently we need to update the value of the operand, so the next
1830 // predicated iteration inserts its generated value in the correct vector.
1831 State.reset(getOperand(0), VPhi, Part);
1832 } else {
1833 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1834 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1835 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1836 PredicatingBB);
1837 Phi->addIncoming(ScalarPredInst, PredicatedBB);
1838 if (State.hasScalarValue(this, *State.Instance))
1839 State.reset(this, Phi, *State.Instance);
1840 else
1841 State.set(this, Phi, *State.Instance);
1842 // NOTE: Currently we need to update the value of the operand, so the next
1843 // predicated iteration inserts its generated value in the correct vector.
1844 State.reset(getOperand(0), Phi, *State.Instance);
1845 }
1846}
1847
1848#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1850 VPSlotTracker &SlotTracker) const {
1851 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1853 O << " = ";
1855}
1856
1858 VPSlotTracker &SlotTracker) const {
1859 O << Indent << "WIDEN ";
1861 O << " = load ";
1863}
1864
1866 VPSlotTracker &SlotTracker) const {
1867 O << Indent << "WIDEN ";
1869 O << " = vp.load ";
1871}
1872
1874 VPSlotTracker &SlotTracker) const {
1875 O << Indent << "WIDEN store ";
1877}
1878
1880 VPSlotTracker &SlotTracker) const {
1881 O << Indent << "WIDEN vp.store ";
1883}
1884#endif
1885
1887 Value *Start = getStartValue()->getLiveInIRValue();
1888 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1889 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1890
1891 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1892 EntryPart->addIncoming(Start, VectorPH);
1893 EntryPart->setDebugLoc(getDebugLoc());
1894 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1895 State.set(this, EntryPart, Part, /*IsScalar*/ true);
1896}
1897
1898#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1900 VPSlotTracker &SlotTracker) const {
1901 O << Indent << "EMIT ";
1903 O << " = CANONICAL-INDUCTION ";
1905}
1906#endif
1907
1910 VPValue *Step) const {
1911 // Must be an integer induction.
1913 return false;
1914 // Start must match the start value of this canonical induction.
1915 if (Start != getStartValue())
1916 return false;
1917
1918 // If the step is defined by a recipe, it is not a ConstantInt.
1919 if (Step->getDefiningRecipe())
1920 return false;
1921
1922 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1923 return StepC && StepC->isOne();
1924}
1925
1927 return IsScalarAfterVectorization &&
1928 (!IsScalable || vputils::onlyFirstLaneUsed(this));
1929}
1930
1931#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1933 VPSlotTracker &SlotTracker) const {
1934 O << Indent << "EMIT ";
1936 O << " = WIDEN-POINTER-INDUCTION ";
1938 O << ", " << *IndDesc.getStep();
1939}
1940#endif
1941
1943 assert(!State.Instance && "cannot be used in per-lane");
1944 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
1945 SCEVExpander Exp(SE, DL, "induction");
1946
1947 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1948 &*State.Builder.GetInsertPoint());
1949 assert(!State.ExpandedSCEVs.contains(Expr) &&
1950 "Same SCEV expanded multiple times");
1951 State.ExpandedSCEVs[Expr] = Res;
1952 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1953 State.set(this, Res, {Part, 0});
1954}
1955
1956#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1958 VPSlotTracker &SlotTracker) const {
1959 O << Indent << "EMIT ";
1961 O << " = EXPAND SCEV " << *Expr;
1962}
1963#endif
1964
1966 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
1967 Type *STy = CanonicalIV->getType();
1968 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1969 ElementCount VF = State.VF;
1970 Value *VStart = VF.isScalar()
1971 ? CanonicalIV
1972 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1973 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1974 Value *VStep = createStepForVF(Builder, STy, VF, Part);
1975 if (VF.isVector()) {
1976 VStep = Builder.CreateVectorSplat(VF, VStep);
1977 VStep =
1978 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1979 }
1980 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1981 State.set(this, CanonicalVectorIV, Part);
1982 }
1983}
1984
1985#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1987 VPSlotTracker &SlotTracker) const {
1988 O << Indent << "EMIT ";
1990 O << " = WIDEN-CANONICAL-INDUCTION ";
1992}
1993#endif
1994
1996 auto &Builder = State.Builder;
1997 // Create a vector from the initial value.
1998 auto *VectorInit = getStartValue()->getLiveInIRValue();
1999
2000 Type *VecTy = State.VF.isScalar()
2001 ? VectorInit->getType()
2002 : VectorType::get(VectorInit->getType(), State.VF);
2003
2004 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2005 if (State.VF.isVector()) {
2006 auto *IdxTy = Builder.getInt32Ty();
2007 auto *One = ConstantInt::get(IdxTy, 1);
2008 IRBuilder<>::InsertPointGuard Guard(Builder);
2009 Builder.SetInsertPoint(VectorPH->getTerminator());
2010 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
2011 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
2012 VectorInit = Builder.CreateInsertElement(
2013 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
2014 }
2015
2016 // Create a phi node for the new recurrence.
2017 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
2018 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
2019 EntryPart->addIncoming(VectorInit, VectorPH);
2020 State.set(this, EntryPart, 0);
2021}
2022
2023#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2025 VPSlotTracker &SlotTracker) const {
2026 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
2028 O << " = phi ";
2030}
2031#endif
2032
2034 auto &Builder = State.Builder;
2035
2036 // Reductions do not have to start at zero. They can start with
2037 // any loop invariant values.
2038 VPValue *StartVPV = getStartValue();
2039 Value *StartV = StartVPV->getLiveInIRValue();
2040
2041 // In order to support recurrences we need to be able to vectorize Phi nodes.
2042 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
2043 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
2044 // this value when we vectorize all of the instructions that use the PHI.
2045 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
2046 Type *VecTy = ScalarPHI ? StartV->getType()
2047 : VectorType::get(StartV->getType(), State.VF);
2048
2049 BasicBlock *HeaderBB = State.CFG.PrevBB;
2050 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
2051 "recipe must be in the vector loop header");
2052 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
2053 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2054 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
2055 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
2056 State.set(this, EntryPart, Part, IsInLoop);
2057 }
2058
2059 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2060
2061 Value *Iden = nullptr;
2062 RecurKind RK = RdxDesc.getRecurrenceKind();
2065 // MinMax and AnyOf reductions have the start value as their identity.
2066 if (ScalarPHI) {
2067 Iden = StartV;
2068 } else {
2069 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2070 Builder.SetInsertPoint(VectorPH->getTerminator());
2071 StartV = Iden =
2072 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
2073 }
2074 } else {
2075 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
2076 RdxDesc.getFastMathFlags());
2077
2078 if (!ScalarPHI) {
2079 Iden = Builder.CreateVectorSplat(State.VF, Iden);
2080 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2081 Builder.SetInsertPoint(VectorPH->getTerminator());
2082 Constant *Zero = Builder.getInt32(0);
2083 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
2084 }
2085 }
2086
2087 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2088 Value *EntryPart = State.get(this, Part, IsInLoop);
2089 // Make sure to add the reduction start value only to the
2090 // first unroll part.
2091 Value *StartVal = (Part == 0) ? StartV : Iden;
2092 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
2093 }
2094}
2095
2096#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2098 VPSlotTracker &SlotTracker) const {
2099 O << Indent << "WIDEN-REDUCTION-PHI ";
2100
2102 O << " = phi ";
2104}
2105#endif
2106
2109 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
2110
2111 Value *Op0 = State.get(getOperand(0), 0);
2112 Type *VecTy = Op0->getType();
2113 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
2114 State.set(this, VecPhi, 0);
2115}
2116
2117#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2119 VPSlotTracker &SlotTracker) const {
2120 O << Indent << "WIDEN-PHI ";
2121
2122 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
2123 // Unless all incoming values are modeled in VPlan print the original PHI
2124 // directly.
2125 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
2126 // values as VPValues.
2127 if (getNumOperands() != OriginalPhi->getNumOperands()) {
2128 O << VPlanIngredient(OriginalPhi);
2129 return;
2130 }
2131
2133 O << " = phi ";
2135}
2136#endif
2137
2138// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2139// remove VPActiveLaneMaskPHIRecipe.
2141 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2142 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2143 Value *StartMask = State.get(getOperand(0), Part);
2144 PHINode *EntryPart =
2145 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2146 EntryPart->addIncoming(StartMask, VectorPH);
2147 EntryPart->setDebugLoc(getDebugLoc());
2148 State.set(this, EntryPart, Part);
2149 }
2150}
2151
2152#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2154 VPSlotTracker &SlotTracker) const {
2155 O << Indent << "ACTIVE-LANE-MASK-PHI ";
2156
2158 O << " = phi ";
2160}
2161#endif
2162
2164 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2165 assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2166 Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2167 PHINode *EntryPart =
2168 State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2169 EntryPart->addIncoming(Start, VectorPH);
2170 EntryPart->setDebugLoc(getDebugLoc());
2171 State.set(this, EntryPart, 0, /*IsScalar=*/true);
2172}
2173
2174#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2176 VPSlotTracker &SlotTracker) const {
2177 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2178
2180 O << " = phi ";
2182}
2183#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
Hexagon Common GEP
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static Value * getStepVector(Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step,...
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C)
Checks if C is uniform across all VFs and UFs.
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static Value * getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, ElementCount VF)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:457
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:294
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:290
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:124
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1450
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:323
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:319
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:260
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:207
bool willReturn() const
Determine if the function will return.
Definition: Function.h:653
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:242
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:586
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:92
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2359
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2470
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2092
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2458
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1531
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1165
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1192
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:464
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1090
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:173
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2031
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2533
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:524
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1974
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2079
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:172
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:309
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1864
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:275
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:484
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2364
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2395
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1747
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2239
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1342
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1118
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:1005
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2019
LLVMContext & getContext() const
Definition: IRBuilder.h:174
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1325
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2005
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1664
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1674
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2159
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:287
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:178
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1824
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2410
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2349
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1585
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:109
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1359
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2664
A struct for saving information about induction variables.
const SCEV * getStep() const
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_IntInduction
Integer induction variable. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:476
bool isBinaryOp() const
Definition: Instruction.h:279
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:276
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:473
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1814
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
Type * getRecurrenceType() const
Returns the type of the recurrence.
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:289
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2844
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2894
iterator end()
Definition: VPlan.h:2878
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2907
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1990
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1995
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:1987
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPRegionBlock * getParent()
Definition: VPlan.h:497
size_t getNumSuccessors() const
Definition: VPlan.h:542
VPlan * getPlan()
Definition: VPlan.cpp:149
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:154
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:532
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2266
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:308
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:396
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:408
unsigned getVPDefID() const
Definition: VPlanValue.h:428
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2782
VPValue * getStartValue() const
Definition: VPlan.h:2781
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1686
bool hasResult() const
Definition: VPlan.h:1311
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1186
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1196
@ CalculateTripCountMinusVF
Definition: VPlan.h:1194
unsigned getOpcode() const
Definition: VPlan.h:1287
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
void execute(VPTransformState &State) override
Generate the instruction.
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:184
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:170
static VPLane getFirstLane()
Definition: VPlan.h:168
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:711
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:726
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
VPBasicBlock * getParent()
Definition: VPlan.h:751
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:817
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:915
ExactFlagsTy ExactFlags
Definition: VPlan.h:971
FastMathFlagsTy FMFs
Definition: VPlan.h:974
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:973
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1100
bool isInBounds() const
Definition: VPlan.h:1142
GEPFlagsTy GEPFlags
Definition: VPlan.h:972
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1149
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:970
WrapFlagsTy WrapFlags
Definition: VPlan.h:969
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1153
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1136
bool hasNoSignedWrap() const
Definition: VPlan.h:1159
FastMathFlags getFastMathFlags() const
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1959
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2150
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2152
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2148
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3019
const VPBlockBase * getEntry() const
Definition: VPlan.h:3058
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:2230
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1429
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2831
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:906
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:449
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:203
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1324
operand_range operands()
Definition: VPlanValue.h:273
unsigned getNumOperands() const
Definition: VPlanValue.h:252
operand_iterator op_begin()
Definition: VPlanValue.h:269
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:253
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:119
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1320
friend class VPInstruction
Definition: VPlanValue.h:47
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:173
user_range users()
Definition: VPlanValue.h:133
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1502
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1506
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1425
void execute(VPTransformState &State) override
Produce widened copies of the cast.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1770
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1765
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1784
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1776
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3120
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3315
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:5105
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1484
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3668
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1472
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1467
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
@ Offset
Definition: DWP.cpp:480
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2159
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1037
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:54
static bool isDbgInfoIntrinsic(Intrinsic::ID ID)
Check if ID corresponds to a debug info intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1211
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ Mul
Product of integers.
@ Add
Sum of integers.
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1195
void execute(VPTransformState &State) override
Generate the phi nodes.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:226
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:372
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:380
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:354
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:243
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:253
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:417
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:420
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:367
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:310
struct llvm::VPTransformState::CFGState CFG
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:295
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:255
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:397
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:283
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:277
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:249
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:406
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:378
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1549
VPValue * getCond() const
Definition: VPlan.h:1545
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.