LLVM 19.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Twine.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/Value.h"
28#include "llvm/Support/Debug.h"
33#include <cassert>
34
35using namespace llvm;
36
38
39namespace llvm {
41}
43
44#define LV_NAME "loop-vectorize"
45#define DEBUG_TYPE LV_NAME
46
48 switch (getVPDefID()) {
49 case VPInterleaveSC:
50 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
51 case VPWidenStoreEVLSC:
52 case VPWidenStoreSC:
53 return true;
54 case VPReplicateSC:
55 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
56 ->mayWriteToMemory();
57 case VPWidenCallSC:
58 return !cast<VPWidenCallRecipe>(this)
59 ->getCalledScalarFunction()
60 ->onlyReadsMemory();
61 case VPBranchOnMaskSC:
62 case VPScalarIVStepsSC:
63 case VPPredInstPHISC:
64 return false;
65 case VPBlendSC:
66 case VPReductionSC:
67 case VPWidenCanonicalIVSC:
68 case VPWidenCastSC:
69 case VPWidenGEPSC:
70 case VPWidenIntOrFpInductionSC:
71 case VPWidenLoadEVLSC:
72 case VPWidenLoadSC:
73 case VPWidenPHISC:
74 case VPWidenSC:
75 case VPWidenSelectSC: {
76 const Instruction *I =
77 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
78 (void)I;
79 assert((!I || !I->mayWriteToMemory()) &&
80 "underlying instruction may write to memory");
81 return false;
82 }
83 default:
84 return true;
85 }
86}
87
89 switch (getVPDefID()) {
90 case VPWidenLoadEVLSC:
91 case VPWidenLoadSC:
92 return true;
93 case VPReplicateSC:
94 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
95 ->mayReadFromMemory();
96 case VPWidenCallSC:
97 return !cast<VPWidenCallRecipe>(this)
98 ->getCalledScalarFunction()
99 ->onlyWritesMemory();
100 case VPBranchOnMaskSC:
101 case VPPredInstPHISC:
102 case VPScalarIVStepsSC:
103 case VPWidenStoreEVLSC:
104 case VPWidenStoreSC:
105 return false;
106 case VPBlendSC:
107 case VPReductionSC:
108 case VPWidenCanonicalIVSC:
109 case VPWidenCastSC:
110 case VPWidenGEPSC:
111 case VPWidenIntOrFpInductionSC:
112 case VPWidenPHISC:
113 case VPWidenSC:
114 case VPWidenSelectSC: {
115 const Instruction *I =
116 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
117 (void)I;
118 assert((!I || !I->mayReadFromMemory()) &&
119 "underlying instruction may read from memory");
120 return false;
121 }
122 default:
123 return true;
124 }
125}
126
128 switch (getVPDefID()) {
129 case VPDerivedIVSC:
130 case VPPredInstPHISC:
131 case VPScalarCastSC:
132 return false;
133 case VPInstructionSC:
134 switch (cast<VPInstruction>(this)->getOpcode()) {
135 case Instruction::Or:
136 case Instruction::ICmp:
137 case Instruction::Select:
145 return false;
146 default:
147 return true;
148 }
149 case VPWidenCallSC: {
150 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
151 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
152 }
153 case VPBlendSC:
154 case VPReductionSC:
155 case VPScalarIVStepsSC:
156 case VPWidenCanonicalIVSC:
157 case VPWidenCastSC:
158 case VPWidenGEPSC:
159 case VPWidenIntOrFpInductionSC:
160 case VPWidenPHISC:
161 case VPWidenPointerInductionSC:
162 case VPWidenSC:
163 case VPWidenSelectSC: {
164 const Instruction *I =
165 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
166 (void)I;
167 assert((!I || !I->mayHaveSideEffects()) &&
168 "underlying instruction has side-effects");
169 return false;
170 }
171 case VPInterleaveSC:
172 return mayWriteToMemory();
173 case VPWidenLoadEVLSC:
174 case VPWidenLoadSC:
175 case VPWidenStoreEVLSC:
176 case VPWidenStoreSC:
177 assert(
178 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
180 "mayHaveSideffects result for ingredient differs from this "
181 "implementation");
182 return mayWriteToMemory();
183 case VPReplicateSC: {
184 auto *R = cast<VPReplicateRecipe>(this);
185 return R->getUnderlyingInstr()->mayHaveSideEffects();
186 }
187 default:
188 return true;
189 }
190}
191
193 VPValue *ExitValue = getOperand(0);
194 auto Lane = vputils::isUniformAfterVectorization(ExitValue)
197 VPBasicBlock *MiddleVPBB =
198 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
199 VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
200 auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
201 // Values leaving the vector loop reach live out phi's in the exiting block
202 // via middle block.
203 auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
204 ? MiddleVPBB
205 : ExitingVPBB;
206 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
207 // Set insertion point in PredBB in case an extract needs to be generated.
208 // TODO: Model extracts explicitly.
209 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
210 Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
211 if (Phi->getBasicBlockIndex(PredBB) != -1)
212 Phi->setIncomingValueForBlock(PredBB, V);
213 else
214 Phi->addIncoming(V, PredBB);
215}
216
217#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
219 O << "Live-out ";
221 O << " = ";
223 O << "\n";
224}
225#endif
226
228 assert(!Parent && "Recipe already in some VPBasicBlock");
229 assert(InsertPos->getParent() &&
230 "Insertion position not in any VPBasicBlock");
231 InsertPos->getParent()->insert(this, InsertPos->getIterator());
232}
233
236 assert(!Parent && "Recipe already in some VPBasicBlock");
237 assert(I == BB.end() || I->getParent() == &BB);
238 BB.insert(this, I);
239}
240
242 assert(!Parent && "Recipe already in some VPBasicBlock");
243 assert(InsertPos->getParent() &&
244 "Insertion position not in any VPBasicBlock");
245 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
246}
247
249 assert(getParent() && "Recipe not in any VPBasicBlock");
251 Parent = nullptr;
252}
253
255 assert(getParent() && "Recipe not in any VPBasicBlock");
257}
258
261 insertAfter(InsertPos);
262}
263
267 insertBefore(BB, I);
268}
269
270/// Return the underlying instruction to be used for computing \p R's cost via
271/// the legacy cost model. Return nullptr if there's no suitable instruction.
273 if (auto *S = dyn_cast<VPSingleDefRecipe>(R))
274 return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
275 if (auto *IG = dyn_cast<VPInterleaveRecipe>(R))
276 return IG->getInsertPos();
277 if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(R))
278 return &WidenMem->getIngredient();
279 return nullptr;
280}
281
283 if (auto *UI = getInstructionForCost(this))
284 if (Ctx.skipCostComputation(UI, VF.isVector()))
285 return 0;
286
287 InstructionCost RecipeCost = computeCost(VF, Ctx);
288 if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
289 RecipeCost.isValid())
291
292 LLVM_DEBUG({
293 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
294 dump();
295 });
296 return RecipeCost;
297}
298
300 VPCostContext &Ctx) const {
301 // Compute the cost for the recipe falling back to the legacy cost model using
302 // the underlying instruction. If there is no underlying instruction, returns
303 // 0.
305 if (UI && isa<VPReplicateRecipe>(this)) {
306 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
307 // transform, avoid computing their cost multiple times for now.
308 Ctx.SkipCostComputation.insert(UI);
309 }
310 return UI ? Ctx.getLegacyCost(UI, VF) : 0;
311}
312
314 assert(OpType == OperationType::FPMathOp &&
315 "recipe doesn't have fast math flags");
316 FastMathFlags Res;
317 Res.setAllowReassoc(FMFs.AllowReassoc);
318 Res.setNoNaNs(FMFs.NoNaNs);
319 Res.setNoInfs(FMFs.NoInfs);
320 Res.setNoSignedZeros(FMFs.NoSignedZeros);
321 Res.setAllowReciprocal(FMFs.AllowReciprocal);
322 Res.setAllowContract(FMFs.AllowContract);
323 Res.setApproxFunc(FMFs.ApproxFunc);
324 return Res;
325}
326
329 const Twine &Name)
330 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
331 Pred, DL),
332 Opcode(Opcode), Name(Name.str()) {
333 assert(Opcode == Instruction::ICmp &&
334 "only ICmp predicates supported at the moment");
335}
336
338 std::initializer_list<VPValue *> Operands,
339 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
340 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
341 Opcode(Opcode), Name(Name.str()) {
342 // Make sure the VPInstruction is a floating-point operation.
343 assert(isFPMathOp() && "this op can't take fast-math flags");
344}
345
346bool VPInstruction::doesGeneratePerAllLanes() const {
347 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
348}
349
350bool VPInstruction::canGenerateScalarForFirstLane() const {
352 return true;
354 return true;
355 switch (Opcode) {
356 case Instruction::ICmp:
363 return true;
364 default:
365 return false;
366 }
367}
368
369Value *VPInstruction::generatePerLane(VPTransformState &State,
370 const VPIteration &Lane) {
371 IRBuilderBase &Builder = State.Builder;
372
374 "only PtrAdd opcodes are supported for now");
375 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
376 State.get(getOperand(1), Lane), Name);
377}
378
379Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
380 IRBuilderBase &Builder = State.Builder;
381
383 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
384 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
385 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
386 auto *Res =
387 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
388 if (auto *I = dyn_cast<Instruction>(Res))
389 setFlags(I);
390 return Res;
391 }
392
393 switch (getOpcode()) {
394 case VPInstruction::Not: {
395 Value *A = State.get(getOperand(0), Part);
396 return Builder.CreateNot(A, Name);
397 }
398 case Instruction::ICmp: {
399 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
400 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
401 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
402 return Builder.CreateCmp(getPredicate(), A, B, Name);
403 }
404 case Instruction::Select: {
405 Value *Cond = State.get(getOperand(0), Part);
406 Value *Op1 = State.get(getOperand(1), Part);
407 Value *Op2 = State.get(getOperand(2), Part);
408 return Builder.CreateSelect(Cond, Op1, Op2, Name);
409 }
411 // Get first lane of vector induction variable.
412 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
413 // Get the original loop tripcount.
414 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
415
416 // If this part of the active lane mask is scalar, generate the CMP directly
417 // to avoid unnecessary extracts.
418 if (State.VF.isScalar())
419 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
420 Name);
421
422 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
423 auto *PredTy = VectorType::get(Int1Ty, State.VF);
424 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
425 {PredTy, ScalarTC->getType()},
426 {VIVElem0, ScalarTC}, nullptr, Name);
427 }
429 // Generate code to combine the previous and current values in vector v3.
430 //
431 // vector.ph:
432 // v_init = vector(..., ..., ..., a[-1])
433 // br vector.body
434 //
435 // vector.body
436 // i = phi [0, vector.ph], [i+4, vector.body]
437 // v1 = phi [v_init, vector.ph], [v2, vector.body]
438 // v2 = a[i, i+1, i+2, i+3];
439 // v3 = vector(v1(3), v2(0, 1, 2))
440
441 // For the first part, use the recurrence phi (v1), otherwise v2.
442 auto *V1 = State.get(getOperand(0), 0);
443 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
444 if (!PartMinus1->getType()->isVectorTy())
445 return PartMinus1;
446 Value *V2 = State.get(getOperand(1), Part);
447 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
448 }
450 if (Part != 0)
451 return State.get(this, 0, /*IsScalar*/ true);
452
453 Value *ScalarTC = State.get(getOperand(0), {0, 0});
454 Value *Step =
455 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
456 Value *Sub = Builder.CreateSub(ScalarTC, Step);
457 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
458 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
459 return Builder.CreateSelect(Cmp, Sub, Zero);
460 }
462 // Compute EVL
463 auto GetEVL = [=](VPTransformState &State, Value *AVL) {
464 assert(AVL->getType()->isIntegerTy() &&
465 "Requested vector length should be an integer.");
466
467 // TODO: Add support for MaxSafeDist for correct loop emission.
468 assert(State.VF.isScalable() && "Expected scalable vector factor.");
469 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
470
471 Value *EVL = State.Builder.CreateIntrinsic(
472 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
473 {AVL, VFArg, State.Builder.getTrue()});
474 return EVL;
475 };
476 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
477 // be outside of the main loop.
478 assert(Part == 0 && "No unrolling expected for predicated vectorization.");
479 // Compute VTC - IV as the AVL (requested vector length).
480 Value *Index = State.get(getOperand(0), VPIteration(0, 0));
481 Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
482 Value *AVL = State.Builder.CreateSub(TripCount, Index);
483 Value *EVL = GetEVL(State, AVL);
484 return EVL;
485 }
487 auto *IV = State.get(getOperand(0), VPIteration(0, 0));
488 if (Part == 0)
489 return IV;
490
491 // The canonical IV is incremented by the vectorization factor (num of SIMD
492 // elements) times the unroll part.
493 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
494 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
496 }
498 if (Part != 0)
499 return nullptr;
500
501 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
502 // Replace the temporary unreachable terminator with a new conditional
503 // branch, hooking it up to backward destination for exiting blocks now and
504 // to forward destination(s) later when they are created.
505 BranchInst *CondBr =
506 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
507 CondBr->setSuccessor(0, nullptr);
509
510 if (!getParent()->isExiting())
511 return CondBr;
512
513 VPRegionBlock *ParentRegion = getParent()->getParent();
514 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
515 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
516 return CondBr;
517 }
519 if (Part != 0)
520 return nullptr;
521 // First create the compare.
522 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
523 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
524 Value *Cond = Builder.CreateICmpEQ(IV, TC);
525
526 // Now create the branch.
527 auto *Plan = getParent()->getPlan();
528 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
529 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
530
531 // Replace the temporary unreachable terminator with a new conditional
532 // branch, hooking it up to backward destination (the header) now and to the
533 // forward destination (the exit/middle block) later when it is created.
534 // Note that CreateCondBr expects a valid BB as first argument, so we need
535 // to set it to nullptr later.
536 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
537 State.CFG.VPBB2IRBB[Header]);
538 CondBr->setSuccessor(0, nullptr);
540 return CondBr;
541 }
543 if (Part != 0)
544 return State.get(this, 0, /*IsScalar*/ true);
545
546 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
547 // and will be removed by breaking up the recipe further.
548 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
549 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
550 // Get its reduction variable descriptor.
551 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
552
553 RecurKind RK = RdxDesc.getRecurrenceKind();
554
555 VPValue *LoopExitingDef = getOperand(1);
556 Type *PhiTy = OrigPhi->getType();
557 VectorParts RdxParts(State.UF);
558 for (unsigned Part = 0; Part < State.UF; ++Part)
559 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
560
561 // If the vector reduction can be performed in a smaller type, we truncate
562 // then extend the loop exit value to enable InstCombine to evaluate the
563 // entire expression in the smaller type.
564 // TODO: Handle this in truncateToMinBW.
565 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
566 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
567 for (unsigned Part = 0; Part < State.UF; ++Part)
568 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
569 }
570 // Reduce all of the unrolled parts into a single vector.
571 Value *ReducedPartRdx = RdxParts[0];
572 unsigned Op = RecurrenceDescriptor::getOpcode(RK);
574 Op = Instruction::Or;
575
576 if (PhiR->isOrdered()) {
577 ReducedPartRdx = RdxParts[State.UF - 1];
578 } else {
579 // Floating-point operations should have some FMF to enable the reduction.
581 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
582 for (unsigned Part = 1; Part < State.UF; ++Part) {
583 Value *RdxPart = RdxParts[Part];
584 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
585 ReducedPartRdx = Builder.CreateBinOp(
586 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
587 else
588 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
589 }
590 }
591
592 // Create the reduction after the loop. Note that inloop reductions create
593 // the target reduction in the loop using a Reduction recipe.
594 if ((State.VF.isVector() ||
596 !PhiR->isInLoop()) {
597 ReducedPartRdx =
598 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
599 // If the reduction can be performed in a smaller type, we need to extend
600 // the reduction to the wider type before we branch to the original loop.
601 if (PhiTy != RdxDesc.getRecurrenceType())
602 ReducedPartRdx = RdxDesc.isSigned()
603 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
604 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
605 }
606
607 // If there were stores of the reduction value to a uniform memory address
608 // inside the loop, create the final store here.
609 if (StoreInst *SI = RdxDesc.IntermediateStore) {
610 auto *NewSI = Builder.CreateAlignedStore(
611 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
612 propagateMetadata(NewSI, SI);
613 }
614
615 return ReducedPartRdx;
616 }
618 if (Part != 0)
619 return State.get(this, 0, /*IsScalar*/ true);
620
621 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
622 unsigned Offset = CI->getZExtValue();
623 assert(Offset > 0 && "Offset from end must be positive");
624 Value *Res;
625 if (State.VF.isVector()) {
626 assert(Offset <= State.VF.getKnownMinValue() &&
627 "invalid offset to extract from");
628 // Extract lane VF - Offset from the operand.
629 Res = State.get(
630 getOperand(0),
631 VPIteration(State.UF - 1, VPLane::getLaneFromEnd(State.VF, Offset)));
632 } else {
633 assert(Offset <= State.UF && "invalid offset to extract from");
634 // When loop is unrolled without vectorizing, retrieve UF - Offset.
635 Res = State.get(getOperand(0), State.UF - Offset);
636 }
637 if (isa<ExtractElementInst>(Res))
638 Res->setName(Name);
639 return Res;
640 }
642 Value *A = State.get(getOperand(0), Part);
643 Value *B = State.get(getOperand(1), Part);
644 return Builder.CreateLogicalAnd(A, B, Name);
645 }
648 "can only generate first lane for PtrAdd");
649 Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
650 Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
651 return Builder.CreatePtrAdd(Ptr, Addend, Name);
652 }
654 if (Part != 0)
655 return State.get(this, 0, /*IsScalar*/ true);
656 Value *IncomingFromVPlanPred =
657 State.get(getOperand(0), Part, /* IsScalar */ true);
658 Value *IncomingFromOtherPreds =
659 State.get(getOperand(1), Part, /* IsScalar */ true);
660 auto *NewPhi =
661 Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
662 BasicBlock *VPlanPred =
663 State.CFG
664 .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
665 NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
666 for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
667 assert(OtherPred != VPlanPred &&
668 "VPlan predecessors should not be connected yet");
669 NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
670 }
671 return NewPhi;
672 }
673
674 default:
675 llvm_unreachable("Unsupported opcode for instruction");
676 }
677}
678
682}
683
686}
687
688#if !defined(NDEBUG)
689bool VPInstruction::isFPMathOp() const {
690 // Inspired by FPMathOperator::classof. Notable differences are that we don't
691 // support Call, PHI and Select opcodes here yet.
692 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
693 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
694 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
695 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
696}
697#endif
698
700 assert(!State.Instance && "VPInstruction executing an Instance");
702 assert((hasFastMathFlags() == isFPMathOp() ||
703 getOpcode() == Instruction::Select) &&
704 "Recipe not a FPMathOp but has fast-math flags?");
705 if (hasFastMathFlags())
708 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
711 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
712 bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this);
713 for (unsigned Part = 0; Part < State.UF; ++Part) {
714 if (GeneratesPerAllLanes) {
715 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
716 Lane != NumLanes; ++Lane) {
717 Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
718 assert(GeneratedValue && "generatePerLane must produce a value");
719 State.set(this, GeneratedValue, VPIteration(Part, Lane));
720 }
721 continue;
722 }
723
724 if (Part != 0 && OnlyFirstPartUsed && hasResult()) {
725 Value *Part0 = State.get(this, 0, /*IsScalar*/ GeneratesPerFirstLaneOnly);
726 State.set(this, Part0, Part,
727 /*IsScalar*/ GeneratesPerFirstLaneOnly);
728 continue;
729 }
730
731 Value *GeneratedValue = generatePerPart(State, Part);
732 if (!hasResult())
733 continue;
734 assert(GeneratedValue && "generatePerPart must produce a value");
735 assert((GeneratedValue->getType()->isVectorTy() ==
736 !GeneratesPerFirstLaneOnly ||
737 State.VF.isScalar()) &&
738 "scalar value but not only first lane defined");
739 State.set(this, GeneratedValue, Part,
740 /*IsScalar*/ GeneratesPerFirstLaneOnly);
741 }
742}
743
745 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
747 return vputils::onlyFirstLaneUsed(this);
748
749 switch (getOpcode()) {
750 default:
751 return false;
752 case Instruction::ICmp:
754 // TODO: Cover additional opcodes.
755 return vputils::onlyFirstLaneUsed(this);
763 return true;
764 };
765 llvm_unreachable("switch should return");
766}
767
769 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
771 return vputils::onlyFirstPartUsed(this);
772
773 switch (getOpcode()) {
774 default:
775 return false;
776 case Instruction::ICmp:
777 case Instruction::Select:
778 return vputils::onlyFirstPartUsed(this);
782 return true;
783 };
784 llvm_unreachable("switch should return");
785}
786
787#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
789 VPSlotTracker SlotTracker(getParent()->getPlan());
790 print(dbgs(), "", SlotTracker);
791}
792
794 VPSlotTracker &SlotTracker) const {
795 O << Indent << "EMIT ";
796
797 if (hasResult()) {
799 O << " = ";
800 }
801
802 switch (getOpcode()) {
804 O << "not";
805 break;
807 O << "combined load";
808 break;
810 O << "combined store";
811 break;
813 O << "active lane mask";
814 break;
816 O << "resume-phi";
817 break;
819 O << "EXPLICIT-VECTOR-LENGTH";
820 break;
822 O << "first-order splice";
823 break;
825 O << "branch-on-cond";
826 break;
828 O << "TC > VF ? TC - VF : 0";
829 break;
831 O << "VF * Part +";
832 break;
834 O << "branch-on-count";
835 break;
837 O << "extract-from-end";
838 break;
840 O << "compute-reduction-result";
841 break;
843 O << "logical-and";
844 break;
846 O << "ptradd";
847 break;
848 default:
850 }
851
852 printFlags(O);
854
855 if (auto DL = getDebugLoc()) {
856 O << ", !dbg ";
857 DL.print(O);
858 }
859}
860#endif
861
863 assert(State.VF.isVector() && "not widening");
864 Function *CalledScalarFn = getCalledScalarFunction();
865 assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) &&
866 "DbgInfoIntrinsic should have been dropped during VPlan construction");
868
869 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
870 FunctionType *VFTy = nullptr;
871 if (Variant)
872 VFTy = Variant->getFunctionType();
873 for (unsigned Part = 0; Part < State.UF; ++Part) {
874 SmallVector<Type *, 2> TysForDecl;
875 // Add return type if intrinsic is overloaded on it.
876 if (UseIntrinsic &&
877 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
878 TysForDecl.push_back(VectorType::get(
879 CalledScalarFn->getReturnType()->getScalarType(), State.VF));
881 for (const auto &I : enumerate(arg_operands())) {
882 // Some intrinsics have a scalar argument - don't replace it with a
883 // vector.
884 Value *Arg;
885 if (UseIntrinsic &&
886 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
887 Arg = State.get(I.value(), VPIteration(0, 0));
888 // Some vectorized function variants may also take a scalar argument,
889 // e.g. linear parameters for pointers. This needs to be the scalar value
890 // from the start of the respective part when interleaving.
891 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
892 Arg = State.get(I.value(), VPIteration(Part, 0));
893 else
894 Arg = State.get(I.value(), Part);
895 if (UseIntrinsic &&
896 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
897 TysForDecl.push_back(Arg->getType());
898 Args.push_back(Arg);
899 }
900
901 Function *VectorF;
902 if (UseIntrinsic) {
903 // Use vector version of the intrinsic.
904 Module *M = State.Builder.GetInsertBlock()->getModule();
905 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
906 assert(VectorF && "Can't retrieve vector intrinsic.");
907 } else {
908#ifndef NDEBUG
909 assert(Variant != nullptr && "Can't create vector function.");
910#endif
911 VectorF = Variant;
912 }
913
914 auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());
916 if (CI)
917 CI->getOperandBundlesAsDefs(OpBundles);
918
919 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
920
921 if (isa<FPMathOperator>(V))
922 V->copyFastMathFlags(CI);
923
924 if (!V->getType()->isVoidTy())
925 State.set(this, V, Part);
926 State.addMetadata(V, CI);
927 }
928}
929
930#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
932 VPSlotTracker &SlotTracker) const {
933 O << Indent << "WIDEN-CALL ";
934
935 Function *CalledFn = getCalledScalarFunction();
936 if (CalledFn->getReturnType()->isVoidTy())
937 O << "void ";
938 else {
940 O << " = ";
941 }
942
943 O << "call @" << CalledFn->getName() << "(";
945 Op->printAsOperand(O, SlotTracker);
946 });
947 O << ")";
948
949 if (VectorIntrinsicID)
950 O << " (using vector intrinsic)";
951 else {
952 O << " (using library function";
953 if (Variant->hasName())
954 O << ": " << Variant->getName();
955 O << ")";
956 }
957}
958
960 VPSlotTracker &SlotTracker) const {
961 O << Indent << "WIDEN-SELECT ";
963 O << " = select ";
965 O << ", ";
967 O << ", ";
969 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
970}
971#endif
972
975
976 // The condition can be loop invariant but still defined inside the
977 // loop. This means that we can't just use the original 'cond' value.
978 // We have to take the 'vectorized' value and pick the first lane.
979 // Instcombine will make this a no-op.
980 auto *InvarCond =
981 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
982
983 for (unsigned Part = 0; Part < State.UF; ++Part) {
984 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
985 Value *Op0 = State.get(getOperand(1), Part);
986 Value *Op1 = State.get(getOperand(2), Part);
987 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
988 State.set(this, Sel, Part);
989 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
990 }
991}
992
993VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
994 const FastMathFlags &FMF) {
995 AllowReassoc = FMF.allowReassoc();
996 NoNaNs = FMF.noNaNs();
997 NoInfs = FMF.noInfs();
998 NoSignedZeros = FMF.noSignedZeros();
999 AllowReciprocal = FMF.allowReciprocal();
1000 AllowContract = FMF.allowContract();
1001 ApproxFunc = FMF.approxFunc();
1002}
1003
1004#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1006 switch (OpType) {
1007 case OperationType::Cmp:
1009 break;
1010 case OperationType::DisjointOp:
1012 O << " disjoint";
1013 break;
1014 case OperationType::PossiblyExactOp:
1015 if (ExactFlags.IsExact)
1016 O << " exact";
1017 break;
1018 case OperationType::OverflowingBinOp:
1019 if (WrapFlags.HasNUW)
1020 O << " nuw";
1021 if (WrapFlags.HasNSW)
1022 O << " nsw";
1023 break;
1024 case OperationType::FPMathOp:
1026 break;
1027 case OperationType::GEPOp:
1028 if (GEPFlags.IsInBounds)
1029 O << " inbounds";
1030 break;
1031 case OperationType::NonNegOp:
1032 if (NonNegFlags.NonNeg)
1033 O << " nneg";
1034 break;
1035 case OperationType::Other:
1036 break;
1037 }
1038 if (getNumOperands() > 0)
1039 O << " ";
1040}
1041#endif
1042
1045 auto &Builder = State.Builder;
1046 switch (Opcode) {
1047 case Instruction::Call:
1048 case Instruction::Br:
1049 case Instruction::PHI:
1050 case Instruction::GetElementPtr:
1051 case Instruction::Select:
1052 llvm_unreachable("This instruction is handled by a different recipe.");
1053 case Instruction::UDiv:
1054 case Instruction::SDiv:
1055 case Instruction::SRem:
1056 case Instruction::URem:
1057 case Instruction::Add:
1058 case Instruction::FAdd:
1059 case Instruction::Sub:
1060 case Instruction::FSub:
1061 case Instruction::FNeg:
1062 case Instruction::Mul:
1063 case Instruction::FMul:
1064 case Instruction::FDiv:
1065 case Instruction::FRem:
1066 case Instruction::Shl:
1067 case Instruction::LShr:
1068 case Instruction::AShr:
1069 case Instruction::And:
1070 case Instruction::Or:
1071 case Instruction::Xor: {
1072 // Just widen unops and binops.
1073 for (unsigned Part = 0; Part < State.UF; ++Part) {
1075 for (VPValue *VPOp : operands())
1076 Ops.push_back(State.get(VPOp, Part));
1077
1078 Value *V = Builder.CreateNAryOp(Opcode, Ops);
1079
1080 if (auto *VecOp = dyn_cast<Instruction>(V))
1081 setFlags(VecOp);
1082
1083 // Use this vector value for all users of the original instruction.
1084 State.set(this, V, Part);
1085 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1086 }
1087
1088 break;
1089 }
1090 case Instruction::Freeze: {
1091 for (unsigned Part = 0; Part < State.UF; ++Part) {
1092 Value *Op = State.get(getOperand(0), Part);
1093
1094 Value *Freeze = Builder.CreateFreeze(Op);
1095 State.set(this, Freeze, Part);
1096 }
1097 break;
1098 }
1099 case Instruction::ICmp:
1100 case Instruction::FCmp: {
1101 // Widen compares. Generate vector compares.
1102 bool FCmp = Opcode == Instruction::FCmp;
1103 for (unsigned Part = 0; Part < State.UF; ++Part) {
1104 Value *A = State.get(getOperand(0), Part);
1105 Value *B = State.get(getOperand(1), Part);
1106 Value *C = nullptr;
1107 if (FCmp) {
1108 // Propagate fast math flags.
1109 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1110 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
1111 Builder.setFastMathFlags(I->getFastMathFlags());
1112 C = Builder.CreateFCmp(getPredicate(), A, B);
1113 } else {
1114 C = Builder.CreateICmp(getPredicate(), A, B);
1115 }
1116 State.set(this, C, Part);
1117 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1118 }
1119
1120 break;
1121 }
1122 default:
1123 // This instruction is not vectorized by simple widening.
1124 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1125 << Instruction::getOpcodeName(Opcode));
1126 llvm_unreachable("Unhandled instruction!");
1127 } // end of switch.
1128
1129#if !defined(NDEBUG)
1130 // Verify that VPlan type inference results agree with the type of the
1131 // generated values.
1132 for (unsigned Part = 0; Part < State.UF; ++Part) {
1134 State.VF) == State.get(this, Part)->getType() &&
1135 "inferred type and type from generated instructions do not match");
1136 }
1137#endif
1138}
1139
1140#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1142 VPSlotTracker &SlotTracker) const {
1143 O << Indent << "WIDEN ";
1145 O << " = " << Instruction::getOpcodeName(Opcode);
1146 printFlags(O);
1148}
1149#endif
1150
1153 auto &Builder = State.Builder;
1154 /// Vectorize casts.
1155 assert(State.VF.isVector() && "Not vectorizing?");
1156 Type *DestTy = VectorType::get(getResultType(), State.VF);
1157 VPValue *Op = getOperand(0);
1158 for (unsigned Part = 0; Part < State.UF; ++Part) {
1159 if (Part > 0 && Op->isLiveIn()) {
1160 // FIXME: Remove once explicit unrolling is implemented using VPlan.
1161 State.set(this, State.get(this, 0), Part);
1162 continue;
1163 }
1164 Value *A = State.get(Op, Part);
1165 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1166 State.set(this, Cast, Part);
1167 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1168 }
1169}
1170
1171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1173 VPSlotTracker &SlotTracker) const {
1174 O << Indent << "WIDEN-CAST ";
1176 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1177 printFlags(O);
1179 O << " to " << *getResultType();
1180}
1181#endif
1182
1183/// This function adds
1184/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
1185/// to each vector element of Val. The sequence starts at StartIndex.
1186/// \p Opcode is relevant for FP induction variable.
1187static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
1189 IRBuilderBase &Builder) {
1190 assert(VF.isVector() && "only vector VFs are supported");
1191
1192 // Create and check the types.
1193 auto *ValVTy = cast<VectorType>(Val->getType());
1194 ElementCount VLen = ValVTy->getElementCount();
1195
1196 Type *STy = Val->getType()->getScalarType();
1197 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1198 "Induction Step must be an integer or FP");
1199 assert(Step->getType() == STy && "Step has wrong type");
1200
1202
1203 // Create a vector of consecutive numbers from zero to VF.
1204 VectorType *InitVecValVTy = ValVTy;
1205 if (STy->isFloatingPointTy()) {
1206 Type *InitVecValSTy =
1208 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1209 }
1210 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1211
1212 // Splat the StartIdx
1213 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
1214
1215 if (STy->isIntegerTy()) {
1216 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
1217 Step = Builder.CreateVectorSplat(VLen, Step);
1218 assert(Step->getType() == Val->getType() && "Invalid step vec");
1219 // FIXME: The newly created binary instructions should contain nsw/nuw
1220 // flags, which can be found from the original scalar operations.
1221 Step = Builder.CreateMul(InitVec, Step);
1222 return Builder.CreateAdd(Val, Step, "induction");
1223 }
1224
1225 // Floating point induction.
1226 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1227 "Binary Opcode should be specified for FP induction");
1228 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1229 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
1230
1231 Step = Builder.CreateVectorSplat(VLen, Step);
1232 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1233 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1234}
1235
1236/// A helper function that returns an integer or floating-point constant with
1237/// value C.
1239 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1240 : ConstantFP::get(Ty, C);
1241}
1242
1244 ElementCount VF) {
1245 assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1246 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
1247 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
1248 return B.CreateUIToFP(RuntimeVF, FTy);
1249}
1250
1252 assert(!State.Instance && "Int or FP induction being replicated.");
1253
1254 Value *Start = getStartValue()->getLiveInIRValue();
1256 TruncInst *Trunc = getTruncInst();
1257 IRBuilderBase &Builder = State.Builder;
1258 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1259 assert(State.VF.isVector() && "must have vector VF");
1260
1261 // The value from the original loop to which we are mapping the new induction
1262 // variable.
1263 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
1264
1265 // Fast-math-flags propagate from the original induction instruction.
1266 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1267 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1268 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1269
1270 // Now do the actual transformations, and start with fetching the step value.
1271 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1272
1273 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1274 "Expected either an induction phi-node or a truncate of it!");
1275
1276 // Construct the initial value of the vector IV in the vector loop preheader
1277 auto CurrIP = Builder.saveIP();
1278 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1279 Builder.SetInsertPoint(VectorPH->getTerminator());
1280 if (isa<TruncInst>(EntryVal)) {
1281 assert(Start->getType()->isIntegerTy() &&
1282 "Truncation requires an integer type");
1283 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1284 Step = Builder.CreateTrunc(Step, TruncType);
1285 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1286 }
1287
1288 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
1289 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1290 Value *SteppedStart = getStepVector(
1291 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
1292
1293 // We create vector phi nodes for both integer and floating-point induction
1294 // variables. Here, we determine the kind of arithmetic we will perform.
1297 if (Step->getType()->isIntegerTy()) {
1298 AddOp = Instruction::Add;
1299 MulOp = Instruction::Mul;
1300 } else {
1301 AddOp = ID.getInductionOpcode();
1302 MulOp = Instruction::FMul;
1303 }
1304
1305 // Multiply the vectorization factor by the step using integer or
1306 // floating-point arithmetic as appropriate.
1307 Type *StepType = Step->getType();
1308 Value *RuntimeVF;
1309 if (Step->getType()->isFloatingPointTy())
1310 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1311 else
1312 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1313 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1314
1315 // Create a vector splat to use in the induction update.
1316 //
1317 // FIXME: If the step is non-constant, we create the vector splat with
1318 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1319 // handle a constant vector splat.
1320 Value *SplatVF = isa<Constant>(Mul)
1321 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
1322 : Builder.CreateVectorSplat(State.VF, Mul);
1323 Builder.restoreIP(CurrIP);
1324
1325 // We may need to add the step a number of times, depending on the unroll
1326 // factor. The last of those goes into the PHI.
1327 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1328 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1329 VecInd->setDebugLoc(EntryVal->getDebugLoc());
1330 Instruction *LastInduction = VecInd;
1331 for (unsigned Part = 0; Part < State.UF; ++Part) {
1332 State.set(this, LastInduction, Part);
1333
1334 if (isa<TruncInst>(EntryVal))
1335 State.addMetadata(LastInduction, EntryVal);
1336
1337 LastInduction = cast<Instruction>(
1338 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
1339 LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1340 }
1341
1342 LastInduction->setName("vec.ind.next");
1343 VecInd->addIncoming(SteppedStart, VectorPH);
1344 // Add induction update using an incorrect block temporarily. The phi node
1345 // will be fixed after VPlan execution. Note that at this point the latch
1346 // block cannot be used, as it does not exist yet.
1347 // TODO: Model increment value in VPlan, by turning the recipe into a
1348 // multi-def and a subclass of VPHeaderPHIRecipe.
1349 VecInd->addIncoming(LastInduction, VectorPH);
1350}
1351
1352#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1354 VPSlotTracker &SlotTracker) const {
1355 O << Indent << "WIDEN-INDUCTION";
1356 if (getTruncInst()) {
1357 O << "\\l\"";
1358 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
1359 O << " +\n" << Indent << "\" ";
1361 } else
1362 O << " " << VPlanIngredient(IV);
1363
1364 O << ", ";
1366}
1367#endif
1368
1370 // The step may be defined by a recipe in the preheader (e.g. if it requires
1371 // SCEV expansion), but for the canonical induction the step is required to be
1372 // 1, which is represented as live-in.
1374 return false;
1375 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1376 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1377 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1378 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1379 getScalarType() == CanIV->getScalarType();
1380}
1381
1382#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1384 VPSlotTracker &SlotTracker) const {
1385 O << Indent;
1387 O << Indent << "= DERIVED-IV ";
1389 O << " + ";
1391 O << " * ";
1393}
1394#endif
1395
1397 // Fast-math-flags propagate from the original induction instruction.
1399 if (hasFastMathFlags())
1401
1402 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1403 /// variable on which to base the steps, \p Step is the size of the step.
1404
1405 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1406 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1407 IRBuilderBase &Builder = State.Builder;
1408
1409 // Ensure step has the same type as that of scalar IV.
1410 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1411 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1412
1413 // We build scalar steps for both integer and floating-point induction
1414 // variables. Here, we determine the kind of arithmetic we will perform.
1417 if (BaseIVTy->isIntegerTy()) {
1418 AddOp = Instruction::Add;
1419 MulOp = Instruction::Mul;
1420 } else {
1421 AddOp = InductionOpcode;
1422 MulOp = Instruction::FMul;
1423 }
1424
1425 // Determine the number of scalars we need to generate for each unroll
1426 // iteration.
1427 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1428 // Compute the scalar steps and save the results in State.
1429 Type *IntStepTy =
1430 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1431 Type *VecIVTy = nullptr;
1432 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1433 if (!FirstLaneOnly && State.VF.isScalable()) {
1434 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1435 UnitStepVec =
1436 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1437 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1438 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1439 }
1440
1441 unsigned StartPart = 0;
1442 unsigned EndPart = State.UF;
1443 unsigned StartLane = 0;
1444 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1445 if (State.Instance) {
1446 StartPart = State.Instance->Part;
1447 EndPart = StartPart + 1;
1448 StartLane = State.Instance->Lane.getKnownLane();
1449 EndLane = StartLane + 1;
1450 }
1451 for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1452 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1453
1454 if (!FirstLaneOnly && State.VF.isScalable()) {
1455 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1456 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1457 if (BaseIVTy->isFloatingPointTy())
1458 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1459 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1460 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1461 State.set(this, Add, Part);
1462 // It's useful to record the lane values too for the known minimum number
1463 // of elements so we do those below. This improves the code quality when
1464 // trying to extract the first element, for example.
1465 }
1466
1467 if (BaseIVTy->isFloatingPointTy())
1468 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1469
1470 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1471 Value *StartIdx = Builder.CreateBinOp(
1472 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1473 // The step returned by `createStepForVF` is a runtime-evaluated value
1474 // when VF is scalable. Otherwise, it should be folded into a Constant.
1475 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1476 "Expected StartIdx to be folded to a constant when VF is not "
1477 "scalable");
1478 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1479 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1480 State.set(this, Add, VPIteration(Part, Lane));
1481 }
1482 }
1483}
1484
1485#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1487 VPSlotTracker &SlotTracker) const {
1488 O << Indent;
1490 O << " = SCALAR-STEPS ";
1492}
1493#endif
1494
1496 assert(State.VF.isVector() && "not widening");
1497 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1498 // Construct a vector GEP by widening the operands of the scalar GEP as
1499 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1500 // results in a vector of pointers when at least one operand of the GEP
1501 // is vector-typed. Thus, to keep the representation compact, we only use
1502 // vector-typed operands for loop-varying values.
1503
1504 if (areAllOperandsInvariant()) {
1505 // If we are vectorizing, but the GEP has only loop-invariant operands,
1506 // the GEP we build (by only using vector-typed operands for
1507 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1508 // produce a vector of pointers, we need to either arbitrarily pick an
1509 // operand to broadcast, or broadcast a clone of the original GEP.
1510 // Here, we broadcast a clone of the original.
1511 //
1512 // TODO: If at some point we decide to scalarize instructions having
1513 // loop-invariant operands, this special case will no longer be
1514 // required. We would add the scalarization decision to
1515 // collectLoopScalars() and teach getVectorValue() to broadcast
1516 // the lane-zero scalar value.
1518 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1519 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1520
1521 auto *NewGEP =
1522 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1523 ArrayRef(Ops).drop_front(), "", isInBounds());
1524 for (unsigned Part = 0; Part < State.UF; ++Part) {
1525 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1526 State.set(this, EntryPart, Part);
1527 State.addMetadata(EntryPart, GEP);
1528 }
1529 } else {
1530 // If the GEP has at least one loop-varying operand, we are sure to
1531 // produce a vector of pointers. But if we are only unrolling, we want
1532 // to produce a scalar GEP for each unroll part. Thus, the GEP we
1533 // produce with the code below will be scalar (if VF == 1) or vector
1534 // (otherwise). Note that for the unroll-only case, we still maintain
1535 // values in the vector mapping with initVector, as we do for other
1536 // instructions.
1537 for (unsigned Part = 0; Part < State.UF; ++Part) {
1538 // The pointer operand of the new GEP. If it's loop-invariant, we
1539 // won't broadcast it.
1540 auto *Ptr = isPointerLoopInvariant()
1541 ? State.get(getOperand(0), VPIteration(0, 0))
1542 : State.get(getOperand(0), Part);
1543
1544 // Collect all the indices for the new GEP. If any index is
1545 // loop-invariant, we won't broadcast it.
1547 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1548 VPValue *Operand = getOperand(I);
1549 if (isIndexLoopInvariant(I - 1))
1550 Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1551 else
1552 Indices.push_back(State.get(Operand, Part));
1553 }
1554
1555 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1556 // but it should be a vector, otherwise.
1557 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1558 Indices, "", isInBounds());
1559 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1560 "NewGEP is not a pointer vector");
1561 State.set(this, NewGEP, Part);
1562 State.addMetadata(NewGEP, GEP);
1563 }
1564 }
1565}
1566
1567#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1569 VPSlotTracker &SlotTracker) const {
1570 O << Indent << "WIDEN-GEP ";
1571 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1572 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1573 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1574
1575 O << " ";
1577 O << " = getelementptr";
1578 printFlags(O);
1580}
1581#endif
1582
1583void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1584 auto &Builder = State.Builder;
1586 for (unsigned Part = 0; Part < State.UF; ++Part) {
1587 // Calculate the pointer for the specific unroll-part.
1588 Value *PartPtr = nullptr;
1589 // Use i32 for the gep index type when the value is constant,
1590 // or query DataLayout for a more suitable index type otherwise.
1591 const DataLayout &DL =
1592 Builder.GetInsertBlock()->getDataLayout();
1593 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1594 ? DL.getIndexType(IndexedTy->getPointerTo())
1595 : Builder.getInt32Ty();
1596 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1597 bool InBounds = isInBounds();
1598 if (IsReverse) {
1599 // If the address is consecutive but reversed, then the
1600 // wide store needs to start at the last vector element.
1601 // RunTimeVF = VScale * VF.getKnownMinValue()
1602 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1603 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1604 // NumElt = -Part * RunTimeVF
1605 Value *NumElt = Builder.CreateMul(
1606 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1607 // LastLane = 1 - RunTimeVF
1608 Value *LastLane =
1609 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1610 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1611 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1612 } else {
1613 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1614 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1615 }
1616
1617 State.set(this, PartPtr, Part, /*IsScalar*/ true);
1618 }
1619}
1620
1621#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1623 VPSlotTracker &SlotTracker) const {
1624 O << Indent;
1626 O << " = vector-pointer ";
1627 if (IsReverse)
1628 O << "(reverse) ";
1629
1631}
1632#endif
1633
1636 // We know that all PHIs in non-header blocks are converted into
1637 // selects, so we don't have to worry about the insertion order and we
1638 // can just use the builder.
1639 // At this point we generate the predication tree. There may be
1640 // duplications since this is a simple recursive scan, but future
1641 // optimizations will clean it up.
1642
1643 unsigned NumIncoming = getNumIncomingValues();
1644
1645 // Generate a sequence of selects of the form:
1646 // SELECT(Mask3, In3,
1647 // SELECT(Mask2, In2,
1648 // SELECT(Mask1, In1,
1649 // In0)))
1650 // Note that Mask0 is never used: lanes for which no path reaches this phi and
1651 // are essentially undef are taken from In0.
1652 VectorParts Entry(State.UF);
1653 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
1654 for (unsigned In = 0; In < NumIncoming; ++In) {
1655 for (unsigned Part = 0; Part < State.UF; ++Part) {
1656 // We might have single edge PHIs (blocks) - use an identity
1657 // 'select' for the first PHI operand.
1658 Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed);
1659 if (In == 0)
1660 Entry[Part] = In0; // Initialize with the first incoming value.
1661 else {
1662 // Select between the current value and the previous incoming edge
1663 // based on the incoming mask.
1664 Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed);
1665 Entry[Part] =
1666 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1667 }
1668 }
1669 }
1670 for (unsigned Part = 0; Part < State.UF; ++Part)
1671 State.set(this, Entry[Part], Part, OnlyFirstLaneUsed);
1672}
1673
1674#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1676 VPSlotTracker &SlotTracker) const {
1677 O << Indent << "BLEND ";
1679 O << " =";
1680 if (getNumIncomingValues() == 1) {
1681 // Not a User of any mask: not really blending, this is a
1682 // single-predecessor phi.
1683 O << " ";
1685 } else {
1686 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1687 O << " ";
1689 if (I == 0)
1690 continue;
1691 O << "/";
1693 }
1694 }
1695}
1696#endif
1697
1699 assert(!State.Instance && "Reduction being replicated.");
1700 Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1701 RecurKind Kind = RdxDesc.getRecurrenceKind();
1702 // Propagate the fast-math flags carried by the underlying instruction.
1704 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1705 for (unsigned Part = 0; Part < State.UF; ++Part) {
1706 Value *NewVecOp = State.get(getVecOp(), Part);
1707 if (VPValue *Cond = getCondOp()) {
1708 Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1709 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1710 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1711 Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1712 RdxDesc.getFastMathFlags());
1713 if (State.VF.isVector()) {
1714 Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1715 }
1716
1717 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1718 NewVecOp = Select;
1719 }
1720 Value *NewRed;
1721 Value *NextInChain;
1722 if (IsOrdered) {
1723 if (State.VF.isVector())
1724 NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1725 PrevInChain);
1726 else
1727 NewRed = State.Builder.CreateBinOp(
1728 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1729 NewVecOp);
1730 PrevInChain = NewRed;
1731 } else {
1732 PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1733 NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1734 }
1736 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1737 NewRed, PrevInChain);
1738 } else if (IsOrdered)
1739 NextInChain = NewRed;
1740 else
1741 NextInChain = State.Builder.CreateBinOp(
1742 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1743 State.set(this, NextInChain, Part, /*IsScalar*/ true);
1744 }
1745}
1746
1747#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1749 VPSlotTracker &SlotTracker) const {
1750 O << Indent << "REDUCE ";
1752 O << " = ";
1754 O << " +";
1755 if (isa<FPMathOperator>(getUnderlyingInstr()))
1757 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1759 if (getCondOp()) {
1760 O << ", ";
1762 }
1763 O << ")";
1764 if (RdxDesc.IntermediateStore)
1765 O << " (with final reduction value stored in invariant address sank "
1766 "outside of loop)";
1767}
1768#endif
1769
1771 // Find if the recipe is used by a widened recipe via an intervening
1772 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1773 return any_of(users(), [](const VPUser *U) {
1774 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1775 return any_of(PredR->users(), [PredR](const VPUser *U) {
1776 return !U->usesScalars(PredR);
1777 });
1778 return false;
1779 });
1780}
1781
1782#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1784 VPSlotTracker &SlotTracker) const {
1785 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1786
1787 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1789 O << " = ";
1790 }
1791 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1792 O << "call";
1793 printFlags(O);
1794 O << "@" << CB->getCalledFunction()->getName() << "(";
1796 O, [&O, &SlotTracker](VPValue *Op) {
1797 Op->printAsOperand(O, SlotTracker);
1798 });
1799 O << ")";
1800 } else {
1802 printFlags(O);
1804 }
1805
1806 if (shouldPack())
1807 O << " (S->V)";
1808}
1809#endif
1810
1811/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1812/// if it is either defined outside the vector region or its operand is known to
1813/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1814/// TODO: Uniformity should be associated with a VPValue and there should be a
1815/// generic way to check.
1817 return C->isDefinedOutsideVectorRegions() ||
1818 isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1819 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1820}
1821
1822Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1824 "Codegen only implemented for first lane.");
1825 switch (Opcode) {
1826 case Instruction::SExt:
1827 case Instruction::ZExt:
1828 case Instruction::Trunc: {
1829 // Note: SExt/ZExt not used yet.
1830 Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1831 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1832 }
1833 default:
1834 llvm_unreachable("opcode not implemented yet");
1835 }
1836}
1837
1838void VPScalarCastRecipe ::execute(VPTransformState &State) {
1839 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1840 for (unsigned Part = 0; Part != State.UF; ++Part) {
1841 Value *Res;
1842 // Only generate a single instance, if the recipe is uniform across UFs and
1843 // VFs.
1844 if (Part > 0 && IsUniformAcrossVFsAndUFs)
1845 Res = State.get(this, VPIteration(0, 0));
1846 else
1847 Res = generate(State, Part);
1848 State.set(this, Res, VPIteration(Part, 0));
1849 }
1850}
1851
1852#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1853void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1854 VPSlotTracker &SlotTracker) const {
1855 O << Indent << "SCALAR-CAST ";
1856 printAsOperand(O, SlotTracker);
1857 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1858 printOperands(O, SlotTracker);
1859 O << " to " << *ResultTy;
1860}
1861#endif
1862
1864 assert(State.Instance && "Branch on Mask works only on single instance.");
1865
1866 unsigned Part = State.Instance->Part;
1867 unsigned Lane = State.Instance->Lane.getKnownLane();
1868
1869 Value *ConditionBit = nullptr;
1870 VPValue *BlockInMask = getMask();
1871 if (BlockInMask) {
1872 ConditionBit = State.get(BlockInMask, Part);
1873 if (ConditionBit->getType()->isVectorTy())
1874 ConditionBit = State.Builder.CreateExtractElement(
1875 ConditionBit, State.Builder.getInt32(Lane));
1876 } else // Block in mask is all-one.
1877 ConditionBit = State.Builder.getTrue();
1878
1879 // Replace the temporary unreachable terminator with a new conditional branch,
1880 // whose two destinations will be set later when they are created.
1881 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1882 assert(isa<UnreachableInst>(CurrentTerminator) &&
1883 "Expected to replace unreachable terminator with conditional branch.");
1884 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1885 CondBr->setSuccessor(0, nullptr);
1886 ReplaceInstWithInst(CurrentTerminator, CondBr);
1887}
1888
1890 assert(State.Instance && "Predicated instruction PHI works per instance.");
1891 Instruction *ScalarPredInst =
1892 cast<Instruction>(State.get(getOperand(0), *State.Instance));
1893 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1894 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1895 assert(PredicatingBB && "Predicated block has no single predecessor.");
1896 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1897 "operand must be VPReplicateRecipe");
1898
1899 // By current pack/unpack logic we need to generate only a single phi node: if
1900 // a vector value for the predicated instruction exists at this point it means
1901 // the instruction has vector users only, and a phi for the vector value is
1902 // needed. In this case the recipe of the predicated instruction is marked to
1903 // also do that packing, thereby "hoisting" the insert-element sequence.
1904 // Otherwise, a phi node for the scalar value is needed.
1905 unsigned Part = State.Instance->Part;
1906 if (State.hasVectorValue(getOperand(0), Part)) {
1907 Value *VectorValue = State.get(getOperand(0), Part);
1908 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1909 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1910 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1911 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1912 if (State.hasVectorValue(this, Part))
1913 State.reset(this, VPhi, Part);
1914 else
1915 State.set(this, VPhi, Part);
1916 // NOTE: Currently we need to update the value of the operand, so the next
1917 // predicated iteration inserts its generated value in the correct vector.
1918 State.reset(getOperand(0), VPhi, Part);
1919 } else {
1920 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1921 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1922 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1923 PredicatingBB);
1924 Phi->addIncoming(ScalarPredInst, PredicatedBB);
1925 if (State.hasScalarValue(this, *State.Instance))
1926 State.reset(this, Phi, *State.Instance);
1927 else
1928 State.set(this, Phi, *State.Instance);
1929 // NOTE: Currently we need to update the value of the operand, so the next
1930 // predicated iteration inserts its generated value in the correct vector.
1931 State.reset(getOperand(0), Phi, *State.Instance);
1932 }
1933}
1934
1935#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1937 VPSlotTracker &SlotTracker) const {
1938 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1940 O << " = ";
1942}
1943
1945 VPSlotTracker &SlotTracker) const {
1946 O << Indent << "WIDEN ";
1948 O << " = load ";
1950}
1951
1953 VPSlotTracker &SlotTracker) const {
1954 O << Indent << "WIDEN ";
1956 O << " = vp.load ";
1958}
1959
1961 VPSlotTracker &SlotTracker) const {
1962 O << Indent << "WIDEN store ";
1964}
1965
1967 VPSlotTracker &SlotTracker) const {
1968 O << Indent << "WIDEN vp.store ";
1970}
1971#endif
1972
1974 Value *Start = getStartValue()->getLiveInIRValue();
1975 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1976 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1977
1978 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1979 EntryPart->addIncoming(Start, VectorPH);
1980 EntryPart->setDebugLoc(getDebugLoc());
1981 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1982 State.set(this, EntryPart, Part, /*IsScalar*/ true);
1983}
1984
1985#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1987 VPSlotTracker &SlotTracker) const {
1988 O << Indent << "EMIT ";
1990 O << " = CANONICAL-INDUCTION ";
1992}
1993#endif
1994
1997 VPValue *Step) const {
1998 // Must be an integer induction.
2000 return false;
2001 // Start must match the start value of this canonical induction.
2002 if (Start != getStartValue())
2003 return false;
2004
2005 // If the step is defined by a recipe, it is not a ConstantInt.
2006 if (Step->getDefiningRecipe())
2007 return false;
2008
2009 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
2010 return StepC && StepC->isOne();
2011}
2012
2014 return IsScalarAfterVectorization &&
2015 (!IsScalable || vputils::onlyFirstLaneUsed(this));
2016}
2017
2018#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2020 VPSlotTracker &SlotTracker) const {
2021 O << Indent << "EMIT ";
2023 O << " = WIDEN-POINTER-INDUCTION ";
2025 O << ", " << *IndDesc.getStep();
2026}
2027#endif
2028
2030 assert(!State.Instance && "cannot be used in per-lane");
2031 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
2032 SCEVExpander Exp(SE, DL, "induction");
2033
2034 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
2035 &*State.Builder.GetInsertPoint());
2036 assert(!State.ExpandedSCEVs.contains(Expr) &&
2037 "Same SCEV expanded multiple times");
2038 State.ExpandedSCEVs[Expr] = Res;
2039 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
2040 State.set(this, Res, {Part, 0});
2041}
2042
2043#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2045 VPSlotTracker &SlotTracker) const {
2046 O << Indent << "EMIT ";
2048 O << " = EXPAND SCEV " << *Expr;
2049}
2050#endif
2051
2053 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
2054 Type *STy = CanonicalIV->getType();
2055 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
2056 ElementCount VF = State.VF;
2057 Value *VStart = VF.isScalar()
2058 ? CanonicalIV
2059 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
2060 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2061 Value *VStep = createStepForVF(Builder, STy, VF, Part);
2062 if (VF.isVector()) {
2063 VStep = Builder.CreateVectorSplat(VF, VStep);
2064 VStep =
2065 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
2066 }
2067 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
2068 State.set(this, CanonicalVectorIV, Part);
2069 }
2070}
2071
2072#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2074 VPSlotTracker &SlotTracker) const {
2075 O << Indent << "EMIT ";
2077 O << " = WIDEN-CANONICAL-INDUCTION ";
2079}
2080#endif
2081
2083 auto &Builder = State.Builder;
2084 // Create a vector from the initial value.
2085 auto *VectorInit = getStartValue()->getLiveInIRValue();
2086
2087 Type *VecTy = State.VF.isScalar()
2088 ? VectorInit->getType()
2089 : VectorType::get(VectorInit->getType(), State.VF);
2090
2091 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2092 if (State.VF.isVector()) {
2093 auto *IdxTy = Builder.getInt32Ty();
2094 auto *One = ConstantInt::get(IdxTy, 1);
2095 IRBuilder<>::InsertPointGuard Guard(Builder);
2096 Builder.SetInsertPoint(VectorPH->getTerminator());
2097 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
2098 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
2099 VectorInit = Builder.CreateInsertElement(
2100 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
2101 }
2102
2103 // Create a phi node for the new recurrence.
2104 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
2105 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
2106 EntryPart->addIncoming(VectorInit, VectorPH);
2107 State.set(this, EntryPart, 0);
2108}
2109
2110#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2112 VPSlotTracker &SlotTracker) const {
2113 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
2115 O << " = phi ";
2117}
2118#endif
2119
2121 auto &Builder = State.Builder;
2122
2123 // Reductions do not have to start at zero. They can start with
2124 // any loop invariant values.
2125 VPValue *StartVPV = getStartValue();
2126 Value *StartV = StartVPV->getLiveInIRValue();
2127
2128 // In order to support recurrences we need to be able to vectorize Phi nodes.
2129 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
2130 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
2131 // this value when we vectorize all of the instructions that use the PHI.
2132 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
2133 Type *VecTy = ScalarPHI ? StartV->getType()
2134 : VectorType::get(StartV->getType(), State.VF);
2135
2136 BasicBlock *HeaderBB = State.CFG.PrevBB;
2137 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
2138 "recipe must be in the vector loop header");
2139 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
2140 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2141 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
2142 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
2143 State.set(this, EntryPart, Part, IsInLoop);
2144 }
2145
2146 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2147
2148 Value *Iden = nullptr;
2149 RecurKind RK = RdxDesc.getRecurrenceKind();
2152 // MinMax and AnyOf reductions have the start value as their identity.
2153 if (ScalarPHI) {
2154 Iden = StartV;
2155 } else {
2156 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2157 Builder.SetInsertPoint(VectorPH->getTerminator());
2158 StartV = Iden =
2159 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
2160 }
2161 } else {
2162 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
2163 RdxDesc.getFastMathFlags());
2164
2165 if (!ScalarPHI) {
2166 Iden = Builder.CreateVectorSplat(State.VF, Iden);
2167 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2168 Builder.SetInsertPoint(VectorPH->getTerminator());
2169 Constant *Zero = Builder.getInt32(0);
2170 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
2171 }
2172 }
2173
2174 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2175 Value *EntryPart = State.get(this, Part, IsInLoop);
2176 // Make sure to add the reduction start value only to the
2177 // first unroll part.
2178 Value *StartVal = (Part == 0) ? StartV : Iden;
2179 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
2180 }
2181}
2182
2183#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2185 VPSlotTracker &SlotTracker) const {
2186 O << Indent << "WIDEN-REDUCTION-PHI ";
2187
2189 O << " = phi ";
2191}
2192#endif
2193
2196 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
2197
2198 Value *Op0 = State.get(getOperand(0), 0);
2199 Type *VecTy = Op0->getType();
2200 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
2201 State.set(this, VecPhi, 0);
2202}
2203
2204#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2206 VPSlotTracker &SlotTracker) const {
2207 O << Indent << "WIDEN-PHI ";
2208
2209 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
2210 // Unless all incoming values are modeled in VPlan print the original PHI
2211 // directly.
2212 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
2213 // values as VPValues.
2214 if (getNumOperands() != OriginalPhi->getNumOperands()) {
2215 O << VPlanIngredient(OriginalPhi);
2216 return;
2217 }
2218
2220 O << " = phi ";
2222}
2223#endif
2224
2225// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2226// remove VPActiveLaneMaskPHIRecipe.
2228 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2229 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2230 Value *StartMask = State.get(getOperand(0), Part);
2231 PHINode *EntryPart =
2232 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2233 EntryPart->addIncoming(StartMask, VectorPH);
2234 EntryPart->setDebugLoc(getDebugLoc());
2235 State.set(this, EntryPart, Part);
2236 }
2237}
2238
2239#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2241 VPSlotTracker &SlotTracker) const {
2242 O << Indent << "ACTIVE-LANE-MASK-PHI ";
2243
2245 O << " = phi ";
2247}
2248#endif
2249
2251 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2252 assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2253 Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2254 PHINode *EntryPart =
2255 State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2256 EntryPart->addIncoming(Start, VectorPH);
2257 EntryPart->setDebugLoc(getDebugLoc());
2258 State.set(this, EntryPart, 0, /*IsScalar=*/true);
2259}
2260
2261#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2263 VPSlotTracker &SlotTracker) const {
2264 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2265
2267 O << " = phi ";
2269}
2270#endif
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
Hexagon Common GEP
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static Value * getStepVector(Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step,...
cl::opt< unsigned > ForceTargetInstructionCost
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C)
Checks if C is uniform across all VFs and UFs.
static Instruction * getInstructionForCost(const VPRecipeBase *R)
Return the underlying instruction to be used for computing R's cost via the legacy cost model.
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static Value * getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, ElementCount VF)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:372
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:457
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:294
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:290
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:124
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1450
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:260
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:207
bool willReturn() const
Determine if the function will return.
Definition: Function.h:653
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:242
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:586
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2477
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2099
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2465
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1538
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1166
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1193
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:463
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:933
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2038
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2540
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1981
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2086
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1871
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:274
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2371
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2402
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1754
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2246
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1349
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1125
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:1006
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2026
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1332
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1681
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2166
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:286
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1831
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2417
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2356
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1592
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:110
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1366
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
A struct for saving information about induction variables.
const SCEV * getStep() const
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_IntInduction
Integer induction variable. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:476
bool isBinaryOp() const
Definition: Instruction.h:279
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:276
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:473
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
Type * getRecurrenceType() const
Returns the type of the recurrence.
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2901
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2951
iterator end()
Definition: VPlan.h:2935
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:574
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2964
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2045
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2050
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:2042
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPRegionBlock * getParent()
Definition: VPlan.h:509
VPlan * getPlan()
Definition: VPlan.cpp:150
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:155
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:544
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2323
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:307
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:111
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:395
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:407
unsigned getVPDefID() const
Definition: VPlanValue.h:427
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2839
VPValue * getStartValue() const
Definition: VPlan.h:2838
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1741
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1246
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1234
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1249
@ CalculateTripCountMinusVF
Definition: VPlan.h:1247
bool hasResult() const
Definition: VPlan.h:1364
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1340
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:196
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:182
static VPLane getFirstLane()
Definition: VPlan.h:180
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:728
void fixPhi(VPlan &Plan, VPTransformState &State)
Fix the wrapped phi node.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:764
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe using the legacy cost model and the underlying instructions.
VPBasicBlock * getParent()
Definition: VPlan.h:789
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:860
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:963
ExactFlagsTy ExactFlags
Definition: VPlan.h:1019
FastMathFlagsTy FMFs
Definition: VPlan.h:1022
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:1021
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1148
bool isInBounds() const
Definition: VPlan.h:1190
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1197
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:1018
WrapFlagsTy WrapFlags
Definition: VPlan.h:1017
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1201
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1184
bool hasNoSignedWrap() const
Definition: VPlan.h:1207
FastMathFlags getFastMathFlags() const
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2014
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2207
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2209
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2205
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3079
const VPBlockBase * getEntry() const
Definition: VPlan.h:3118
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:2287
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1484
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2888
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:954
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:448
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:202
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1456
operand_range operands()
Definition: VPlanValue.h:272
unsigned getNumOperands() const
Definition: VPlanValue.h:251
operand_iterator op_begin()
Definition: VPlanValue.h:268
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:252
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:120
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1452
friend class VPInstruction
Definition: VPlanValue.h:47
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
user_range users()
Definition: VPlanValue.h:132
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1557
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1561
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1480
void execute(VPTransformState &State) override
Produce widened copies of the cast.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1825
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1820
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1839
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1831
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3183
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3385
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:5105
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3738
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1604
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1599
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
@ Offset
Definition: DWP.cpp:480
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2159
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1037
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:55
static bool isDbgInfoIntrinsic(Intrinsic::ID ID)
Check if ID corresponds to a debug info intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1211
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ Mul
Product of integers.
@ Add
Sum of integers.
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1195
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:737
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:742
void execute(VPTransformState &State) override
Generate the phi nodes.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:238
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:384
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:392
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:355
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:255
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:254
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:429
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:432
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:368
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:322
struct llvm::VPTransformState::CFGState CFG
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:307
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:267
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:409
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:295
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:289
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:261
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:418
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:379
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1604
VPValue * getCond() const
Definition: VPlan.h:1600
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.